diff --git a/docs/utilities.rst b/docs/utilities.rst index 2de6e2a01..ca5af6a3f 100644 --- a/docs/utilities.rst +++ b/docs/utilities.rst @@ -80,6 +80,13 @@ you'll need to have it start in the background -- something you'll need to figure out for your own system. To get you started though, there are Systemd service files in the ``scripts`` directory. +Some command line arguments are available to customize the behavior of the +consumer. By default it will use ``/etc/paperless.conf`` values. Display the +help with: + +.. code-block:: shell-session + + $ /path/to/paperless/src/manage.py document_consumer --help .. _utilities-exporter: diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 74e85b9f0..886b0dd69 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -32,31 +32,31 @@ class Consumer: 5. Delete the document and image(s) """ - SCRATCH = settings.SCRATCH_DIR - CONSUME = settings.CONSUMPTION_DIR - - def __init__(self): + def __init__(self, consume=settings.CONSUMPTION_DIR, + scratch=settings.SCRATCH_DIR): self.logger = logging.getLogger(__name__) self.logging_group = None + self.stats = {} + self._ignore = [] + self.consume = consume + self.scratch = scratch + try: - os.makedirs(self.SCRATCH) + os.makedirs(self.scratch) except FileExistsError: pass - self.stats = {} - self._ignore = [] - - if not self.CONSUME: + if not self.consume: raise ConsumerError( "The CONSUMPTION_DIR settings variable does not appear to be " "set." ) - if not os.path.exists(self.CONSUME): + if not os.path.exists(self.consume): raise ConsumerError( - "Consumption directory {} does not exist".format(self.CONSUME)) + "Consumption directory {} does not exist".format(self.consume)) self.parsers = [] for response in document_consumer_declaration.send(self): @@ -73,11 +73,11 @@ class Consumer: "group": self.logging_group }) - def consume(self): + def run(self): - for doc in os.listdir(self.CONSUME): + for doc in os.listdir(self.consume): - doc = os.path.join(self.CONSUME, doc) + doc = os.path.join(self.consume, doc) if not os.path.isfile(doc): continue @@ -226,8 +226,8 @@ class Consumer: def _is_ready(self, doc): """ - Detect whether `doc` is ready to consume or if it's still being written - to by the uploader. + Detect whether ``doc`` is ready to consume or if it's still being + written to by the uploader. """ t = os.stat(doc).st_mtime diff --git a/src/documents/forms.py b/src/documents/forms.py index 28b30df3e..5f965a1c6 100644 --- a/src/documents/forms.py +++ b/src/documents/forms.py @@ -92,7 +92,7 @@ class UploadForm(forms.Form): t = int(mktime(datetime.now().timetuple())) file_name = os.path.join( - Consumer.CONSUME, + settings.CONSUMPTION_DIR, "{} - {}.{}".format(correspondent, title, self._file_type) ) diff --git a/src/documents/mail.py b/src/documents/mail.py index d41b952ac..d63e04beb 100644 --- a/src/documents/mail.py +++ b/src/documents/mail.py @@ -151,7 +151,7 @@ class Attachment(object): class MailFetcher(Loggable): - def __init__(self): + def __init__(self, consume=settings.CONSUMPTION_DIR): Loggable.__init__(self) @@ -165,6 +165,7 @@ class MailFetcher(Loggable): self._enabled = bool(self._host) self.last_checked = datetime.datetime.now() + self.consume = consume def pull(self): """ @@ -185,7 +186,7 @@ class MailFetcher(Loggable): self.log("info", 'Storing email: "{}"'.format(message.subject)) t = int(time.mktime(message.time.timetuple())) - file_name = os.path.join(Consumer.CONSUME, message.file_name) + file_name = os.path.join(self.consume, message.file_name) with open(file_name, "wb") as f: f.write(message.attachment.data) os.utime(file_name, times=(t, t)) diff --git a/src/documents/management/commands/document_consumer.py b/src/documents/management/commands/document_consumer.py index f50b489ee..f94265b65 100644 --- a/src/documents/management/commands/document_consumer.py +++ b/src/documents/management/commands/document_consumer.py @@ -16,9 +16,6 @@ class Command(BaseCommand): consumption directory, and fetch any mail available. """ - LOOP_TIME = settings.CONSUMER_LOOP_TIME - MAIL_DELTA = datetime.timedelta(minutes=10) - ORIGINAL_DOCS = os.path.join(settings.MEDIA_ROOT, "documents", "originals") THUMB_DOCS = os.path.join(settings.MEDIA_ROOT, "documents", "thumbnails") @@ -32,13 +29,41 @@ class Command(BaseCommand): BaseCommand.__init__(self, *args, **kwargs) + def add_arguments(self, parser): + parser.add_argument( + "directory", + default=settings.CONSUMPTION_DIR, + nargs="?", + help="The consumption directory." + ) + parser.add_argument( + "--loop-time", + default=settings.CONSUMER_LOOP_TIME, + type=int, + help="Wait time between each loop (in seconds)." + ) + parser.add_argument( + "--mail-delta", + default=10, + type=int, + help="Wait time between each mail fetch (in minutes)." + ) + parser.add_argument( + "--oneshot", + action="store_true", + help="Run only once." + ) + def handle(self, *args, **options): self.verbosity = options["verbosity"] + directory = options["directory"] + loop_time = options["loop_time"] + mail_delta = datetime.timedelta(minutes=options["mail_delta"]) try: - self.file_consumer = Consumer() - self.mail_fetcher = MailFetcher() + self.file_consumer = Consumer(consume=directory) + self.mail_fetcher = MailFetcher(consume=directory) except (ConsumerError, MailFetcherError) as e: raise CommandError(e) @@ -49,27 +74,32 @@ class Command(BaseCommand): pass logging.getLogger(__name__).info( - "Starting document consumer at {}".format(settings.CONSUMPTION_DIR) + "Starting document consumer at {}".format(directory) ) - try: - while True: - self.loop() - time.sleep(self.LOOP_TIME) - if self.verbosity > 1: - print(".") - except KeyboardInterrupt: - print("Exiting") + if options["oneshot"]: + self.loop(mail_delta=mail_delta) + else: + try: + while True: + self.loop(mail_delta=mail_delta) + time.sleep(loop_time) + if self.verbosity > 1: + print(".", int(time.time())) + except KeyboardInterrupt: + print("Exiting") - def loop(self): - - # Consume whatever files we can - self.file_consumer.consume() + def loop(self, mail_delta): # Occasionally fetch mail and store it to be consumed on the next loop # We fetch email when we first start up so that it is not necessary to # wait for 10 minutes after making changes to the config file. - delta = self.mail_fetcher.last_checked + self.MAIL_DELTA + delta = self.mail_fetcher.last_checked + mail_delta if self.first_iteration or delta < datetime.datetime.now(): self.first_iteration = False self.mail_fetcher.pull() + + # Consume whatever files we can. + # We have to run twice as the first run checks for file readiness + for i in range(2): + self.file_consumer.run() diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py index b7fca45e7..9f2445340 100644 --- a/src/documents/tests/test_consumer.py +++ b/src/documents/tests/test_consumer.py @@ -1,5 +1,6 @@ from django.test import TestCase from unittest import mock +from tempfile import TemporaryDirectory from ..consumer import Consumer from ..models import FileInfo @@ -16,7 +17,6 @@ class TestConsumer(TestCase): self.DummyParser ) - @mock.patch("documents.consumer.Consumer.CONSUME") @mock.patch("documents.consumer.os.makedirs") @mock.patch("documents.consumer.os.path.exists", return_value=True) @mock.patch("documents.consumer.document_consumer_declaration.send") @@ -32,18 +32,22 @@ class TestConsumer(TestCase): (None, lambda _: {"weight": 0, "parser": DummyParser1}), (None, lambda _: {"weight": 1, "parser": DummyParser2}), ) + with TemporaryDirectory() as tmpdir: + self.assertEqual( + Consumer(consume=tmpdir)._get_parser_class("doc.pdf"), + DummyParser2 + ) - self.assertEqual(Consumer()._get_parser_class("doc.pdf"), DummyParser2) - - @mock.patch("documents.consumer.Consumer.CONSUME") @mock.patch("documents.consumer.os.makedirs") @mock.patch("documents.consumer.os.path.exists", return_value=True) @mock.patch("documents.consumer.document_consumer_declaration.send") def test__get_parser_class_0_parsers(self, m, *args): m.return_value = ((None, lambda _: None),) - self.assertIsNone(Consumer()._get_parser_class("doc.pdf")) + with TemporaryDirectory() as tmpdir: + self.assertIsNone( + Consumer(consume=tmpdir)._get_parser_class("doc.pdf") + ) - @mock.patch("documents.consumer.Consumer.CONSUME") @mock.patch("documents.consumer.os.makedirs") @mock.patch("documents.consumer.os.path.exists", return_value=True) @mock.patch("documents.consumer.document_consumer_declaration.send") @@ -51,7 +55,8 @@ class TestConsumer(TestCase): m.return_value = ( (None, lambda _: {"weight": 0, "parser": self.DummyParser}), ) - return Consumer() + with TemporaryDirectory() as tmpdir: + return Consumer(consume=tmpdir) class TestAttributes(TestCase): diff --git a/src/setup.cfg b/src/setup.cfg index 1112934c0..f9572519b 100644 --- a/src/setup.cfg +++ b/src/setup.cfg @@ -6,7 +6,6 @@ exclude = migrations, paperless/settings.py, .tox DJANGO_SETTINGS_MODULE=paperless.settings addopts = --pythonwarnings=all -n auto env = - PAPERLESS_CONSUME=/tmp PAPERLESS_PASSPHRASE=THISISNOTASECRET PAPERLESS_SECRET=paperless PAPERLESS_EMAIL_SECRET=paperless