From 31c4167535e3966c5d40b02322ac2e10a14962c7 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Mon, 16 Nov 2020 18:52:13 +0100 Subject: [PATCH] added option for polling --- paperless.conf.example | 6 ++++++ .../management/commands/document_consumer.py | 8 +++++++- src/paperless/settings.py | 16 ++++++++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/paperless.conf.example b/paperless.conf.example index f825daa6b..c64385cbb 100644 --- a/paperless.conf.example +++ b/paperless.conf.example @@ -165,6 +165,12 @@ PAPERLESS_EMAIL_SECRET="" # If you only specify PAPERLESS_TASK_WORKERS, paperless will adjust # PAPERLESS_THREADS_PER_WORKER automatically. +# If paperless won't find documents added to your consume folder, it might +# not be able to automatically detect filesystem changes. In that case, +# specify a polling interval in seconds below, which will then cause paperless +# to periodically check your consumption directory for changes. +#PAPERLESS_CONSUMER_POLLING=10 + # When the consumer detects a duplicate document, it will not touch the # original document. This default behavior can be changed here. diff --git a/src/documents/management/commands/document_consumer.py b/src/documents/management/commands/document_consumer.py index 769d71af2..d991b722a 100644 --- a/src/documents/management/commands/document_consumer.py +++ b/src/documents/management/commands/document_consumer.py @@ -6,6 +6,7 @@ from django.core.management.base import BaseCommand from django_q.tasks import async_task from watchdog.events import FileSystemEventHandler from watchdog.observers import Observer +from watchdog.observers.polling import PollingObserver try: from inotify_simple import INotify, flags @@ -75,7 +76,12 @@ class Command(BaseCommand): async_task("documents.tasks.consume_file", entry.path, task_name=os.path.basename(entry.path)) # Start the watchdog. Woof! - observer = Observer() + if settings.CONSUMER_POLLING > 0: + logging.getLogger(__name__).info('Using polling instead of file' + 'system notifications.') + observer = PollingObserver(timeout=settings.CONSUMER_POLLING) + else: + observer = Observer() event_handler = Handler() observer.schedule(event_handler, directory, recursive=True) observer.start() diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 7712844d0..18acf401a 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -286,13 +286,29 @@ TASK_WORKERS = int(os.getenv("PAPERLESS_TASK_WORKERS", default_task_workers())) Q_CLUSTER = { 'name': 'paperless', 'catch_up': False, + 'workers': TASK_WORKERS, 'redis': os.getenv("PAPERLESS_REDIS", "redis://localhost:6379") } + +def default_threads_per_worker(): + try: + return max( + math.floor(multiprocessing.cpu_count() / TASK_WORKERS), + 1 + ) + except NotImplementedError: + return 1 + + +THREADS_PER_WORKER = os.getenv("PAPERLESS_THREADS_PER_WORKER", default_threads_per_worker()) + ############################################################################### # Paperless Specific Settings # ############################################################################### +CONSUMER_POLLING = int(os.getenv("PAPERLESS_CONSUMER_POLLING", 0)) + CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES") # The default language that tesseract will attempt to use when parsing