From 7e1d59377aa46a1158da830119a4489fb2e97e5d Mon Sep 17 00:00:00 2001 From: Erik Arvstedt Date: Fri, 11 May 2018 14:01:21 +0200 Subject: [PATCH] Add inotify support --- paperless.conf.example | 2 + requirements.txt | 1 + .../management/commands/document_consumer.py | 47 ++++++++++++++++++- src/paperless/settings.py | 2 + 4 files changed, 50 insertions(+), 2 deletions(-) diff --git a/paperless.conf.example b/paperless.conf.example index 45c532fe1..0727ac29d 100644 --- a/paperless.conf.example +++ b/paperless.conf.example @@ -165,6 +165,8 @@ PAPERLESS_PASSPHRASE="secret" #PAPERLESS_CONVERT_DENSITY=300 +# (This setting is ignored on Linux where inotify is used instead of a +# polling loop.) # The number of seconds that Paperless will wait between checking # PAPERLESS_CONSUMPTION_DIR. If you tend to write documents to this directory # rarely, you may want to use a higher value than the default (10). diff --git a/requirements.txt b/requirements.txt index c90a8eaa9..d51a39039 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,6 +20,7 @@ flake8==3.5.0 fuzzywuzzy==0.15.0 gunicorn==19.7.1 idna==2.6 +inotify_simple==1.1.7; sys_platform == 'linux' langdetect==1.0.7 mccabe==0.6.1 more-itertools==4.1.0 diff --git a/src/documents/management/commands/document_consumer.py b/src/documents/management/commands/document_consumer.py index 9234617e1..a0e2f00fe 100644 --- a/src/documents/management/commands/document_consumer.py +++ b/src/documents/management/commands/document_consumer.py @@ -1,6 +1,7 @@ import datetime import logging import os +import sys import time from django.conf import settings @@ -9,6 +10,11 @@ from django.core.management.base import BaseCommand, CommandError from ...consumer import Consumer, ConsumerError, make_dirs from ...mail import MailFetcher, MailFetcherError +try: + from inotify_simple import INotify, flags +except ImportError: + pass + class Command(BaseCommand): """ @@ -53,6 +59,11 @@ class Command(BaseCommand): action="store_true", help="Run only once." ) + parser.add_argument( + "--no-inotify", + action="store_true", + help="Don't use inotify, even if it's available." + ) def handle(self, *args, **options): @@ -60,6 +71,8 @@ class Command(BaseCommand): directory = options["directory"] loop_time = options["loop_time"] mail_delta = options["mail_delta"] * 60 + use_inotify = (not options["no_inotify"] + and "inotify_simple" in sys.modules) try: self.file_consumer = Consumer(consume=directory) @@ -70,14 +83,20 @@ class Command(BaseCommand): make_dirs(self.ORIGINAL_DOCS, self.THUMB_DOCS) logging.getLogger(__name__).info( - "Starting document consumer at {}".format(directory) + "Starting document consumer at {}{}".format( + directory, + " with inotify" if use_inotify else "" + ) ) if options["oneshot"]: self.loop_step(mail_delta) else: try: - self.loop(loop_time, mail_delta) + if use_inotify: + self.loop_inotify(mail_delta) + else: + self.loop(loop_time, mail_delta) except KeyboardInterrupt: print("Exiting") @@ -101,3 +120,27 @@ class Command(BaseCommand): self.mail_fetcher.pull() self.file_consumer.consume_new_files() + + def loop_inotify(self, mail_delta): + directory = self.file_consumer.consume + inotify = INotify() + inotify.add_watch(directory, flags.CLOSE_WRITE | flags.MOVED_TO) + + # Run initial mail fetch and consume all currently existing documents + self.loop_step(mail_delta) + next_mail_time = self.mail_fetcher.last_checked + mail_delta + + while True: + # Consume documents until next_mail_time + while True: + delta = next_mail_time - time.time() + if delta > 0: + for event in inotify.read(timeout=delta): + file = os.path.join(directory, event.name) + if os.path.isfile(file): + self.file_consumer.try_consume_file(file) + else: + break + + self.mail_fetcher.pull() + next_mail_time = self.mail_fetcher.last_checked + mail_delta diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 314edeb03..15e0674bc 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -246,6 +246,8 @@ SCRATCH_DIR = os.getenv("PAPERLESS_SCRATCH_DIR", "/tmp/paperless") # This is where Paperless will look for PDFs to index CONSUMPTION_DIR = os.getenv("PAPERLESS_CONSUMPTION_DIR") +# (This setting is ignored on Linux where inotify is used instead of a +# polling loop.) # The number of seconds that Paperless will wait between checking # CONSUMPTION_DIR. If you tend to write documents to this directory very # slowly, you may want to use a higher value than the default.