Add inotify support

This commit is contained in:
Erik Arvstedt 2018-05-11 14:01:21 +02:00
parent 7357471b9e
commit 7e1d59377a
4 changed files with 50 additions and 2 deletions

View File

@ -165,6 +165,8 @@ PAPERLESS_PASSPHRASE="secret"
#PAPERLESS_CONVERT_DENSITY=300
# (This setting is ignored on Linux where inotify is used instead of a
# polling loop.)
# The number of seconds that Paperless will wait between checking
# PAPERLESS_CONSUMPTION_DIR. If you tend to write documents to this directory
# rarely, you may want to use a higher value than the default (10).

View File

@ -20,6 +20,7 @@ flake8==3.5.0
fuzzywuzzy==0.15.0
gunicorn==19.7.1
idna==2.6
inotify_simple==1.1.7; sys_platform == 'linux'
langdetect==1.0.7
mccabe==0.6.1
more-itertools==4.1.0

View File

@ -1,6 +1,7 @@
import datetime
import logging
import os
import sys
import time
from django.conf import settings
@ -9,6 +10,11 @@ from django.core.management.base import BaseCommand, CommandError
from ...consumer import Consumer, ConsumerError, make_dirs
from ...mail import MailFetcher, MailFetcherError
try:
from inotify_simple import INotify, flags
except ImportError:
pass
class Command(BaseCommand):
"""
@ -53,6 +59,11 @@ class Command(BaseCommand):
action="store_true",
help="Run only once."
)
parser.add_argument(
"--no-inotify",
action="store_true",
help="Don't use inotify, even if it's available."
)
def handle(self, *args, **options):
@ -60,6 +71,8 @@ class Command(BaseCommand):
directory = options["directory"]
loop_time = options["loop_time"]
mail_delta = options["mail_delta"] * 60
use_inotify = (not options["no_inotify"]
and "inotify_simple" in sys.modules)
try:
self.file_consumer = Consumer(consume=directory)
@ -70,14 +83,20 @@ class Command(BaseCommand):
make_dirs(self.ORIGINAL_DOCS, self.THUMB_DOCS)
logging.getLogger(__name__).info(
"Starting document consumer at {}".format(directory)
"Starting document consumer at {}{}".format(
directory,
" with inotify" if use_inotify else ""
)
)
if options["oneshot"]:
self.loop_step(mail_delta)
else:
try:
self.loop(loop_time, mail_delta)
if use_inotify:
self.loop_inotify(mail_delta)
else:
self.loop(loop_time, mail_delta)
except KeyboardInterrupt:
print("Exiting")
@ -101,3 +120,27 @@ class Command(BaseCommand):
self.mail_fetcher.pull()
self.file_consumer.consume_new_files()
def loop_inotify(self, mail_delta):
directory = self.file_consumer.consume
inotify = INotify()
inotify.add_watch(directory, flags.CLOSE_WRITE | flags.MOVED_TO)
# Run initial mail fetch and consume all currently existing documents
self.loop_step(mail_delta)
next_mail_time = self.mail_fetcher.last_checked + mail_delta
while True:
# Consume documents until next_mail_time
while True:
delta = next_mail_time - time.time()
if delta > 0:
for event in inotify.read(timeout=delta):
file = os.path.join(directory, event.name)
if os.path.isfile(file):
self.file_consumer.try_consume_file(file)
else:
break
self.mail_fetcher.pull()
next_mail_time = self.mail_fetcher.last_checked + mail_delta

View File

@ -246,6 +246,8 @@ SCRATCH_DIR = os.getenv("PAPERLESS_SCRATCH_DIR", "/tmp/paperless")
# This is where Paperless will look for PDFs to index
CONSUMPTION_DIR = os.getenv("PAPERLESS_CONSUMPTION_DIR")
# (This setting is ignored on Linux where inotify is used instead of a
# polling loop.)
# The number of seconds that Paperless will wait between checking
# CONSUMPTION_DIR. If you tend to write documents to this directory very
# slowly, you may want to use a higher value than the default.