mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-03 03:16:10 -06:00 
			
		
		
		
	Merge branch 'inotify' of git://github.com/erikarvstedt/paperless into erikarvstedt-inotify
This commit is contained in:
		@@ -49,17 +49,18 @@ The Consumer
 | 
			
		||||
------------
 | 
			
		||||
 | 
			
		||||
The consumer script runs in an infinite loop, constantly looking at a directory
 | 
			
		||||
for PDF files to parse and index.  The process is pretty straightforward:
 | 
			
		||||
for documents to parse and index.  The process is pretty straightforward:
 | 
			
		||||
 | 
			
		||||
1. Look in ``CONSUMPTION_DIR`` for a PDF.  If one is found, go to #2.  If not,
 | 
			
		||||
   wait 10 seconds and try again.
 | 
			
		||||
2. Parse the PDF with Tesseract
 | 
			
		||||
1. Look in ``CONSUMPTION_DIR`` for a document.  If one is found, go to #2.
 | 
			
		||||
   If not, wait 10 seconds and try again.  On Linux, new documents are detected
 | 
			
		||||
   instantly via inotify, so there's no waiting involved.
 | 
			
		||||
2. Parse the document with Tesseract
 | 
			
		||||
3. Create a new record in the database with the OCR'd text
 | 
			
		||||
4. Attempt to automatically assign document attributes by doing some guesswork.
 | 
			
		||||
   Read up on the :ref:`guesswork documentation<guesswork>` for more
 | 
			
		||||
   information about this process.
 | 
			
		||||
5. Encrypt the PDF and store it in the ``media`` directory under
 | 
			
		||||
   ``documents/pdf``.
 | 
			
		||||
5. Encrypt the document and store it in the ``media`` directory under
 | 
			
		||||
   ``documents/originals``.
 | 
			
		||||
6. Go to #1.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -74,8 +75,8 @@ The consumer is started via the ``manage.py`` script:
 | 
			
		||||
 | 
			
		||||
    $ /path/to/paperless/src/manage.py document_consumer
 | 
			
		||||
 | 
			
		||||
This starts the service that will run in a loop, consuming PDF files as they
 | 
			
		||||
appear in ``CONSUMPTION_DIR``.
 | 
			
		||||
This starts the service that will consume documents as they appear in
 | 
			
		||||
``CONSUMPTION_DIR``.
 | 
			
		||||
 | 
			
		||||
Note that this command runs continuously, so exiting it will mean your webserver
 | 
			
		||||
disappears.  If you want to run this full-time (which is kind of the point)
 | 
			
		||||
@@ -97,8 +98,8 @@ The Exporter
 | 
			
		||||
------------
 | 
			
		||||
 | 
			
		||||
Tired of fiddling with Paperless, or just want to do something stupid and are
 | 
			
		||||
afraid of accidentally damaging your files?  You can export all of your PDFs
 | 
			
		||||
into neatly named, dated, and unencrypted.
 | 
			
		||||
afraid of accidentally damaging your files?  You can export all of your
 | 
			
		||||
documents into neatly named, dated, and unencrypted files.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
.. _utilities-exporter-howto:
 | 
			
		||||
@@ -112,10 +113,10 @@ This too is done via the ``manage.py`` script:
 | 
			
		||||
 | 
			
		||||
    $ /path/to/paperless/src/manage.py document_exporter /path/to/somewhere/
 | 
			
		||||
 | 
			
		||||
This will dump all of your unencrypted PDFs into ``/path/to/somewhere`` for you
 | 
			
		||||
to do with as you please.  The files are accompanied with a special file,
 | 
			
		||||
``manifest.json`` which can be used to
 | 
			
		||||
:ref:`import the files <utilities-importer>` at a later date if you wish.
 | 
			
		||||
This will dump all of your unencrypted documents into ``/path/to/somewhere``
 | 
			
		||||
for you to do with as you please.  The files are accompanied with a special
 | 
			
		||||
file, ``manifest.json`` which can be used to :ref:`import the files
 | 
			
		||||
<utilities-importer>` at a later date if you wish.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
.. _utilities-exporter-howto-docker:
 | 
			
		||||
 
 | 
			
		||||
@@ -165,6 +165,8 @@ PAPERLESS_PASSPHRASE="secret"
 | 
			
		||||
#PAPERLESS_CONVERT_DENSITY=300
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# (This setting is ignored on Linux where inotify is used instead of a
 | 
			
		||||
# polling loop.)
 | 
			
		||||
# The number of seconds that Paperless will wait between checking
 | 
			
		||||
# PAPERLESS_CONSUMPTION_DIR.  If you tend to write documents to this directory
 | 
			
		||||
# rarely, you may want to use a higher value than the default (10).
 | 
			
		||||
 
 | 
			
		||||
@@ -20,6 +20,7 @@ flake8==3.5.0
 | 
			
		||||
fuzzywuzzy==0.15.0
 | 
			
		||||
gunicorn==19.8.1
 | 
			
		||||
idna==2.6
 | 
			
		||||
inotify_simple==1.1.7; sys_platform == 'linux'
 | 
			
		||||
langdetect==1.0.7
 | 
			
		||||
mccabe==0.6.1
 | 
			
		||||
more-itertools==4.1.0
 | 
			
		||||
 
 | 
			
		||||
@@ -3,8 +3,10 @@ import hashlib
 | 
			
		||||
import logging
 | 
			
		||||
import os
 | 
			
		||||
import re
 | 
			
		||||
import time
 | 
			
		||||
import uuid
 | 
			
		||||
 | 
			
		||||
from operator import itemgetter
 | 
			
		||||
from django.conf import settings
 | 
			
		||||
from django.utils import timezone
 | 
			
		||||
from paperless.db import GnuPG
 | 
			
		||||
@@ -32,21 +34,21 @@ class Consumer:
 | 
			
		||||
      5. Delete the document and image(s)
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    # Files are considered ready for consumption if they have been unmodified
 | 
			
		||||
    # for this duration
 | 
			
		||||
    FILES_MIN_UNMODIFIED_DURATION = 0.5
 | 
			
		||||
 | 
			
		||||
    def __init__(self, consume=settings.CONSUMPTION_DIR,
 | 
			
		||||
                 scratch=settings.SCRATCH_DIR):
 | 
			
		||||
 | 
			
		||||
        self.logger = logging.getLogger(__name__)
 | 
			
		||||
        self.logging_group = None
 | 
			
		||||
 | 
			
		||||
        self.stats = {}
 | 
			
		||||
        self._ignore = []
 | 
			
		||||
        self.consume = consume
 | 
			
		||||
        self.scratch = scratch
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            os.makedirs(self.scratch)
 | 
			
		||||
        except FileExistsError:
 | 
			
		||||
            pass
 | 
			
		||||
        os.makedirs(self.scratch, exists_ok=True)
 | 
			
		||||
 | 
			
		||||
        if not self.consume:
 | 
			
		||||
            raise ConsumerError(
 | 
			
		||||
@@ -73,83 +75,99 @@ class Consumer:
 | 
			
		||||
            "group": self.logging_group
 | 
			
		||||
        })
 | 
			
		||||
 | 
			
		||||
    def run(self):
 | 
			
		||||
    def consume_new_files(self):
 | 
			
		||||
        """
 | 
			
		||||
        Find non-ignored files in consumption dir and consume them if they have
 | 
			
		||||
        been unmodified for FILES_MIN_UNMODIFIED_DURATION.
 | 
			
		||||
        """
 | 
			
		||||
        ignored_files = []
 | 
			
		||||
        files = []
 | 
			
		||||
        for entry in os.scandir(self.consume):
 | 
			
		||||
            if entry.is_file():
 | 
			
		||||
                file = (entry.path, entry.stat().st_mtime)
 | 
			
		||||
                if file in self._ignore:
 | 
			
		||||
                    ignored_files.append(file)
 | 
			
		||||
                else:
 | 
			
		||||
                    files.append(file)
 | 
			
		||||
 | 
			
		||||
        for doc in os.listdir(self.consume):
 | 
			
		||||
        if not files:
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
            doc = os.path.join(self.consume, doc)
 | 
			
		||||
        # Set _ignore to only include files that still exist.
 | 
			
		||||
        # This keeps it from growing indefinitely.
 | 
			
		||||
        self._ignore[:] = ignored_files
 | 
			
		||||
 | 
			
		||||
            if not os.path.isfile(doc):
 | 
			
		||||
                continue
 | 
			
		||||
        files_old_to_new = sorted(files, key=itemgetter(1))
 | 
			
		||||
 | 
			
		||||
            if not re.match(FileInfo.REGEXES["title"], doc):
 | 
			
		||||
                continue
 | 
			
		||||
        time.sleep(self.FILES_MIN_UNMODIFIED_DURATION)
 | 
			
		||||
 | 
			
		||||
            if doc in self._ignore:
 | 
			
		||||
                continue
 | 
			
		||||
        for file, mtime in files_old_to_new:
 | 
			
		||||
            if mtime == os.path.getmtime(file):
 | 
			
		||||
                # File has not been modified and can be consumed
 | 
			
		||||
                if not self.try_consume_file(file):
 | 
			
		||||
                    self._ignore.append((file, mtime))
 | 
			
		||||
 | 
			
		||||
            if not self._is_ready(doc):
 | 
			
		||||
                continue
 | 
			
		||||
    def try_consume_file(self, file):
 | 
			
		||||
        "Return True if file was consumed"
 | 
			
		||||
 | 
			
		||||
            if self._is_duplicate(doc):
 | 
			
		||||
                self.log(
 | 
			
		||||
                    "info",
 | 
			
		||||
                    "Skipping {} as it appears to be a duplicate".format(doc)
 | 
			
		||||
                )
 | 
			
		||||
                self._ignore.append(doc)
 | 
			
		||||
                continue
 | 
			
		||||
        if not re.match(FileInfo.REGEXES["title"], file):
 | 
			
		||||
            return False
 | 
			
		||||
 | 
			
		||||
            parser_class = self._get_parser_class(doc)
 | 
			
		||||
            if not parser_class:
 | 
			
		||||
                self.log(
 | 
			
		||||
                    "error", "No parsers could be found for {}".format(doc))
 | 
			
		||||
                self._ignore.append(doc)
 | 
			
		||||
                continue
 | 
			
		||||
        doc = file
 | 
			
		||||
 | 
			
		||||
            self.logging_group = uuid.uuid4()
 | 
			
		||||
        if self._is_duplicate(doc):
 | 
			
		||||
            self.log(
 | 
			
		||||
                "info",
 | 
			
		||||
                "Skipping {} as it appears to be a duplicate".format(doc)
 | 
			
		||||
            )
 | 
			
		||||
            return False
 | 
			
		||||
 | 
			
		||||
            self.log("info", "Consuming {}".format(doc))
 | 
			
		||||
        parser_class = self._get_parser_class(doc)
 | 
			
		||||
        if not parser_class:
 | 
			
		||||
            self.log(
 | 
			
		||||
                "error", "No parsers could be found for {}".format(doc))
 | 
			
		||||
            return False
 | 
			
		||||
 | 
			
		||||
            document_consumption_started.send(
 | 
			
		||||
                sender=self.__class__,
 | 
			
		||||
                filename=doc,
 | 
			
		||||
                logging_group=self.logging_group
 | 
			
		||||
        self.logging_group = uuid.uuid4()
 | 
			
		||||
 | 
			
		||||
        self.log("info", "Consuming {}".format(doc))
 | 
			
		||||
 | 
			
		||||
        document_consumption_started.send(
 | 
			
		||||
            sender=self.__class__,
 | 
			
		||||
            filename=doc,
 | 
			
		||||
            logging_group=self.logging_group
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        parsed_document = parser_class(doc)
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            thumbnail = parsed_document.get_thumbnail()
 | 
			
		||||
            date = parsed_document.get_date()
 | 
			
		||||
            document = self._store(
 | 
			
		||||
                parsed_document.get_text(),
 | 
			
		||||
                doc,
 | 
			
		||||
                thumbnail,
 | 
			
		||||
                date
 | 
			
		||||
            )
 | 
			
		||||
        except ParseError as e:
 | 
			
		||||
            self.log("error", "PARSE FAILURE for {}: {}".format(doc, e))
 | 
			
		||||
            parsed_document.cleanup()
 | 
			
		||||
            return False
 | 
			
		||||
        else:
 | 
			
		||||
            parsed_document.cleanup()
 | 
			
		||||
            self._cleanup_doc(doc)
 | 
			
		||||
 | 
			
		||||
            self.log(
 | 
			
		||||
                "info",
 | 
			
		||||
                "Document {} consumption finished".format(document)
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
            parsed_document = parser_class(doc)
 | 
			
		||||
 | 
			
		||||
            try:
 | 
			
		||||
                thumbnail = parsed_document.get_thumbnail()
 | 
			
		||||
                date = parsed_document.get_date()
 | 
			
		||||
                document = self._store(
 | 
			
		||||
                    parsed_document.get_text(),
 | 
			
		||||
                    doc,
 | 
			
		||||
                    thumbnail,
 | 
			
		||||
                    date
 | 
			
		||||
                )
 | 
			
		||||
            except ParseError as e:
 | 
			
		||||
 | 
			
		||||
                self._ignore.append(doc)
 | 
			
		||||
                self.log("error", "PARSE FAILURE for {}: {}".format(doc, e))
 | 
			
		||||
                parsed_document.cleanup()
 | 
			
		||||
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            else:
 | 
			
		||||
 | 
			
		||||
                parsed_document.cleanup()
 | 
			
		||||
                self._cleanup_doc(doc)
 | 
			
		||||
 | 
			
		||||
                self.log(
 | 
			
		||||
                    "info",
 | 
			
		||||
                    "Document {} consumption finished".format(document)
 | 
			
		||||
                )
 | 
			
		||||
 | 
			
		||||
                document_consumption_finished.send(
 | 
			
		||||
                    sender=self.__class__,
 | 
			
		||||
                    document=document,
 | 
			
		||||
                    logging_group=self.logging_group
 | 
			
		||||
                )
 | 
			
		||||
            document_consumption_finished.send(
 | 
			
		||||
                sender=self.__class__,
 | 
			
		||||
                document=document,
 | 
			
		||||
                logging_group=self.logging_group
 | 
			
		||||
            )
 | 
			
		||||
            return True
 | 
			
		||||
 | 
			
		||||
    def _get_parser_class(self, doc):
 | 
			
		||||
        """
 | 
			
		||||
@@ -224,22 +242,6 @@ class Consumer:
 | 
			
		||||
        self.log("debug", "Deleting document {}".format(doc))
 | 
			
		||||
        os.unlink(doc)
 | 
			
		||||
 | 
			
		||||
    def _is_ready(self, doc):
 | 
			
		||||
        """
 | 
			
		||||
        Detect whether ``doc`` is ready to consume or if it's still being
 | 
			
		||||
        written to by the uploader.
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        t = os.stat(doc).st_mtime
 | 
			
		||||
 | 
			
		||||
        if self.stats.get(doc) == t:
 | 
			
		||||
            del(self.stats[doc])
 | 
			
		||||
            return True
 | 
			
		||||
 | 
			
		||||
        self.stats[doc] = t
 | 
			
		||||
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _is_duplicate(doc):
 | 
			
		||||
        with open(doc, "rb") as f:
 | 
			
		||||
 
 | 
			
		||||
@@ -20,7 +20,7 @@ class MailFetcherError(Exception):
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class InvalidMessageError(Exception):
 | 
			
		||||
class InvalidMessageError(MailFetcherError):
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -75,6 +75,9 @@ class Message(Loggable):
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            dispositions = content_disposition.strip().split(";")
 | 
			
		||||
            if len(dispositions) < 2:
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            if not dispositions[0].lower() == "attachment" and \
 | 
			
		||||
               "filename" not in dispositions[1].lower():
 | 
			
		||||
                continue
 | 
			
		||||
@@ -159,8 +162,10 @@ class MailFetcher(Loggable):
 | 
			
		||||
        self._inbox = os.getenv("PAPERLESS_CONSUME_MAIL_INBOX", "INBOX")
 | 
			
		||||
 | 
			
		||||
        self._enabled = bool(self._host)
 | 
			
		||||
        if self._enabled and Message.SECRET is None:
 | 
			
		||||
            raise MailFetcherError("No PAPERLESS_EMAIL_SECRET defined")
 | 
			
		||||
 | 
			
		||||
        self.last_checked = datetime.datetime.now()
 | 
			
		||||
        self.last_checked = time.time()
 | 
			
		||||
        self.consume = consume
 | 
			
		||||
 | 
			
		||||
    def pull(self):
 | 
			
		||||
@@ -187,7 +192,7 @@ class MailFetcher(Loggable):
 | 
			
		||||
                    f.write(message.attachment.data)
 | 
			
		||||
                    os.utime(file_name, times=(t, t))
 | 
			
		||||
 | 
			
		||||
        self.last_checked = datetime.datetime.now()
 | 
			
		||||
        self.last_checked = time.time()
 | 
			
		||||
 | 
			
		||||
    def _get_messages(self):
 | 
			
		||||
 | 
			
		||||
@@ -205,7 +210,7 @@ class MailFetcher(Loggable):
 | 
			
		||||
            self._connection.close()
 | 
			
		||||
            self._connection.logout()
 | 
			
		||||
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
        except MailFetcherError as e:
 | 
			
		||||
            self.log("error", str(e))
 | 
			
		||||
 | 
			
		||||
        return r
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,7 @@
 | 
			
		||||
import datetime
 | 
			
		||||
import logging
 | 
			
		||||
import os
 | 
			
		||||
import sys
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
from django.conf import settings
 | 
			
		||||
@@ -9,6 +10,11 @@ from django.core.management.base import BaseCommand, CommandError
 | 
			
		||||
from ...consumer import Consumer, ConsumerError
 | 
			
		||||
from ...mail import MailFetcher, MailFetcherError
 | 
			
		||||
 | 
			
		||||
try:
 | 
			
		||||
    from inotify_simple import INotify, flags
 | 
			
		||||
except ImportError:
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Command(BaseCommand):
 | 
			
		||||
    """
 | 
			
		||||
@@ -53,13 +59,20 @@ class Command(BaseCommand):
 | 
			
		||||
            action="store_true",
 | 
			
		||||
            help="Run only once."
 | 
			
		||||
        )
 | 
			
		||||
        parser.add_argument(
 | 
			
		||||
            "--no-inotify",
 | 
			
		||||
            action="store_true",
 | 
			
		||||
            help="Don't use inotify, even if it's available."
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def handle(self, *args, **options):
 | 
			
		||||
 | 
			
		||||
        self.verbosity = options["verbosity"]
 | 
			
		||||
        directory = options["directory"]
 | 
			
		||||
        loop_time = options["loop_time"]
 | 
			
		||||
        mail_delta = datetime.timedelta(minutes=options["mail_delta"])
 | 
			
		||||
        mail_delta = options["mail_delta"] * 60
 | 
			
		||||
        use_inotify = (not options["no_inotify"]
 | 
			
		||||
                       and "inotify_simple" in sys.modules)
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            self.file_consumer = Consumer(consume=directory)
 | 
			
		||||
@@ -67,39 +80,68 @@ class Command(BaseCommand):
 | 
			
		||||
        except (ConsumerError, MailFetcherError) as e:
 | 
			
		||||
            raise CommandError(e)
 | 
			
		||||
 | 
			
		||||
        for path in (self.ORIGINAL_DOCS, self.THUMB_DOCS):
 | 
			
		||||
            try:
 | 
			
		||||
                os.makedirs(path)
 | 
			
		||||
            except FileExistsError:
 | 
			
		||||
                pass
 | 
			
		||||
        for d in (self.ORIGINAL_DOCS, self.THUMB_DOCS):
 | 
			
		||||
            os.makedirs(d, exists_ok=True)
 | 
			
		||||
 | 
			
		||||
        logging.getLogger(__name__).info(
 | 
			
		||||
            "Starting document consumer at {}".format(directory)
 | 
			
		||||
            "Starting document consumer at {}{}".format(
 | 
			
		||||
                directory,
 | 
			
		||||
                " with inotify" if use_inotify else ""
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        if options["oneshot"]:
 | 
			
		||||
            self.loop(mail_delta=mail_delta)
 | 
			
		||||
            self.loop_step(mail_delta)
 | 
			
		||||
        else:
 | 
			
		||||
            try:
 | 
			
		||||
                while True:
 | 
			
		||||
                    self.loop(mail_delta=mail_delta)
 | 
			
		||||
                    time.sleep(loop_time)
 | 
			
		||||
                    if self.verbosity > 1:
 | 
			
		||||
                        print(".", int(time.time()))
 | 
			
		||||
                if use_inotify:
 | 
			
		||||
                    self.loop_inotify(mail_delta)
 | 
			
		||||
                else:
 | 
			
		||||
                    self.loop(loop_time, mail_delta)
 | 
			
		||||
            except KeyboardInterrupt:
 | 
			
		||||
                print("Exiting")
 | 
			
		||||
 | 
			
		||||
    def loop(self, mail_delta):
 | 
			
		||||
    def loop(self, loop_time, mail_delta):
 | 
			
		||||
        while True:
 | 
			
		||||
            start_time = time.time()
 | 
			
		||||
            if self.verbosity > 1:
 | 
			
		||||
                print(".", int(start_time))
 | 
			
		||||
            self.loop_step(mail_delta, start_time)
 | 
			
		||||
            # Sleep until the start of the next loop step
 | 
			
		||||
            time.sleep(max(0, start_time + loop_time - time.time()))
 | 
			
		||||
 | 
			
		||||
    def loop_step(self, mail_delta, time_now=None):
 | 
			
		||||
 | 
			
		||||
        # Occasionally fetch mail and store it to be consumed on the next loop
 | 
			
		||||
        # We fetch email when we first start up so that it is not necessary to
 | 
			
		||||
        # wait for 10 minutes after making changes to the config file.
 | 
			
		||||
        delta = self.mail_fetcher.last_checked + mail_delta
 | 
			
		||||
        if self.first_iteration or delta < datetime.datetime.now():
 | 
			
		||||
        next_mail_time = self.mail_fetcher.last_checked + mail_delta
 | 
			
		||||
        if self.first_iteration or time_now > next_mail_time:
 | 
			
		||||
            self.first_iteration = False
 | 
			
		||||
            self.mail_fetcher.pull()
 | 
			
		||||
 | 
			
		||||
        # Consume whatever files we can.
 | 
			
		||||
        # We have to run twice as the first run checks for file readiness
 | 
			
		||||
        for i in range(2):
 | 
			
		||||
            self.file_consumer.run()
 | 
			
		||||
        self.file_consumer.consume_new_files()
 | 
			
		||||
 | 
			
		||||
    def loop_inotify(self, mail_delta):
 | 
			
		||||
        directory = self.file_consumer.consume
 | 
			
		||||
        inotify = INotify()
 | 
			
		||||
        inotify.add_watch(directory, flags.CLOSE_WRITE | flags.MOVED_TO)
 | 
			
		||||
 | 
			
		||||
        # Run initial mail fetch and consume all currently existing documents
 | 
			
		||||
        self.loop_step(mail_delta)
 | 
			
		||||
        next_mail_time = self.mail_fetcher.last_checked + mail_delta
 | 
			
		||||
 | 
			
		||||
        while True:
 | 
			
		||||
            # Consume documents until next_mail_time
 | 
			
		||||
            while True:
 | 
			
		||||
                delta = next_mail_time - time.time()
 | 
			
		||||
                if delta > 0:
 | 
			
		||||
                    for event in inotify.read(timeout=delta):
 | 
			
		||||
                        file = os.path.join(directory, event.name)
 | 
			
		||||
                        if os.path.isfile(file):
 | 
			
		||||
                            self.file_consumer.try_consume_file(file)
 | 
			
		||||
                else:
 | 
			
		||||
                    break
 | 
			
		||||
 | 
			
		||||
            self.mail_fetcher.pull()
 | 
			
		||||
            next_mail_time = self.mail_fetcher.last_checked + mail_delta
 | 
			
		||||
 
 | 
			
		||||
@@ -246,6 +246,8 @@ SCRATCH_DIR = os.getenv("PAPERLESS_SCRATCH_DIR", "/tmp/paperless")
 | 
			
		||||
# This is where Paperless will look for PDFs to index
 | 
			
		||||
CONSUMPTION_DIR = os.getenv("PAPERLESS_CONSUMPTION_DIR")
 | 
			
		||||
 | 
			
		||||
# (This setting is ignored on Linux where inotify is used instead of a
 | 
			
		||||
# polling loop.)
 | 
			
		||||
# The number of seconds that Paperless will wait between checking
 | 
			
		||||
# CONSUMPTION_DIR.  If you tend to write documents to this directory very
 | 
			
		||||
# slowly, you may want to use a higher value than the default.
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user