Merge branch 'ovv-consumer-cli-args'

This commit is contained in:
Daniel Quinn 2018-03-03 18:43:41 +00:00
commit 4a25e9655c
7 changed files with 88 additions and 46 deletions

View File

@ -80,6 +80,13 @@ you'll need to have it start in the background -- something you'll need to
figure out for your own system. To get you started though, there are Systemd figure out for your own system. To get you started though, there are Systemd
service files in the ``scripts`` directory. service files in the ``scripts`` directory.
Some command line arguments are available to customize the behavior of the
consumer. By default it will use ``/etc/paperless.conf`` values. Display the
help with:
.. code-block:: shell-session
$ /path/to/paperless/src/manage.py document_consumer --help
.. _utilities-exporter: .. _utilities-exporter:

View File

@ -32,31 +32,31 @@ class Consumer:
5. Delete the document and image(s) 5. Delete the document and image(s)
""" """
SCRATCH = settings.SCRATCH_DIR def __init__(self, consume=settings.CONSUMPTION_DIR,
CONSUME = settings.CONSUMPTION_DIR scratch=settings.SCRATCH_DIR):
def __init__(self):
self.logger = logging.getLogger(__name__) self.logger = logging.getLogger(__name__)
self.logging_group = None self.logging_group = None
self.stats = {}
self._ignore = []
self.consume = consume
self.scratch = scratch
try: try:
os.makedirs(self.SCRATCH) os.makedirs(self.scratch)
except FileExistsError: except FileExistsError:
pass pass
self.stats = {} if not self.consume:
self._ignore = []
if not self.CONSUME:
raise ConsumerError( raise ConsumerError(
"The CONSUMPTION_DIR settings variable does not appear to be " "The CONSUMPTION_DIR settings variable does not appear to be "
"set." "set."
) )
if not os.path.exists(self.CONSUME): if not os.path.exists(self.consume):
raise ConsumerError( raise ConsumerError(
"Consumption directory {} does not exist".format(self.CONSUME)) "Consumption directory {} does not exist".format(self.consume))
self.parsers = [] self.parsers = []
for response in document_consumer_declaration.send(self): for response in document_consumer_declaration.send(self):
@ -73,11 +73,11 @@ class Consumer:
"group": self.logging_group "group": self.logging_group
}) })
def consume(self): def run(self):
for doc in os.listdir(self.CONSUME): for doc in os.listdir(self.consume):
doc = os.path.join(self.CONSUME, doc) doc = os.path.join(self.consume, doc)
if not os.path.isfile(doc): if not os.path.isfile(doc):
continue continue
@ -226,8 +226,8 @@ class Consumer:
def _is_ready(self, doc): def _is_ready(self, doc):
""" """
Detect whether `doc` is ready to consume or if it's still being written Detect whether ``doc`` is ready to consume or if it's still being
to by the uploader. written to by the uploader.
""" """
t = os.stat(doc).st_mtime t = os.stat(doc).st_mtime

View File

@ -92,7 +92,7 @@ class UploadForm(forms.Form):
t = int(mktime(datetime.now().timetuple())) t = int(mktime(datetime.now().timetuple()))
file_name = os.path.join( file_name = os.path.join(
Consumer.CONSUME, settings.CONSUMPTION_DIR,
"{} - {}.{}".format(correspondent, title, self._file_type) "{} - {}.{}".format(correspondent, title, self._file_type)
) )

View File

@ -151,7 +151,7 @@ class Attachment(object):
class MailFetcher(Loggable): class MailFetcher(Loggable):
def __init__(self): def __init__(self, consume=settings.CONSUMPTION_DIR):
Loggable.__init__(self) Loggable.__init__(self)
@ -165,6 +165,7 @@ class MailFetcher(Loggable):
self._enabled = bool(self._host) self._enabled = bool(self._host)
self.last_checked = datetime.datetime.now() self.last_checked = datetime.datetime.now()
self.consume = consume
def pull(self): def pull(self):
""" """
@ -185,7 +186,7 @@ class MailFetcher(Loggable):
self.log("info", 'Storing email: "{}"'.format(message.subject)) self.log("info", 'Storing email: "{}"'.format(message.subject))
t = int(time.mktime(message.time.timetuple())) t = int(time.mktime(message.time.timetuple()))
file_name = os.path.join(Consumer.CONSUME, message.file_name) file_name = os.path.join(self.consume, message.file_name)
with open(file_name, "wb") as f: with open(file_name, "wb") as f:
f.write(message.attachment.data) f.write(message.attachment.data)
os.utime(file_name, times=(t, t)) os.utime(file_name, times=(t, t))

View File

@ -16,9 +16,6 @@ class Command(BaseCommand):
consumption directory, and fetch any mail available. consumption directory, and fetch any mail available.
""" """
LOOP_TIME = settings.CONSUMER_LOOP_TIME
MAIL_DELTA = datetime.timedelta(minutes=10)
ORIGINAL_DOCS = os.path.join(settings.MEDIA_ROOT, "documents", "originals") ORIGINAL_DOCS = os.path.join(settings.MEDIA_ROOT, "documents", "originals")
THUMB_DOCS = os.path.join(settings.MEDIA_ROOT, "documents", "thumbnails") THUMB_DOCS = os.path.join(settings.MEDIA_ROOT, "documents", "thumbnails")
@ -32,13 +29,41 @@ class Command(BaseCommand):
BaseCommand.__init__(self, *args, **kwargs) BaseCommand.__init__(self, *args, **kwargs)
def add_arguments(self, parser):
parser.add_argument(
"directory",
default=settings.CONSUMPTION_DIR,
nargs="?",
help="The consumption directory."
)
parser.add_argument(
"--loop-time",
default=settings.CONSUMER_LOOP_TIME,
type=int,
help="Wait time between each loop (in seconds)."
)
parser.add_argument(
"--mail-delta",
default=10,
type=int,
help="Wait time between each mail fetch (in minutes)."
)
parser.add_argument(
"--oneshot",
action="store_true",
help="Run only once."
)
def handle(self, *args, **options): def handle(self, *args, **options):
self.verbosity = options["verbosity"] self.verbosity = options["verbosity"]
directory = options["directory"]
loop_time = options["loop_time"]
mail_delta = datetime.timedelta(minutes=options["mail_delta"])
try: try:
self.file_consumer = Consumer() self.file_consumer = Consumer(consume=directory)
self.mail_fetcher = MailFetcher() self.mail_fetcher = MailFetcher(consume=directory)
except (ConsumerError, MailFetcherError) as e: except (ConsumerError, MailFetcherError) as e:
raise CommandError(e) raise CommandError(e)
@ -49,27 +74,32 @@ class Command(BaseCommand):
pass pass
logging.getLogger(__name__).info( logging.getLogger(__name__).info(
"Starting document consumer at {}".format(settings.CONSUMPTION_DIR) "Starting document consumer at {}".format(directory)
) )
try: if options["oneshot"]:
while True: self.loop(mail_delta=mail_delta)
self.loop() else:
time.sleep(self.LOOP_TIME) try:
if self.verbosity > 1: while True:
print(".") self.loop(mail_delta=mail_delta)
except KeyboardInterrupt: time.sleep(loop_time)
print("Exiting") if self.verbosity > 1:
print(".", int(time.time()))
except KeyboardInterrupt:
print("Exiting")
def loop(self): def loop(self, mail_delta):
# Consume whatever files we can
self.file_consumer.consume()
# Occasionally fetch mail and store it to be consumed on the next loop # Occasionally fetch mail and store it to be consumed on the next loop
# We fetch email when we first start up so that it is not necessary to # We fetch email when we first start up so that it is not necessary to
# wait for 10 minutes after making changes to the config file. # wait for 10 minutes after making changes to the config file.
delta = self.mail_fetcher.last_checked + self.MAIL_DELTA delta = self.mail_fetcher.last_checked + mail_delta
if self.first_iteration or delta < datetime.datetime.now(): if self.first_iteration or delta < datetime.datetime.now():
self.first_iteration = False self.first_iteration = False
self.mail_fetcher.pull() self.mail_fetcher.pull()
# Consume whatever files we can.
# We have to run twice as the first run checks for file readiness
for i in range(2):
self.file_consumer.run()

View File

@ -1,5 +1,6 @@
from django.test import TestCase from django.test import TestCase
from unittest import mock from unittest import mock
from tempfile import TemporaryDirectory
from ..consumer import Consumer from ..consumer import Consumer
from ..models import FileInfo from ..models import FileInfo
@ -16,7 +17,6 @@ class TestConsumer(TestCase):
self.DummyParser self.DummyParser
) )
@mock.patch("documents.consumer.Consumer.CONSUME")
@mock.patch("documents.consumer.os.makedirs") @mock.patch("documents.consumer.os.makedirs")
@mock.patch("documents.consumer.os.path.exists", return_value=True) @mock.patch("documents.consumer.os.path.exists", return_value=True)
@mock.patch("documents.consumer.document_consumer_declaration.send") @mock.patch("documents.consumer.document_consumer_declaration.send")
@ -32,18 +32,22 @@ class TestConsumer(TestCase):
(None, lambda _: {"weight": 0, "parser": DummyParser1}), (None, lambda _: {"weight": 0, "parser": DummyParser1}),
(None, lambda _: {"weight": 1, "parser": DummyParser2}), (None, lambda _: {"weight": 1, "parser": DummyParser2}),
) )
with TemporaryDirectory() as tmpdir:
self.assertEqual(
Consumer(consume=tmpdir)._get_parser_class("doc.pdf"),
DummyParser2
)
self.assertEqual(Consumer()._get_parser_class("doc.pdf"), DummyParser2)
@mock.patch("documents.consumer.Consumer.CONSUME")
@mock.patch("documents.consumer.os.makedirs") @mock.patch("documents.consumer.os.makedirs")
@mock.patch("documents.consumer.os.path.exists", return_value=True) @mock.patch("documents.consumer.os.path.exists", return_value=True)
@mock.patch("documents.consumer.document_consumer_declaration.send") @mock.patch("documents.consumer.document_consumer_declaration.send")
def test__get_parser_class_0_parsers(self, m, *args): def test__get_parser_class_0_parsers(self, m, *args):
m.return_value = ((None, lambda _: None),) m.return_value = ((None, lambda _: None),)
self.assertIsNone(Consumer()._get_parser_class("doc.pdf")) with TemporaryDirectory() as tmpdir:
self.assertIsNone(
Consumer(consume=tmpdir)._get_parser_class("doc.pdf")
)
@mock.patch("documents.consumer.Consumer.CONSUME")
@mock.patch("documents.consumer.os.makedirs") @mock.patch("documents.consumer.os.makedirs")
@mock.patch("documents.consumer.os.path.exists", return_value=True) @mock.patch("documents.consumer.os.path.exists", return_value=True)
@mock.patch("documents.consumer.document_consumer_declaration.send") @mock.patch("documents.consumer.document_consumer_declaration.send")
@ -51,7 +55,8 @@ class TestConsumer(TestCase):
m.return_value = ( m.return_value = (
(None, lambda _: {"weight": 0, "parser": self.DummyParser}), (None, lambda _: {"weight": 0, "parser": self.DummyParser}),
) )
return Consumer() with TemporaryDirectory() as tmpdir:
return Consumer(consume=tmpdir)
class TestAttributes(TestCase): class TestAttributes(TestCase):

View File

@ -6,7 +6,6 @@ exclude = migrations, paperless/settings.py, .tox
DJANGO_SETTINGS_MODULE=paperless.settings DJANGO_SETTINGS_MODULE=paperless.settings
addopts = --pythonwarnings=all -n auto addopts = --pythonwarnings=all -n auto
env = env =
PAPERLESS_CONSUME=/tmp
PAPERLESS_PASSPHRASE=THISISNOTASECRET PAPERLESS_PASSPHRASE=THISISNOTASECRET
PAPERLESS_SECRET=paperless PAPERLESS_SECRET=paperless
PAPERLESS_EMAIL_SECRET=paperless PAPERLESS_EMAIL_SECRET=paperless