Merge branch 'ovv-consumer-cli-args'

This commit is contained in:
Daniel Quinn 2018-03-03 18:43:41 +00:00
commit 4a25e9655c
7 changed files with 88 additions and 46 deletions

View File

@ -80,6 +80,13 @@ you'll need to have it start in the background -- something you'll need to
figure out for your own system. To get you started though, there are Systemd
service files in the ``scripts`` directory.
Some command line arguments are available to customize the behavior of the
consumer. By default it will use ``/etc/paperless.conf`` values. Display the
help with:
.. code-block:: shell-session
$ /path/to/paperless/src/manage.py document_consumer --help
.. _utilities-exporter:

View File

@ -32,31 +32,31 @@ class Consumer:
5. Delete the document and image(s)
"""
SCRATCH = settings.SCRATCH_DIR
CONSUME = settings.CONSUMPTION_DIR
def __init__(self):
def __init__(self, consume=settings.CONSUMPTION_DIR,
scratch=settings.SCRATCH_DIR):
self.logger = logging.getLogger(__name__)
self.logging_group = None
self.stats = {}
self._ignore = []
self.consume = consume
self.scratch = scratch
try:
os.makedirs(self.SCRATCH)
os.makedirs(self.scratch)
except FileExistsError:
pass
self.stats = {}
self._ignore = []
if not self.CONSUME:
if not self.consume:
raise ConsumerError(
"The CONSUMPTION_DIR settings variable does not appear to be "
"set."
)
if not os.path.exists(self.CONSUME):
if not os.path.exists(self.consume):
raise ConsumerError(
"Consumption directory {} does not exist".format(self.CONSUME))
"Consumption directory {} does not exist".format(self.consume))
self.parsers = []
for response in document_consumer_declaration.send(self):
@ -73,11 +73,11 @@ class Consumer:
"group": self.logging_group
})
def consume(self):
def run(self):
for doc in os.listdir(self.CONSUME):
for doc in os.listdir(self.consume):
doc = os.path.join(self.CONSUME, doc)
doc = os.path.join(self.consume, doc)
if not os.path.isfile(doc):
continue
@ -226,8 +226,8 @@ class Consumer:
def _is_ready(self, doc):
"""
Detect whether `doc` is ready to consume or if it's still being written
to by the uploader.
Detect whether ``doc`` is ready to consume or if it's still being
written to by the uploader.
"""
t = os.stat(doc).st_mtime

View File

@ -92,7 +92,7 @@ class UploadForm(forms.Form):
t = int(mktime(datetime.now().timetuple()))
file_name = os.path.join(
Consumer.CONSUME,
settings.CONSUMPTION_DIR,
"{} - {}.{}".format(correspondent, title, self._file_type)
)

View File

@ -151,7 +151,7 @@ class Attachment(object):
class MailFetcher(Loggable):
def __init__(self):
def __init__(self, consume=settings.CONSUMPTION_DIR):
Loggable.__init__(self)
@ -165,6 +165,7 @@ class MailFetcher(Loggable):
self._enabled = bool(self._host)
self.last_checked = datetime.datetime.now()
self.consume = consume
def pull(self):
"""
@ -185,7 +186,7 @@ class MailFetcher(Loggable):
self.log("info", 'Storing email: "{}"'.format(message.subject))
t = int(time.mktime(message.time.timetuple()))
file_name = os.path.join(Consumer.CONSUME, message.file_name)
file_name = os.path.join(self.consume, message.file_name)
with open(file_name, "wb") as f:
f.write(message.attachment.data)
os.utime(file_name, times=(t, t))

View File

@ -16,9 +16,6 @@ class Command(BaseCommand):
consumption directory, and fetch any mail available.
"""
LOOP_TIME = settings.CONSUMER_LOOP_TIME
MAIL_DELTA = datetime.timedelta(minutes=10)
ORIGINAL_DOCS = os.path.join(settings.MEDIA_ROOT, "documents", "originals")
THUMB_DOCS = os.path.join(settings.MEDIA_ROOT, "documents", "thumbnails")
@ -32,13 +29,41 @@ class Command(BaseCommand):
BaseCommand.__init__(self, *args, **kwargs)
def add_arguments(self, parser):
parser.add_argument(
"directory",
default=settings.CONSUMPTION_DIR,
nargs="?",
help="The consumption directory."
)
parser.add_argument(
"--loop-time",
default=settings.CONSUMER_LOOP_TIME,
type=int,
help="Wait time between each loop (in seconds)."
)
parser.add_argument(
"--mail-delta",
default=10,
type=int,
help="Wait time between each mail fetch (in minutes)."
)
parser.add_argument(
"--oneshot",
action="store_true",
help="Run only once."
)
def handle(self, *args, **options):
self.verbosity = options["verbosity"]
directory = options["directory"]
loop_time = options["loop_time"]
mail_delta = datetime.timedelta(minutes=options["mail_delta"])
try:
self.file_consumer = Consumer()
self.mail_fetcher = MailFetcher()
self.file_consumer = Consumer(consume=directory)
self.mail_fetcher = MailFetcher(consume=directory)
except (ConsumerError, MailFetcherError) as e:
raise CommandError(e)
@ -49,27 +74,32 @@ class Command(BaseCommand):
pass
logging.getLogger(__name__).info(
"Starting document consumer at {}".format(settings.CONSUMPTION_DIR)
"Starting document consumer at {}".format(directory)
)
try:
while True:
self.loop()
time.sleep(self.LOOP_TIME)
if self.verbosity > 1:
print(".")
except KeyboardInterrupt:
print("Exiting")
if options["oneshot"]:
self.loop(mail_delta=mail_delta)
else:
try:
while True:
self.loop(mail_delta=mail_delta)
time.sleep(loop_time)
if self.verbosity > 1:
print(".", int(time.time()))
except KeyboardInterrupt:
print("Exiting")
def loop(self):
# Consume whatever files we can
self.file_consumer.consume()
def loop(self, mail_delta):
# Occasionally fetch mail and store it to be consumed on the next loop
# We fetch email when we first start up so that it is not necessary to
# wait for 10 minutes after making changes to the config file.
delta = self.mail_fetcher.last_checked + self.MAIL_DELTA
delta = self.mail_fetcher.last_checked + mail_delta
if self.first_iteration or delta < datetime.datetime.now():
self.first_iteration = False
self.mail_fetcher.pull()
# Consume whatever files we can.
# We have to run twice as the first run checks for file readiness
for i in range(2):
self.file_consumer.run()

View File

@ -1,5 +1,6 @@
from django.test import TestCase
from unittest import mock
from tempfile import TemporaryDirectory
from ..consumer import Consumer
from ..models import FileInfo
@ -16,7 +17,6 @@ class TestConsumer(TestCase):
self.DummyParser
)
@mock.patch("documents.consumer.Consumer.CONSUME")
@mock.patch("documents.consumer.os.makedirs")
@mock.patch("documents.consumer.os.path.exists", return_value=True)
@mock.patch("documents.consumer.document_consumer_declaration.send")
@ -32,18 +32,22 @@ class TestConsumer(TestCase):
(None, lambda _: {"weight": 0, "parser": DummyParser1}),
(None, lambda _: {"weight": 1, "parser": DummyParser2}),
)
with TemporaryDirectory() as tmpdir:
self.assertEqual(
Consumer(consume=tmpdir)._get_parser_class("doc.pdf"),
DummyParser2
)
self.assertEqual(Consumer()._get_parser_class("doc.pdf"), DummyParser2)
@mock.patch("documents.consumer.Consumer.CONSUME")
@mock.patch("documents.consumer.os.makedirs")
@mock.patch("documents.consumer.os.path.exists", return_value=True)
@mock.patch("documents.consumer.document_consumer_declaration.send")
def test__get_parser_class_0_parsers(self, m, *args):
m.return_value = ((None, lambda _: None),)
self.assertIsNone(Consumer()._get_parser_class("doc.pdf"))
with TemporaryDirectory() as tmpdir:
self.assertIsNone(
Consumer(consume=tmpdir)._get_parser_class("doc.pdf")
)
@mock.patch("documents.consumer.Consumer.CONSUME")
@mock.patch("documents.consumer.os.makedirs")
@mock.patch("documents.consumer.os.path.exists", return_value=True)
@mock.patch("documents.consumer.document_consumer_declaration.send")
@ -51,7 +55,8 @@ class TestConsumer(TestCase):
m.return_value = (
(None, lambda _: {"weight": 0, "parser": self.DummyParser}),
)
return Consumer()
with TemporaryDirectory() as tmpdir:
return Consumer(consume=tmpdir)
class TestAttributes(TestCase):

View File

@ -6,7 +6,6 @@ exclude = migrations, paperless/settings.py, .tox
DJANGO_SETTINGS_MODULE=paperless.settings
addopts = --pythonwarnings=all -n auto
env =
PAPERLESS_CONSUME=/tmp
PAPERLESS_PASSPHRASE=THISISNOTASECRET
PAPERLESS_SECRET=paperless
PAPERLESS_EMAIL_SECRET=paperless