mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Merge branch 'ovv-consumer-cli-args'
This commit is contained in:
commit
4a25e9655c
@ -80,6 +80,13 @@ you'll need to have it start in the background -- something you'll need to
|
|||||||
figure out for your own system. To get you started though, there are Systemd
|
figure out for your own system. To get you started though, there are Systemd
|
||||||
service files in the ``scripts`` directory.
|
service files in the ``scripts`` directory.
|
||||||
|
|
||||||
|
Some command line arguments are available to customize the behavior of the
|
||||||
|
consumer. By default it will use ``/etc/paperless.conf`` values. Display the
|
||||||
|
help with:
|
||||||
|
|
||||||
|
.. code-block:: shell-session
|
||||||
|
|
||||||
|
$ /path/to/paperless/src/manage.py document_consumer --help
|
||||||
|
|
||||||
.. _utilities-exporter:
|
.. _utilities-exporter:
|
||||||
|
|
||||||
|
@ -32,31 +32,31 @@ class Consumer:
|
|||||||
5. Delete the document and image(s)
|
5. Delete the document and image(s)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
SCRATCH = settings.SCRATCH_DIR
|
def __init__(self, consume=settings.CONSUMPTION_DIR,
|
||||||
CONSUME = settings.CONSUMPTION_DIR
|
scratch=settings.SCRATCH_DIR):
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
|
|
||||||
self.logger = logging.getLogger(__name__)
|
self.logger = logging.getLogger(__name__)
|
||||||
self.logging_group = None
|
self.logging_group = None
|
||||||
|
|
||||||
|
self.stats = {}
|
||||||
|
self._ignore = []
|
||||||
|
self.consume = consume
|
||||||
|
self.scratch = scratch
|
||||||
|
|
||||||
try:
|
try:
|
||||||
os.makedirs(self.SCRATCH)
|
os.makedirs(self.scratch)
|
||||||
except FileExistsError:
|
except FileExistsError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
self.stats = {}
|
if not self.consume:
|
||||||
self._ignore = []
|
|
||||||
|
|
||||||
if not self.CONSUME:
|
|
||||||
raise ConsumerError(
|
raise ConsumerError(
|
||||||
"The CONSUMPTION_DIR settings variable does not appear to be "
|
"The CONSUMPTION_DIR settings variable does not appear to be "
|
||||||
"set."
|
"set."
|
||||||
)
|
)
|
||||||
|
|
||||||
if not os.path.exists(self.CONSUME):
|
if not os.path.exists(self.consume):
|
||||||
raise ConsumerError(
|
raise ConsumerError(
|
||||||
"Consumption directory {} does not exist".format(self.CONSUME))
|
"Consumption directory {} does not exist".format(self.consume))
|
||||||
|
|
||||||
self.parsers = []
|
self.parsers = []
|
||||||
for response in document_consumer_declaration.send(self):
|
for response in document_consumer_declaration.send(self):
|
||||||
@ -73,11 +73,11 @@ class Consumer:
|
|||||||
"group": self.logging_group
|
"group": self.logging_group
|
||||||
})
|
})
|
||||||
|
|
||||||
def consume(self):
|
def run(self):
|
||||||
|
|
||||||
for doc in os.listdir(self.CONSUME):
|
for doc in os.listdir(self.consume):
|
||||||
|
|
||||||
doc = os.path.join(self.CONSUME, doc)
|
doc = os.path.join(self.consume, doc)
|
||||||
|
|
||||||
if not os.path.isfile(doc):
|
if not os.path.isfile(doc):
|
||||||
continue
|
continue
|
||||||
@ -226,8 +226,8 @@ class Consumer:
|
|||||||
|
|
||||||
def _is_ready(self, doc):
|
def _is_ready(self, doc):
|
||||||
"""
|
"""
|
||||||
Detect whether `doc` is ready to consume or if it's still being written
|
Detect whether ``doc`` is ready to consume or if it's still being
|
||||||
to by the uploader.
|
written to by the uploader.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
t = os.stat(doc).st_mtime
|
t = os.stat(doc).st_mtime
|
||||||
|
@ -92,7 +92,7 @@ class UploadForm(forms.Form):
|
|||||||
|
|
||||||
t = int(mktime(datetime.now().timetuple()))
|
t = int(mktime(datetime.now().timetuple()))
|
||||||
file_name = os.path.join(
|
file_name = os.path.join(
|
||||||
Consumer.CONSUME,
|
settings.CONSUMPTION_DIR,
|
||||||
"{} - {}.{}".format(correspondent, title, self._file_type)
|
"{} - {}.{}".format(correspondent, title, self._file_type)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -151,7 +151,7 @@ class Attachment(object):
|
|||||||
|
|
||||||
class MailFetcher(Loggable):
|
class MailFetcher(Loggable):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, consume=settings.CONSUMPTION_DIR):
|
||||||
|
|
||||||
Loggable.__init__(self)
|
Loggable.__init__(self)
|
||||||
|
|
||||||
@ -165,6 +165,7 @@ class MailFetcher(Loggable):
|
|||||||
self._enabled = bool(self._host)
|
self._enabled = bool(self._host)
|
||||||
|
|
||||||
self.last_checked = datetime.datetime.now()
|
self.last_checked = datetime.datetime.now()
|
||||||
|
self.consume = consume
|
||||||
|
|
||||||
def pull(self):
|
def pull(self):
|
||||||
"""
|
"""
|
||||||
@ -185,7 +186,7 @@ class MailFetcher(Loggable):
|
|||||||
self.log("info", 'Storing email: "{}"'.format(message.subject))
|
self.log("info", 'Storing email: "{}"'.format(message.subject))
|
||||||
|
|
||||||
t = int(time.mktime(message.time.timetuple()))
|
t = int(time.mktime(message.time.timetuple()))
|
||||||
file_name = os.path.join(Consumer.CONSUME, message.file_name)
|
file_name = os.path.join(self.consume, message.file_name)
|
||||||
with open(file_name, "wb") as f:
|
with open(file_name, "wb") as f:
|
||||||
f.write(message.attachment.data)
|
f.write(message.attachment.data)
|
||||||
os.utime(file_name, times=(t, t))
|
os.utime(file_name, times=(t, t))
|
||||||
|
@ -16,9 +16,6 @@ class Command(BaseCommand):
|
|||||||
consumption directory, and fetch any mail available.
|
consumption directory, and fetch any mail available.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
LOOP_TIME = settings.CONSUMER_LOOP_TIME
|
|
||||||
MAIL_DELTA = datetime.timedelta(minutes=10)
|
|
||||||
|
|
||||||
ORIGINAL_DOCS = os.path.join(settings.MEDIA_ROOT, "documents", "originals")
|
ORIGINAL_DOCS = os.path.join(settings.MEDIA_ROOT, "documents", "originals")
|
||||||
THUMB_DOCS = os.path.join(settings.MEDIA_ROOT, "documents", "thumbnails")
|
THUMB_DOCS = os.path.join(settings.MEDIA_ROOT, "documents", "thumbnails")
|
||||||
|
|
||||||
@ -32,13 +29,41 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
BaseCommand.__init__(self, *args, **kwargs)
|
BaseCommand.__init__(self, *args, **kwargs)
|
||||||
|
|
||||||
|
def add_arguments(self, parser):
|
||||||
|
parser.add_argument(
|
||||||
|
"directory",
|
||||||
|
default=settings.CONSUMPTION_DIR,
|
||||||
|
nargs="?",
|
||||||
|
help="The consumption directory."
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--loop-time",
|
||||||
|
default=settings.CONSUMER_LOOP_TIME,
|
||||||
|
type=int,
|
||||||
|
help="Wait time between each loop (in seconds)."
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--mail-delta",
|
||||||
|
default=10,
|
||||||
|
type=int,
|
||||||
|
help="Wait time between each mail fetch (in minutes)."
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--oneshot",
|
||||||
|
action="store_true",
|
||||||
|
help="Run only once."
|
||||||
|
)
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
|
|
||||||
self.verbosity = options["verbosity"]
|
self.verbosity = options["verbosity"]
|
||||||
|
directory = options["directory"]
|
||||||
|
loop_time = options["loop_time"]
|
||||||
|
mail_delta = datetime.timedelta(minutes=options["mail_delta"])
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.file_consumer = Consumer()
|
self.file_consumer = Consumer(consume=directory)
|
||||||
self.mail_fetcher = MailFetcher()
|
self.mail_fetcher = MailFetcher(consume=directory)
|
||||||
except (ConsumerError, MailFetcherError) as e:
|
except (ConsumerError, MailFetcherError) as e:
|
||||||
raise CommandError(e)
|
raise CommandError(e)
|
||||||
|
|
||||||
@ -49,27 +74,32 @@ class Command(BaseCommand):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
logging.getLogger(__name__).info(
|
logging.getLogger(__name__).info(
|
||||||
"Starting document consumer at {}".format(settings.CONSUMPTION_DIR)
|
"Starting document consumer at {}".format(directory)
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
if options["oneshot"]:
|
||||||
while True:
|
self.loop(mail_delta=mail_delta)
|
||||||
self.loop()
|
else:
|
||||||
time.sleep(self.LOOP_TIME)
|
try:
|
||||||
if self.verbosity > 1:
|
while True:
|
||||||
print(".")
|
self.loop(mail_delta=mail_delta)
|
||||||
except KeyboardInterrupt:
|
time.sleep(loop_time)
|
||||||
print("Exiting")
|
if self.verbosity > 1:
|
||||||
|
print(".", int(time.time()))
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("Exiting")
|
||||||
|
|
||||||
def loop(self):
|
def loop(self, mail_delta):
|
||||||
|
|
||||||
# Consume whatever files we can
|
|
||||||
self.file_consumer.consume()
|
|
||||||
|
|
||||||
# Occasionally fetch mail and store it to be consumed on the next loop
|
# Occasionally fetch mail and store it to be consumed on the next loop
|
||||||
# We fetch email when we first start up so that it is not necessary to
|
# We fetch email when we first start up so that it is not necessary to
|
||||||
# wait for 10 minutes after making changes to the config file.
|
# wait for 10 minutes after making changes to the config file.
|
||||||
delta = self.mail_fetcher.last_checked + self.MAIL_DELTA
|
delta = self.mail_fetcher.last_checked + mail_delta
|
||||||
if self.first_iteration or delta < datetime.datetime.now():
|
if self.first_iteration or delta < datetime.datetime.now():
|
||||||
self.first_iteration = False
|
self.first_iteration = False
|
||||||
self.mail_fetcher.pull()
|
self.mail_fetcher.pull()
|
||||||
|
|
||||||
|
# Consume whatever files we can.
|
||||||
|
# We have to run twice as the first run checks for file readiness
|
||||||
|
for i in range(2):
|
||||||
|
self.file_consumer.run()
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
from tempfile import TemporaryDirectory
|
||||||
|
|
||||||
from ..consumer import Consumer
|
from ..consumer import Consumer
|
||||||
from ..models import FileInfo
|
from ..models import FileInfo
|
||||||
@ -16,7 +17,6 @@ class TestConsumer(TestCase):
|
|||||||
self.DummyParser
|
self.DummyParser
|
||||||
)
|
)
|
||||||
|
|
||||||
@mock.patch("documents.consumer.Consumer.CONSUME")
|
|
||||||
@mock.patch("documents.consumer.os.makedirs")
|
@mock.patch("documents.consumer.os.makedirs")
|
||||||
@mock.patch("documents.consumer.os.path.exists", return_value=True)
|
@mock.patch("documents.consumer.os.path.exists", return_value=True)
|
||||||
@mock.patch("documents.consumer.document_consumer_declaration.send")
|
@mock.patch("documents.consumer.document_consumer_declaration.send")
|
||||||
@ -32,18 +32,22 @@ class TestConsumer(TestCase):
|
|||||||
(None, lambda _: {"weight": 0, "parser": DummyParser1}),
|
(None, lambda _: {"weight": 0, "parser": DummyParser1}),
|
||||||
(None, lambda _: {"weight": 1, "parser": DummyParser2}),
|
(None, lambda _: {"weight": 1, "parser": DummyParser2}),
|
||||||
)
|
)
|
||||||
|
with TemporaryDirectory() as tmpdir:
|
||||||
|
self.assertEqual(
|
||||||
|
Consumer(consume=tmpdir)._get_parser_class("doc.pdf"),
|
||||||
|
DummyParser2
|
||||||
|
)
|
||||||
|
|
||||||
self.assertEqual(Consumer()._get_parser_class("doc.pdf"), DummyParser2)
|
|
||||||
|
|
||||||
@mock.patch("documents.consumer.Consumer.CONSUME")
|
|
||||||
@mock.patch("documents.consumer.os.makedirs")
|
@mock.patch("documents.consumer.os.makedirs")
|
||||||
@mock.patch("documents.consumer.os.path.exists", return_value=True)
|
@mock.patch("documents.consumer.os.path.exists", return_value=True)
|
||||||
@mock.patch("documents.consumer.document_consumer_declaration.send")
|
@mock.patch("documents.consumer.document_consumer_declaration.send")
|
||||||
def test__get_parser_class_0_parsers(self, m, *args):
|
def test__get_parser_class_0_parsers(self, m, *args):
|
||||||
m.return_value = ((None, lambda _: None),)
|
m.return_value = ((None, lambda _: None),)
|
||||||
self.assertIsNone(Consumer()._get_parser_class("doc.pdf"))
|
with TemporaryDirectory() as tmpdir:
|
||||||
|
self.assertIsNone(
|
||||||
|
Consumer(consume=tmpdir)._get_parser_class("doc.pdf")
|
||||||
|
)
|
||||||
|
|
||||||
@mock.patch("documents.consumer.Consumer.CONSUME")
|
|
||||||
@mock.patch("documents.consumer.os.makedirs")
|
@mock.patch("documents.consumer.os.makedirs")
|
||||||
@mock.patch("documents.consumer.os.path.exists", return_value=True)
|
@mock.patch("documents.consumer.os.path.exists", return_value=True)
|
||||||
@mock.patch("documents.consumer.document_consumer_declaration.send")
|
@mock.patch("documents.consumer.document_consumer_declaration.send")
|
||||||
@ -51,7 +55,8 @@ class TestConsumer(TestCase):
|
|||||||
m.return_value = (
|
m.return_value = (
|
||||||
(None, lambda _: {"weight": 0, "parser": self.DummyParser}),
|
(None, lambda _: {"weight": 0, "parser": self.DummyParser}),
|
||||||
)
|
)
|
||||||
return Consumer()
|
with TemporaryDirectory() as tmpdir:
|
||||||
|
return Consumer(consume=tmpdir)
|
||||||
|
|
||||||
|
|
||||||
class TestAttributes(TestCase):
|
class TestAttributes(TestCase):
|
||||||
|
@ -6,7 +6,6 @@ exclude = migrations, paperless/settings.py, .tox
|
|||||||
DJANGO_SETTINGS_MODULE=paperless.settings
|
DJANGO_SETTINGS_MODULE=paperless.settings
|
||||||
addopts = --pythonwarnings=all -n auto
|
addopts = --pythonwarnings=all -n auto
|
||||||
env =
|
env =
|
||||||
PAPERLESS_CONSUME=/tmp
|
|
||||||
PAPERLESS_PASSPHRASE=THISISNOTASECRET
|
PAPERLESS_PASSPHRASE=THISISNOTASECRET
|
||||||
PAPERLESS_SECRET=paperless
|
PAPERLESS_SECRET=paperless
|
||||||
PAPERLESS_EMAIL_SECRET=paperless
|
PAPERLESS_EMAIL_SECRET=paperless
|
||||||
|
Loading…
x
Reference in New Issue
Block a user