mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-09-06 21:13:43 -05:00
Creates a data model for the document consumption, allowing stronger typing of arguments and setting of some information about the file only once
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
import filecmp
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from threading import Thread
|
||||
from time import sleep
|
||||
from unittest import mock
|
||||
@@ -11,9 +12,12 @@ from django.core.management import CommandError
|
||||
from django.test import override_settings
|
||||
from django.test import TransactionTestCase
|
||||
from documents.consumer import ConsumerError
|
||||
from documents.data_models import ConsumableDocument
|
||||
from documents.data_models import DocumentMetadataOverrides
|
||||
from documents.management.commands import document_consumer
|
||||
from documents.models import Tag
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from documents.tests.utils import DocumentConsumeDelayMixin
|
||||
|
||||
|
||||
class ConsumerThread(Thread):
|
||||
@@ -35,18 +39,19 @@ def chunked(size, source):
|
||||
yield source[i : i + size]
|
||||
|
||||
|
||||
class ConsumerMixin:
|
||||
class ConsumerThreadMixin(DocumentConsumeDelayMixin):
|
||||
"""
|
||||
Provides a thread which runs the consumer management command at setUp
|
||||
and stops it at tearDown
|
||||
"""
|
||||
|
||||
sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
|
||||
sample_file: Path = (
|
||||
Path(__file__).parent / Path("samples") / Path("simple.pdf")
|
||||
).resolve()
|
||||
|
||||
def setUp(self) -> None:
|
||||
super().setUp()
|
||||
self.t = None
|
||||
patcher = mock.patch(
|
||||
"documents.tasks.consume_file.delay",
|
||||
)
|
||||
self.task_mock = patcher.start()
|
||||
self.addCleanup(patcher.stop)
|
||||
|
||||
def t_start(self):
|
||||
self.t = ConsumerThread()
|
||||
@@ -67,7 +72,7 @@ class ConsumerMixin:
|
||||
def wait_for_task_mock_call(self, expected_call_count=1):
|
||||
n = 0
|
||||
while n < 50:
|
||||
if self.task_mock.call_count >= expected_call_count:
|
||||
if self.consume_file_mock.call_count >= expected_call_count:
|
||||
# give task_mock some time to finish and raise errors
|
||||
sleep(1)
|
||||
return
|
||||
@@ -76,8 +81,12 @@ class ConsumerMixin:
|
||||
|
||||
# A bogus async_task that will simply check the file for
|
||||
# completeness and raise an exception otherwise.
|
||||
def bogus_task(self, filename, **kwargs):
|
||||
eq = filecmp.cmp(filename, self.sample_file, shallow=False)
|
||||
def bogus_task(
|
||||
self,
|
||||
input_doc: ConsumableDocument,
|
||||
overrides=None,
|
||||
):
|
||||
eq = filecmp.cmp(input_doc.original_file, self.sample_file, shallow=False)
|
||||
if not eq:
|
||||
print("Consumed an INVALID file.")
|
||||
raise ConsumerError("Incomplete File READ FAILED")
|
||||
@@ -103,19 +112,20 @@ class ConsumerMixin:
|
||||
@override_settings(
|
||||
CONSUMER_INOTIFY_DELAY=0.01,
|
||||
)
|
||||
class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||
class TestConsumer(DirectoriesMixin, ConsumerThreadMixin, TransactionTestCase):
|
||||
def test_consume_file(self):
|
||||
self.t_start()
|
||||
|
||||
f = os.path.join(self.dirs.consumption_dir, "my_file.pdf")
|
||||
f = Path(os.path.join(self.dirs.consumption_dir, "my_file.pdf"))
|
||||
shutil.copy(self.sample_file, f)
|
||||
|
||||
self.wait_for_task_mock_call()
|
||||
|
||||
self.task_mock.assert_called_once()
|
||||
self.consume_file_mock.assert_called_once()
|
||||
|
||||
args, kwargs = self.task_mock.call_args
|
||||
self.assertEqual(args[0], f)
|
||||
input_doc, _ = self.get_last_consume_delay_call_args()
|
||||
|
||||
self.assertEqual(input_doc.original_file, f)
|
||||
|
||||
def test_consume_file_invalid_ext(self):
|
||||
self.t_start()
|
||||
@@ -125,26 +135,27 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||
|
||||
self.wait_for_task_mock_call()
|
||||
|
||||
self.task_mock.assert_not_called()
|
||||
self.consume_file_mock.assert_not_called()
|
||||
|
||||
def test_consume_existing_file(self):
|
||||
f = os.path.join(self.dirs.consumption_dir, "my_file.pdf")
|
||||
f = Path(os.path.join(self.dirs.consumption_dir, "my_file.pdf"))
|
||||
shutil.copy(self.sample_file, f)
|
||||
|
||||
self.t_start()
|
||||
self.task_mock.assert_called_once()
|
||||
self.consume_file_mock.assert_called_once()
|
||||
|
||||
args, kwargs = self.task_mock.call_args
|
||||
self.assertEqual(args[0], f)
|
||||
input_doc, _ = self.get_last_consume_delay_call_args()
|
||||
|
||||
self.assertEqual(input_doc.original_file, f)
|
||||
|
||||
@mock.patch("documents.management.commands.document_consumer.logger.error")
|
||||
def test_slow_write_pdf(self, error_logger):
|
||||
|
||||
self.task_mock.side_effect = self.bogus_task
|
||||
self.consume_file_mock.side_effect = self.bogus_task
|
||||
|
||||
self.t_start()
|
||||
|
||||
fname = os.path.join(self.dirs.consumption_dir, "my_file.pdf")
|
||||
fname = Path(os.path.join(self.dirs.consumption_dir, "my_file.pdf"))
|
||||
|
||||
self.slow_write_file(fname)
|
||||
|
||||
@@ -152,48 +163,52 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||
|
||||
error_logger.assert_not_called()
|
||||
|
||||
self.task_mock.assert_called_once()
|
||||
self.consume_file_mock.assert_called_once()
|
||||
|
||||
args, kwargs = self.task_mock.call_args
|
||||
self.assertEqual(args[0], fname)
|
||||
input_doc, _ = self.get_last_consume_delay_call_args()
|
||||
|
||||
self.assertEqual(input_doc.original_file, fname)
|
||||
|
||||
@mock.patch("documents.management.commands.document_consumer.logger.error")
|
||||
def test_slow_write_and_move(self, error_logger):
|
||||
|
||||
self.task_mock.side_effect = self.bogus_task
|
||||
self.consume_file_mock.side_effect = self.bogus_task
|
||||
|
||||
self.t_start()
|
||||
|
||||
fname = os.path.join(self.dirs.consumption_dir, "my_file.~df")
|
||||
fname2 = os.path.join(self.dirs.consumption_dir, "my_file.pdf")
|
||||
fname = Path(os.path.join(self.dirs.consumption_dir, "my_file.~df"))
|
||||
fname2 = Path(os.path.join(self.dirs.consumption_dir, "my_file.pdf"))
|
||||
|
||||
self.slow_write_file(fname)
|
||||
shutil.move(fname, fname2)
|
||||
|
||||
self.wait_for_task_mock_call()
|
||||
|
||||
self.task_mock.assert_called_once()
|
||||
self.consume_file_mock.assert_called_once()
|
||||
|
||||
args, kwargs = self.task_mock.call_args
|
||||
self.assertEqual(args[0], fname2)
|
||||
input_doc, _ = self.get_last_consume_delay_call_args()
|
||||
|
||||
self.assertEqual(input_doc.original_file, fname2)
|
||||
|
||||
error_logger.assert_not_called()
|
||||
|
||||
@mock.patch("documents.management.commands.document_consumer.logger.error")
|
||||
def test_slow_write_incomplete(self, error_logger):
|
||||
|
||||
self.task_mock.side_effect = self.bogus_task
|
||||
self.consume_file_mock.side_effect = self.bogus_task
|
||||
|
||||
self.t_start()
|
||||
|
||||
fname = os.path.join(self.dirs.consumption_dir, "my_file.pdf")
|
||||
fname = Path(os.path.join(self.dirs.consumption_dir, "my_file.pdf"))
|
||||
self.slow_write_file(fname, incomplete=True)
|
||||
|
||||
self.wait_for_task_mock_call()
|
||||
|
||||
self.task_mock.assert_called_once()
|
||||
args, kwargs = self.task_mock.call_args
|
||||
self.assertEqual(args[0], fname)
|
||||
self.consume_file_mock.assert_called_once()
|
||||
|
||||
input_doc, _ = self.get_last_consume_delay_call_args()
|
||||
|
||||
self.assertEqual(input_doc.original_file, fname)
|
||||
|
||||
# assert that we have an error logged with this invalid file.
|
||||
error_logger.assert_called_once()
|
||||
@@ -209,7 +224,7 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||
self.assertRaises(CommandError, call_command, "document_consumer", "--oneshot")
|
||||
|
||||
def test_mac_write(self):
|
||||
self.task_mock.side_effect = self.bogus_task
|
||||
self.consume_file_mock.side_effect = self.bogus_task
|
||||
|
||||
self.t_start()
|
||||
|
||||
@@ -238,12 +253,13 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||
|
||||
self.wait_for_task_mock_call(expected_call_count=2)
|
||||
|
||||
self.assertEqual(2, self.task_mock.call_count)
|
||||
self.assertEqual(2, self.consume_file_mock.call_count)
|
||||
|
||||
fnames = [
|
||||
os.path.basename(args[0]) for args, _ in self.task_mock.call_args_list
|
||||
]
|
||||
self.assertCountEqual(fnames, ["my_file.pdf", "my_second_file.pdf"])
|
||||
consumed_files = []
|
||||
for input_doc, _ in self.get_all_consume_delay_call_args():
|
||||
consumed_files.append(input_doc.original_file.name)
|
||||
|
||||
self.assertCountEqual(consumed_files, ["my_file.pdf", "my_second_file.pdf"])
|
||||
|
||||
def test_is_ignored(self):
|
||||
test_paths = [
|
||||
@@ -341,7 +357,7 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||
|
||||
self.wait_for_task_mock_call()
|
||||
|
||||
self.task_mock.assert_not_called()
|
||||
self.consume_file_mock.assert_not_called()
|
||||
|
||||
|
||||
@override_settings(
|
||||
@@ -373,7 +389,7 @@ class TestConsumerRecursivePolling(TestConsumer):
|
||||
pass
|
||||
|
||||
|
||||
class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||
class TestConsumerTags(DirectoriesMixin, ConsumerThreadMixin, TransactionTestCase):
|
||||
@override_settings(CONSUMER_RECURSIVE=True, CONSUMER_SUBDIRS_AS_TAGS=True)
|
||||
def test_consume_file_with_path_tags(self):
|
||||
|
||||
@@ -387,7 +403,7 @@ class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||
|
||||
path = os.path.join(self.dirs.consumption_dir, *tag_names)
|
||||
os.makedirs(path, exist_ok=True)
|
||||
f = os.path.join(path, "my_file.pdf")
|
||||
f = Path(os.path.join(path, "my_file.pdf"))
|
||||
# Wait at least inotify read_delay for recursive watchers
|
||||
# to be created for the new directories
|
||||
sleep(1)
|
||||
@@ -395,18 +411,19 @@ class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||
|
||||
self.wait_for_task_mock_call()
|
||||
|
||||
self.task_mock.assert_called_once()
|
||||
self.consume_file_mock.assert_called_once()
|
||||
|
||||
# Add the pk of the Tag created by _consume()
|
||||
tag_ids.append(Tag.objects.get(name=tag_names[1]).pk)
|
||||
|
||||
args, kwargs = self.task_mock.call_args
|
||||
self.assertEqual(args[0], f)
|
||||
input_doc, overrides = self.get_last_consume_delay_call_args()
|
||||
|
||||
self.assertEqual(input_doc.original_file, f)
|
||||
|
||||
# assertCountEqual has a bad name, but test that the first
|
||||
# sequence contains the same elements as second, regardless of
|
||||
# their order.
|
||||
self.assertCountEqual(kwargs["override_tag_ids"], tag_ids)
|
||||
self.assertCountEqual(overrides.tag_ids, tag_ids)
|
||||
|
||||
@override_settings(
|
||||
CONSUMER_POLLING=1,
|
||||
|
Reference in New Issue
Block a user