Creates a data model for the document consumption, allowing stronger typing of arguments and setting of some information about the file only once

This commit is contained in:
Trenton H
2023-01-23 15:55:49 -08:00
parent fa60251c18
commit 3c2bbf244d
14 changed files with 596 additions and 433 deletions

View File

@@ -1,6 +1,7 @@
import filecmp
import os
import shutil
from pathlib import Path
from threading import Thread
from time import sleep
from unittest import mock
@@ -11,9 +12,12 @@ from django.core.management import CommandError
from django.test import override_settings
from django.test import TransactionTestCase
from documents.consumer import ConsumerError
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
from documents.management.commands import document_consumer
from documents.models import Tag
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import DocumentConsumeDelayMixin
class ConsumerThread(Thread):
@@ -35,18 +39,19 @@ def chunked(size, source):
yield source[i : i + size]
class ConsumerMixin:
class ConsumerThreadMixin(DocumentConsumeDelayMixin):
"""
Provides a thread which runs the consumer management command at setUp
and stops it at tearDown
"""
sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
sample_file: Path = (
Path(__file__).parent / Path("samples") / Path("simple.pdf")
).resolve()
def setUp(self) -> None:
super().setUp()
self.t = None
patcher = mock.patch(
"documents.tasks.consume_file.delay",
)
self.task_mock = patcher.start()
self.addCleanup(patcher.stop)
def t_start(self):
self.t = ConsumerThread()
@@ -67,7 +72,7 @@ class ConsumerMixin:
def wait_for_task_mock_call(self, expected_call_count=1):
n = 0
while n < 50:
if self.task_mock.call_count >= expected_call_count:
if self.consume_file_mock.call_count >= expected_call_count:
# give task_mock some time to finish and raise errors
sleep(1)
return
@@ -76,8 +81,12 @@ class ConsumerMixin:
# A bogus async_task that will simply check the file for
# completeness and raise an exception otherwise.
def bogus_task(self, filename, **kwargs):
eq = filecmp.cmp(filename, self.sample_file, shallow=False)
def bogus_task(
self,
input_doc: ConsumableDocument,
overrides=None,
):
eq = filecmp.cmp(input_doc.original_file, self.sample_file, shallow=False)
if not eq:
print("Consumed an INVALID file.")
raise ConsumerError("Incomplete File READ FAILED")
@@ -103,19 +112,20 @@ class ConsumerMixin:
@override_settings(
CONSUMER_INOTIFY_DELAY=0.01,
)
class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
class TestConsumer(DirectoriesMixin, ConsumerThreadMixin, TransactionTestCase):
def test_consume_file(self):
self.t_start()
f = os.path.join(self.dirs.consumption_dir, "my_file.pdf")
f = Path(os.path.join(self.dirs.consumption_dir, "my_file.pdf"))
shutil.copy(self.sample_file, f)
self.wait_for_task_mock_call()
self.task_mock.assert_called_once()
self.consume_file_mock.assert_called_once()
args, kwargs = self.task_mock.call_args
self.assertEqual(args[0], f)
input_doc, _ = self.get_last_consume_delay_call_args()
self.assertEqual(input_doc.original_file, f)
def test_consume_file_invalid_ext(self):
self.t_start()
@@ -125,26 +135,27 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
self.wait_for_task_mock_call()
self.task_mock.assert_not_called()
self.consume_file_mock.assert_not_called()
def test_consume_existing_file(self):
f = os.path.join(self.dirs.consumption_dir, "my_file.pdf")
f = Path(os.path.join(self.dirs.consumption_dir, "my_file.pdf"))
shutil.copy(self.sample_file, f)
self.t_start()
self.task_mock.assert_called_once()
self.consume_file_mock.assert_called_once()
args, kwargs = self.task_mock.call_args
self.assertEqual(args[0], f)
input_doc, _ = self.get_last_consume_delay_call_args()
self.assertEqual(input_doc.original_file, f)
@mock.patch("documents.management.commands.document_consumer.logger.error")
def test_slow_write_pdf(self, error_logger):
self.task_mock.side_effect = self.bogus_task
self.consume_file_mock.side_effect = self.bogus_task
self.t_start()
fname = os.path.join(self.dirs.consumption_dir, "my_file.pdf")
fname = Path(os.path.join(self.dirs.consumption_dir, "my_file.pdf"))
self.slow_write_file(fname)
@@ -152,48 +163,52 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
error_logger.assert_not_called()
self.task_mock.assert_called_once()
self.consume_file_mock.assert_called_once()
args, kwargs = self.task_mock.call_args
self.assertEqual(args[0], fname)
input_doc, _ = self.get_last_consume_delay_call_args()
self.assertEqual(input_doc.original_file, fname)
@mock.patch("documents.management.commands.document_consumer.logger.error")
def test_slow_write_and_move(self, error_logger):
self.task_mock.side_effect = self.bogus_task
self.consume_file_mock.side_effect = self.bogus_task
self.t_start()
fname = os.path.join(self.dirs.consumption_dir, "my_file.~df")
fname2 = os.path.join(self.dirs.consumption_dir, "my_file.pdf")
fname = Path(os.path.join(self.dirs.consumption_dir, "my_file.~df"))
fname2 = Path(os.path.join(self.dirs.consumption_dir, "my_file.pdf"))
self.slow_write_file(fname)
shutil.move(fname, fname2)
self.wait_for_task_mock_call()
self.task_mock.assert_called_once()
self.consume_file_mock.assert_called_once()
args, kwargs = self.task_mock.call_args
self.assertEqual(args[0], fname2)
input_doc, _ = self.get_last_consume_delay_call_args()
self.assertEqual(input_doc.original_file, fname2)
error_logger.assert_not_called()
@mock.patch("documents.management.commands.document_consumer.logger.error")
def test_slow_write_incomplete(self, error_logger):
self.task_mock.side_effect = self.bogus_task
self.consume_file_mock.side_effect = self.bogus_task
self.t_start()
fname = os.path.join(self.dirs.consumption_dir, "my_file.pdf")
fname = Path(os.path.join(self.dirs.consumption_dir, "my_file.pdf"))
self.slow_write_file(fname, incomplete=True)
self.wait_for_task_mock_call()
self.task_mock.assert_called_once()
args, kwargs = self.task_mock.call_args
self.assertEqual(args[0], fname)
self.consume_file_mock.assert_called_once()
input_doc, _ = self.get_last_consume_delay_call_args()
self.assertEqual(input_doc.original_file, fname)
# assert that we have an error logged with this invalid file.
error_logger.assert_called_once()
@@ -209,7 +224,7 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
self.assertRaises(CommandError, call_command, "document_consumer", "--oneshot")
def test_mac_write(self):
self.task_mock.side_effect = self.bogus_task
self.consume_file_mock.side_effect = self.bogus_task
self.t_start()
@@ -238,12 +253,13 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
self.wait_for_task_mock_call(expected_call_count=2)
self.assertEqual(2, self.task_mock.call_count)
self.assertEqual(2, self.consume_file_mock.call_count)
fnames = [
os.path.basename(args[0]) for args, _ in self.task_mock.call_args_list
]
self.assertCountEqual(fnames, ["my_file.pdf", "my_second_file.pdf"])
consumed_files = []
for input_doc, _ in self.get_all_consume_delay_call_args():
consumed_files.append(input_doc.original_file.name)
self.assertCountEqual(consumed_files, ["my_file.pdf", "my_second_file.pdf"])
def test_is_ignored(self):
test_paths = [
@@ -341,7 +357,7 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
self.wait_for_task_mock_call()
self.task_mock.assert_not_called()
self.consume_file_mock.assert_not_called()
@override_settings(
@@ -373,7 +389,7 @@ class TestConsumerRecursivePolling(TestConsumer):
pass
class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
class TestConsumerTags(DirectoriesMixin, ConsumerThreadMixin, TransactionTestCase):
@override_settings(CONSUMER_RECURSIVE=True, CONSUMER_SUBDIRS_AS_TAGS=True)
def test_consume_file_with_path_tags(self):
@@ -387,7 +403,7 @@ class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
path = os.path.join(self.dirs.consumption_dir, *tag_names)
os.makedirs(path, exist_ok=True)
f = os.path.join(path, "my_file.pdf")
f = Path(os.path.join(path, "my_file.pdf"))
# Wait at least inotify read_delay for recursive watchers
# to be created for the new directories
sleep(1)
@@ -395,18 +411,19 @@ class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
self.wait_for_task_mock_call()
self.task_mock.assert_called_once()
self.consume_file_mock.assert_called_once()
# Add the pk of the Tag created by _consume()
tag_ids.append(Tag.objects.get(name=tag_names[1]).pk)
args, kwargs = self.task_mock.call_args
self.assertEqual(args[0], f)
input_doc, overrides = self.get_last_consume_delay_call_args()
self.assertEqual(input_doc.original_file, f)
# assertCountEqual has a bad name, but test that the first
# sequence contains the same elements as second, regardless of
# their order.
self.assertCountEqual(kwargs["override_tag_ids"], tag_ids)
self.assertCountEqual(overrides.tag_ids, tag_ids)
@override_settings(
CONSUMER_POLLING=1,