From 70d8e8bc56b11146e764f1b3e78246d8081c9985 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Mon, 16 Nov 2020 23:16:37 +0100 Subject: [PATCH] added more testing --- src/documents/consumer.py | 6 +- .../management/commands/document_consumer.py | 3 - src/documents/tests/test_consumer.py | 261 +++++++++++++++++- src/paperless/settings.py | 1 + src/setup.cfg | 1 - 5 files changed, 264 insertions(+), 8 deletions(-) diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 7f0fd7d21..6239e7d2a 100755 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -72,8 +72,10 @@ class Consumer: ) @staticmethod - def pre_check_scratch_fir(): + def pre_check_directories(): os.makedirs(settings.SCRATCH_DIR, exist_ok=True) + os.makedirs(settings.THUMBNAIL_DIR, exist_ok=True) + os.makedirs(settings.ORIGINALS_DIR, exist_ok=True) def log(self, level, message): getattr(self.logger, level)(message, extra={ @@ -100,7 +102,7 @@ class Consumer: self.pre_check_file_exists(filename) self.pre_check_consumption_dir() - self.pre_check_scratch_fir() + self.pre_check_directories() self.pre_check_regex(filename) self.pre_check_duplicate(filename) diff --git a/src/documents/management/commands/document_consumer.py b/src/documents/management/commands/document_consumer.py index d991b722a..ec48daa96 100644 --- a/src/documents/management/commands/document_consumer.py +++ b/src/documents/management/commands/document_consumer.py @@ -61,9 +61,6 @@ class Command(BaseCommand): self.verbosity = options["verbosity"] directory = options["directory"] - for d in (settings.ORIGINALS_DIR, settings.THUMBNAIL_DIR): - os.makedirs(d, exist_ok=True) - logging.getLogger(__name__).info( "Starting document consumer at {}".format( directory diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py index 4d5360c7b..1661eef5b 100644 --- a/src/documents/tests/test_consumer.py +++ b/src/documents/tests/test_consumer.py @@ -1,8 +1,17 @@ +import os import re +import shutil +import tempfile +from unittest import mock +from unittest.mock import MagicMock -from django.test import TestCase +from django.conf import settings +from django.db import DatabaseError +from django.test import TestCase, override_settings -from ..models import FileInfo, Tag +from ..consumer import Consumer, ConsumerError +from ..models import FileInfo, Tag, Correspondent, DocumentType, Document +from ..parsers import DocumentParser, ParseError class TestAttributes(TestCase): @@ -394,3 +403,251 @@ class TestFieldPermutations(TestCase): self.assertEqual(info.created.year, 2019) self.assertEqual(info.created.month, 9) self.assertEqual(info.created.day, 8) + + +class DummyParser(DocumentParser): + + def get_thumbnail(self): + # not important during tests + raise NotImplementedError() + + def __init__(self, path, logging_group, scratch_dir): + super(DummyParser, self).__init__(path, logging_group) + _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir) + + def get_optimised_thumbnail(self): + return self.fake_thumb + + def get_text(self): + return "The Text" + + +class FaultyParser(DocumentParser): + + def get_thumbnail(self): + # not important during tests + raise NotImplementedError() + + def __init__(self, path, logging_group, scratch_dir): + super(FaultyParser, self).__init__(path, logging_group) + _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir) + + def get_optimised_thumbnail(self): + return self.fake_thumb + + def get_text(self): + raise ParseError("Does not compute.") + + +class TestConsumer(TestCase): + + def make_dummy_parser(self, path, logging_group): + return DummyParser(path, logging_group, self.scratch_dir) + + def make_faulty_parser(self, path, logging_group): + return FaultyParser(path, logging_group, self.scratch_dir) + + def setUp(self): + self.scratch_dir = tempfile.mkdtemp() + self.media_dir = tempfile.mkdtemp() + + override_settings( + SCRATCH_DIR=self.scratch_dir, + MEDIA_ROOT=self.media_dir, + ORIGINALS_DIR=os.path.join(self.media_dir, "documents", "originals"), + THUMBNAIL_DIR=os.path.join(self.media_dir, "documents", "thumbnails") + ).enable() + + patcher = mock.patch("documents.parsers.document_consumer_declaration.send") + m = patcher.start() + m.return_value = [(None, { + "parser": self.make_dummy_parser, + "test": lambda _: True, + "weight": 0 + })] + + self.addCleanup(patcher.stop) + + self.consumer = Consumer() + + def tearDown(self): + shutil.rmtree(self.scratch_dir, ignore_errors=True) + shutil.rmtree(self.media_dir, ignore_errors=True) + + def get_test_file(self): + fd, f = tempfile.mkstemp(suffix=".pdf", dir=self.scratch_dir) + return f + + def testNormalOperation(self): + + filename = self.get_test_file() + document = self.consumer.try_consume_file(filename) + + self.assertEqual(document.content, "The Text") + self.assertEqual(document.title, os.path.splitext(os.path.basename(filename))[0]) + self.assertIsNone(document.correspondent) + self.assertIsNone(document.document_type) + self.assertEqual(document.filename, "0000001.pdf") + + self.assertTrue(os.path.isfile( + document.source_path + )) + + self.assertTrue(os.path.isfile( + document.thumbnail_path + )) + + self.assertFalse(os.path.isfile(filename)) + + def testOverrideFilename(self): + filename = self.get_test_file() + overrideFilename = "My Bank - Statement for November.pdf" + + document = self.consumer.try_consume_file(filename, original_filename=overrideFilename) + + self.assertEqual(document.correspondent.name, "My Bank") + self.assertEqual(document.title, "Statement for November") + + def testOverrideTitle(self): + + document = self.consumer.try_consume_file(self.get_test_file(), force_title="Override Title") + self.assertEqual(document.title, "Override Title") + + def testOverrideCorrespondent(self): + c = Correspondent.objects.create(name="test") + + document = self.consumer.try_consume_file(self.get_test_file(), force_correspondent_id=c.pk) + self.assertEqual(document.correspondent.id, c.id) + + def testOverrideDocumentType(self): + dt = DocumentType.objects.create(name="test") + + document = self.consumer.try_consume_file(self.get_test_file(), force_document_type_id=dt.pk) + self.assertEqual(document.document_type.id, dt.id) + + def testOverrideTags(self): + t1 = Tag.objects.create(name="t1") + t2 = Tag.objects.create(name="t2") + t3 = Tag.objects.create(name="t3") + document = self.consumer.try_consume_file(self.get_test_file(), force_tag_ids=[t1.id, t3.id]) + + self.assertIn(t1, document.tags.all()) + self.assertNotIn(t2, document.tags.all()) + self.assertIn(t3, document.tags.all()) + + def testNotAFile(self): + try: + self.consumer.try_consume_file("non-existing-file") + except ConsumerError as e: + self.assertTrue(str(e).endswith('It is not a file')) + return + + self.fail("Should throw exception") + + @override_settings(CONSUMPTION_DIR=None) + def testConsumptionDirUnset(self): + try: + self.consumer.try_consume_file(self.get_test_file()) + except ConsumerError as e: + self.assertEqual(str(e), "The CONSUMPTION_DIR settings variable does not appear to be set.") + return + + self.fail("Should throw exception") + + @override_settings(CONSUMPTION_DIR="asd") + def testNoConsumptionDir(self): + try: + self.consumer.try_consume_file(self.get_test_file()) + except ConsumerError as e: + self.assertEqual(str(e), "Consumption directory asd does not exist") + return + + self.fail("Should throw exception") + + def testDuplicates(self): + self.consumer.try_consume_file(self.get_test_file()) + + try: + self.consumer.try_consume_file(self.get_test_file()) + except ConsumerError as e: + self.assertTrue(str(e).endswith("It is a duplicate.")) + return + + self.fail("Should throw exception") + + @mock.patch("documents.parsers.document_consumer_declaration.send") + def testNoParsers(self, m): + m.return_value = [] + + try: + self.consumer.try_consume_file(self.get_test_file()) + except ConsumerError as e: + self.assertTrue(str(e).startswith("No parsers abvailable")) + return + + self.fail("Should throw exception") + + @mock.patch("documents.parsers.document_consumer_declaration.send") + def testFaultyParser(self, m): + m.return_value = [(None, { + "parser": self.make_faulty_parser, + "test": lambda _: True, + "weight": 0 + })] + + try: + self.consumer.try_consume_file(self.get_test_file()) + except ConsumerError as e: + self.assertEqual(str(e), "Does not compute.") + return + + self.fail("Should throw exception.") + + @mock.patch("documents.consumer.Consumer._write") + def testPostSaveError(self, m): + filename = self.get_test_file() + m.side_effect = OSError("NO.") + try: + self.consumer.try_consume_file(filename) + except ConsumerError as e: + self.assertEqual(str(e), "NO.") + else: + self.fail("Should raise exception") + + # file not deleted + self.assertTrue(os.path.isfile(filename)) + + # Database empty + self.assertEqual(len(Document.objects.all()), 0) + + @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}") + def testFilenameHandling(self): + filename = self.get_test_file() + + document = self.consumer.try_consume_file(filename, original_filename="Bank - Test.pdf", force_title="new docs") + + print(document.source_path) + print("===") + + self.assertEqual(document.title, "new docs") + self.assertEqual(document.correspondent.name, "Bank") + self.assertEqual(document.filename, "bank/new-docs-0000001.pdf") + + @mock.patch("documents.consumer.DocumentClassifier") + def testClassifyDocument(self, m): + correspondent = Correspondent.objects.create(name="test") + dtype = DocumentType.objects.create(name="test") + t1 = Tag.objects.create(name="t1") + t2 = Tag.objects.create(name="t2") + + m.return_value = MagicMock() + m.return_value.predict_correspondent.return_value = correspondent.pk + m.return_value.predict_document_type.return_value = dtype.pk + m.return_value.predict_tags.return_value = [t1.pk] + + document = self.consumer.try_consume_file(self.get_test_file()) + + self.assertEqual(document.correspondent, correspondent) + self.assertEqual(document.document_type, dtype) + self.assertIn(t1, document.tags.all()) + self.assertNotIn(t2, document.tags.all()) diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 18acf401a..0f9d9d7e9 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -359,5 +359,6 @@ FILENAME_PARSE_TRANSFORMS = [] for t in json.loads(os.getenv("PAPERLESS_FILENAME_PARSE_TRANSFORMS", "[]")): FILENAME_PARSE_TRANSFORMS.append((re.compile(t["pattern"]), t["repl"])) +# TODO: this should not have a prefix. # Specify the filename format for out files PAPERLESS_FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT") diff --git a/src/setup.cfg b/src/setup.cfg index 33bef4f4e..05b89eb51 100644 --- a/src/setup.cfg +++ b/src/setup.cfg @@ -6,7 +6,6 @@ ignore = E501 DJANGO_SETTINGS_MODULE=paperless.settings addopts = --pythonwarnings=all env = - PAPERLESS_PASSPHRASE=THISISNOTASECRET PAPERLESS_SECRET=paperless PAPERLESS_EMAIL_SECRET=paperless