mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	added more testing
This commit is contained in:
		| @@ -72,8 +72,10 @@ class Consumer: | |||||||
|             ) |             ) | ||||||
|  |  | ||||||
|     @staticmethod |     @staticmethod | ||||||
|     def pre_check_scratch_fir(): |     def pre_check_directories(): | ||||||
|         os.makedirs(settings.SCRATCH_DIR, exist_ok=True) |         os.makedirs(settings.SCRATCH_DIR, exist_ok=True) | ||||||
|  |         os.makedirs(settings.THUMBNAIL_DIR, exist_ok=True) | ||||||
|  |         os.makedirs(settings.ORIGINALS_DIR, exist_ok=True) | ||||||
|  |  | ||||||
|     def log(self, level, message): |     def log(self, level, message): | ||||||
|         getattr(self.logger, level)(message, extra={ |         getattr(self.logger, level)(message, extra={ | ||||||
| @@ -100,7 +102,7 @@ class Consumer: | |||||||
|  |  | ||||||
|         self.pre_check_file_exists(filename) |         self.pre_check_file_exists(filename) | ||||||
|         self.pre_check_consumption_dir() |         self.pre_check_consumption_dir() | ||||||
|         self.pre_check_scratch_fir() |         self.pre_check_directories() | ||||||
|         self.pre_check_regex(filename) |         self.pre_check_regex(filename) | ||||||
|         self.pre_check_duplicate(filename) |         self.pre_check_duplicate(filename) | ||||||
|  |  | ||||||
|   | |||||||
| @@ -61,9 +61,6 @@ class Command(BaseCommand): | |||||||
|         self.verbosity = options["verbosity"] |         self.verbosity = options["verbosity"] | ||||||
|         directory = options["directory"] |         directory = options["directory"] | ||||||
|  |  | ||||||
|         for d in (settings.ORIGINALS_DIR, settings.THUMBNAIL_DIR): |  | ||||||
|             os.makedirs(d, exist_ok=True) |  | ||||||
|  |  | ||||||
|         logging.getLogger(__name__).info( |         logging.getLogger(__name__).info( | ||||||
|             "Starting document consumer at {}".format( |             "Starting document consumer at {}".format( | ||||||
|                 directory |                 directory | ||||||
|   | |||||||
| @@ -1,8 +1,17 @@ | |||||||
|  | import os | ||||||
| import re | import re | ||||||
|  | import shutil | ||||||
|  | import tempfile | ||||||
|  | from unittest import mock | ||||||
|  | from unittest.mock import MagicMock | ||||||
|  |  | ||||||
| from django.test import TestCase | from django.conf import settings | ||||||
|  | from django.db import DatabaseError | ||||||
|  | from django.test import TestCase, override_settings | ||||||
|  |  | ||||||
| from ..models import FileInfo, Tag | from ..consumer import Consumer, ConsumerError | ||||||
|  | from ..models import FileInfo, Tag, Correspondent, DocumentType, Document | ||||||
|  | from ..parsers import DocumentParser, ParseError | ||||||
|  |  | ||||||
|  |  | ||||||
| class TestAttributes(TestCase): | class TestAttributes(TestCase): | ||||||
| @@ -394,3 +403,251 @@ class TestFieldPermutations(TestCase): | |||||||
|             self.assertEqual(info.created.year, 2019) |             self.assertEqual(info.created.year, 2019) | ||||||
|             self.assertEqual(info.created.month, 9) |             self.assertEqual(info.created.month, 9) | ||||||
|             self.assertEqual(info.created.day, 8) |             self.assertEqual(info.created.day, 8) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class DummyParser(DocumentParser): | ||||||
|  |  | ||||||
|  |     def get_thumbnail(self): | ||||||
|  |         # not important during tests | ||||||
|  |         raise NotImplementedError() | ||||||
|  |  | ||||||
|  |     def __init__(self, path, logging_group, scratch_dir): | ||||||
|  |         super(DummyParser, self).__init__(path, logging_group) | ||||||
|  |         _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir) | ||||||
|  |  | ||||||
|  |     def get_optimised_thumbnail(self): | ||||||
|  |         return self.fake_thumb | ||||||
|  |  | ||||||
|  |     def get_text(self): | ||||||
|  |         return "The Text" | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class FaultyParser(DocumentParser): | ||||||
|  |  | ||||||
|  |     def get_thumbnail(self): | ||||||
|  |         # not important during tests | ||||||
|  |         raise NotImplementedError() | ||||||
|  |  | ||||||
|  |     def __init__(self, path, logging_group, scratch_dir): | ||||||
|  |         super(FaultyParser, self).__init__(path, logging_group) | ||||||
|  |         _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir) | ||||||
|  |  | ||||||
|  |     def get_optimised_thumbnail(self): | ||||||
|  |         return self.fake_thumb | ||||||
|  |  | ||||||
|  |     def get_text(self): | ||||||
|  |         raise ParseError("Does not compute.") | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class TestConsumer(TestCase): | ||||||
|  |  | ||||||
|  |     def make_dummy_parser(self, path, logging_group): | ||||||
|  |         return DummyParser(path, logging_group, self.scratch_dir) | ||||||
|  |  | ||||||
|  |     def make_faulty_parser(self, path, logging_group): | ||||||
|  |         return FaultyParser(path, logging_group, self.scratch_dir) | ||||||
|  |  | ||||||
|  |     def setUp(self): | ||||||
|  |         self.scratch_dir = tempfile.mkdtemp() | ||||||
|  |         self.media_dir = tempfile.mkdtemp() | ||||||
|  |  | ||||||
|  |         override_settings( | ||||||
|  |             SCRATCH_DIR=self.scratch_dir, | ||||||
|  |             MEDIA_ROOT=self.media_dir, | ||||||
|  |             ORIGINALS_DIR=os.path.join(self.media_dir, "documents", "originals"), | ||||||
|  |             THUMBNAIL_DIR=os.path.join(self.media_dir, "documents", "thumbnails") | ||||||
|  |         ).enable() | ||||||
|  |  | ||||||
|  |         patcher = mock.patch("documents.parsers.document_consumer_declaration.send") | ||||||
|  |         m = patcher.start() | ||||||
|  |         m.return_value = [(None, { | ||||||
|  |             "parser": self.make_dummy_parser, | ||||||
|  |             "test": lambda _: True, | ||||||
|  |             "weight": 0 | ||||||
|  |         })] | ||||||
|  |  | ||||||
|  |         self.addCleanup(patcher.stop) | ||||||
|  |  | ||||||
|  |         self.consumer = Consumer() | ||||||
|  |  | ||||||
|  |     def tearDown(self): | ||||||
|  |         shutil.rmtree(self.scratch_dir, ignore_errors=True) | ||||||
|  |         shutil.rmtree(self.media_dir, ignore_errors=True) | ||||||
|  |  | ||||||
|  |     def get_test_file(self): | ||||||
|  |         fd, f = tempfile.mkstemp(suffix=".pdf", dir=self.scratch_dir) | ||||||
|  |         return f | ||||||
|  |  | ||||||
|  |     def testNormalOperation(self): | ||||||
|  |  | ||||||
|  |         filename = self.get_test_file() | ||||||
|  |         document = self.consumer.try_consume_file(filename) | ||||||
|  |  | ||||||
|  |         self.assertEqual(document.content, "The Text") | ||||||
|  |         self.assertEqual(document.title, os.path.splitext(os.path.basename(filename))[0]) | ||||||
|  |         self.assertIsNone(document.correspondent) | ||||||
|  |         self.assertIsNone(document.document_type) | ||||||
|  |         self.assertEqual(document.filename, "0000001.pdf") | ||||||
|  |  | ||||||
|  |         self.assertTrue(os.path.isfile( | ||||||
|  |             document.source_path | ||||||
|  |         )) | ||||||
|  |  | ||||||
|  |         self.assertTrue(os.path.isfile( | ||||||
|  |             document.thumbnail_path | ||||||
|  |         )) | ||||||
|  |  | ||||||
|  |         self.assertFalse(os.path.isfile(filename)) | ||||||
|  |  | ||||||
|  |     def testOverrideFilename(self): | ||||||
|  |         filename = self.get_test_file() | ||||||
|  |         overrideFilename = "My Bank - Statement for November.pdf" | ||||||
|  |  | ||||||
|  |         document = self.consumer.try_consume_file(filename, original_filename=overrideFilename) | ||||||
|  |  | ||||||
|  |         self.assertEqual(document.correspondent.name, "My Bank") | ||||||
|  |         self.assertEqual(document.title, "Statement for November") | ||||||
|  |  | ||||||
|  |     def testOverrideTitle(self): | ||||||
|  |  | ||||||
|  |         document = self.consumer.try_consume_file(self.get_test_file(), force_title="Override Title") | ||||||
|  |         self.assertEqual(document.title, "Override Title") | ||||||
|  |  | ||||||
|  |     def testOverrideCorrespondent(self): | ||||||
|  |         c = Correspondent.objects.create(name="test") | ||||||
|  |  | ||||||
|  |         document = self.consumer.try_consume_file(self.get_test_file(), force_correspondent_id=c.pk) | ||||||
|  |         self.assertEqual(document.correspondent.id, c.id) | ||||||
|  |  | ||||||
|  |     def testOverrideDocumentType(self): | ||||||
|  |         dt = DocumentType.objects.create(name="test") | ||||||
|  |  | ||||||
|  |         document = self.consumer.try_consume_file(self.get_test_file(), force_document_type_id=dt.pk) | ||||||
|  |         self.assertEqual(document.document_type.id, dt.id) | ||||||
|  |  | ||||||
|  |     def testOverrideTags(self): | ||||||
|  |         t1 = Tag.objects.create(name="t1") | ||||||
|  |         t2 = Tag.objects.create(name="t2") | ||||||
|  |         t3 = Tag.objects.create(name="t3") | ||||||
|  |         document = self.consumer.try_consume_file(self.get_test_file(), force_tag_ids=[t1.id, t3.id]) | ||||||
|  |  | ||||||
|  |         self.assertIn(t1, document.tags.all()) | ||||||
|  |         self.assertNotIn(t2, document.tags.all()) | ||||||
|  |         self.assertIn(t3, document.tags.all()) | ||||||
|  |  | ||||||
|  |     def testNotAFile(self): | ||||||
|  |         try: | ||||||
|  |             self.consumer.try_consume_file("non-existing-file") | ||||||
|  |         except ConsumerError as e: | ||||||
|  |             self.assertTrue(str(e).endswith('It is not a file')) | ||||||
|  |             return | ||||||
|  |  | ||||||
|  |         self.fail("Should throw exception") | ||||||
|  |  | ||||||
|  |     @override_settings(CONSUMPTION_DIR=None) | ||||||
|  |     def testConsumptionDirUnset(self): | ||||||
|  |         try: | ||||||
|  |             self.consumer.try_consume_file(self.get_test_file()) | ||||||
|  |         except ConsumerError as e: | ||||||
|  |             self.assertEqual(str(e), "The CONSUMPTION_DIR settings variable does not appear to be set.") | ||||||
|  |             return | ||||||
|  |  | ||||||
|  |         self.fail("Should throw exception") | ||||||
|  |  | ||||||
|  |     @override_settings(CONSUMPTION_DIR="asd") | ||||||
|  |     def testNoConsumptionDir(self): | ||||||
|  |         try: | ||||||
|  |             self.consumer.try_consume_file(self.get_test_file()) | ||||||
|  |         except ConsumerError as e: | ||||||
|  |             self.assertEqual(str(e), "Consumption directory asd does not exist") | ||||||
|  |             return | ||||||
|  |  | ||||||
|  |         self.fail("Should throw exception") | ||||||
|  |  | ||||||
|  |     def testDuplicates(self): | ||||||
|  |         self.consumer.try_consume_file(self.get_test_file()) | ||||||
|  |  | ||||||
|  |         try: | ||||||
|  |             self.consumer.try_consume_file(self.get_test_file()) | ||||||
|  |         except ConsumerError as e: | ||||||
|  |             self.assertTrue(str(e).endswith("It is a duplicate.")) | ||||||
|  |             return | ||||||
|  |  | ||||||
|  |         self.fail("Should throw exception") | ||||||
|  |  | ||||||
|  |     @mock.patch("documents.parsers.document_consumer_declaration.send") | ||||||
|  |     def testNoParsers(self, m): | ||||||
|  |         m.return_value = [] | ||||||
|  |  | ||||||
|  |         try: | ||||||
|  |             self.consumer.try_consume_file(self.get_test_file()) | ||||||
|  |         except ConsumerError as e: | ||||||
|  |             self.assertTrue(str(e).startswith("No parsers abvailable")) | ||||||
|  |             return | ||||||
|  |  | ||||||
|  |         self.fail("Should throw exception") | ||||||
|  |  | ||||||
|  |     @mock.patch("documents.parsers.document_consumer_declaration.send") | ||||||
|  |     def testFaultyParser(self, m): | ||||||
|  |         m.return_value = [(None, { | ||||||
|  |             "parser": self.make_faulty_parser, | ||||||
|  |             "test": lambda _: True, | ||||||
|  |             "weight": 0 | ||||||
|  |         })] | ||||||
|  |  | ||||||
|  |         try: | ||||||
|  |             self.consumer.try_consume_file(self.get_test_file()) | ||||||
|  |         except ConsumerError as e: | ||||||
|  |             self.assertEqual(str(e), "Does not compute.") | ||||||
|  |             return | ||||||
|  |  | ||||||
|  |         self.fail("Should throw exception.") | ||||||
|  |  | ||||||
|  |     @mock.patch("documents.consumer.Consumer._write") | ||||||
|  |     def testPostSaveError(self, m): | ||||||
|  |         filename = self.get_test_file() | ||||||
|  |         m.side_effect = OSError("NO.") | ||||||
|  |         try: | ||||||
|  |             self.consumer.try_consume_file(filename) | ||||||
|  |         except ConsumerError as e: | ||||||
|  |             self.assertEqual(str(e), "NO.") | ||||||
|  |         else: | ||||||
|  |             self.fail("Should raise exception") | ||||||
|  |  | ||||||
|  |         # file not deleted | ||||||
|  |         self.assertTrue(os.path.isfile(filename)) | ||||||
|  |  | ||||||
|  |         # Database empty | ||||||
|  |         self.assertEqual(len(Document.objects.all()), 0) | ||||||
|  |  | ||||||
|  |     @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}") | ||||||
|  |     def testFilenameHandling(self): | ||||||
|  |         filename = self.get_test_file() | ||||||
|  |  | ||||||
|  |         document = self.consumer.try_consume_file(filename, original_filename="Bank - Test.pdf", force_title="new docs") | ||||||
|  |  | ||||||
|  |         print(document.source_path) | ||||||
|  |         print("===") | ||||||
|  |  | ||||||
|  |         self.assertEqual(document.title, "new docs") | ||||||
|  |         self.assertEqual(document.correspondent.name, "Bank") | ||||||
|  |         self.assertEqual(document.filename, "bank/new-docs-0000001.pdf") | ||||||
|  |  | ||||||
|  |     @mock.patch("documents.consumer.DocumentClassifier") | ||||||
|  |     def testClassifyDocument(self, m): | ||||||
|  |         correspondent = Correspondent.objects.create(name="test") | ||||||
|  |         dtype = DocumentType.objects.create(name="test") | ||||||
|  |         t1 = Tag.objects.create(name="t1") | ||||||
|  |         t2 = Tag.objects.create(name="t2") | ||||||
|  |  | ||||||
|  |         m.return_value = MagicMock() | ||||||
|  |         m.return_value.predict_correspondent.return_value = correspondent.pk | ||||||
|  |         m.return_value.predict_document_type.return_value = dtype.pk | ||||||
|  |         m.return_value.predict_tags.return_value = [t1.pk] | ||||||
|  |  | ||||||
|  |         document = self.consumer.try_consume_file(self.get_test_file()) | ||||||
|  |  | ||||||
|  |         self.assertEqual(document.correspondent, correspondent) | ||||||
|  |         self.assertEqual(document.document_type, dtype) | ||||||
|  |         self.assertIn(t1, document.tags.all()) | ||||||
|  |         self.assertNotIn(t2, document.tags.all()) | ||||||
|   | |||||||
| @@ -359,5 +359,6 @@ FILENAME_PARSE_TRANSFORMS = [] | |||||||
| for t in json.loads(os.getenv("PAPERLESS_FILENAME_PARSE_TRANSFORMS", "[]")): | for t in json.loads(os.getenv("PAPERLESS_FILENAME_PARSE_TRANSFORMS", "[]")): | ||||||
|     FILENAME_PARSE_TRANSFORMS.append((re.compile(t["pattern"]), t["repl"])) |     FILENAME_PARSE_TRANSFORMS.append((re.compile(t["pattern"]), t["repl"])) | ||||||
|  |  | ||||||
|  | # TODO: this should not have a prefix. | ||||||
| # Specify the filename format for out files | # Specify the filename format for out files | ||||||
| PAPERLESS_FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT") | PAPERLESS_FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT") | ||||||
|   | |||||||
| @@ -6,7 +6,6 @@ ignore = E501 | |||||||
| DJANGO_SETTINGS_MODULE=paperless.settings | DJANGO_SETTINGS_MODULE=paperless.settings | ||||||
| addopts = --pythonwarnings=all | addopts = --pythonwarnings=all | ||||||
| env = | env = | ||||||
|   PAPERLESS_PASSPHRASE=THISISNOTASECRET |  | ||||||
|   PAPERLESS_SECRET=paperless |   PAPERLESS_SECRET=paperless | ||||||
|   PAPERLESS_EMAIL_SECRET=paperless |   PAPERLESS_EMAIL_SECRET=paperless | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Jonas Winkler
					Jonas Winkler