diff --git a/src/documents/sanity_checker.py b/src/documents/sanity_checker.py index bc0b689d4..b8fd73f98 100644 --- a/src/documents/sanity_checker.py +++ b/src/documents/sanity_checker.py @@ -56,7 +56,8 @@ def check_sanity(): messages.append(SanityError( f"Thumbnail of document {doc.pk} does not exist.")) else: - present_files.remove(os.path.normpath(doc.thumbnail_path)) + if os.path.normpath(doc.thumbnail_path) in present_files: + present_files.remove(os.path.normpath(doc.thumbnail_path)) try: with doc.thumbnail_file as f: f.read() @@ -71,7 +72,8 @@ def check_sanity(): messages.append(SanityError( f"Original of document {doc.pk} does not exist.")) else: - present_files.remove(os.path.normpath(doc.source_path)) + if os.path.normpath(doc.source_path) in present_files: + present_files.remove(os.path.normpath(doc.source_path)) try: with doc.source_file as f: checksum = hashlib.md5(f.read()).hexdigest() @@ -92,7 +94,8 @@ def check_sanity(): f"Archived version of document {doc.pk} does not exist." )) else: - present_files.remove(os.path.normpath(doc.archive_path)) + if os.path.normpath(doc.archive_path) in present_files: + present_files.remove(os.path.normpath(doc.archive_path)) try: with doc.archive_file as f: checksum = hashlib.md5(f.read()).hexdigest() @@ -103,7 +106,8 @@ def check_sanity(): else: if not checksum == doc.archive_checksum: messages.append(SanityError( - f"Checksum mismatch of archive {doc.pk}. " + f"Checksum mismatch of archived document " + f"{doc.pk}. " f"Stored: {doc.checksum}, actual: {checksum}." )) diff --git a/src/documents/tests/samples/simple.png b/src/documents/tests/samples/simple.png new file mode 100644 index 000000000..a3a768401 Binary files /dev/null and b/src/documents/tests/samples/simple.png differ diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py index a6f0cc55a..8ead1ea41 100644 --- a/src/documents/tests/test_consumer.py +++ b/src/documents/tests/test_consumer.py @@ -5,12 +5,14 @@ import tempfile from unittest import mock from unittest.mock import MagicMock +from django.conf import settings from django.test import TestCase, override_settings from .utils import DirectoriesMixin from ..consumer import Consumer, ConsumerError from ..models import FileInfo, Tag, Correspondent, DocumentType, Document from ..parsers import DocumentParser, ParseError +from ..tasks import sanity_check class TestAttributes(TestCase): @@ -181,6 +183,24 @@ class DummyParser(DocumentParser): self.text = "The Text" +class CopyParser(DocumentParser): + + def get_thumbnail(self, document_path, mime_type): + return self.fake_thumb + + def get_optimised_thumbnail(self, document_path, mime_type): + return self.fake_thumb + + def __init__(self, logging_group, progress_callback=None): + super(CopyParser, self).__init__(logging_group, progress_callback) + _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=self.tempdir) + + def parse(self, document_path, mime_type, file_name=None): + self.text = "The text" + self.archive_path = os.path.join(self.tempdir, "archive.pdf") + shutil.copy(document_path, self.archive_path) + + class FaultyParser(DocumentParser): def get_thumbnail(self, document_path, mime_type): @@ -203,6 +223,8 @@ def fake_magic_from_file(file, mime=False): if mime: if os.path.splitext(file)[1] == ".pdf": return "application/pdf" + elif os.path.splitext(file)[1] == ".png": + return "image/png" else: return "unknown" else: @@ -516,6 +538,19 @@ class TestConsumer(DirectoriesMixin, TestCase): self._assert_first_last_send_progress(last_status="FAILED") + @mock.patch("documents.parsers.document_consumer_declaration.send") + def test_similar_filenames(self, m): + shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), os.path.join(settings.CONSUMPTION_DIR, "simple.pdf")) + shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.png"), os.path.join(settings.CONSUMPTION_DIR, "simple.png")) + m.return_value = [(None, { + "parser": CopyParser, + "mime_types": {"application/pdf": ".pdf", "image/png": ".zip"}, + "weight": 0 + })] + doc1 = self.consumer.try_consume_file(os.path.join(settings.CONSUMPTION_DIR, "simple.png")) + doc2 = self.consumer.try_consume_file(os.path.join(settings.CONSUMPTION_DIR, "simple.pdf")) + + sanity_check() class PreConsumeTestCase(TestCase):