diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 76aa293d0..d9a149ed5 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -5,6 +5,7 @@ import tempfile from enum import Enum from pathlib import Path from typing import TYPE_CHECKING +from typing import Final import magic from django.conf import settings @@ -49,6 +50,8 @@ from documents.utils import copy_file_with_basic_stats from documents.utils import run_subprocess from paperless_mail.parsers import MailDocumentParser +LOGGING_NAME: Final[str] = "paperless.consumer" + class WorkflowTriggerPlugin( NoCleanupPluginMixin, @@ -156,7 +159,7 @@ class ConsumerPlugin( ConsumerPluginMixin, ConsumeTaskPlugin, ): - logging_name = "paperless.consumer" + logging_name = LOGGING_NAME def run_pre_consume_script(self) -> None: """ @@ -756,7 +759,7 @@ class ConsumerPreflightPlugin( ConsumeTaskPlugin, ): NAME: str = "ConsumerPreflightPlugin" - logging_name = "paperless.consumer" + logging_name = LOGGING_NAME def pre_check_file_exists(self) -> None: """ @@ -831,6 +834,32 @@ class ConsumerPreflightPlugin( settings.ORIGINALS_DIR.mkdir(parents=True, exist_ok=True) settings.ARCHIVE_DIR.mkdir(parents=True, exist_ok=True) + def run(self) -> None: + self._send_progress( + 0, + 100, + ProgressStatusOptions.STARTED, + ConsumerStatusShortMessage.NEW_FILE, + ) + + # Make sure that preconditions for consuming the file are met. + + self.pre_check_file_exists() + self.pre_check_duplicate() + self.pre_check_directories() + + +class AsnCheckPlugin( + NoCleanupPluginMixin, + NoSetupPluginMixin, + AlwaysRunPluginMixin, + LoggingMixin, + ConsumerPluginMixin, + ConsumeTaskPlugin, +): + NAME: str = "AsnCheckPlugin" + logging_name = LOGGING_NAME + def pre_check_asn_value(self) -> None: """ Check that if override_asn is given, it is unique and within a valid range @@ -868,16 +897,4 @@ class ConsumerPreflightPlugin( ) def run(self) -> None: - self._send_progress( - 0, - 100, - ProgressStatusOptions.STARTED, - ConsumerStatusShortMessage.NEW_FILE, - ) - - # Make sure that preconditions for consuming the file are met. - - self.pre_check_file_exists() - self.pre_check_duplicate() - self.pre_check_directories() self.pre_check_asn_value() diff --git a/src/documents/tasks.py b/src/documents/tasks.py index 91a266856..1e8b35891 100644 --- a/src/documents/tasks.py +++ b/src/documents/tasks.py @@ -29,6 +29,7 @@ from documents.bulk_download import OriginalsOnlyStrategy from documents.caching import clear_document_caches from documents.classifier import DocumentClassifier from documents.classifier import load_classifier +from documents.consumer import AsnCheckPlugin from documents.consumer import ConsumerPlugin from documents.consumer import ConsumerPreflightPlugin from documents.consumer import WorkflowTriggerPlugin @@ -157,8 +158,10 @@ def consume_file( plugins: list[type[ConsumeTaskPlugin]] = [ ConsumerPreflightPlugin, + AsnCheckPlugin, CollatePlugin, BarcodePlugin, + AsnCheckPlugin, # Re-run ASN check after barcode reading WorkflowTriggerPlugin, ConsumerPlugin, ] diff --git a/src/documents/tests/test_barcodes.py b/src/documents/tests/test_barcodes.py index beb4e2a9a..d7dab5a2d 100644 --- a/src/documents/tests/test_barcodes.py +++ b/src/documents/tests/test_barcodes.py @@ -11,6 +11,7 @@ from django.test import override_settings from documents import tasks from documents.barcodes import BarcodePlugin +from documents.consumer import ConsumerError from documents.data_models import ConsumableDocument from documents.data_models import DocumentMetadataOverrides from documents.data_models import DocumentSource @@ -93,6 +94,41 @@ class TestBarcode( self.assertDictEqual(separator_page_numbers, {1: False}) + @override_settings(CONSUMER_ENABLE_ASN_BARCODE=True) + def test_asn_barcode_duplicate_in_trash_fails(self): + """ + GIVEN: + - A document with ASN barcode 123 is in the trash + WHEN: + - A file with the same barcode ASN is consumed + THEN: + - The ASN check is re-run and consumption fails + """ + test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-123.pdf" + + first_doc = Document.objects.create( + title="First ASN 123", + content="", + checksum="asn123first", + mime_type="application/pdf", + archive_serial_number=123, + ) + + first_doc.delete() + + dupe_asn = settings.SCRATCH_DIR / "barcode-39-asn-123-second.pdf" + shutil.copy(test_file, dupe_asn) + + with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): + with self.assertRaisesRegex(ConsumerError, r"ASN 123.*trash"): + tasks.consume_file( + ConsumableDocument( + source=DocumentSource.ConsumeFolder, + original_file=dupe_asn, + ), + None, + ) + @override_settings( CONSUMER_BARCODE_TIFF_SUPPORT=True, ) diff --git a/src/documents/tests/utils.py b/src/documents/tests/utils.py index dc89322c9..f099cd92e 100644 --- a/src/documents/tests/utils.py +++ b/src/documents/tests/utils.py @@ -20,6 +20,7 @@ from django.db.migrations.executor import MigrationExecutor from django.test import TransactionTestCase from django.test import override_settings +from documents.consumer import AsnCheckPlugin from documents.consumer import ConsumerPlugin from documents.consumer import ConsumerPreflightPlugin from documents.data_models import ConsumableDocument @@ -371,6 +372,14 @@ class GetConsumerMixin: "task-id", ) preflight_plugin.setup() + asncheck_plugin = AsnCheckPlugin( + doc, + overrides or DocumentMetadataOverrides(), + self.status, # type: ignore + self.dirs.scratch_dir, + "task-id", + ) + asncheck_plugin.setup() reader = ConsumerPlugin( doc, overrides or DocumentMetadataOverrides(), @@ -381,6 +390,7 @@ class GetConsumerMixin: reader.setup() try: preflight_plugin.run() + asncheck_plugin.run() yield reader finally: reader.cleanup()