mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Refactor file consumption task to allow beginnings of a plugin system (#5367)
This commit is contained in:
		| @@ -3,7 +3,6 @@ import re | ||||
| import tempfile | ||||
| from dataclasses import dataclass | ||||
| from pathlib import Path | ||||
| from typing import Final | ||||
| from typing import Optional | ||||
|  | ||||
| from django.conf import settings | ||||
| @@ -15,8 +14,9 @@ from PIL import Image | ||||
|  | ||||
| from documents.converters import convert_from_tiff_to_pdf | ||||
| from documents.data_models import ConsumableDocument | ||||
| from documents.data_models import DocumentMetadataOverrides | ||||
| from documents.data_models import DocumentSource | ||||
| from documents.plugins.base import ConsumeTaskPlugin | ||||
| from documents.plugins.base import StopConsumeTaskError | ||||
| from documents.plugins.helpers import ProgressStatusOptions | ||||
| from documents.utils import copy_basic_file_stats | ||||
| from documents.utils import copy_file_with_basic_stats | ||||
|  | ||||
| @@ -26,7 +26,7 @@ logger = logging.getLogger("paperless.barcodes") | ||||
| @dataclass(frozen=True) | ||||
| class Barcode: | ||||
|     """ | ||||
|     Holds the information about a single barcode and its location | ||||
|     Holds the information about a single barcode and its location in a document | ||||
|     """ | ||||
|  | ||||
|     page: int | ||||
| @@ -49,77 +49,111 @@ class Barcode: | ||||
|         return self.value.startswith(settings.CONSUMER_ASN_BARCODE_PREFIX) | ||||
|  | ||||
|  | ||||
| class BarcodeReader: | ||||
|     def __init__(self, filepath: Path, mime_type: str) -> None: | ||||
|         self.file: Final[Path] = filepath | ||||
|         self.mime: Final[str] = mime_type | ||||
|         self.pdf_file: Path = self.file | ||||
|         self.barcodes: list[Barcode] = [] | ||||
|         self._tiff_conversion_done = False | ||||
|         self.temp_dir: Optional[tempfile.TemporaryDirectory] = None | ||||
| class BarcodePlugin(ConsumeTaskPlugin): | ||||
|     NAME: str = "BarcodePlugin" | ||||
|  | ||||
|     @property | ||||
|     def able_to_run(self) -> bool: | ||||
|         """ | ||||
|         Able to run if: | ||||
|           - ASN from barcode detection is enabled or | ||||
|           - Barcode support is enabled and the mime type is supported | ||||
|         """ | ||||
|         if settings.CONSUMER_BARCODE_TIFF_SUPPORT: | ||||
|             self.SUPPORTED_FILE_MIMES = {"application/pdf", "image/tiff"} | ||||
|             supported_mimes = {"application/pdf", "image/tiff"} | ||||
|         else: | ||||
|             self.SUPPORTED_FILE_MIMES = {"application/pdf"} | ||||
|             supported_mimes = {"application/pdf"} | ||||
|  | ||||
|     def __enter__(self): | ||||
|         if self.supported_mime_type: | ||||
|             self.temp_dir = tempfile.TemporaryDirectory(prefix="paperless-barcodes") | ||||
|         return self | ||||
|         return ( | ||||
|             settings.CONSUMER_ENABLE_ASN_BARCODE or settings.CONSUMER_ENABLE_BARCODES | ||||
|         ) and self.input_doc.mime_type in supported_mimes | ||||
|  | ||||
|     def __exit__(self, exc_type, exc_val, exc_tb): | ||||
|         if self.temp_dir is not None: | ||||
|             self.temp_dir.cleanup() | ||||
|             self.temp_dir = None | ||||
|     def setup(self): | ||||
|         self.temp_dir = tempfile.TemporaryDirectory( | ||||
|             dir=self.base_tmp_dir, | ||||
|             prefix="barcode", | ||||
|         ) | ||||
|         self.pdf_file = self.input_doc.original_file | ||||
|         self._tiff_conversion_done = False | ||||
|         self.barcodes: list[Barcode] = [] | ||||
|  | ||||
|     @property | ||||
|     def supported_mime_type(self) -> bool: | ||||
|         """ | ||||
|         Return True if the given mime type is supported for barcodes, false otherwise | ||||
|         """ | ||||
|         return self.mime in self.SUPPORTED_FILE_MIMES | ||||
|     def run(self) -> Optional[str]: | ||||
|         # Maybe do the conversion of TIFF to PDF | ||||
|         self.convert_from_tiff_to_pdf() | ||||
|  | ||||
|     @property | ||||
|     def asn(self) -> Optional[int]: | ||||
|         """ | ||||
|         Search the parsed barcodes for any ASNs. | ||||
|         The first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX | ||||
|         is considered the ASN to be used. | ||||
|         Returns the detected ASN (or None) | ||||
|         """ | ||||
|         asn = None | ||||
|  | ||||
|         if not self.supported_mime_type: | ||||
|             return None | ||||
|  | ||||
|         # Ensure the barcodes have been read | ||||
|         # Locate any barcodes in the files | ||||
|         self.detect() | ||||
|  | ||||
|         # get the first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX | ||||
|         asn_text = next( | ||||
|             (x.value for x in self.barcodes if x.is_asn), | ||||
|             None, | ||||
|         # Update/overwrite an ASN if possible | ||||
|         located_asn = self.asn | ||||
|         if located_asn is not None: | ||||
|             logger.info(f"Found ASN in barcode: {located_asn}") | ||||
|             self.metadata.asn = located_asn | ||||
|  | ||||
|         separator_pages = self.get_separation_pages() | ||||
|         if not separator_pages: | ||||
|             return "No pages to split on!" | ||||
|  | ||||
|         # We have pages to split against | ||||
|  | ||||
|         # Note this does NOT use the base_temp_dir, as that will be removed | ||||
|         tmp_dir = Path( | ||||
|             tempfile.mkdtemp( | ||||
|                 dir=settings.SCRATCH_DIR, | ||||
|                 prefix="paperless-barcode-split-", | ||||
|             ), | ||||
|         ).resolve() | ||||
|  | ||||
|         from documents import tasks | ||||
|  | ||||
|         # Create the split document tasks | ||||
|         for new_document in self.separate_pages(separator_pages): | ||||
|             copy_file_with_basic_stats(new_document, tmp_dir / new_document.name) | ||||
|  | ||||
|             task = tasks.consume_file.delay( | ||||
|                 ConsumableDocument( | ||||
|                     # Same source, for templates | ||||
|                     source=self.input_doc.source, | ||||
|                     mailrule_id=self.input_doc.mailrule_id, | ||||
|                     # Can't use same folder or the consume might grab it again | ||||
|                     original_file=(tmp_dir / new_document.name).resolve(), | ||||
|                 ), | ||||
|                 # All the same metadata | ||||
|                 self.metadata, | ||||
|             ) | ||||
|             logger.info(f"Created new task {task.id} for {new_document.name}") | ||||
|  | ||||
|         if asn_text: | ||||
|             logger.debug(f"Found ASN Barcode: {asn_text}") | ||||
|             # remove the prefix and remove whitespace | ||||
|             asn_text = asn_text[len(settings.CONSUMER_ASN_BARCODE_PREFIX) :].strip() | ||||
|         # This file is now two or more files | ||||
|         self.input_doc.original_file.unlink() | ||||
|  | ||||
|             # remove non-numeric parts of the remaining string | ||||
|             asn_text = re.sub(r"\D", "", asn_text) | ||||
|         msg = "Barcode splitting complete!" | ||||
|  | ||||
|             # now, try parsing the ASN number | ||||
|             try: | ||||
|                 asn = int(asn_text) | ||||
|             except ValueError as e: | ||||
|                 logger.warning(f"Failed to parse ASN number because: {e}") | ||||
|         # Update the progress to complete | ||||
|         self.status_mgr.send_progress(ProgressStatusOptions.SUCCESS, msg, 100, 100) | ||||
|  | ||||
|         return asn | ||||
|         # Request the consume task stops | ||||
|         raise StopConsumeTaskError(msg) | ||||
|  | ||||
|     def cleanup(self) -> None: | ||||
|         self.temp_dir.cleanup() | ||||
|  | ||||
|     def convert_from_tiff_to_pdf(self): | ||||
|         """ | ||||
|         May convert a TIFF image into a PDF, if the input is a TIFF and | ||||
|         the TIFF has not been made into a PDF | ||||
|         """ | ||||
|         # Nothing to do, pdf_file is already assigned correctly | ||||
|         if self.input_doc.mime_type != "image/tiff" or self._tiff_conversion_done: | ||||
|             return | ||||
|  | ||||
|         self.pdf_file = convert_from_tiff_to_pdf( | ||||
|             self.input_doc.original_file, | ||||
|             Path(self.temp_dir.name), | ||||
|         ) | ||||
|         self._tiff_conversion_done = True | ||||
|  | ||||
|     @staticmethod | ||||
|     def read_barcodes_zxing(image: Image) -> list[str]: | ||||
|     def read_barcodes_zxing(image: Image.Image) -> list[str]: | ||||
|         barcodes = [] | ||||
|  | ||||
|         import zxingcpp | ||||
| @@ -135,7 +169,7 @@ class BarcodeReader: | ||||
|         return barcodes | ||||
|  | ||||
|     @staticmethod | ||||
|     def read_barcodes_pyzbar(image: Image) -> list[str]: | ||||
|     def read_barcodes_pyzbar(image: Image.Image) -> list[str]: | ||||
|         barcodes = [] | ||||
|  | ||||
|         from pyzbar import pyzbar | ||||
| @@ -154,18 +188,6 @@ class BarcodeReader: | ||||
|  | ||||
|         return barcodes | ||||
|  | ||||
|     def convert_from_tiff_to_pdf(self): | ||||
|         """ | ||||
|         May convert a TIFF image into a PDF, if the input is a TIFF and | ||||
|         the TIFF has not been made into a PDF | ||||
|         """ | ||||
|         # Nothing to do, pdf_file is already assigned correctly | ||||
|         if self.mime != "image/tiff" or self._tiff_conversion_done: | ||||
|             return | ||||
|  | ||||
|         self._tiff_conversion_done = True | ||||
|         self.pdf_file = convert_from_tiff_to_pdf(self.file, Path(self.temp_dir.name)) | ||||
|  | ||||
|     def detect(self) -> None: | ||||
|         """ | ||||
|         Scan all pages of the PDF as images, updating barcodes and the pages | ||||
| @@ -218,10 +240,45 @@ class BarcodeReader: | ||||
|         # This file is really borked, allow the consumption to continue | ||||
|         # but it may fail further on | ||||
|         except Exception as e:  # pragma: no cover | ||||
|             logger.exception( | ||||
|             logger.warning( | ||||
|                 f"Exception during barcode scanning: {e}", | ||||
|             ) | ||||
|  | ||||
|     @property | ||||
|     def asn(self) -> Optional[int]: | ||||
|         """ | ||||
|         Search the parsed barcodes for any ASNs. | ||||
|         The first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX | ||||
|         is considered the ASN to be used. | ||||
|         Returns the detected ASN (or None) | ||||
|         """ | ||||
|         asn = None | ||||
|  | ||||
|         # Ensure the barcodes have been read | ||||
|         self.detect() | ||||
|  | ||||
|         # get the first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX | ||||
|         asn_text = next( | ||||
|             (x.value for x in self.barcodes if x.is_asn), | ||||
|             None, | ||||
|         ) | ||||
|  | ||||
|         if asn_text: | ||||
|             logger.debug(f"Found ASN Barcode: {asn_text}") | ||||
|             # remove the prefix and remove whitespace | ||||
|             asn_text = asn_text[len(settings.CONSUMER_ASN_BARCODE_PREFIX) :].strip() | ||||
|  | ||||
|             # remove non-numeric parts of the remaining string | ||||
|             asn_text = re.sub(r"\D", "", asn_text) | ||||
|  | ||||
|             # now, try parsing the ASN number | ||||
|             try: | ||||
|                 asn = int(asn_text) | ||||
|             except ValueError as e: | ||||
|                 logger.warning(f"Failed to parse ASN number because: {e}") | ||||
|  | ||||
|         return asn | ||||
|  | ||||
|     def get_separation_pages(self) -> dict[int, bool]: | ||||
|         """ | ||||
|         Search the parsed barcodes for separators and returns a dict of page | ||||
| @@ -251,7 +308,7 @@ class BarcodeReader: | ||||
|         """ | ||||
|  | ||||
|         document_paths = [] | ||||
|         fname = self.file.stem | ||||
|         fname = self.input_doc.original_file.stem | ||||
|         with Pdf.open(self.pdf_file) as input_pdf: | ||||
|             # Start with an empty document | ||||
|             current_document: list[Page] = [] | ||||
| @@ -292,58 +349,8 @@ class BarcodeReader: | ||||
|                 with open(savepath, "wb") as out: | ||||
|                     dst.save(out) | ||||
|  | ||||
|                 copy_basic_file_stats(self.file, savepath) | ||||
|                 copy_basic_file_stats(self.input_doc.original_file, savepath) | ||||
|  | ||||
|                 document_paths.append(savepath) | ||||
|  | ||||
|             return document_paths | ||||
|  | ||||
|     def separate( | ||||
|         self, | ||||
|         source: DocumentSource, | ||||
|         overrides: DocumentMetadataOverrides, | ||||
|     ) -> bool: | ||||
|         """ | ||||
|         Separates the document, based on barcodes and configuration, creating new | ||||
|         documents as required in the appropriate location. | ||||
|  | ||||
|         Returns True if a split happened, False otherwise | ||||
|         """ | ||||
|         # Do nothing | ||||
|         if not self.supported_mime_type: | ||||
|             logger.warning(f"Unsupported file format for barcode reader: {self.mime}") | ||||
|             return False | ||||
|  | ||||
|         # Does nothing unless needed | ||||
|         self.convert_from_tiff_to_pdf() | ||||
|  | ||||
|         # Actually read the codes, if any | ||||
|         self.detect() | ||||
|  | ||||
|         separator_pages = self.get_separation_pages() | ||||
|  | ||||
|         # Also do nothing | ||||
|         if not separator_pages: | ||||
|             logger.warning("No pages to split on!") | ||||
|             return False | ||||
|  | ||||
|         tmp_dir = Path(tempfile.mkdtemp(prefix="paperless-barcode-split-")).resolve() | ||||
|  | ||||
|         from documents import tasks | ||||
|  | ||||
|         # Create the split document tasks | ||||
|         for new_document in self.separate_pages(separator_pages): | ||||
|             copy_file_with_basic_stats(new_document, tmp_dir / new_document.name) | ||||
|  | ||||
|             tasks.consume_file.delay( | ||||
|                 ConsumableDocument( | ||||
|                     # Same source, for templates | ||||
|                     source=source, | ||||
|                     # Can't use same folder or the consume might grab it again | ||||
|                     original_file=(tmp_dir / new_document.name).resolve(), | ||||
|                 ), | ||||
|                 # All the same metadata | ||||
|                 overrides, | ||||
|             ) | ||||
|         logger.info("Barcode splitting complete!") | ||||
|         return True | ||||
|   | ||||
| @@ -21,7 +21,6 @@ from filelock import FileLock | ||||
| from rest_framework.reverse import reverse | ||||
|  | ||||
| from documents.classifier import load_classifier | ||||
| from documents.data_models import ConsumableDocument | ||||
| from documents.data_models import DocumentMetadataOverrides | ||||
| from documents.file_handling import create_source_path_directory | ||||
| from documents.file_handling import generate_unique_filename | ||||
| @@ -42,12 +41,83 @@ from documents.parsers import ParseError | ||||
| from documents.parsers import get_parser_class_for_mime_type | ||||
| from documents.parsers import parse_date | ||||
| from documents.permissions import set_permissions_for_object | ||||
| from documents.plugins.base import AlwaysRunPluginMixin | ||||
| from documents.plugins.base import ConsumeTaskPlugin | ||||
| from documents.plugins.base import NoCleanupPluginMixin | ||||
| from documents.plugins.base import NoSetupPluginMixin | ||||
| from documents.signals import document_consumption_finished | ||||
| from documents.signals import document_consumption_started | ||||
| from documents.utils import copy_basic_file_stats | ||||
| from documents.utils import copy_file_with_basic_stats | ||||
|  | ||||
|  | ||||
| class WorkflowTriggerPlugin( | ||||
|     NoCleanupPluginMixin, | ||||
|     NoSetupPluginMixin, | ||||
|     AlwaysRunPluginMixin, | ||||
|     ConsumeTaskPlugin, | ||||
| ): | ||||
|     NAME: str = "WorkflowTriggerPlugin" | ||||
|  | ||||
|     def run(self) -> Optional[str]: | ||||
|         """ | ||||
|         Get overrides from matching workflows | ||||
|         """ | ||||
|         overrides = DocumentMetadataOverrides() | ||||
|         for workflow in Workflow.objects.filter(enabled=True).order_by("order"): | ||||
|             template_overrides = DocumentMetadataOverrides() | ||||
|  | ||||
|             if document_matches_workflow( | ||||
|                 self.input_doc, | ||||
|                 workflow, | ||||
|                 WorkflowTrigger.WorkflowTriggerType.CONSUMPTION, | ||||
|             ): | ||||
|                 for action in workflow.actions.all(): | ||||
|                     if action.assign_title is not None: | ||||
|                         template_overrides.title = action.assign_title | ||||
|                     if action.assign_tags is not None: | ||||
|                         template_overrides.tag_ids = [ | ||||
|                             tag.pk for tag in action.assign_tags.all() | ||||
|                         ] | ||||
|                     if action.assign_correspondent is not None: | ||||
|                         template_overrides.correspondent_id = ( | ||||
|                             action.assign_correspondent.pk | ||||
|                         ) | ||||
|                     if action.assign_document_type is not None: | ||||
|                         template_overrides.document_type_id = ( | ||||
|                             action.assign_document_type.pk | ||||
|                         ) | ||||
|                     if action.assign_storage_path is not None: | ||||
|                         template_overrides.storage_path_id = ( | ||||
|                             action.assign_storage_path.pk | ||||
|                         ) | ||||
|                     if action.assign_owner is not None: | ||||
|                         template_overrides.owner_id = action.assign_owner.pk | ||||
|                     if action.assign_view_users is not None: | ||||
|                         template_overrides.view_users = [ | ||||
|                             user.pk for user in action.assign_view_users.all() | ||||
|                         ] | ||||
|                     if action.assign_view_groups is not None: | ||||
|                         template_overrides.view_groups = [ | ||||
|                             group.pk for group in action.assign_view_groups.all() | ||||
|                         ] | ||||
|                     if action.assign_change_users is not None: | ||||
|                         template_overrides.change_users = [ | ||||
|                             user.pk for user in action.assign_change_users.all() | ||||
|                         ] | ||||
|                     if action.assign_change_groups is not None: | ||||
|                         template_overrides.change_groups = [ | ||||
|                             group.pk for group in action.assign_change_groups.all() | ||||
|                         ] | ||||
|                     if action.assign_custom_fields is not None: | ||||
|                         template_overrides.custom_field_ids = [ | ||||
|                             field.pk for field in action.assign_custom_fields.all() | ||||
|                         ] | ||||
|  | ||||
|                     overrides.update(template_overrides) | ||||
|         self.metadata.update(overrides) | ||||
|  | ||||
|  | ||||
| class ConsumerError(Exception): | ||||
|     pass | ||||
|  | ||||
| @@ -602,70 +672,6 @@ class Consumer(LoggingMixin): | ||||
|  | ||||
|         return document | ||||
|  | ||||
|     def get_workflow_overrides( | ||||
|         self, | ||||
|         input_doc: ConsumableDocument, | ||||
|     ) -> DocumentMetadataOverrides: | ||||
|         """ | ||||
|         Get overrides from matching workflows | ||||
|         """ | ||||
|         overrides = DocumentMetadataOverrides() | ||||
|         for workflow in Workflow.objects.filter(enabled=True).order_by("order"): | ||||
|             template_overrides = DocumentMetadataOverrides() | ||||
|  | ||||
|             if document_matches_workflow( | ||||
|                 input_doc, | ||||
|                 workflow, | ||||
|                 WorkflowTrigger.WorkflowTriggerType.CONSUMPTION, | ||||
|             ): | ||||
|                 for action in workflow.actions.all(): | ||||
|                     self.log.info( | ||||
|                         f"Applying overrides in {action} from {workflow}", | ||||
|                     ) | ||||
|                     if action.assign_title is not None: | ||||
|                         template_overrides.title = action.assign_title | ||||
|                     if action.assign_tags is not None: | ||||
|                         template_overrides.tag_ids = [ | ||||
|                             tag.pk for tag in action.assign_tags.all() | ||||
|                         ] | ||||
|                     if action.assign_correspondent is not None: | ||||
|                         template_overrides.correspondent_id = ( | ||||
|                             action.assign_correspondent.pk | ||||
|                         ) | ||||
|                     if action.assign_document_type is not None: | ||||
|                         template_overrides.document_type_id = ( | ||||
|                             action.assign_document_type.pk | ||||
|                         ) | ||||
|                     if action.assign_storage_path is not None: | ||||
|                         template_overrides.storage_path_id = ( | ||||
|                             action.assign_storage_path.pk | ||||
|                         ) | ||||
|                     if action.assign_owner is not None: | ||||
|                         template_overrides.owner_id = action.assign_owner.pk | ||||
|                     if action.assign_view_users is not None: | ||||
|                         template_overrides.view_users = [ | ||||
|                             user.pk for user in action.assign_view_users.all() | ||||
|                         ] | ||||
|                     if action.assign_view_groups is not None: | ||||
|                         template_overrides.view_groups = [ | ||||
|                             group.pk for group in action.assign_view_groups.all() | ||||
|                         ] | ||||
|                     if action.assign_change_users is not None: | ||||
|                         template_overrides.change_users = [ | ||||
|                             user.pk for user in action.assign_change_users.all() | ||||
|                         ] | ||||
|                     if action.assign_change_groups is not None: | ||||
|                         template_overrides.change_groups = [ | ||||
|                             group.pk for group in action.assign_change_groups.all() | ||||
|                         ] | ||||
|                     if action.assign_custom_fields is not None: | ||||
|                         template_overrides.custom_field_ids = [ | ||||
|                             field.pk for field in action.assign_custom_fields.all() | ||||
|                         ] | ||||
|  | ||||
|                     overrides.update(template_overrides) | ||||
|         return overrides | ||||
|  | ||||
|     def _parse_title_placeholders(self, title: str) -> str: | ||||
|         local_added = timezone.localtime(timezone.now()) | ||||
|  | ||||
|   | ||||
| @@ -3,24 +3,41 @@ import logging | ||||
| import os | ||||
| import shutil | ||||
| from pathlib import Path | ||||
| from typing import Final | ||||
| from typing import Optional | ||||
|  | ||||
| from django.conf import settings | ||||
| from pikepdf import Pdf | ||||
|  | ||||
| from documents.consumer import ConsumerError | ||||
| from documents.converters import convert_from_tiff_to_pdf | ||||
| from documents.data_models import ConsumableDocument | ||||
| from documents.plugins.base import ConsumeTaskPlugin | ||||
| from documents.plugins.base import NoCleanupPluginMixin | ||||
| from documents.plugins.base import NoSetupPluginMixin | ||||
| from documents.plugins.base import StopConsumeTaskError | ||||
|  | ||||
| logger = logging.getLogger("paperless.double_sided") | ||||
|  | ||||
| # Hardcoded for now, could be made a configurable setting if needed | ||||
| TIMEOUT_MINUTES = 30 | ||||
| TIMEOUT_MINUTES: Final[int] = 30 | ||||
| TIMEOUT_SECONDS: Final[int] = TIMEOUT_MINUTES * 60 | ||||
|  | ||||
| # Used by test cases | ||||
| STAGING_FILE_NAME = "double-sided-staging.pdf" | ||||
|  | ||||
|  | ||||
| def collate(input_doc: ConsumableDocument) -> str: | ||||
| class CollatePlugin(NoCleanupPluginMixin, NoSetupPluginMixin, ConsumeTaskPlugin): | ||||
|     NAME: str = "CollatePlugin" | ||||
|  | ||||
|     @property | ||||
|     def able_to_run(self) -> bool: | ||||
|         return ( | ||||
|             settings.CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED | ||||
|             and settings.CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME | ||||
|             in self.input_doc.original_file.parts | ||||
|         ) | ||||
|  | ||||
|     def run(self) -> Optional[str]: | ||||
|         """ | ||||
|         Tries to collate pages from 2 single sided scans of a double sided | ||||
|         document. | ||||
| @@ -39,33 +56,32 @@ def collate(input_doc: ConsumableDocument) -> str: | ||||
|         in case of failure. | ||||
|         """ | ||||
|  | ||||
|     # Make sure scratch dir exists, Consumer might not have run yet | ||||
|     settings.SCRATCH_DIR.mkdir(exist_ok=True) | ||||
|  | ||||
|     if input_doc.mime_type == "application/pdf": | ||||
|         pdf_file = input_doc.original_file | ||||
|         if self.input_doc.mime_type == "application/pdf": | ||||
|             pdf_file = self.input_doc.original_file | ||||
|         elif ( | ||||
|         input_doc.mime_type == "image/tiff" | ||||
|             self.input_doc.mime_type == "image/tiff" | ||||
|             and settings.CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT | ||||
|         ): | ||||
|             pdf_file = convert_from_tiff_to_pdf( | ||||
|             input_doc.original_file, | ||||
|             settings.SCRATCH_DIR, | ||||
|                 self.input_doc.original_file, | ||||
|                 self.base_tmp_dir, | ||||
|             ) | ||||
|         input_doc.original_file.unlink() | ||||
|             self.input_doc.original_file.unlink() | ||||
|         else: | ||||
|         raise ConsumerError("Unsupported file type for collation of double-sided scans") | ||||
|             raise ConsumerError( | ||||
|                 "Unsupported file type for collation of double-sided scans", | ||||
|             ) | ||||
|  | ||||
|     staging = settings.SCRATCH_DIR / STAGING_FILE_NAME | ||||
|         staging: Path = settings.SCRATCH_DIR / STAGING_FILE_NAME | ||||
|  | ||||
|         valid_staging_exists = False | ||||
|         if staging.exists(): | ||||
|         stats = os.stat(str(staging)) | ||||
|             stats = staging.stat() | ||||
|             # if the file is older than the timeout, we don't consider | ||||
|             # it valid | ||||
|         if dt.datetime.now().timestamp() - stats.st_mtime > TIMEOUT_MINUTES * 60: | ||||
|             if (dt.datetime.now().timestamp() - stats.st_mtime) > TIMEOUT_SECONDS: | ||||
|                 logger.warning("Outdated double sided staging file exists, deleting it") | ||||
|             os.unlink(str(staging)) | ||||
|                 staging.unlink() | ||||
|             else: | ||||
|                 valid_staging_exists = True | ||||
|  | ||||
| @@ -88,23 +104,24 @@ def collate(input_doc: ConsumableDocument) -> str: | ||||
|                     # Merged file has the same path, but without the | ||||
|                     # double-sided subdir. Therefore, it is also in the | ||||
|                     # consumption dir and will be picked up for processing | ||||
|                 old_file = input_doc.original_file | ||||
|                     old_file = self.input_doc.original_file | ||||
|                     new_file = Path( | ||||
|                         *( | ||||
|                             part | ||||
|                             for part in old_file.with_name( | ||||
|                                 f"{old_file.stem}-collated.pdf", | ||||
|                             ).parts | ||||
|                         if part != settings.CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME | ||||
|                             if part | ||||
|                             != settings.CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME | ||||
|                         ), | ||||
|                     ) | ||||
|                     # If the user didn't create the subdirs yet, do it for them | ||||
|                     new_file.parent.mkdir(parents=True, exist_ok=True) | ||||
|                     pdf1.save(new_file) | ||||
|                 logger.info("Collated documents into new file %s", new_file) | ||||
|             return ( | ||||
|                 raise StopConsumeTaskError( | ||||
|                     "Success. Even numbered pages of double sided scan collated " | ||||
|                 "with odd pages" | ||||
|                     "with odd pages", | ||||
|                 ) | ||||
|             finally: | ||||
|                 # Delete staging and recently uploaded file no matter what. | ||||
| @@ -118,12 +135,13 @@ def collate(input_doc: ConsumableDocument) -> str: | ||||
|             shutil.move(pdf_file, staging) | ||||
|             # update access to modification time so we know if the file | ||||
|             # is outdated when another file gets uploaded | ||||
|         os.utime(staging, (dt.datetime.now().timestamp(),) * 2) | ||||
|             timestamp = dt.datetime.now().timestamp() | ||||
|             os.utime(staging, (timestamp, timestamp)) | ||||
|             logger.info( | ||||
|                 "Got scan with odd numbered pages of double-sided scan, moved it to %s", | ||||
|                 staging, | ||||
|             ) | ||||
|         return ( | ||||
|             raise StopConsumeTaskError( | ||||
|                 "Received odd numbered pages of double sided scan, waiting up to " | ||||
|             f"{TIMEOUT_MINUTES} minutes for even numbered pages" | ||||
|                 f"{TIMEOUT_MINUTES} minutes for even numbered pages", | ||||
|             ) | ||||
|   | ||||
							
								
								
									
										0
									
								
								src/documents/plugins/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								src/documents/plugins/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										131
									
								
								src/documents/plugins/base.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										131
									
								
								src/documents/plugins/base.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,131 @@ | ||||
| import abc | ||||
| from pathlib import Path | ||||
| from typing import Final | ||||
| from typing import Optional | ||||
|  | ||||
| from documents.data_models import ConsumableDocument | ||||
| from documents.data_models import DocumentMetadataOverrides | ||||
| from documents.plugins.helpers import ProgressManager | ||||
|  | ||||
|  | ||||
| class StopConsumeTaskError(Exception): | ||||
|     """ | ||||
|     A plugin setup or run may raise this to exit the asynchronous consume task. | ||||
|  | ||||
|     Most likely, this means it has created one or more new tasks to execute instead, | ||||
|     such as when a barcode has been used to create new documents | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, message: str) -> None: | ||||
|         self.message = message | ||||
|         super().__init__(message) | ||||
|  | ||||
|  | ||||
| class ConsumeTaskPlugin(abc.ABC): | ||||
|     """ | ||||
|     Defines the interface for a plugin for the document consume task | ||||
|     Meanings as per RFC2119 (https://datatracker.ietf.org/doc/html/rfc2119) | ||||
|  | ||||
|     Plugin Implementation | ||||
|  | ||||
|     The plugin SHALL implement property able_to_run and methods setup, run and cleanup. | ||||
|     The plugin property able_to_run SHALL return True if the plugin is able to run, given the conditions, settings and document information. | ||||
|     The plugin property able_to_run MAY be hardcoded to return True. | ||||
|     The plugin setup SHOULD perform any resource creation or additional initialization needed to run the document. | ||||
|     The plugin setup MAY be a non-operation. | ||||
|     The plugin cleanup SHOULD perform resource cleanup, including in the event of an error. | ||||
|     The plugin cleanup MAY be a non-operation. | ||||
|     The plugin run SHALL perform any operations against the document or system state required for the plugin. | ||||
|     The plugin run MAY update the document metadata. | ||||
|     The plugin run MAY return an informational message. | ||||
|     The plugin run MAY raise StopConsumeTaskError to cease any further operations against the document. | ||||
|  | ||||
|     Plugin Manager Implementation | ||||
|  | ||||
|     The plugin manager SHALL provide the plugin with the input document, document metadata, progress manager and a created temporary directory. | ||||
|     The plugin manager SHALL execute the plugin setup, run and cleanup, in that order IF the plugin property able_to_run is True. | ||||
|     The plugin manager SHOULD log the return message of executing a plugin's run. | ||||
|     The plugin manager SHALL always execute the plugin cleanup, IF the plugin property able_to_run is True. | ||||
|     The plugin manager SHALL cease calling plugins and exit the task IF a plugin raises StopConsumeTaskError. | ||||
|     The plugin manager SHOULD return the StopConsumeTaskError message IF a plugin raises StopConsumeTaskError. | ||||
|     """ | ||||
|  | ||||
|     NAME: str = "ConsumeTaskPlugin" | ||||
|  | ||||
|     def __init__( | ||||
|         self, | ||||
|         input_doc: ConsumableDocument, | ||||
|         metadata: DocumentMetadataOverrides, | ||||
|         status_mgr: ProgressManager, | ||||
|         base_tmp_dir: Path, | ||||
|         task_id: str, | ||||
|     ) -> None: | ||||
|         super().__init__() | ||||
|         self.input_doc = input_doc | ||||
|         self.metadata = metadata | ||||
|         self.base_tmp_dir: Final = base_tmp_dir | ||||
|         self.status_mgr = status_mgr | ||||
|         self.task_id: Final = task_id | ||||
|  | ||||
|     @abc.abstractproperty | ||||
|     def able_to_run(self) -> bool: | ||||
|         """ | ||||
|         Return True if the conditions are met for the plugin to run, False otherwise | ||||
|  | ||||
|         If False, setup(), run() and cleanup() will not be called | ||||
|         """ | ||||
|  | ||||
|     @abc.abstractmethod | ||||
|     def setup(self) -> None: | ||||
|         """ | ||||
|         Allows the plugin to perform any additional setup it may need, such as creating | ||||
|         a temporary directory, copying a file somewhere, etc. | ||||
|  | ||||
|         Executed before run() | ||||
|  | ||||
|         In general, this should be the "light" work, not the bulk of processing | ||||
|         """ | ||||
|  | ||||
|     @abc.abstractmethod | ||||
|     def run(self) -> Optional[str]: | ||||
|         """ | ||||
|         The bulk of plugin processing, this does whatever action the plugin is for. | ||||
|  | ||||
|         Executed after setup() and before cleanup() | ||||
|         """ | ||||
|  | ||||
|     @abc.abstractmethod | ||||
|     def cleanup(self) -> None: | ||||
|         """ | ||||
|         Allows the plugin to execute any cleanup it may require | ||||
|  | ||||
|         Executed after run(), even in the case of error | ||||
|         """ | ||||
|  | ||||
|  | ||||
| class AlwaysRunPluginMixin(ConsumeTaskPlugin): | ||||
|     """ | ||||
|     A plugin which is always able to run | ||||
|     """ | ||||
|  | ||||
|     @property | ||||
|     def able_to_run(self) -> bool: | ||||
|         return True | ||||
|  | ||||
|  | ||||
| class NoSetupPluginMixin(ConsumeTaskPlugin): | ||||
|     """ | ||||
|     A plugin which requires no setup | ||||
|     """ | ||||
|  | ||||
|     def setup(self) -> None: | ||||
|         pass | ||||
|  | ||||
|  | ||||
| class NoCleanupPluginMixin(ConsumeTaskPlugin): | ||||
|     """ | ||||
|     A plugin which needs to clean up no files | ||||
|     """ | ||||
|  | ||||
|     def cleanup(self) -> None: | ||||
|         pass | ||||
							
								
								
									
										82
									
								
								src/documents/plugins/helpers.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										82
									
								
								src/documents/plugins/helpers.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,82 @@ | ||||
| import enum | ||||
| from typing import TYPE_CHECKING | ||||
| from typing import Optional | ||||
| from typing import Union | ||||
|  | ||||
| from asgiref.sync import async_to_sync | ||||
| from channels.layers import get_channel_layer | ||||
| from channels_redis.pubsub import RedisPubSubChannelLayer | ||||
|  | ||||
|  | ||||
| class ProgressStatusOptions(str, enum.Enum): | ||||
|     STARTED = "STARTED" | ||||
|     WORKING = "WORKING" | ||||
|     SUCCESS = "SUCCESS" | ||||
|     FAILED = "FAILED" | ||||
|  | ||||
|  | ||||
| class ProgressManager: | ||||
|     """ | ||||
|     Handles sending of progress information via the channel layer, with proper management | ||||
|     of the open/close of the layer to ensure messages go out and everything is cleaned up | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, filename: str, task_id: Optional[str] = None) -> None: | ||||
|         self.filename = filename | ||||
|         self._channel: Optional[RedisPubSubChannelLayer] = None | ||||
|         self.task_id = task_id | ||||
|  | ||||
|     def __enter__(self): | ||||
|         self.open() | ||||
|         return self | ||||
|  | ||||
|     def __exit__(self, exc_type, exc_val, exc_tb): | ||||
|         self.close() | ||||
|  | ||||
|     def open(self) -> None: | ||||
|         """ | ||||
|         If not already opened, gets the default channel layer | ||||
|         opened and ready to send messages | ||||
|         """ | ||||
|         if self._channel is None: | ||||
|             self._channel = get_channel_layer() | ||||
|  | ||||
|     def close(self) -> None: | ||||
|         """ | ||||
|         If it was opened, flushes the channel layer | ||||
|         """ | ||||
|         if self._channel is not None: | ||||
|             async_to_sync(self._channel.flush) | ||||
|             self._channel = None | ||||
|  | ||||
|     def send_progress( | ||||
|         self, | ||||
|         status: ProgressStatusOptions, | ||||
|         message: str, | ||||
|         current_progress: int, | ||||
|         max_progress: int, | ||||
|         extra_args: Optional[dict[str, Union[str, int]]] = None, | ||||
|     ) -> None: | ||||
|         # Ensure the layer is open | ||||
|         self.open() | ||||
|  | ||||
|         # Just for IDEs | ||||
|         if TYPE_CHECKING: | ||||
|             assert self._channel is not None | ||||
|  | ||||
|         payload = { | ||||
|             "type": "status_update", | ||||
|             "data": { | ||||
|                 "filename": self.filename, | ||||
|                 "task_id": self.task_id, | ||||
|                 "current_progress": current_progress, | ||||
|                 "max_progress": max_progress, | ||||
|                 "status": status, | ||||
|                 "message": message, | ||||
|             }, | ||||
|         } | ||||
|         if extra_args is not None: | ||||
|             payload["data"].update(extra_args) | ||||
|  | ||||
|         # Construct and send the update | ||||
|         async_to_sync(self._channel.group_send)("status_updates", payload) | ||||
| @@ -2,30 +2,30 @@ import hashlib | ||||
| import logging | ||||
| import shutil | ||||
| import uuid | ||||
| from pathlib import Path | ||||
| from tempfile import TemporaryDirectory | ||||
| from typing import Optional | ||||
|  | ||||
| import tqdm | ||||
| from asgiref.sync import async_to_sync | ||||
| from celery import Task | ||||
| from celery import shared_task | ||||
| from channels.layers import get_channel_layer | ||||
| from django.conf import settings | ||||
| from django.db import transaction | ||||
| from django.db.models.signals import post_save | ||||
| from filelock import FileLock | ||||
| from redis.exceptions import ConnectionError | ||||
| from whoosh.writing import AsyncWriter | ||||
|  | ||||
| from documents import index | ||||
| from documents import sanity_checker | ||||
| from documents.barcodes import BarcodeReader | ||||
| from documents.barcodes import BarcodePlugin | ||||
| from documents.classifier import DocumentClassifier | ||||
| from documents.classifier import load_classifier | ||||
| from documents.consumer import Consumer | ||||
| from documents.consumer import ConsumerError | ||||
| from documents.consumer import WorkflowTriggerPlugin | ||||
| from documents.data_models import ConsumableDocument | ||||
| from documents.data_models import DocumentMetadataOverrides | ||||
| from documents.double_sided import collate | ||||
| from documents.double_sided import CollatePlugin | ||||
| from documents.file_handling import create_source_path_directory | ||||
| from documents.file_handling import generate_unique_filename | ||||
| from documents.models import Correspondent | ||||
| @@ -35,6 +35,10 @@ from documents.models import StoragePath | ||||
| from documents.models import Tag | ||||
| from documents.parsers import DocumentParser | ||||
| from documents.parsers import get_parser_class_for_mime_type | ||||
| from documents.plugins.base import ConsumeTaskPlugin | ||||
| from documents.plugins.base import ProgressManager | ||||
| from documents.plugins.base import StopConsumeTaskError | ||||
| from documents.plugins.helpers import ProgressStatusOptions | ||||
| from documents.sanity_checker import SanityCheckFailedException | ||||
| from documents.signals import document_updated | ||||
|  | ||||
| @@ -102,70 +106,60 @@ def consume_file( | ||||
|     input_doc: ConsumableDocument, | ||||
|     overrides: Optional[DocumentMetadataOverrides] = None, | ||||
| ): | ||||
|     def send_progress(status="SUCCESS", message="finished"): | ||||
|         payload = { | ||||
|             "filename": overrides.filename or input_doc.original_file.name, | ||||
|             "task_id": None, | ||||
|             "current_progress": 100, | ||||
|             "max_progress": 100, | ||||
|             "status": status, | ||||
|             "message": message, | ||||
|         } | ||||
|         try: | ||||
|             async_to_sync(get_channel_layer().group_send)( | ||||
|                 "status_updates", | ||||
|                 {"type": "status_update", "data": payload}, | ||||
|             ) | ||||
|         except ConnectionError as e: | ||||
|             logger.warning(f"ConnectionError on status send: {e!s}") | ||||
|  | ||||
|     # Default no overrides | ||||
|     if overrides is None: | ||||
|         overrides = DocumentMetadataOverrides() | ||||
|  | ||||
|     # Handle collation of double-sided documents scanned in two parts | ||||
|     if settings.CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED and ( | ||||
|         settings.CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME | ||||
|         in input_doc.original_file.parts | ||||
|     ): | ||||
|         try: | ||||
|             msg = collate(input_doc) | ||||
|             send_progress(message=msg) | ||||
|             return msg | ||||
|         except ConsumerError as e: | ||||
|             send_progress(status="FAILURE", message=e.args[0]) | ||||
|             raise e | ||||
|     plugins: list[type[ConsumeTaskPlugin]] = [ | ||||
|         CollatePlugin, | ||||
|         BarcodePlugin, | ||||
|         WorkflowTriggerPlugin, | ||||
|     ] | ||||
|  | ||||
|     # read all barcodes in the current document | ||||
|     if settings.CONSUMER_ENABLE_BARCODES or settings.CONSUMER_ENABLE_ASN_BARCODE: | ||||
|         with BarcodeReader(input_doc.original_file, input_doc.mime_type) as reader: | ||||
|             if settings.CONSUMER_ENABLE_BARCODES and reader.separate( | ||||
|                 input_doc.source, | ||||
|     with ProgressManager( | ||||
|         overrides.filename or input_doc.original_file.name, | ||||
|         self.request.id, | ||||
|     ) as status_mgr, TemporaryDirectory(dir=settings.SCRATCH_DIR) as tmp_dir: | ||||
|         tmp_dir = Path(tmp_dir) | ||||
|         for plugin_class in plugins: | ||||
|             plugin_name = plugin_class.NAME | ||||
|  | ||||
|             plugin = plugin_class( | ||||
|                 input_doc, | ||||
|                 overrides, | ||||
|             ): | ||||
|                 # notify the sender, otherwise the progress bar | ||||
|                 # in the UI stays stuck | ||||
|                 send_progress() | ||||
|                 # consuming stops here, since the original document with | ||||
|                 # the barcodes has been split and will be consumed separately | ||||
|                 input_doc.original_file.unlink() | ||||
|                 return "File successfully split" | ||||
|  | ||||
|             # try reading the ASN from barcode | ||||
|             if ( | ||||
|                 settings.CONSUMER_ENABLE_ASN_BARCODE | ||||
|                 and (located_asn := reader.asn) is not None | ||||
|             ): | ||||
|                 # Note this will take precedence over an API provided ASN | ||||
|                 # But it's from a physical barcode, so that's good | ||||
|                 overrides.asn = located_asn | ||||
|                 logger.info(f"Found ASN in barcode: {overrides.asn}") | ||||
|  | ||||
|     template_overrides = Consumer().get_workflow_overrides( | ||||
|         input_doc=input_doc, | ||||
|                 status_mgr, | ||||
|                 tmp_dir, | ||||
|                 self.request.id, | ||||
|             ) | ||||
|  | ||||
|     overrides.update(template_overrides) | ||||
|             if not plugin.able_to_run: | ||||
|                 logger.debug(f"Skipping plugin {plugin_name}") | ||||
|                 continue | ||||
|  | ||||
|             try: | ||||
|                 logger.debug(f"Executing plugin {plugin_name}") | ||||
|                 plugin.setup() | ||||
|  | ||||
|                 msg = plugin.run() | ||||
|  | ||||
|                 if msg is not None: | ||||
|                     logger.info(f"{plugin_name} completed with: {msg}") | ||||
|                 else: | ||||
|                     logger.info(f"{plugin_name} completed with no message") | ||||
|  | ||||
|                 overrides = plugin.metadata | ||||
|  | ||||
|             except StopConsumeTaskError as e: | ||||
|                 logger.info(f"{plugin_name} requested task exit: {e.message}") | ||||
|                 return e.message | ||||
|  | ||||
|             except Exception as e: | ||||
|                 logger.exception(f"{plugin_name} failed: {e}") | ||||
|                 status_mgr.send_progress(ProgressStatusOptions.FAILED, f"{e}", 100, 100) | ||||
|                 raise | ||||
|  | ||||
|             finally: | ||||
|                 plugin.cleanup() | ||||
|  | ||||
|     # continue with consumption if no barcode was found | ||||
|     document = Consumer().try_consume_file( | ||||
|   | ||||
| @@ -1,4 +1,7 @@ | ||||
| import shutil | ||||
| from collections.abc import Generator | ||||
| from contextlib import contextmanager | ||||
| from pathlib import Path | ||||
| from unittest import mock | ||||
|  | ||||
| import pytest | ||||
| @@ -7,14 +10,13 @@ from django.test import TestCase | ||||
| from django.test import override_settings | ||||
|  | ||||
| from documents import tasks | ||||
| from documents.barcodes import BarcodeReader | ||||
| from documents.consumer import ConsumerError | ||||
| from documents.barcodes import BarcodePlugin | ||||
| from documents.data_models import ConsumableDocument | ||||
| from documents.data_models import DocumentMetadataOverrides | ||||
| from documents.data_models import DocumentSource | ||||
| from documents.models import Document | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
| from documents.tests.utils import DocumentConsumeDelayMixin | ||||
| from documents.tests.utils import DummyProgressManager | ||||
| from documents.tests.utils import FileSystemAssertsMixin | ||||
| from documents.tests.utils import SampleDirMixin | ||||
|  | ||||
| @@ -26,8 +28,29 @@ except ImportError: | ||||
|     HAS_ZXING_LIB = False | ||||
|  | ||||
|  | ||||
| class GetReaderPluginMixin: | ||||
|     @contextmanager | ||||
|     def get_reader(self, filepath: Path) -> Generator[BarcodePlugin, None, None]: | ||||
|         reader = BarcodePlugin( | ||||
|             ConsumableDocument(DocumentSource.ConsumeFolder, original_file=filepath), | ||||
|             DocumentMetadataOverrides(), | ||||
|             DummyProgressManager(filepath.name, None), | ||||
|             self.dirs.scratch_dir, | ||||
|             "task-id", | ||||
|         ) | ||||
|         reader.setup() | ||||
|         yield reader | ||||
|         reader.cleanup() | ||||
|  | ||||
|  | ||||
| @override_settings(CONSUMER_BARCODE_SCANNER="PYZBAR") | ||||
| class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, TestCase): | ||||
| class TestBarcode( | ||||
|     DirectoriesMixin, | ||||
|     FileSystemAssertsMixin, | ||||
|     SampleDirMixin, | ||||
|     GetReaderPluginMixin, | ||||
|     TestCase, | ||||
| ): | ||||
|     def test_scan_file_for_separating_barcodes(self): | ||||
|         """ | ||||
|         GIVEN: | ||||
| @@ -39,7 +62,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | ||||
|         """ | ||||
|         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf" | ||||
|  | ||||
|         with BarcodeReader(test_file, "application/pdf") as reader: | ||||
|         with self.get_reader(test_file) as reader: | ||||
|             reader.detect() | ||||
|             separator_page_numbers = reader.get_separation_pages() | ||||
|  | ||||
| @@ -60,7 +83,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | ||||
|         """ | ||||
|         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.tiff" | ||||
|  | ||||
|         with BarcodeReader(test_file, "image/tiff") as reader: | ||||
|         with self.get_reader(test_file) as reader: | ||||
|             reader.detect() | ||||
|             separator_page_numbers = reader.get_separation_pages() | ||||
|  | ||||
| @@ -80,7 +103,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | ||||
|         """ | ||||
|         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle-alpha.tiff" | ||||
|  | ||||
|         with BarcodeReader(test_file, "image/tiff") as reader: | ||||
|         with self.get_reader(test_file) as reader: | ||||
|             reader.detect() | ||||
|             separator_page_numbers = reader.get_separation_pages() | ||||
|  | ||||
| @@ -97,7 +120,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | ||||
|             - No pages to split on | ||||
|         """ | ||||
|         test_file = self.SAMPLE_DIR / "simple.pdf" | ||||
|         with BarcodeReader(test_file, "application/pdf") as reader: | ||||
|         with self.get_reader(test_file) as reader: | ||||
|             reader.detect() | ||||
|             separator_page_numbers = reader.get_separation_pages() | ||||
|  | ||||
| @@ -115,7 +138,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | ||||
|         """ | ||||
|         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf" | ||||
|  | ||||
|         with BarcodeReader(test_file, "application/pdf") as reader: | ||||
|         with self.get_reader(test_file) as reader: | ||||
|             reader.detect() | ||||
|             separator_page_numbers = reader.get_separation_pages() | ||||
|  | ||||
| @@ -133,7 +156,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | ||||
|         """ | ||||
|         test_file = self.BARCODE_SAMPLE_DIR / "several-patcht-codes.pdf" | ||||
|  | ||||
|         with BarcodeReader(test_file, "application/pdf") as reader: | ||||
|         with self.get_reader(test_file) as reader: | ||||
|             reader.detect() | ||||
|             separator_page_numbers = reader.get_separation_pages() | ||||
|  | ||||
| @@ -158,7 +181,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | ||||
|         ]: | ||||
|             test_file = self.BARCODE_SAMPLE_DIR / test_file | ||||
|  | ||||
|             with BarcodeReader(test_file, "application/pdf") as reader: | ||||
|             with self.get_reader(test_file) as reader: | ||||
|                 reader.detect() | ||||
|                 separator_page_numbers = reader.get_separation_pages() | ||||
|  | ||||
| @@ -177,7 +200,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | ||||
|         """ | ||||
|         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle-unreadable.pdf" | ||||
|  | ||||
|         with BarcodeReader(test_file, "application/pdf") as reader: | ||||
|         with self.get_reader(test_file) as reader: | ||||
|             reader.detect() | ||||
|             separator_page_numbers = reader.get_separation_pages() | ||||
|  | ||||
| @@ -195,7 +218,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | ||||
|         """ | ||||
|         test_file = self.BARCODE_SAMPLE_DIR / "barcode-fax-image.pdf" | ||||
|  | ||||
|         with BarcodeReader(test_file, "application/pdf") as reader: | ||||
|         with self.get_reader(test_file) as reader: | ||||
|             reader.detect() | ||||
|             separator_page_numbers = reader.get_separation_pages() | ||||
|  | ||||
| @@ -214,7 +237,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | ||||
|         """ | ||||
|         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-qr.pdf" | ||||
|  | ||||
|         with BarcodeReader(test_file, "application/pdf") as reader: | ||||
|         with self.get_reader(test_file) as reader: | ||||
|             reader.detect() | ||||
|             separator_page_numbers = reader.get_separation_pages() | ||||
|  | ||||
| @@ -234,7 +257,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | ||||
|         """ | ||||
|         test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.pdf" | ||||
|  | ||||
|         with BarcodeReader(test_file, "application/pdf") as reader: | ||||
|         with self.get_reader(test_file) as reader: | ||||
|             reader.detect() | ||||
|             separator_page_numbers = reader.get_separation_pages() | ||||
|  | ||||
| @@ -255,7 +278,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | ||||
|         """ | ||||
|         test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-custom.pdf" | ||||
|  | ||||
|         with BarcodeReader(test_file, "application/pdf") as reader: | ||||
|         with self.get_reader(test_file) as reader: | ||||
|             reader.detect() | ||||
|             separator_page_numbers = reader.get_separation_pages() | ||||
|  | ||||
| @@ -276,7 +299,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | ||||
|         """ | ||||
|         test_file = self.BARCODE_SAMPLE_DIR / "barcode-128-custom.pdf" | ||||
|  | ||||
|         with BarcodeReader(test_file, "application/pdf") as reader: | ||||
|         with self.get_reader(test_file) as reader: | ||||
|             reader.detect() | ||||
|             separator_page_numbers = reader.get_separation_pages() | ||||
|  | ||||
| @@ -296,7 +319,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | ||||
|         """ | ||||
|         test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.pdf" | ||||
|  | ||||
|         with BarcodeReader(test_file, "application/pdf") as reader: | ||||
|         with self.get_reader(test_file) as reader: | ||||
|             reader.detect() | ||||
|             separator_page_numbers = reader.get_separation_pages() | ||||
|  | ||||
| @@ -315,7 +338,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | ||||
|         """ | ||||
|         test_file = self.BARCODE_SAMPLE_DIR / "many-qr-codes.pdf" | ||||
|  | ||||
|         with BarcodeReader(test_file, "application/pdf") as reader: | ||||
|         with self.get_reader(test_file) as reader: | ||||
|             reader.detect() | ||||
|             separator_page_numbers = reader.get_separation_pages() | ||||
|  | ||||
| @@ -334,7 +357,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | ||||
|         """ | ||||
|         test_file = self.SAMPLE_DIR / "password-is-test.pdf" | ||||
|         with self.assertLogs("paperless.barcodes", level="WARNING") as cm: | ||||
|             with BarcodeReader(test_file, "application/pdf") as reader: | ||||
|             with self.get_reader(test_file) as reader: | ||||
|                 reader.detect() | ||||
|                 warning = cm.output[0] | ||||
|                 expected_str = "WARNING:paperless.barcodes:File is likely password protected, not checking for barcodes" | ||||
| @@ -356,7 +379,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | ||||
|         """ | ||||
|         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf" | ||||
|  | ||||
|         with BarcodeReader(test_file, "application/pdf") as reader: | ||||
|         with self.get_reader(test_file) as reader: | ||||
|             documents = reader.separate_pages({1: False}) | ||||
|  | ||||
|             self.assertEqual(reader.pdf_file, test_file) | ||||
| @@ -373,7 +396,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | ||||
|         """ | ||||
|         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-double.pdf" | ||||
|  | ||||
|         with BarcodeReader(test_file, "application/pdf") as reader: | ||||
|         with self.get_reader(test_file) as reader: | ||||
|             documents = reader.separate_pages({1: False, 2: False}) | ||||
|  | ||||
|             self.assertEqual(len(documents), 2) | ||||
| @@ -385,32 +408,18 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | ||||
|         WHEN: | ||||
|             - No separation pages are provided | ||||
|         THEN: | ||||
|             - No new documents are produced | ||||
|             - A warning is logged | ||||
|             - Nothing happens | ||||
|         """ | ||||
|         test_file = self.SAMPLE_DIR / "simple.pdf" | ||||
|  | ||||
|         with self.assertLogs("paperless.barcodes", level="WARNING") as cm: | ||||
|             with BarcodeReader(test_file, "application/pdf") as reader: | ||||
|                 self.assertFalse( | ||||
|                     reader.separate( | ||||
|                         DocumentSource.ApiUpload, | ||||
|                         DocumentMetadataOverrides(), | ||||
|                     ), | ||||
|                 ) | ||||
|                 self.assertEqual( | ||||
|                     cm.output, | ||||
|                     [ | ||||
|                         "WARNING:paperless.barcodes:No pages to split on!", | ||||
|                     ], | ||||
|                 ) | ||||
|         with self.get_reader(test_file) as reader: | ||||
|             self.assertEqual("No pages to split on!", reader.run()) | ||||
|  | ||||
|     @override_settings( | ||||
|         CONSUMER_ENABLE_BARCODES=True, | ||||
|         CONSUMER_BARCODE_TIFF_SUPPORT=True, | ||||
|     ) | ||||
|     @mock.patch("documents.consumer.Consumer.try_consume_file") | ||||
|     def test_consume_barcode_unsupported_jpg_file(self, m): | ||||
|     def test_consume_barcode_unsupported_jpg_file(self): | ||||
|         """ | ||||
|         GIVEN: | ||||
|             - JPEG image as input | ||||
| @@ -422,35 +431,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | ||||
|         """ | ||||
|         test_file = self.SAMPLE_DIR / "simple.jpg" | ||||
|  | ||||
|         dst = settings.SCRATCH_DIR / "simple.jpg" | ||||
|         shutil.copy(test_file, dst) | ||||
|  | ||||
|         with self.assertLogs("paperless.barcodes", level="WARNING") as cm: | ||||
|             self.assertIn( | ||||
|                 "Success", | ||||
|                 tasks.consume_file( | ||||
|                     ConsumableDocument( | ||||
|                         source=DocumentSource.ConsumeFolder, | ||||
|                         original_file=dst, | ||||
|                     ), | ||||
|                     None, | ||||
|                 ), | ||||
|             ) | ||||
|  | ||||
|         self.assertListEqual( | ||||
|             cm.output, | ||||
|             [ | ||||
|                 "WARNING:paperless.barcodes:Unsupported file format for barcode reader: image/jpeg", | ||||
|             ], | ||||
|         ) | ||||
|         m.assert_called_once() | ||||
|  | ||||
|         args, kwargs = m.call_args | ||||
|         self.assertIsNone(kwargs["override_filename"]) | ||||
|         self.assertIsNone(kwargs["override_title"]) | ||||
|         self.assertIsNone(kwargs["override_correspondent_id"]) | ||||
|         self.assertIsNone(kwargs["override_document_type_id"]) | ||||
|         self.assertIsNone(kwargs["override_tag_ids"]) | ||||
|         with self.get_reader(test_file) as reader: | ||||
|             self.assertFalse(reader.able_to_run) | ||||
|  | ||||
|     @override_settings( | ||||
|         CONSUMER_ENABLE_BARCODES=True, | ||||
| @@ -467,7 +449,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | ||||
|         """ | ||||
|         test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-2.pdf" | ||||
|  | ||||
|         with BarcodeReader(test_file, "application/pdf") as reader: | ||||
|         with self.get_reader(test_file) as reader: | ||||
|             reader.detect() | ||||
|             separator_page_numbers = reader.get_separation_pages() | ||||
|  | ||||
| @@ -504,7 +486,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | ||||
|         """ | ||||
|         test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-1.pdf" | ||||
|  | ||||
|         with BarcodeReader(test_file, "application/pdf") as reader: | ||||
|         with self.get_reader(test_file) as reader: | ||||
|             reader.detect() | ||||
|             separator_page_numbers = reader.get_separation_pages() | ||||
|  | ||||
| @@ -550,7 +532,7 @@ class TestBarcodeNewConsume( | ||||
|  | ||||
|         overrides = DocumentMetadataOverrides(tag_ids=[1, 2, 9]) | ||||
|  | ||||
|         with mock.patch("documents.tasks.async_to_sync") as progress_mocker: | ||||
|         with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): | ||||
|             self.assertEqual( | ||||
|                 tasks.consume_file( | ||||
|                     ConsumableDocument( | ||||
| @@ -559,10 +541,8 @@ class TestBarcodeNewConsume( | ||||
|                     ), | ||||
|                     overrides, | ||||
|                 ), | ||||
|                 "File successfully split", | ||||
|                 "Barcode splitting complete!", | ||||
|             ) | ||||
|             # We let the consumer know progress is done | ||||
|             progress_mocker.assert_called_once() | ||||
|             # 2 new document consume tasks created | ||||
|             self.assertEqual(self.consume_file_mock.call_count, 2) | ||||
|  | ||||
| @@ -580,7 +560,20 @@ class TestBarcodeNewConsume( | ||||
|                 self.assertEqual(overrides, new_doc_overrides) | ||||
|  | ||||
|  | ||||
| class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase): | ||||
| class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, GetReaderPluginMixin, TestCase): | ||||
|     @contextmanager | ||||
|     def get_reader(self, filepath: Path) -> BarcodePlugin: | ||||
|         reader = BarcodePlugin( | ||||
|             ConsumableDocument(DocumentSource.ConsumeFolder, original_file=filepath), | ||||
|             DocumentMetadataOverrides(), | ||||
|             DummyProgressManager(filepath.name, None), | ||||
|             self.dirs.scratch_dir, | ||||
|             "task-id", | ||||
|         ) | ||||
|         reader.setup() | ||||
|         yield reader | ||||
|         reader.cleanup() | ||||
|  | ||||
|     @override_settings(CONSUMER_ASN_BARCODE_PREFIX="CUSTOM-PREFIX-") | ||||
|     def test_scan_file_for_asn_custom_prefix(self): | ||||
|         """ | ||||
| @@ -594,7 +587,7 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase): | ||||
|             - The ASN integer value is correct | ||||
|         """ | ||||
|         test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf" | ||||
|         with BarcodeReader(test_file, "application/pdf") as reader: | ||||
|         with self.get_reader(test_file) as reader: | ||||
|             asn = reader.asn | ||||
|  | ||||
|             self.assertEqual(reader.pdf_file, test_file) | ||||
| @@ -613,7 +606,7 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase): | ||||
|         """ | ||||
|         test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-123.pdf" | ||||
|  | ||||
|         with BarcodeReader(test_file, "application/pdf") as reader: | ||||
|         with self.get_reader(test_file) as reader: | ||||
|             asn = reader.asn | ||||
|  | ||||
|             self.assertEqual(reader.pdf_file, test_file) | ||||
| @@ -630,55 +623,12 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase): | ||||
|         """ | ||||
|         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf" | ||||
|  | ||||
|         with BarcodeReader(test_file, "application/pdf") as reader: | ||||
|         with self.get_reader(test_file) as reader: | ||||
|             asn = reader.asn | ||||
|  | ||||
|             self.assertEqual(reader.pdf_file, test_file) | ||||
|             self.assertEqual(asn, None) | ||||
|  | ||||
|     @override_settings(CONSUMER_ENABLE_ASN_BARCODE=True) | ||||
|     def test_scan_file_for_asn_already_exists(self): | ||||
|         """ | ||||
|         GIVEN: | ||||
|             - PDF with an ASN barcode | ||||
|             - ASN value already exists | ||||
|         WHEN: | ||||
|             - File is scanned for barcodes | ||||
|         THEN: | ||||
|             - ASN is retrieved from the document | ||||
|             - Consumption fails | ||||
|         """ | ||||
|  | ||||
|         Document.objects.create( | ||||
|             title="WOW", | ||||
|             content="the content", | ||||
|             archive_serial_number=123, | ||||
|             checksum="456", | ||||
|             mime_type="application/pdf", | ||||
|         ) | ||||
|  | ||||
|         test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-123.pdf" | ||||
|  | ||||
|         dst = settings.SCRATCH_DIR / "barcode-39-asn-123.pdf" | ||||
|         shutil.copy(test_file, dst) | ||||
|  | ||||
|         with mock.patch("documents.consumer.Consumer._send_progress"): | ||||
|             with self.assertRaises(ConsumerError) as cm, self.assertLogs( | ||||
|                 "paperless.consumer", | ||||
|                 level="ERROR", | ||||
|             ) as logs_cm: | ||||
|                 tasks.consume_file( | ||||
|                     ConsumableDocument( | ||||
|                         source=DocumentSource.ConsumeFolder, | ||||
|                         original_file=dst, | ||||
|                     ), | ||||
|                     None, | ||||
|                 ) | ||||
|             self.assertIn("Not consuming barcode-39-asn-123.pdf", str(cm.exception)) | ||||
|             error_str = logs_cm.output[0] | ||||
|             expected_str = "ERROR:paperless.consumer:Not consuming barcode-39-asn-123.pdf: Given ASN already exists!" | ||||
|             self.assertEqual(expected_str, error_str) | ||||
|  | ||||
|     def test_scan_file_for_asn_barcode_invalid(self): | ||||
|         """ | ||||
|         GIVEN: | ||||
| @@ -692,7 +642,7 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase): | ||||
|         """ | ||||
|         test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-invalid.pdf" | ||||
|  | ||||
|         with BarcodeReader(test_file, "application/pdf") as reader: | ||||
|         with self.get_reader(test_file) as reader: | ||||
|             asn = reader.asn | ||||
|  | ||||
|             self.assertEqual(reader.pdf_file, test_file) | ||||
| @@ -718,7 +668,9 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase): | ||||
|         dst = settings.SCRATCH_DIR / "barcode-39-asn-123.pdf" | ||||
|         shutil.copy(test_file, dst) | ||||
|  | ||||
|         with mock.patch("documents.consumer.Consumer.try_consume_file") as mocked_call: | ||||
|         with mock.patch( | ||||
|             "documents.consumer.Consumer.try_consume_file", | ||||
|         ) as mocked_consumer: | ||||
|             tasks.consume_file( | ||||
|                 ConsumableDocument( | ||||
|                     source=DocumentSource.ConsumeFolder, | ||||
| @@ -726,40 +678,11 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase): | ||||
|                 ), | ||||
|                 None, | ||||
|             ) | ||||
|  | ||||
|             args, kwargs = mocked_call.call_args | ||||
|             mocked_consumer.assert_called_once() | ||||
|             args, kwargs = mocked_consumer.call_args | ||||
|  | ||||
|             self.assertEqual(kwargs["override_asn"], 123) | ||||
|  | ||||
|     @override_settings(CONSUMER_ENABLE_ASN_BARCODE=True) | ||||
|     def test_asn_too_large(self): | ||||
|         """ | ||||
|         GIVEN: | ||||
|             - ASN from barcode enabled | ||||
|             - Barcode contains too large an ASN value | ||||
|         WHEN: | ||||
|             - ASN from barcode checked for correctness | ||||
|         THEN: | ||||
|             - Exception is raised regarding size limits | ||||
|         """ | ||||
|         src = self.BARCODE_SAMPLE_DIR / "barcode-128-asn-too-large.pdf" | ||||
|  | ||||
|         dst = self.dirs.scratch_dir / "barcode-128-asn-too-large.pdf" | ||||
|         shutil.copy(src, dst) | ||||
|  | ||||
|         input_doc = ConsumableDocument( | ||||
|             source=DocumentSource.ConsumeFolder, | ||||
|             original_file=dst, | ||||
|         ) | ||||
|  | ||||
|         with mock.patch("documents.consumer.Consumer._send_progress"): | ||||
|             self.assertRaisesMessage( | ||||
|                 ConsumerError, | ||||
|                 "Given ASN 4294967296 is out of range [0, 4,294,967,295]", | ||||
|                 tasks.consume_file, | ||||
|                 input_doc, | ||||
|             ) | ||||
|  | ||||
|     @override_settings(CONSUMER_BARCODE_SCANNER="PYZBAR") | ||||
|     def test_scan_file_for_qrcode_without_upscale(self): | ||||
|         """ | ||||
| @@ -774,7 +697,7 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase): | ||||
|  | ||||
|         test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-asn-000123-upscale-dpi.pdf" | ||||
|  | ||||
|         with BarcodeReader(test_file, "application/pdf") as reader: | ||||
|         with self.get_reader(test_file) as reader: | ||||
|             reader.detect() | ||||
|             self.assertEqual(len(reader.barcodes), 0) | ||||
|  | ||||
| @@ -796,7 +719,7 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase): | ||||
|  | ||||
|         test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-asn-000123-upscale-dpi.pdf" | ||||
|  | ||||
|         with BarcodeReader(test_file, "application/pdf") as reader: | ||||
|         with self.get_reader(test_file) as reader: | ||||
|             reader.detect() | ||||
|             self.assertEqual(len(reader.barcodes), 1) | ||||
|             self.assertEqual(reader.asn, 123) | ||||
|   | ||||
| @@ -17,6 +17,7 @@ from documents.data_models import DocumentSource | ||||
| from documents.double_sided import STAGING_FILE_NAME | ||||
| from documents.double_sided import TIMEOUT_MINUTES | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
| from documents.tests.utils import DummyProgressManager | ||||
| from documents.tests.utils import FileSystemAssertsMixin | ||||
|  | ||||
|  | ||||
| @@ -42,9 +43,10 @@ class TestDoubleSided(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | ||||
|         dst = self.dirs.double_sided_dir / dstname | ||||
|         dst.parent.mkdir(parents=True, exist_ok=True) | ||||
|         shutil.copy(src, dst) | ||||
|         with mock.patch("documents.tasks.async_to_sync"), mock.patch( | ||||
|             "documents.consumer.async_to_sync", | ||||
|         ): | ||||
|         with mock.patch( | ||||
|             "documents.tasks.ProgressManager", | ||||
|             DummyProgressManager, | ||||
|         ), mock.patch("documents.consumer.async_to_sync"): | ||||
|             msg = tasks.consume_file( | ||||
|                 ConsumableDocument( | ||||
|                     source=DocumentSource.ConsumeFolder, | ||||
| @@ -211,7 +213,7 @@ class TestDoubleSided(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | ||||
|         """ | ||||
|         msg = self.consume_file("simple.pdf", Path("..") / "simple.pdf") | ||||
|         self.assertIsNotFile(self.staging_file) | ||||
|         self.assertRegex(msg, "Success. New document .* created") | ||||
|         self.assertRegex(msg, r"Success. New document id \d+ created") | ||||
|  | ||||
|     def test_subdirectory_upload(self): | ||||
|         """ | ||||
| @@ -250,4 +252,4 @@ class TestDoubleSided(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | ||||
|         """ | ||||
|         msg = self.consume_file("simple.pdf") | ||||
|         self.assertIsNotFile(self.staging_file) | ||||
|         self.assertRegex(msg, "Success. New document .* created") | ||||
|         self.assertRegex(msg, r"Success. New document id \d+ created") | ||||
|   | ||||
| @@ -24,6 +24,7 @@ from documents.models import WorkflowAction | ||||
| from documents.models import WorkflowTrigger | ||||
| from documents.signals import document_consumption_finished | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
| from documents.tests.utils import DummyProgressManager | ||||
| from documents.tests.utils import FileSystemAssertsMixin | ||||
| from paperless_mail.models import MailAccount | ||||
| from paperless_mail.models import MailRule | ||||
| @@ -126,7 +127,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase): | ||||
|  | ||||
|         test_file = self.SAMPLE_DIR / "simple.pdf" | ||||
|  | ||||
|         with mock.patch("documents.tasks.async_to_sync"): | ||||
|         with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): | ||||
|             with self.assertLogs("paperless.matching", level="INFO") as cm: | ||||
|                 tasks.consume_file( | ||||
|                     ConsumableDocument( | ||||
| @@ -203,7 +204,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase): | ||||
|         w.save() | ||||
|  | ||||
|         test_file = self.SAMPLE_DIR / "simple.pdf" | ||||
|         with mock.patch("documents.tasks.async_to_sync"): | ||||
|         with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): | ||||
|             with self.assertLogs("paperless.matching", level="INFO") as cm: | ||||
|                 tasks.consume_file( | ||||
|                     ConsumableDocument( | ||||
| @@ -294,7 +295,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase): | ||||
|  | ||||
|         test_file = self.SAMPLE_DIR / "simple.pdf" | ||||
|  | ||||
|         with mock.patch("documents.tasks.async_to_sync"): | ||||
|         with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): | ||||
|             with self.assertLogs("paperless.matching", level="INFO") as cm: | ||||
|                 tasks.consume_file( | ||||
|                     ConsumableDocument( | ||||
| @@ -356,7 +357,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase): | ||||
|  | ||||
|         test_file = self.SAMPLE_DIR / "simple.pdf" | ||||
|  | ||||
|         with mock.patch("documents.tasks.async_to_sync"): | ||||
|         with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): | ||||
|             with self.assertLogs("paperless.matching", level="DEBUG") as cm: | ||||
|                 tasks.consume_file( | ||||
|                     ConsumableDocument( | ||||
| @@ -407,7 +408,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase): | ||||
|  | ||||
|         test_file = self.SAMPLE_DIR / "simple.pdf" | ||||
|  | ||||
|         with mock.patch("documents.tasks.async_to_sync"): | ||||
|         with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): | ||||
|             with self.assertLogs("paperless.matching", level="DEBUG") as cm: | ||||
|                 tasks.consume_file( | ||||
|                     ConsumableDocument( | ||||
| @@ -468,7 +469,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase): | ||||
|  | ||||
|         test_file = self.SAMPLE_DIR / "simple.pdf" | ||||
|  | ||||
|         with mock.patch("documents.tasks.async_to_sync"): | ||||
|         with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): | ||||
|             with self.assertLogs("paperless.matching", level="DEBUG") as cm: | ||||
|                 tasks.consume_file( | ||||
|                     ConsumableDocument( | ||||
| @@ -529,7 +530,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase): | ||||
|  | ||||
|         test_file = self.SAMPLE_DIR / "simple.pdf" | ||||
|  | ||||
|         with mock.patch("documents.tasks.async_to_sync"): | ||||
|         with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): | ||||
|             with self.assertLogs("paperless.matching", level="DEBUG") as cm: | ||||
|                 tasks.consume_file( | ||||
|                     ConsumableDocument( | ||||
| @@ -591,7 +592,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase): | ||||
|  | ||||
|         test_file = self.SAMPLE_DIR / "simple.pdf" | ||||
|  | ||||
|         with mock.patch("documents.tasks.async_to_sync"): | ||||
|         with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): | ||||
|             with self.assertLogs("paperless.matching", level="DEBUG") as cm: | ||||
|                 tasks.consume_file( | ||||
|                     ConsumableDocument( | ||||
| @@ -686,7 +687,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase): | ||||
|  | ||||
|         test_file = self.SAMPLE_DIR / "simple.pdf" | ||||
|  | ||||
|         with mock.patch("documents.tasks.async_to_sync"): | ||||
|         with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): | ||||
|             with self.assertLogs("paperless.matching", level="INFO") as cm: | ||||
|                 tasks.consume_file( | ||||
|                     ConsumableDocument( | ||||
|   | ||||
| @@ -9,6 +9,7 @@ from os import PathLike | ||||
| from pathlib import Path | ||||
| from typing import Any | ||||
| from typing import Callable | ||||
| from typing import Optional | ||||
| from typing import Union | ||||
| from unittest import mock | ||||
|  | ||||
| @@ -23,6 +24,7 @@ from django.test import override_settings | ||||
| from documents.data_models import ConsumableDocument | ||||
| from documents.data_models import DocumentMetadataOverrides | ||||
| from documents.parsers import ParseError | ||||
| from documents.plugins.helpers import ProgressStatusOptions | ||||
|  | ||||
|  | ||||
| def setup_directories(): | ||||
| @@ -146,6 +148,11 @@ def util_call_with_backoff( | ||||
|  | ||||
|  | ||||
| class DirectoriesMixin: | ||||
|     """ | ||||
|     Creates and overrides settings for all folders and paths, then ensures | ||||
|     they are cleaned up on exit | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, *args, **kwargs): | ||||
|         super().__init__(*args, **kwargs) | ||||
|         self.dirs = None | ||||
| @@ -160,6 +167,10 @@ class DirectoriesMixin: | ||||
|  | ||||
|  | ||||
| class FileSystemAssertsMixin: | ||||
|     """ | ||||
|     Utilities for checks various state information of the file system | ||||
|     """ | ||||
|  | ||||
|     def assertIsFile(self, path: Union[PathLike, str]): | ||||
|         self.assertTrue(Path(path).resolve().is_file(), f"File does not exist: {path}") | ||||
|  | ||||
| @@ -188,6 +199,11 @@ class FileSystemAssertsMixin: | ||||
|  | ||||
|  | ||||
| class ConsumerProgressMixin: | ||||
|     """ | ||||
|     Mocks the Consumer _send_progress, preventing attempts to connect to Redis | ||||
|     and allowing access to its calls for verification | ||||
|     """ | ||||
|  | ||||
|     def setUp(self) -> None: | ||||
|         self.send_progress_patcher = mock.patch( | ||||
|             "documents.consumer.Consumer._send_progress", | ||||
| @@ -310,3 +326,59 @@ class SampleDirMixin: | ||||
|     SAMPLE_DIR = Path(__file__).parent / "samples" | ||||
|  | ||||
|     BARCODE_SAMPLE_DIR = SAMPLE_DIR / "barcodes" | ||||
|  | ||||
|  | ||||
| class DummyProgressManager: | ||||
|     """ | ||||
|     A dummy handler for progress management that doesn't actually try to | ||||
|     connect to Redis.  Payloads are stored for test assertions if needed. | ||||
|  | ||||
|     Use it with | ||||
|       mock.patch("documents.tasks.ProgressManager", DummyProgressManager) | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, filename: str, task_id: Optional[str] = None) -> None: | ||||
|         self.filename = filename | ||||
|         self.task_id = task_id | ||||
|         print("hello world") | ||||
|         self.payloads = [] | ||||
|  | ||||
|     def __enter__(self): | ||||
|         self.open() | ||||
|         return self | ||||
|  | ||||
|     def __exit__(self, exc_type, exc_val, exc_tb): | ||||
|         self.close() | ||||
|  | ||||
|     def open(self) -> None: | ||||
|         pass | ||||
|  | ||||
|     def close(self) -> None: | ||||
|         pass | ||||
|  | ||||
|     def send_progress( | ||||
|         self, | ||||
|         status: ProgressStatusOptions, | ||||
|         message: str, | ||||
|         current_progress: int, | ||||
|         max_progress: int, | ||||
|         extra_args: Optional[dict[str, Union[str, int]]] = None, | ||||
|     ) -> None: | ||||
|         # Ensure the layer is open | ||||
|         self.open() | ||||
|  | ||||
|         payload = { | ||||
|             "type": "status_update", | ||||
|             "data": { | ||||
|                 "filename": self.filename, | ||||
|                 "task_id": self.task_id, | ||||
|                 "current_progress": current_progress, | ||||
|                 "max_progress": max_progress, | ||||
|                 "status": status, | ||||
|                 "message": message, | ||||
|             }, | ||||
|         } | ||||
|         if extra_args is not None: | ||||
|             payload["data"].update(extra_args) | ||||
|  | ||||
|         self.payloads.append(payload) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Trenton H
					Trenton H