mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Refactor file consumption task to allow beginnings of a plugin system (#5367)
This commit is contained in:
		| @@ -3,7 +3,6 @@ import re | |||||||
| import tempfile | import tempfile | ||||||
| from dataclasses import dataclass | from dataclasses import dataclass | ||||||
| from pathlib import Path | from pathlib import Path | ||||||
| from typing import Final |  | ||||||
| from typing import Optional | from typing import Optional | ||||||
|  |  | ||||||
| from django.conf import settings | from django.conf import settings | ||||||
| @@ -15,8 +14,9 @@ from PIL import Image | |||||||
|  |  | ||||||
| from documents.converters import convert_from_tiff_to_pdf | from documents.converters import convert_from_tiff_to_pdf | ||||||
| from documents.data_models import ConsumableDocument | from documents.data_models import ConsumableDocument | ||||||
| from documents.data_models import DocumentMetadataOverrides | from documents.plugins.base import ConsumeTaskPlugin | ||||||
| from documents.data_models import DocumentSource | from documents.plugins.base import StopConsumeTaskError | ||||||
|  | from documents.plugins.helpers import ProgressStatusOptions | ||||||
| from documents.utils import copy_basic_file_stats | from documents.utils import copy_basic_file_stats | ||||||
| from documents.utils import copy_file_with_basic_stats | from documents.utils import copy_file_with_basic_stats | ||||||
|  |  | ||||||
| @@ -26,7 +26,7 @@ logger = logging.getLogger("paperless.barcodes") | |||||||
| @dataclass(frozen=True) | @dataclass(frozen=True) | ||||||
| class Barcode: | class Barcode: | ||||||
|     """ |     """ | ||||||
|     Holds the information about a single barcode and its location |     Holds the information about a single barcode and its location in a document | ||||||
|     """ |     """ | ||||||
|  |  | ||||||
|     page: int |     page: int | ||||||
| @@ -49,77 +49,111 @@ class Barcode: | |||||||
|         return self.value.startswith(settings.CONSUMER_ASN_BARCODE_PREFIX) |         return self.value.startswith(settings.CONSUMER_ASN_BARCODE_PREFIX) | ||||||
|  |  | ||||||
|  |  | ||||||
| class BarcodeReader: | class BarcodePlugin(ConsumeTaskPlugin): | ||||||
|     def __init__(self, filepath: Path, mime_type: str) -> None: |     NAME: str = "BarcodePlugin" | ||||||
|         self.file: Final[Path] = filepath |  | ||||||
|         self.mime: Final[str] = mime_type |  | ||||||
|         self.pdf_file: Path = self.file |  | ||||||
|         self.barcodes: list[Barcode] = [] |  | ||||||
|         self._tiff_conversion_done = False |  | ||||||
|         self.temp_dir: Optional[tempfile.TemporaryDirectory] = None |  | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def able_to_run(self) -> bool: | ||||||
|  |         """ | ||||||
|  |         Able to run if: | ||||||
|  |           - ASN from barcode detection is enabled or | ||||||
|  |           - Barcode support is enabled and the mime type is supported | ||||||
|  |         """ | ||||||
|         if settings.CONSUMER_BARCODE_TIFF_SUPPORT: |         if settings.CONSUMER_BARCODE_TIFF_SUPPORT: | ||||||
|             self.SUPPORTED_FILE_MIMES = {"application/pdf", "image/tiff"} |             supported_mimes = {"application/pdf", "image/tiff"} | ||||||
|         else: |         else: | ||||||
|             self.SUPPORTED_FILE_MIMES = {"application/pdf"} |             supported_mimes = {"application/pdf"} | ||||||
|  |  | ||||||
|     def __enter__(self): |         return ( | ||||||
|         if self.supported_mime_type: |             settings.CONSUMER_ENABLE_ASN_BARCODE or settings.CONSUMER_ENABLE_BARCODES | ||||||
|             self.temp_dir = tempfile.TemporaryDirectory(prefix="paperless-barcodes") |         ) and self.input_doc.mime_type in supported_mimes | ||||||
|         return self |  | ||||||
|  |  | ||||||
|     def __exit__(self, exc_type, exc_val, exc_tb): |     def setup(self): | ||||||
|         if self.temp_dir is not None: |         self.temp_dir = tempfile.TemporaryDirectory( | ||||||
|             self.temp_dir.cleanup() |             dir=self.base_tmp_dir, | ||||||
|             self.temp_dir = None |             prefix="barcode", | ||||||
|  |         ) | ||||||
|  |         self.pdf_file = self.input_doc.original_file | ||||||
|  |         self._tiff_conversion_done = False | ||||||
|  |         self.barcodes: list[Barcode] = [] | ||||||
|  |  | ||||||
|     @property |     def run(self) -> Optional[str]: | ||||||
|     def supported_mime_type(self) -> bool: |         # Maybe do the conversion of TIFF to PDF | ||||||
|         """ |         self.convert_from_tiff_to_pdf() | ||||||
|         Return True if the given mime type is supported for barcodes, false otherwise |  | ||||||
|         """ |  | ||||||
|         return self.mime in self.SUPPORTED_FILE_MIMES |  | ||||||
|  |  | ||||||
|     @property |         # Locate any barcodes in the files | ||||||
|     def asn(self) -> Optional[int]: |  | ||||||
|         """ |  | ||||||
|         Search the parsed barcodes for any ASNs. |  | ||||||
|         The first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX |  | ||||||
|         is considered the ASN to be used. |  | ||||||
|         Returns the detected ASN (or None) |  | ||||||
|         """ |  | ||||||
|         asn = None |  | ||||||
|  |  | ||||||
|         if not self.supported_mime_type: |  | ||||||
|             return None |  | ||||||
|  |  | ||||||
|         # Ensure the barcodes have been read |  | ||||||
|         self.detect() |         self.detect() | ||||||
|  |  | ||||||
|         # get the first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX |         # Update/overwrite an ASN if possible | ||||||
|         asn_text = next( |         located_asn = self.asn | ||||||
|             (x.value for x in self.barcodes if x.is_asn), |         if located_asn is not None: | ||||||
|             None, |             logger.info(f"Found ASN in barcode: {located_asn}") | ||||||
|  |             self.metadata.asn = located_asn | ||||||
|  |  | ||||||
|  |         separator_pages = self.get_separation_pages() | ||||||
|  |         if not separator_pages: | ||||||
|  |             return "No pages to split on!" | ||||||
|  |  | ||||||
|  |         # We have pages to split against | ||||||
|  |  | ||||||
|  |         # Note this does NOT use the base_temp_dir, as that will be removed | ||||||
|  |         tmp_dir = Path( | ||||||
|  |             tempfile.mkdtemp( | ||||||
|  |                 dir=settings.SCRATCH_DIR, | ||||||
|  |                 prefix="paperless-barcode-split-", | ||||||
|  |             ), | ||||||
|  |         ).resolve() | ||||||
|  |  | ||||||
|  |         from documents import tasks | ||||||
|  |  | ||||||
|  |         # Create the split document tasks | ||||||
|  |         for new_document in self.separate_pages(separator_pages): | ||||||
|  |             copy_file_with_basic_stats(new_document, tmp_dir / new_document.name) | ||||||
|  |  | ||||||
|  |             task = tasks.consume_file.delay( | ||||||
|  |                 ConsumableDocument( | ||||||
|  |                     # Same source, for templates | ||||||
|  |                     source=self.input_doc.source, | ||||||
|  |                     mailrule_id=self.input_doc.mailrule_id, | ||||||
|  |                     # Can't use same folder or the consume might grab it again | ||||||
|  |                     original_file=(tmp_dir / new_document.name).resolve(), | ||||||
|  |                 ), | ||||||
|  |                 # All the same metadata | ||||||
|  |                 self.metadata, | ||||||
|             ) |             ) | ||||||
|  |             logger.info(f"Created new task {task.id} for {new_document.name}") | ||||||
|  |  | ||||||
|         if asn_text: |         # This file is now two or more files | ||||||
|             logger.debug(f"Found ASN Barcode: {asn_text}") |         self.input_doc.original_file.unlink() | ||||||
|             # remove the prefix and remove whitespace |  | ||||||
|             asn_text = asn_text[len(settings.CONSUMER_ASN_BARCODE_PREFIX) :].strip() |  | ||||||
|  |  | ||||||
|             # remove non-numeric parts of the remaining string |         msg = "Barcode splitting complete!" | ||||||
|             asn_text = re.sub(r"\D", "", asn_text) |  | ||||||
|  |  | ||||||
|             # now, try parsing the ASN number |         # Update the progress to complete | ||||||
|             try: |         self.status_mgr.send_progress(ProgressStatusOptions.SUCCESS, msg, 100, 100) | ||||||
|                 asn = int(asn_text) |  | ||||||
|             except ValueError as e: |  | ||||||
|                 logger.warning(f"Failed to parse ASN number because: {e}") |  | ||||||
|  |  | ||||||
|         return asn |         # Request the consume task stops | ||||||
|  |         raise StopConsumeTaskError(msg) | ||||||
|  |  | ||||||
|  |     def cleanup(self) -> None: | ||||||
|  |         self.temp_dir.cleanup() | ||||||
|  |  | ||||||
|  |     def convert_from_tiff_to_pdf(self): | ||||||
|  |         """ | ||||||
|  |         May convert a TIFF image into a PDF, if the input is a TIFF and | ||||||
|  |         the TIFF has not been made into a PDF | ||||||
|  |         """ | ||||||
|  |         # Nothing to do, pdf_file is already assigned correctly | ||||||
|  |         if self.input_doc.mime_type != "image/tiff" or self._tiff_conversion_done: | ||||||
|  |             return | ||||||
|  |  | ||||||
|  |         self.pdf_file = convert_from_tiff_to_pdf( | ||||||
|  |             self.input_doc.original_file, | ||||||
|  |             Path(self.temp_dir.name), | ||||||
|  |         ) | ||||||
|  |         self._tiff_conversion_done = True | ||||||
|  |  | ||||||
|     @staticmethod |     @staticmethod | ||||||
|     def read_barcodes_zxing(image: Image) -> list[str]: |     def read_barcodes_zxing(image: Image.Image) -> list[str]: | ||||||
|         barcodes = [] |         barcodes = [] | ||||||
|  |  | ||||||
|         import zxingcpp |         import zxingcpp | ||||||
| @@ -135,7 +169,7 @@ class BarcodeReader: | |||||||
|         return barcodes |         return barcodes | ||||||
|  |  | ||||||
|     @staticmethod |     @staticmethod | ||||||
|     def read_barcodes_pyzbar(image: Image) -> list[str]: |     def read_barcodes_pyzbar(image: Image.Image) -> list[str]: | ||||||
|         barcodes = [] |         barcodes = [] | ||||||
|  |  | ||||||
|         from pyzbar import pyzbar |         from pyzbar import pyzbar | ||||||
| @@ -154,18 +188,6 @@ class BarcodeReader: | |||||||
|  |  | ||||||
|         return barcodes |         return barcodes | ||||||
|  |  | ||||||
|     def convert_from_tiff_to_pdf(self): |  | ||||||
|         """ |  | ||||||
|         May convert a TIFF image into a PDF, if the input is a TIFF and |  | ||||||
|         the TIFF has not been made into a PDF |  | ||||||
|         """ |  | ||||||
|         # Nothing to do, pdf_file is already assigned correctly |  | ||||||
|         if self.mime != "image/tiff" or self._tiff_conversion_done: |  | ||||||
|             return |  | ||||||
|  |  | ||||||
|         self._tiff_conversion_done = True |  | ||||||
|         self.pdf_file = convert_from_tiff_to_pdf(self.file, Path(self.temp_dir.name)) |  | ||||||
|  |  | ||||||
|     def detect(self) -> None: |     def detect(self) -> None: | ||||||
|         """ |         """ | ||||||
|         Scan all pages of the PDF as images, updating barcodes and the pages |         Scan all pages of the PDF as images, updating barcodes and the pages | ||||||
| @@ -218,10 +240,45 @@ class BarcodeReader: | |||||||
|         # This file is really borked, allow the consumption to continue |         # This file is really borked, allow the consumption to continue | ||||||
|         # but it may fail further on |         # but it may fail further on | ||||||
|         except Exception as e:  # pragma: no cover |         except Exception as e:  # pragma: no cover | ||||||
|             logger.exception( |             logger.warning( | ||||||
|                 f"Exception during barcode scanning: {e}", |                 f"Exception during barcode scanning: {e}", | ||||||
|             ) |             ) | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def asn(self) -> Optional[int]: | ||||||
|  |         """ | ||||||
|  |         Search the parsed barcodes for any ASNs. | ||||||
|  |         The first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX | ||||||
|  |         is considered the ASN to be used. | ||||||
|  |         Returns the detected ASN (or None) | ||||||
|  |         """ | ||||||
|  |         asn = None | ||||||
|  |  | ||||||
|  |         # Ensure the barcodes have been read | ||||||
|  |         self.detect() | ||||||
|  |  | ||||||
|  |         # get the first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX | ||||||
|  |         asn_text = next( | ||||||
|  |             (x.value for x in self.barcodes if x.is_asn), | ||||||
|  |             None, | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |         if asn_text: | ||||||
|  |             logger.debug(f"Found ASN Barcode: {asn_text}") | ||||||
|  |             # remove the prefix and remove whitespace | ||||||
|  |             asn_text = asn_text[len(settings.CONSUMER_ASN_BARCODE_PREFIX) :].strip() | ||||||
|  |  | ||||||
|  |             # remove non-numeric parts of the remaining string | ||||||
|  |             asn_text = re.sub(r"\D", "", asn_text) | ||||||
|  |  | ||||||
|  |             # now, try parsing the ASN number | ||||||
|  |             try: | ||||||
|  |                 asn = int(asn_text) | ||||||
|  |             except ValueError as e: | ||||||
|  |                 logger.warning(f"Failed to parse ASN number because: {e}") | ||||||
|  |  | ||||||
|  |         return asn | ||||||
|  |  | ||||||
|     def get_separation_pages(self) -> dict[int, bool]: |     def get_separation_pages(self) -> dict[int, bool]: | ||||||
|         """ |         """ | ||||||
|         Search the parsed barcodes for separators and returns a dict of page |         Search the parsed barcodes for separators and returns a dict of page | ||||||
| @@ -251,7 +308,7 @@ class BarcodeReader: | |||||||
|         """ |         """ | ||||||
|  |  | ||||||
|         document_paths = [] |         document_paths = [] | ||||||
|         fname = self.file.stem |         fname = self.input_doc.original_file.stem | ||||||
|         with Pdf.open(self.pdf_file) as input_pdf: |         with Pdf.open(self.pdf_file) as input_pdf: | ||||||
|             # Start with an empty document |             # Start with an empty document | ||||||
|             current_document: list[Page] = [] |             current_document: list[Page] = [] | ||||||
| @@ -292,58 +349,8 @@ class BarcodeReader: | |||||||
|                 with open(savepath, "wb") as out: |                 with open(savepath, "wb") as out: | ||||||
|                     dst.save(out) |                     dst.save(out) | ||||||
|  |  | ||||||
|                 copy_basic_file_stats(self.file, savepath) |                 copy_basic_file_stats(self.input_doc.original_file, savepath) | ||||||
|  |  | ||||||
|                 document_paths.append(savepath) |                 document_paths.append(savepath) | ||||||
|  |  | ||||||
|             return document_paths |             return document_paths | ||||||
|  |  | ||||||
|     def separate( |  | ||||||
|         self, |  | ||||||
|         source: DocumentSource, |  | ||||||
|         overrides: DocumentMetadataOverrides, |  | ||||||
|     ) -> bool: |  | ||||||
|         """ |  | ||||||
|         Separates the document, based on barcodes and configuration, creating new |  | ||||||
|         documents as required in the appropriate location. |  | ||||||
|  |  | ||||||
|         Returns True if a split happened, False otherwise |  | ||||||
|         """ |  | ||||||
|         # Do nothing |  | ||||||
|         if not self.supported_mime_type: |  | ||||||
|             logger.warning(f"Unsupported file format for barcode reader: {self.mime}") |  | ||||||
|             return False |  | ||||||
|  |  | ||||||
|         # Does nothing unless needed |  | ||||||
|         self.convert_from_tiff_to_pdf() |  | ||||||
|  |  | ||||||
|         # Actually read the codes, if any |  | ||||||
|         self.detect() |  | ||||||
|  |  | ||||||
|         separator_pages = self.get_separation_pages() |  | ||||||
|  |  | ||||||
|         # Also do nothing |  | ||||||
|         if not separator_pages: |  | ||||||
|             logger.warning("No pages to split on!") |  | ||||||
|             return False |  | ||||||
|  |  | ||||||
|         tmp_dir = Path(tempfile.mkdtemp(prefix="paperless-barcode-split-")).resolve() |  | ||||||
|  |  | ||||||
|         from documents import tasks |  | ||||||
|  |  | ||||||
|         # Create the split document tasks |  | ||||||
|         for new_document in self.separate_pages(separator_pages): |  | ||||||
|             copy_file_with_basic_stats(new_document, tmp_dir / new_document.name) |  | ||||||
|  |  | ||||||
|             tasks.consume_file.delay( |  | ||||||
|                 ConsumableDocument( |  | ||||||
|                     # Same source, for templates |  | ||||||
|                     source=source, |  | ||||||
|                     # Can't use same folder or the consume might grab it again |  | ||||||
|                     original_file=(tmp_dir / new_document.name).resolve(), |  | ||||||
|                 ), |  | ||||||
|                 # All the same metadata |  | ||||||
|                 overrides, |  | ||||||
|             ) |  | ||||||
|         logger.info("Barcode splitting complete!") |  | ||||||
|         return True |  | ||||||
|   | |||||||
| @@ -21,7 +21,6 @@ from filelock import FileLock | |||||||
| from rest_framework.reverse import reverse | from rest_framework.reverse import reverse | ||||||
|  |  | ||||||
| from documents.classifier import load_classifier | from documents.classifier import load_classifier | ||||||
| from documents.data_models import ConsumableDocument |  | ||||||
| from documents.data_models import DocumentMetadataOverrides | from documents.data_models import DocumentMetadataOverrides | ||||||
| from documents.file_handling import create_source_path_directory | from documents.file_handling import create_source_path_directory | ||||||
| from documents.file_handling import generate_unique_filename | from documents.file_handling import generate_unique_filename | ||||||
| @@ -42,12 +41,83 @@ from documents.parsers import ParseError | |||||||
| from documents.parsers import get_parser_class_for_mime_type | from documents.parsers import get_parser_class_for_mime_type | ||||||
| from documents.parsers import parse_date | from documents.parsers import parse_date | ||||||
| from documents.permissions import set_permissions_for_object | from documents.permissions import set_permissions_for_object | ||||||
|  | from documents.plugins.base import AlwaysRunPluginMixin | ||||||
|  | from documents.plugins.base import ConsumeTaskPlugin | ||||||
|  | from documents.plugins.base import NoCleanupPluginMixin | ||||||
|  | from documents.plugins.base import NoSetupPluginMixin | ||||||
| from documents.signals import document_consumption_finished | from documents.signals import document_consumption_finished | ||||||
| from documents.signals import document_consumption_started | from documents.signals import document_consumption_started | ||||||
| from documents.utils import copy_basic_file_stats | from documents.utils import copy_basic_file_stats | ||||||
| from documents.utils import copy_file_with_basic_stats | from documents.utils import copy_file_with_basic_stats | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class WorkflowTriggerPlugin( | ||||||
|  |     NoCleanupPluginMixin, | ||||||
|  |     NoSetupPluginMixin, | ||||||
|  |     AlwaysRunPluginMixin, | ||||||
|  |     ConsumeTaskPlugin, | ||||||
|  | ): | ||||||
|  |     NAME: str = "WorkflowTriggerPlugin" | ||||||
|  |  | ||||||
|  |     def run(self) -> Optional[str]: | ||||||
|  |         """ | ||||||
|  |         Get overrides from matching workflows | ||||||
|  |         """ | ||||||
|  |         overrides = DocumentMetadataOverrides() | ||||||
|  |         for workflow in Workflow.objects.filter(enabled=True).order_by("order"): | ||||||
|  |             template_overrides = DocumentMetadataOverrides() | ||||||
|  |  | ||||||
|  |             if document_matches_workflow( | ||||||
|  |                 self.input_doc, | ||||||
|  |                 workflow, | ||||||
|  |                 WorkflowTrigger.WorkflowTriggerType.CONSUMPTION, | ||||||
|  |             ): | ||||||
|  |                 for action in workflow.actions.all(): | ||||||
|  |                     if action.assign_title is not None: | ||||||
|  |                         template_overrides.title = action.assign_title | ||||||
|  |                     if action.assign_tags is not None: | ||||||
|  |                         template_overrides.tag_ids = [ | ||||||
|  |                             tag.pk for tag in action.assign_tags.all() | ||||||
|  |                         ] | ||||||
|  |                     if action.assign_correspondent is not None: | ||||||
|  |                         template_overrides.correspondent_id = ( | ||||||
|  |                             action.assign_correspondent.pk | ||||||
|  |                         ) | ||||||
|  |                     if action.assign_document_type is not None: | ||||||
|  |                         template_overrides.document_type_id = ( | ||||||
|  |                             action.assign_document_type.pk | ||||||
|  |                         ) | ||||||
|  |                     if action.assign_storage_path is not None: | ||||||
|  |                         template_overrides.storage_path_id = ( | ||||||
|  |                             action.assign_storage_path.pk | ||||||
|  |                         ) | ||||||
|  |                     if action.assign_owner is not None: | ||||||
|  |                         template_overrides.owner_id = action.assign_owner.pk | ||||||
|  |                     if action.assign_view_users is not None: | ||||||
|  |                         template_overrides.view_users = [ | ||||||
|  |                             user.pk for user in action.assign_view_users.all() | ||||||
|  |                         ] | ||||||
|  |                     if action.assign_view_groups is not None: | ||||||
|  |                         template_overrides.view_groups = [ | ||||||
|  |                             group.pk for group in action.assign_view_groups.all() | ||||||
|  |                         ] | ||||||
|  |                     if action.assign_change_users is not None: | ||||||
|  |                         template_overrides.change_users = [ | ||||||
|  |                             user.pk for user in action.assign_change_users.all() | ||||||
|  |                         ] | ||||||
|  |                     if action.assign_change_groups is not None: | ||||||
|  |                         template_overrides.change_groups = [ | ||||||
|  |                             group.pk for group in action.assign_change_groups.all() | ||||||
|  |                         ] | ||||||
|  |                     if action.assign_custom_fields is not None: | ||||||
|  |                         template_overrides.custom_field_ids = [ | ||||||
|  |                             field.pk for field in action.assign_custom_fields.all() | ||||||
|  |                         ] | ||||||
|  |  | ||||||
|  |                     overrides.update(template_overrides) | ||||||
|  |         self.metadata.update(overrides) | ||||||
|  |  | ||||||
|  |  | ||||||
| class ConsumerError(Exception): | class ConsumerError(Exception): | ||||||
|     pass |     pass | ||||||
|  |  | ||||||
| @@ -602,70 +672,6 @@ class Consumer(LoggingMixin): | |||||||
|  |  | ||||||
|         return document |         return document | ||||||
|  |  | ||||||
|     def get_workflow_overrides( |  | ||||||
|         self, |  | ||||||
|         input_doc: ConsumableDocument, |  | ||||||
|     ) -> DocumentMetadataOverrides: |  | ||||||
|         """ |  | ||||||
|         Get overrides from matching workflows |  | ||||||
|         """ |  | ||||||
|         overrides = DocumentMetadataOverrides() |  | ||||||
|         for workflow in Workflow.objects.filter(enabled=True).order_by("order"): |  | ||||||
|             template_overrides = DocumentMetadataOverrides() |  | ||||||
|  |  | ||||||
|             if document_matches_workflow( |  | ||||||
|                 input_doc, |  | ||||||
|                 workflow, |  | ||||||
|                 WorkflowTrigger.WorkflowTriggerType.CONSUMPTION, |  | ||||||
|             ): |  | ||||||
|                 for action in workflow.actions.all(): |  | ||||||
|                     self.log.info( |  | ||||||
|                         f"Applying overrides in {action} from {workflow}", |  | ||||||
|                     ) |  | ||||||
|                     if action.assign_title is not None: |  | ||||||
|                         template_overrides.title = action.assign_title |  | ||||||
|                     if action.assign_tags is not None: |  | ||||||
|                         template_overrides.tag_ids = [ |  | ||||||
|                             tag.pk for tag in action.assign_tags.all() |  | ||||||
|                         ] |  | ||||||
|                     if action.assign_correspondent is not None: |  | ||||||
|                         template_overrides.correspondent_id = ( |  | ||||||
|                             action.assign_correspondent.pk |  | ||||||
|                         ) |  | ||||||
|                     if action.assign_document_type is not None: |  | ||||||
|                         template_overrides.document_type_id = ( |  | ||||||
|                             action.assign_document_type.pk |  | ||||||
|                         ) |  | ||||||
|                     if action.assign_storage_path is not None: |  | ||||||
|                         template_overrides.storage_path_id = ( |  | ||||||
|                             action.assign_storage_path.pk |  | ||||||
|                         ) |  | ||||||
|                     if action.assign_owner is not None: |  | ||||||
|                         template_overrides.owner_id = action.assign_owner.pk |  | ||||||
|                     if action.assign_view_users is not None: |  | ||||||
|                         template_overrides.view_users = [ |  | ||||||
|                             user.pk for user in action.assign_view_users.all() |  | ||||||
|                         ] |  | ||||||
|                     if action.assign_view_groups is not None: |  | ||||||
|                         template_overrides.view_groups = [ |  | ||||||
|                             group.pk for group in action.assign_view_groups.all() |  | ||||||
|                         ] |  | ||||||
|                     if action.assign_change_users is not None: |  | ||||||
|                         template_overrides.change_users = [ |  | ||||||
|                             user.pk for user in action.assign_change_users.all() |  | ||||||
|                         ] |  | ||||||
|                     if action.assign_change_groups is not None: |  | ||||||
|                         template_overrides.change_groups = [ |  | ||||||
|                             group.pk for group in action.assign_change_groups.all() |  | ||||||
|                         ] |  | ||||||
|                     if action.assign_custom_fields is not None: |  | ||||||
|                         template_overrides.custom_field_ids = [ |  | ||||||
|                             field.pk for field in action.assign_custom_fields.all() |  | ||||||
|                         ] |  | ||||||
|  |  | ||||||
|                     overrides.update(template_overrides) |  | ||||||
|         return overrides |  | ||||||
|  |  | ||||||
|     def _parse_title_placeholders(self, title: str) -> str: |     def _parse_title_placeholders(self, title: str) -> str: | ||||||
|         local_added = timezone.localtime(timezone.now()) |         local_added = timezone.localtime(timezone.now()) | ||||||
|  |  | ||||||
|   | |||||||
| @@ -3,24 +3,41 @@ import logging | |||||||
| import os | import os | ||||||
| import shutil | import shutil | ||||||
| from pathlib import Path | from pathlib import Path | ||||||
|  | from typing import Final | ||||||
|  | from typing import Optional | ||||||
|  |  | ||||||
| from django.conf import settings | from django.conf import settings | ||||||
| from pikepdf import Pdf | from pikepdf import Pdf | ||||||
|  |  | ||||||
| from documents.consumer import ConsumerError | from documents.consumer import ConsumerError | ||||||
| from documents.converters import convert_from_tiff_to_pdf | from documents.converters import convert_from_tiff_to_pdf | ||||||
| from documents.data_models import ConsumableDocument | from documents.plugins.base import ConsumeTaskPlugin | ||||||
|  | from documents.plugins.base import NoCleanupPluginMixin | ||||||
|  | from documents.plugins.base import NoSetupPluginMixin | ||||||
|  | from documents.plugins.base import StopConsumeTaskError | ||||||
|  |  | ||||||
| logger = logging.getLogger("paperless.double_sided") | logger = logging.getLogger("paperless.double_sided") | ||||||
|  |  | ||||||
| # Hardcoded for now, could be made a configurable setting if needed | # Hardcoded for now, could be made a configurable setting if needed | ||||||
| TIMEOUT_MINUTES = 30 | TIMEOUT_MINUTES: Final[int] = 30 | ||||||
|  | TIMEOUT_SECONDS: Final[int] = TIMEOUT_MINUTES * 60 | ||||||
|  |  | ||||||
| # Used by test cases | # Used by test cases | ||||||
| STAGING_FILE_NAME = "double-sided-staging.pdf" | STAGING_FILE_NAME = "double-sided-staging.pdf" | ||||||
|  |  | ||||||
|  |  | ||||||
| def collate(input_doc: ConsumableDocument) -> str: | class CollatePlugin(NoCleanupPluginMixin, NoSetupPluginMixin, ConsumeTaskPlugin): | ||||||
|  |     NAME: str = "CollatePlugin" | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def able_to_run(self) -> bool: | ||||||
|  |         return ( | ||||||
|  |             settings.CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED | ||||||
|  |             and settings.CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME | ||||||
|  |             in self.input_doc.original_file.parts | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |     def run(self) -> Optional[str]: | ||||||
|         """ |         """ | ||||||
|         Tries to collate pages from 2 single sided scans of a double sided |         Tries to collate pages from 2 single sided scans of a double sided | ||||||
|         document. |         document. | ||||||
| @@ -39,33 +56,32 @@ def collate(input_doc: ConsumableDocument) -> str: | |||||||
|         in case of failure. |         in case of failure. | ||||||
|         """ |         """ | ||||||
|  |  | ||||||
|     # Make sure scratch dir exists, Consumer might not have run yet |         if self.input_doc.mime_type == "application/pdf": | ||||||
|     settings.SCRATCH_DIR.mkdir(exist_ok=True) |             pdf_file = self.input_doc.original_file | ||||||
|  |  | ||||||
|     if input_doc.mime_type == "application/pdf": |  | ||||||
|         pdf_file = input_doc.original_file |  | ||||||
|         elif ( |         elif ( | ||||||
|         input_doc.mime_type == "image/tiff" |             self.input_doc.mime_type == "image/tiff" | ||||||
|             and settings.CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT |             and settings.CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT | ||||||
|         ): |         ): | ||||||
|             pdf_file = convert_from_tiff_to_pdf( |             pdf_file = convert_from_tiff_to_pdf( | ||||||
|             input_doc.original_file, |                 self.input_doc.original_file, | ||||||
|             settings.SCRATCH_DIR, |                 self.base_tmp_dir, | ||||||
|             ) |             ) | ||||||
|         input_doc.original_file.unlink() |             self.input_doc.original_file.unlink() | ||||||
|         else: |         else: | ||||||
|         raise ConsumerError("Unsupported file type for collation of double-sided scans") |             raise ConsumerError( | ||||||
|  |                 "Unsupported file type for collation of double-sided scans", | ||||||
|  |             ) | ||||||
|  |  | ||||||
|     staging = settings.SCRATCH_DIR / STAGING_FILE_NAME |         staging: Path = settings.SCRATCH_DIR / STAGING_FILE_NAME | ||||||
|  |  | ||||||
|         valid_staging_exists = False |         valid_staging_exists = False | ||||||
|         if staging.exists(): |         if staging.exists(): | ||||||
|         stats = os.stat(str(staging)) |             stats = staging.stat() | ||||||
|             # if the file is older than the timeout, we don't consider |             # if the file is older than the timeout, we don't consider | ||||||
|             # it valid |             # it valid | ||||||
|         if dt.datetime.now().timestamp() - stats.st_mtime > TIMEOUT_MINUTES * 60: |             if (dt.datetime.now().timestamp() - stats.st_mtime) > TIMEOUT_SECONDS: | ||||||
|                 logger.warning("Outdated double sided staging file exists, deleting it") |                 logger.warning("Outdated double sided staging file exists, deleting it") | ||||||
|             os.unlink(str(staging)) |                 staging.unlink() | ||||||
|             else: |             else: | ||||||
|                 valid_staging_exists = True |                 valid_staging_exists = True | ||||||
|  |  | ||||||
| @@ -88,23 +104,24 @@ def collate(input_doc: ConsumableDocument) -> str: | |||||||
|                     # Merged file has the same path, but without the |                     # Merged file has the same path, but without the | ||||||
|                     # double-sided subdir. Therefore, it is also in the |                     # double-sided subdir. Therefore, it is also in the | ||||||
|                     # consumption dir and will be picked up for processing |                     # consumption dir and will be picked up for processing | ||||||
|                 old_file = input_doc.original_file |                     old_file = self.input_doc.original_file | ||||||
|                     new_file = Path( |                     new_file = Path( | ||||||
|                         *( |                         *( | ||||||
|                             part |                             part | ||||||
|                             for part in old_file.with_name( |                             for part in old_file.with_name( | ||||||
|                                 f"{old_file.stem}-collated.pdf", |                                 f"{old_file.stem}-collated.pdf", | ||||||
|                             ).parts |                             ).parts | ||||||
|                         if part != settings.CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME |                             if part | ||||||
|  |                             != settings.CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME | ||||||
|                         ), |                         ), | ||||||
|                     ) |                     ) | ||||||
|                     # If the user didn't create the subdirs yet, do it for them |                     # If the user didn't create the subdirs yet, do it for them | ||||||
|                     new_file.parent.mkdir(parents=True, exist_ok=True) |                     new_file.parent.mkdir(parents=True, exist_ok=True) | ||||||
|                     pdf1.save(new_file) |                     pdf1.save(new_file) | ||||||
|                 logger.info("Collated documents into new file %s", new_file) |                 logger.info("Collated documents into new file %s", new_file) | ||||||
|             return ( |                 raise StopConsumeTaskError( | ||||||
|                     "Success. Even numbered pages of double sided scan collated " |                     "Success. Even numbered pages of double sided scan collated " | ||||||
|                 "with odd pages" |                     "with odd pages", | ||||||
|                 ) |                 ) | ||||||
|             finally: |             finally: | ||||||
|                 # Delete staging and recently uploaded file no matter what. |                 # Delete staging and recently uploaded file no matter what. | ||||||
| @@ -118,12 +135,13 @@ def collate(input_doc: ConsumableDocument) -> str: | |||||||
|             shutil.move(pdf_file, staging) |             shutil.move(pdf_file, staging) | ||||||
|             # update access to modification time so we know if the file |             # update access to modification time so we know if the file | ||||||
|             # is outdated when another file gets uploaded |             # is outdated when another file gets uploaded | ||||||
|         os.utime(staging, (dt.datetime.now().timestamp(),) * 2) |             timestamp = dt.datetime.now().timestamp() | ||||||
|  |             os.utime(staging, (timestamp, timestamp)) | ||||||
|             logger.info( |             logger.info( | ||||||
|                 "Got scan with odd numbered pages of double-sided scan, moved it to %s", |                 "Got scan with odd numbered pages of double-sided scan, moved it to %s", | ||||||
|                 staging, |                 staging, | ||||||
|             ) |             ) | ||||||
|         return ( |             raise StopConsumeTaskError( | ||||||
|                 "Received odd numbered pages of double sided scan, waiting up to " |                 "Received odd numbered pages of double sided scan, waiting up to " | ||||||
|             f"{TIMEOUT_MINUTES} minutes for even numbered pages" |                 f"{TIMEOUT_MINUTES} minutes for even numbered pages", | ||||||
|             ) |             ) | ||||||
|   | |||||||
							
								
								
									
										0
									
								
								src/documents/plugins/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								src/documents/plugins/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										131
									
								
								src/documents/plugins/base.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										131
									
								
								src/documents/plugins/base.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,131 @@ | |||||||
|  | import abc | ||||||
|  | from pathlib import Path | ||||||
|  | from typing import Final | ||||||
|  | from typing import Optional | ||||||
|  |  | ||||||
|  | from documents.data_models import ConsumableDocument | ||||||
|  | from documents.data_models import DocumentMetadataOverrides | ||||||
|  | from documents.plugins.helpers import ProgressManager | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class StopConsumeTaskError(Exception): | ||||||
|  |     """ | ||||||
|  |     A plugin setup or run may raise this to exit the asynchronous consume task. | ||||||
|  |  | ||||||
|  |     Most likely, this means it has created one or more new tasks to execute instead, | ||||||
|  |     such as when a barcode has been used to create new documents | ||||||
|  |     """ | ||||||
|  |  | ||||||
|  |     def __init__(self, message: str) -> None: | ||||||
|  |         self.message = message | ||||||
|  |         super().__init__(message) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class ConsumeTaskPlugin(abc.ABC): | ||||||
|  |     """ | ||||||
|  |     Defines the interface for a plugin for the document consume task | ||||||
|  |     Meanings as per RFC2119 (https://datatracker.ietf.org/doc/html/rfc2119) | ||||||
|  |  | ||||||
|  |     Plugin Implementation | ||||||
|  |  | ||||||
|  |     The plugin SHALL implement property able_to_run and methods setup, run and cleanup. | ||||||
|  |     The plugin property able_to_run SHALL return True if the plugin is able to run, given the conditions, settings and document information. | ||||||
|  |     The plugin property able_to_run MAY be hardcoded to return True. | ||||||
|  |     The plugin setup SHOULD perform any resource creation or additional initialization needed to run the document. | ||||||
|  |     The plugin setup MAY be a non-operation. | ||||||
|  |     The plugin cleanup SHOULD perform resource cleanup, including in the event of an error. | ||||||
|  |     The plugin cleanup MAY be a non-operation. | ||||||
|  |     The plugin run SHALL perform any operations against the document or system state required for the plugin. | ||||||
|  |     The plugin run MAY update the document metadata. | ||||||
|  |     The plugin run MAY return an informational message. | ||||||
|  |     The plugin run MAY raise StopConsumeTaskError to cease any further operations against the document. | ||||||
|  |  | ||||||
|  |     Plugin Manager Implementation | ||||||
|  |  | ||||||
|  |     The plugin manager SHALL provide the plugin with the input document, document metadata, progress manager and a created temporary directory. | ||||||
|  |     The plugin manager SHALL execute the plugin setup, run and cleanup, in that order IF the plugin property able_to_run is True. | ||||||
|  |     The plugin manager SHOULD log the return message of executing a plugin's run. | ||||||
|  |     The plugin manager SHALL always execute the plugin cleanup, IF the plugin property able_to_run is True. | ||||||
|  |     The plugin manager SHALL cease calling plugins and exit the task IF a plugin raises StopConsumeTaskError. | ||||||
|  |     The plugin manager SHOULD return the StopConsumeTaskError message IF a plugin raises StopConsumeTaskError. | ||||||
|  |     """ | ||||||
|  |  | ||||||
|  |     NAME: str = "ConsumeTaskPlugin" | ||||||
|  |  | ||||||
|  |     def __init__( | ||||||
|  |         self, | ||||||
|  |         input_doc: ConsumableDocument, | ||||||
|  |         metadata: DocumentMetadataOverrides, | ||||||
|  |         status_mgr: ProgressManager, | ||||||
|  |         base_tmp_dir: Path, | ||||||
|  |         task_id: str, | ||||||
|  |     ) -> None: | ||||||
|  |         super().__init__() | ||||||
|  |         self.input_doc = input_doc | ||||||
|  |         self.metadata = metadata | ||||||
|  |         self.base_tmp_dir: Final = base_tmp_dir | ||||||
|  |         self.status_mgr = status_mgr | ||||||
|  |         self.task_id: Final = task_id | ||||||
|  |  | ||||||
|  |     @abc.abstractproperty | ||||||
|  |     def able_to_run(self) -> bool: | ||||||
|  |         """ | ||||||
|  |         Return True if the conditions are met for the plugin to run, False otherwise | ||||||
|  |  | ||||||
|  |         If False, setup(), run() and cleanup() will not be called | ||||||
|  |         """ | ||||||
|  |  | ||||||
|  |     @abc.abstractmethod | ||||||
|  |     def setup(self) -> None: | ||||||
|  |         """ | ||||||
|  |         Allows the plugin to perform any additional setup it may need, such as creating | ||||||
|  |         a temporary directory, copying a file somewhere, etc. | ||||||
|  |  | ||||||
|  |         Executed before run() | ||||||
|  |  | ||||||
|  |         In general, this should be the "light" work, not the bulk of processing | ||||||
|  |         """ | ||||||
|  |  | ||||||
|  |     @abc.abstractmethod | ||||||
|  |     def run(self) -> Optional[str]: | ||||||
|  |         """ | ||||||
|  |         The bulk of plugin processing, this does whatever action the plugin is for. | ||||||
|  |  | ||||||
|  |         Executed after setup() and before cleanup() | ||||||
|  |         """ | ||||||
|  |  | ||||||
|  |     @abc.abstractmethod | ||||||
|  |     def cleanup(self) -> None: | ||||||
|  |         """ | ||||||
|  |         Allows the plugin to execute any cleanup it may require | ||||||
|  |  | ||||||
|  |         Executed after run(), even in the case of error | ||||||
|  |         """ | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class AlwaysRunPluginMixin(ConsumeTaskPlugin): | ||||||
|  |     """ | ||||||
|  |     A plugin which is always able to run | ||||||
|  |     """ | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def able_to_run(self) -> bool: | ||||||
|  |         return True | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class NoSetupPluginMixin(ConsumeTaskPlugin): | ||||||
|  |     """ | ||||||
|  |     A plugin which requires no setup | ||||||
|  |     """ | ||||||
|  |  | ||||||
|  |     def setup(self) -> None: | ||||||
|  |         pass | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class NoCleanupPluginMixin(ConsumeTaskPlugin): | ||||||
|  |     """ | ||||||
|  |     A plugin which needs to clean up no files | ||||||
|  |     """ | ||||||
|  |  | ||||||
|  |     def cleanup(self) -> None: | ||||||
|  |         pass | ||||||
							
								
								
									
										82
									
								
								src/documents/plugins/helpers.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										82
									
								
								src/documents/plugins/helpers.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,82 @@ | |||||||
|  | import enum | ||||||
|  | from typing import TYPE_CHECKING | ||||||
|  | from typing import Optional | ||||||
|  | from typing import Union | ||||||
|  |  | ||||||
|  | from asgiref.sync import async_to_sync | ||||||
|  | from channels.layers import get_channel_layer | ||||||
|  | from channels_redis.pubsub import RedisPubSubChannelLayer | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class ProgressStatusOptions(str, enum.Enum): | ||||||
|  |     STARTED = "STARTED" | ||||||
|  |     WORKING = "WORKING" | ||||||
|  |     SUCCESS = "SUCCESS" | ||||||
|  |     FAILED = "FAILED" | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class ProgressManager: | ||||||
|  |     """ | ||||||
|  |     Handles sending of progress information via the channel layer, with proper management | ||||||
|  |     of the open/close of the layer to ensure messages go out and everything is cleaned up | ||||||
|  |     """ | ||||||
|  |  | ||||||
|  |     def __init__(self, filename: str, task_id: Optional[str] = None) -> None: | ||||||
|  |         self.filename = filename | ||||||
|  |         self._channel: Optional[RedisPubSubChannelLayer] = None | ||||||
|  |         self.task_id = task_id | ||||||
|  |  | ||||||
|  |     def __enter__(self): | ||||||
|  |         self.open() | ||||||
|  |         return self | ||||||
|  |  | ||||||
|  |     def __exit__(self, exc_type, exc_val, exc_tb): | ||||||
|  |         self.close() | ||||||
|  |  | ||||||
|  |     def open(self) -> None: | ||||||
|  |         """ | ||||||
|  |         If not already opened, gets the default channel layer | ||||||
|  |         opened and ready to send messages | ||||||
|  |         """ | ||||||
|  |         if self._channel is None: | ||||||
|  |             self._channel = get_channel_layer() | ||||||
|  |  | ||||||
|  |     def close(self) -> None: | ||||||
|  |         """ | ||||||
|  |         If it was opened, flushes the channel layer | ||||||
|  |         """ | ||||||
|  |         if self._channel is not None: | ||||||
|  |             async_to_sync(self._channel.flush) | ||||||
|  |             self._channel = None | ||||||
|  |  | ||||||
|  |     def send_progress( | ||||||
|  |         self, | ||||||
|  |         status: ProgressStatusOptions, | ||||||
|  |         message: str, | ||||||
|  |         current_progress: int, | ||||||
|  |         max_progress: int, | ||||||
|  |         extra_args: Optional[dict[str, Union[str, int]]] = None, | ||||||
|  |     ) -> None: | ||||||
|  |         # Ensure the layer is open | ||||||
|  |         self.open() | ||||||
|  |  | ||||||
|  |         # Just for IDEs | ||||||
|  |         if TYPE_CHECKING: | ||||||
|  |             assert self._channel is not None | ||||||
|  |  | ||||||
|  |         payload = { | ||||||
|  |             "type": "status_update", | ||||||
|  |             "data": { | ||||||
|  |                 "filename": self.filename, | ||||||
|  |                 "task_id": self.task_id, | ||||||
|  |                 "current_progress": current_progress, | ||||||
|  |                 "max_progress": max_progress, | ||||||
|  |                 "status": status, | ||||||
|  |                 "message": message, | ||||||
|  |             }, | ||||||
|  |         } | ||||||
|  |         if extra_args is not None: | ||||||
|  |             payload["data"].update(extra_args) | ||||||
|  |  | ||||||
|  |         # Construct and send the update | ||||||
|  |         async_to_sync(self._channel.group_send)("status_updates", payload) | ||||||
| @@ -2,30 +2,30 @@ import hashlib | |||||||
| import logging | import logging | ||||||
| import shutil | import shutil | ||||||
| import uuid | import uuid | ||||||
|  | from pathlib import Path | ||||||
|  | from tempfile import TemporaryDirectory | ||||||
| from typing import Optional | from typing import Optional | ||||||
|  |  | ||||||
| import tqdm | import tqdm | ||||||
| from asgiref.sync import async_to_sync |  | ||||||
| from celery import Task | from celery import Task | ||||||
| from celery import shared_task | from celery import shared_task | ||||||
| from channels.layers import get_channel_layer |  | ||||||
| from django.conf import settings | from django.conf import settings | ||||||
| from django.db import transaction | from django.db import transaction | ||||||
| from django.db.models.signals import post_save | from django.db.models.signals import post_save | ||||||
| from filelock import FileLock | from filelock import FileLock | ||||||
| from redis.exceptions import ConnectionError |  | ||||||
| from whoosh.writing import AsyncWriter | from whoosh.writing import AsyncWriter | ||||||
|  |  | ||||||
| from documents import index | from documents import index | ||||||
| from documents import sanity_checker | from documents import sanity_checker | ||||||
| from documents.barcodes import BarcodeReader | from documents.barcodes import BarcodePlugin | ||||||
| from documents.classifier import DocumentClassifier | from documents.classifier import DocumentClassifier | ||||||
| from documents.classifier import load_classifier | from documents.classifier import load_classifier | ||||||
| from documents.consumer import Consumer | from documents.consumer import Consumer | ||||||
| from documents.consumer import ConsumerError | from documents.consumer import ConsumerError | ||||||
|  | from documents.consumer import WorkflowTriggerPlugin | ||||||
| from documents.data_models import ConsumableDocument | from documents.data_models import ConsumableDocument | ||||||
| from documents.data_models import DocumentMetadataOverrides | from documents.data_models import DocumentMetadataOverrides | ||||||
| from documents.double_sided import collate | from documents.double_sided import CollatePlugin | ||||||
| from documents.file_handling import create_source_path_directory | from documents.file_handling import create_source_path_directory | ||||||
| from documents.file_handling import generate_unique_filename | from documents.file_handling import generate_unique_filename | ||||||
| from documents.models import Correspondent | from documents.models import Correspondent | ||||||
| @@ -35,6 +35,10 @@ from documents.models import StoragePath | |||||||
| from documents.models import Tag | from documents.models import Tag | ||||||
| from documents.parsers import DocumentParser | from documents.parsers import DocumentParser | ||||||
| from documents.parsers import get_parser_class_for_mime_type | from documents.parsers import get_parser_class_for_mime_type | ||||||
|  | from documents.plugins.base import ConsumeTaskPlugin | ||||||
|  | from documents.plugins.base import ProgressManager | ||||||
|  | from documents.plugins.base import StopConsumeTaskError | ||||||
|  | from documents.plugins.helpers import ProgressStatusOptions | ||||||
| from documents.sanity_checker import SanityCheckFailedException | from documents.sanity_checker import SanityCheckFailedException | ||||||
| from documents.signals import document_updated | from documents.signals import document_updated | ||||||
|  |  | ||||||
| @@ -102,70 +106,60 @@ def consume_file( | |||||||
|     input_doc: ConsumableDocument, |     input_doc: ConsumableDocument, | ||||||
|     overrides: Optional[DocumentMetadataOverrides] = None, |     overrides: Optional[DocumentMetadataOverrides] = None, | ||||||
| ): | ): | ||||||
|     def send_progress(status="SUCCESS", message="finished"): |  | ||||||
|         payload = { |  | ||||||
|             "filename": overrides.filename or input_doc.original_file.name, |  | ||||||
|             "task_id": None, |  | ||||||
|             "current_progress": 100, |  | ||||||
|             "max_progress": 100, |  | ||||||
|             "status": status, |  | ||||||
|             "message": message, |  | ||||||
|         } |  | ||||||
|         try: |  | ||||||
|             async_to_sync(get_channel_layer().group_send)( |  | ||||||
|                 "status_updates", |  | ||||||
|                 {"type": "status_update", "data": payload}, |  | ||||||
|             ) |  | ||||||
|         except ConnectionError as e: |  | ||||||
|             logger.warning(f"ConnectionError on status send: {e!s}") |  | ||||||
|  |  | ||||||
|     # Default no overrides |     # Default no overrides | ||||||
|     if overrides is None: |     if overrides is None: | ||||||
|         overrides = DocumentMetadataOverrides() |         overrides = DocumentMetadataOverrides() | ||||||
|  |  | ||||||
|     # Handle collation of double-sided documents scanned in two parts |     plugins: list[type[ConsumeTaskPlugin]] = [ | ||||||
|     if settings.CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED and ( |         CollatePlugin, | ||||||
|         settings.CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME |         BarcodePlugin, | ||||||
|         in input_doc.original_file.parts |         WorkflowTriggerPlugin, | ||||||
|     ): |     ] | ||||||
|         try: |  | ||||||
|             msg = collate(input_doc) |  | ||||||
|             send_progress(message=msg) |  | ||||||
|             return msg |  | ||||||
|         except ConsumerError as e: |  | ||||||
|             send_progress(status="FAILURE", message=e.args[0]) |  | ||||||
|             raise e |  | ||||||
|  |  | ||||||
|     # read all barcodes in the current document |     with ProgressManager( | ||||||
|     if settings.CONSUMER_ENABLE_BARCODES or settings.CONSUMER_ENABLE_ASN_BARCODE: |         overrides.filename or input_doc.original_file.name, | ||||||
|         with BarcodeReader(input_doc.original_file, input_doc.mime_type) as reader: |         self.request.id, | ||||||
|             if settings.CONSUMER_ENABLE_BARCODES and reader.separate( |     ) as status_mgr, TemporaryDirectory(dir=settings.SCRATCH_DIR) as tmp_dir: | ||||||
|                 input_doc.source, |         tmp_dir = Path(tmp_dir) | ||||||
|  |         for plugin_class in plugins: | ||||||
|  |             plugin_name = plugin_class.NAME | ||||||
|  |  | ||||||
|  |             plugin = plugin_class( | ||||||
|  |                 input_doc, | ||||||
|                 overrides, |                 overrides, | ||||||
|             ): |                 status_mgr, | ||||||
|                 # notify the sender, otherwise the progress bar |                 tmp_dir, | ||||||
|                 # in the UI stays stuck |                 self.request.id, | ||||||
|                 send_progress() |  | ||||||
|                 # consuming stops here, since the original document with |  | ||||||
|                 # the barcodes has been split and will be consumed separately |  | ||||||
|                 input_doc.original_file.unlink() |  | ||||||
|                 return "File successfully split" |  | ||||||
|  |  | ||||||
|             # try reading the ASN from barcode |  | ||||||
|             if ( |  | ||||||
|                 settings.CONSUMER_ENABLE_ASN_BARCODE |  | ||||||
|                 and (located_asn := reader.asn) is not None |  | ||||||
|             ): |  | ||||||
|                 # Note this will take precedence over an API provided ASN |  | ||||||
|                 # But it's from a physical barcode, so that's good |  | ||||||
|                 overrides.asn = located_asn |  | ||||||
|                 logger.info(f"Found ASN in barcode: {overrides.asn}") |  | ||||||
|  |  | ||||||
|     template_overrides = Consumer().get_workflow_overrides( |  | ||||||
|         input_doc=input_doc, |  | ||||||
|             ) |             ) | ||||||
|  |  | ||||||
|     overrides.update(template_overrides) |             if not plugin.able_to_run: | ||||||
|  |                 logger.debug(f"Skipping plugin {plugin_name}") | ||||||
|  |                 continue | ||||||
|  |  | ||||||
|  |             try: | ||||||
|  |                 logger.debug(f"Executing plugin {plugin_name}") | ||||||
|  |                 plugin.setup() | ||||||
|  |  | ||||||
|  |                 msg = plugin.run() | ||||||
|  |  | ||||||
|  |                 if msg is not None: | ||||||
|  |                     logger.info(f"{plugin_name} completed with: {msg}") | ||||||
|  |                 else: | ||||||
|  |                     logger.info(f"{plugin_name} completed with no message") | ||||||
|  |  | ||||||
|  |                 overrides = plugin.metadata | ||||||
|  |  | ||||||
|  |             except StopConsumeTaskError as e: | ||||||
|  |                 logger.info(f"{plugin_name} requested task exit: {e.message}") | ||||||
|  |                 return e.message | ||||||
|  |  | ||||||
|  |             except Exception as e: | ||||||
|  |                 logger.exception(f"{plugin_name} failed: {e}") | ||||||
|  |                 status_mgr.send_progress(ProgressStatusOptions.FAILED, f"{e}", 100, 100) | ||||||
|  |                 raise | ||||||
|  |  | ||||||
|  |             finally: | ||||||
|  |                 plugin.cleanup() | ||||||
|  |  | ||||||
|     # continue with consumption if no barcode was found |     # continue with consumption if no barcode was found | ||||||
|     document = Consumer().try_consume_file( |     document = Consumer().try_consume_file( | ||||||
|   | |||||||
| @@ -1,4 +1,7 @@ | |||||||
| import shutil | import shutil | ||||||
|  | from collections.abc import Generator | ||||||
|  | from contextlib import contextmanager | ||||||
|  | from pathlib import Path | ||||||
| from unittest import mock | from unittest import mock | ||||||
|  |  | ||||||
| import pytest | import pytest | ||||||
| @@ -7,14 +10,13 @@ from django.test import TestCase | |||||||
| from django.test import override_settings | from django.test import override_settings | ||||||
|  |  | ||||||
| from documents import tasks | from documents import tasks | ||||||
| from documents.barcodes import BarcodeReader | from documents.barcodes import BarcodePlugin | ||||||
| from documents.consumer import ConsumerError |  | ||||||
| from documents.data_models import ConsumableDocument | from documents.data_models import ConsumableDocument | ||||||
| from documents.data_models import DocumentMetadataOverrides | from documents.data_models import DocumentMetadataOverrides | ||||||
| from documents.data_models import DocumentSource | from documents.data_models import DocumentSource | ||||||
| from documents.models import Document |  | ||||||
| from documents.tests.utils import DirectoriesMixin | from documents.tests.utils import DirectoriesMixin | ||||||
| from documents.tests.utils import DocumentConsumeDelayMixin | from documents.tests.utils import DocumentConsumeDelayMixin | ||||||
|  | from documents.tests.utils import DummyProgressManager | ||||||
| from documents.tests.utils import FileSystemAssertsMixin | from documents.tests.utils import FileSystemAssertsMixin | ||||||
| from documents.tests.utils import SampleDirMixin | from documents.tests.utils import SampleDirMixin | ||||||
|  |  | ||||||
| @@ -26,8 +28,29 @@ except ImportError: | |||||||
|     HAS_ZXING_LIB = False |     HAS_ZXING_LIB = False | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class GetReaderPluginMixin: | ||||||
|  |     @contextmanager | ||||||
|  |     def get_reader(self, filepath: Path) -> Generator[BarcodePlugin, None, None]: | ||||||
|  |         reader = BarcodePlugin( | ||||||
|  |             ConsumableDocument(DocumentSource.ConsumeFolder, original_file=filepath), | ||||||
|  |             DocumentMetadataOverrides(), | ||||||
|  |             DummyProgressManager(filepath.name, None), | ||||||
|  |             self.dirs.scratch_dir, | ||||||
|  |             "task-id", | ||||||
|  |         ) | ||||||
|  |         reader.setup() | ||||||
|  |         yield reader | ||||||
|  |         reader.cleanup() | ||||||
|  |  | ||||||
|  |  | ||||||
| @override_settings(CONSUMER_BARCODE_SCANNER="PYZBAR") | @override_settings(CONSUMER_BARCODE_SCANNER="PYZBAR") | ||||||
| class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, TestCase): | class TestBarcode( | ||||||
|  |     DirectoriesMixin, | ||||||
|  |     FileSystemAssertsMixin, | ||||||
|  |     SampleDirMixin, | ||||||
|  |     GetReaderPluginMixin, | ||||||
|  |     TestCase, | ||||||
|  | ): | ||||||
|     def test_scan_file_for_separating_barcodes(self): |     def test_scan_file_for_separating_barcodes(self): | ||||||
|         """ |         """ | ||||||
|         GIVEN: |         GIVEN: | ||||||
| @@ -39,7 +62,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | |||||||
|         """ |         """ | ||||||
|         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf" |         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf" | ||||||
|  |  | ||||||
|         with BarcodeReader(test_file, "application/pdf") as reader: |         with self.get_reader(test_file) as reader: | ||||||
|             reader.detect() |             reader.detect() | ||||||
|             separator_page_numbers = reader.get_separation_pages() |             separator_page_numbers = reader.get_separation_pages() | ||||||
|  |  | ||||||
| @@ -60,7 +83,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | |||||||
|         """ |         """ | ||||||
|         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.tiff" |         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.tiff" | ||||||
|  |  | ||||||
|         with BarcodeReader(test_file, "image/tiff") as reader: |         with self.get_reader(test_file) as reader: | ||||||
|             reader.detect() |             reader.detect() | ||||||
|             separator_page_numbers = reader.get_separation_pages() |             separator_page_numbers = reader.get_separation_pages() | ||||||
|  |  | ||||||
| @@ -80,7 +103,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | |||||||
|         """ |         """ | ||||||
|         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle-alpha.tiff" |         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle-alpha.tiff" | ||||||
|  |  | ||||||
|         with BarcodeReader(test_file, "image/tiff") as reader: |         with self.get_reader(test_file) as reader: | ||||||
|             reader.detect() |             reader.detect() | ||||||
|             separator_page_numbers = reader.get_separation_pages() |             separator_page_numbers = reader.get_separation_pages() | ||||||
|  |  | ||||||
| @@ -97,7 +120,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | |||||||
|             - No pages to split on |             - No pages to split on | ||||||
|         """ |         """ | ||||||
|         test_file = self.SAMPLE_DIR / "simple.pdf" |         test_file = self.SAMPLE_DIR / "simple.pdf" | ||||||
|         with BarcodeReader(test_file, "application/pdf") as reader: |         with self.get_reader(test_file) as reader: | ||||||
|             reader.detect() |             reader.detect() | ||||||
|             separator_page_numbers = reader.get_separation_pages() |             separator_page_numbers = reader.get_separation_pages() | ||||||
|  |  | ||||||
| @@ -115,7 +138,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | |||||||
|         """ |         """ | ||||||
|         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf" |         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf" | ||||||
|  |  | ||||||
|         with BarcodeReader(test_file, "application/pdf") as reader: |         with self.get_reader(test_file) as reader: | ||||||
|             reader.detect() |             reader.detect() | ||||||
|             separator_page_numbers = reader.get_separation_pages() |             separator_page_numbers = reader.get_separation_pages() | ||||||
|  |  | ||||||
| @@ -133,7 +156,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | |||||||
|         """ |         """ | ||||||
|         test_file = self.BARCODE_SAMPLE_DIR / "several-patcht-codes.pdf" |         test_file = self.BARCODE_SAMPLE_DIR / "several-patcht-codes.pdf" | ||||||
|  |  | ||||||
|         with BarcodeReader(test_file, "application/pdf") as reader: |         with self.get_reader(test_file) as reader: | ||||||
|             reader.detect() |             reader.detect() | ||||||
|             separator_page_numbers = reader.get_separation_pages() |             separator_page_numbers = reader.get_separation_pages() | ||||||
|  |  | ||||||
| @@ -158,7 +181,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | |||||||
|         ]: |         ]: | ||||||
|             test_file = self.BARCODE_SAMPLE_DIR / test_file |             test_file = self.BARCODE_SAMPLE_DIR / test_file | ||||||
|  |  | ||||||
|             with BarcodeReader(test_file, "application/pdf") as reader: |             with self.get_reader(test_file) as reader: | ||||||
|                 reader.detect() |                 reader.detect() | ||||||
|                 separator_page_numbers = reader.get_separation_pages() |                 separator_page_numbers = reader.get_separation_pages() | ||||||
|  |  | ||||||
| @@ -177,7 +200,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | |||||||
|         """ |         """ | ||||||
|         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle-unreadable.pdf" |         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle-unreadable.pdf" | ||||||
|  |  | ||||||
|         with BarcodeReader(test_file, "application/pdf") as reader: |         with self.get_reader(test_file) as reader: | ||||||
|             reader.detect() |             reader.detect() | ||||||
|             separator_page_numbers = reader.get_separation_pages() |             separator_page_numbers = reader.get_separation_pages() | ||||||
|  |  | ||||||
| @@ -195,7 +218,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | |||||||
|         """ |         """ | ||||||
|         test_file = self.BARCODE_SAMPLE_DIR / "barcode-fax-image.pdf" |         test_file = self.BARCODE_SAMPLE_DIR / "barcode-fax-image.pdf" | ||||||
|  |  | ||||||
|         with BarcodeReader(test_file, "application/pdf") as reader: |         with self.get_reader(test_file) as reader: | ||||||
|             reader.detect() |             reader.detect() | ||||||
|             separator_page_numbers = reader.get_separation_pages() |             separator_page_numbers = reader.get_separation_pages() | ||||||
|  |  | ||||||
| @@ -214,7 +237,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | |||||||
|         """ |         """ | ||||||
|         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-qr.pdf" |         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-qr.pdf" | ||||||
|  |  | ||||||
|         with BarcodeReader(test_file, "application/pdf") as reader: |         with self.get_reader(test_file) as reader: | ||||||
|             reader.detect() |             reader.detect() | ||||||
|             separator_page_numbers = reader.get_separation_pages() |             separator_page_numbers = reader.get_separation_pages() | ||||||
|  |  | ||||||
| @@ -234,7 +257,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | |||||||
|         """ |         """ | ||||||
|         test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.pdf" |         test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.pdf" | ||||||
|  |  | ||||||
|         with BarcodeReader(test_file, "application/pdf") as reader: |         with self.get_reader(test_file) as reader: | ||||||
|             reader.detect() |             reader.detect() | ||||||
|             separator_page_numbers = reader.get_separation_pages() |             separator_page_numbers = reader.get_separation_pages() | ||||||
|  |  | ||||||
| @@ -255,7 +278,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | |||||||
|         """ |         """ | ||||||
|         test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-custom.pdf" |         test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-custom.pdf" | ||||||
|  |  | ||||||
|         with BarcodeReader(test_file, "application/pdf") as reader: |         with self.get_reader(test_file) as reader: | ||||||
|             reader.detect() |             reader.detect() | ||||||
|             separator_page_numbers = reader.get_separation_pages() |             separator_page_numbers = reader.get_separation_pages() | ||||||
|  |  | ||||||
| @@ -276,7 +299,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | |||||||
|         """ |         """ | ||||||
|         test_file = self.BARCODE_SAMPLE_DIR / "barcode-128-custom.pdf" |         test_file = self.BARCODE_SAMPLE_DIR / "barcode-128-custom.pdf" | ||||||
|  |  | ||||||
|         with BarcodeReader(test_file, "application/pdf") as reader: |         with self.get_reader(test_file) as reader: | ||||||
|             reader.detect() |             reader.detect() | ||||||
|             separator_page_numbers = reader.get_separation_pages() |             separator_page_numbers = reader.get_separation_pages() | ||||||
|  |  | ||||||
| @@ -296,7 +319,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | |||||||
|         """ |         """ | ||||||
|         test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.pdf" |         test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.pdf" | ||||||
|  |  | ||||||
|         with BarcodeReader(test_file, "application/pdf") as reader: |         with self.get_reader(test_file) as reader: | ||||||
|             reader.detect() |             reader.detect() | ||||||
|             separator_page_numbers = reader.get_separation_pages() |             separator_page_numbers = reader.get_separation_pages() | ||||||
|  |  | ||||||
| @@ -315,7 +338,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | |||||||
|         """ |         """ | ||||||
|         test_file = self.BARCODE_SAMPLE_DIR / "many-qr-codes.pdf" |         test_file = self.BARCODE_SAMPLE_DIR / "many-qr-codes.pdf" | ||||||
|  |  | ||||||
|         with BarcodeReader(test_file, "application/pdf") as reader: |         with self.get_reader(test_file) as reader: | ||||||
|             reader.detect() |             reader.detect() | ||||||
|             separator_page_numbers = reader.get_separation_pages() |             separator_page_numbers = reader.get_separation_pages() | ||||||
|  |  | ||||||
| @@ -334,7 +357,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | |||||||
|         """ |         """ | ||||||
|         test_file = self.SAMPLE_DIR / "password-is-test.pdf" |         test_file = self.SAMPLE_DIR / "password-is-test.pdf" | ||||||
|         with self.assertLogs("paperless.barcodes", level="WARNING") as cm: |         with self.assertLogs("paperless.barcodes", level="WARNING") as cm: | ||||||
|             with BarcodeReader(test_file, "application/pdf") as reader: |             with self.get_reader(test_file) as reader: | ||||||
|                 reader.detect() |                 reader.detect() | ||||||
|                 warning = cm.output[0] |                 warning = cm.output[0] | ||||||
|                 expected_str = "WARNING:paperless.barcodes:File is likely password protected, not checking for barcodes" |                 expected_str = "WARNING:paperless.barcodes:File is likely password protected, not checking for barcodes" | ||||||
| @@ -356,7 +379,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | |||||||
|         """ |         """ | ||||||
|         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf" |         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf" | ||||||
|  |  | ||||||
|         with BarcodeReader(test_file, "application/pdf") as reader: |         with self.get_reader(test_file) as reader: | ||||||
|             documents = reader.separate_pages({1: False}) |             documents = reader.separate_pages({1: False}) | ||||||
|  |  | ||||||
|             self.assertEqual(reader.pdf_file, test_file) |             self.assertEqual(reader.pdf_file, test_file) | ||||||
| @@ -373,7 +396,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | |||||||
|         """ |         """ | ||||||
|         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-double.pdf" |         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-double.pdf" | ||||||
|  |  | ||||||
|         with BarcodeReader(test_file, "application/pdf") as reader: |         with self.get_reader(test_file) as reader: | ||||||
|             documents = reader.separate_pages({1: False, 2: False}) |             documents = reader.separate_pages({1: False, 2: False}) | ||||||
|  |  | ||||||
|             self.assertEqual(len(documents), 2) |             self.assertEqual(len(documents), 2) | ||||||
| @@ -385,32 +408,18 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | |||||||
|         WHEN: |         WHEN: | ||||||
|             - No separation pages are provided |             - No separation pages are provided | ||||||
|         THEN: |         THEN: | ||||||
|             - No new documents are produced |             - Nothing happens | ||||||
|             - A warning is logged |  | ||||||
|         """ |         """ | ||||||
|         test_file = self.SAMPLE_DIR / "simple.pdf" |         test_file = self.SAMPLE_DIR / "simple.pdf" | ||||||
|  |  | ||||||
|         with self.assertLogs("paperless.barcodes", level="WARNING") as cm: |         with self.get_reader(test_file) as reader: | ||||||
|             with BarcodeReader(test_file, "application/pdf") as reader: |             self.assertEqual("No pages to split on!", reader.run()) | ||||||
|                 self.assertFalse( |  | ||||||
|                     reader.separate( |  | ||||||
|                         DocumentSource.ApiUpload, |  | ||||||
|                         DocumentMetadataOverrides(), |  | ||||||
|                     ), |  | ||||||
|                 ) |  | ||||||
|                 self.assertEqual( |  | ||||||
|                     cm.output, |  | ||||||
|                     [ |  | ||||||
|                         "WARNING:paperless.barcodes:No pages to split on!", |  | ||||||
|                     ], |  | ||||||
|                 ) |  | ||||||
|  |  | ||||||
|     @override_settings( |     @override_settings( | ||||||
|         CONSUMER_ENABLE_BARCODES=True, |         CONSUMER_ENABLE_BARCODES=True, | ||||||
|         CONSUMER_BARCODE_TIFF_SUPPORT=True, |         CONSUMER_BARCODE_TIFF_SUPPORT=True, | ||||||
|     ) |     ) | ||||||
|     @mock.patch("documents.consumer.Consumer.try_consume_file") |     def test_consume_barcode_unsupported_jpg_file(self): | ||||||
|     def test_consume_barcode_unsupported_jpg_file(self, m): |  | ||||||
|         """ |         """ | ||||||
|         GIVEN: |         GIVEN: | ||||||
|             - JPEG image as input |             - JPEG image as input | ||||||
| @@ -422,35 +431,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | |||||||
|         """ |         """ | ||||||
|         test_file = self.SAMPLE_DIR / "simple.jpg" |         test_file = self.SAMPLE_DIR / "simple.jpg" | ||||||
|  |  | ||||||
|         dst = settings.SCRATCH_DIR / "simple.jpg" |         with self.get_reader(test_file) as reader: | ||||||
|         shutil.copy(test_file, dst) |             self.assertFalse(reader.able_to_run) | ||||||
|  |  | ||||||
|         with self.assertLogs("paperless.barcodes", level="WARNING") as cm: |  | ||||||
|             self.assertIn( |  | ||||||
|                 "Success", |  | ||||||
|                 tasks.consume_file( |  | ||||||
|                     ConsumableDocument( |  | ||||||
|                         source=DocumentSource.ConsumeFolder, |  | ||||||
|                         original_file=dst, |  | ||||||
|                     ), |  | ||||||
|                     None, |  | ||||||
|                 ), |  | ||||||
|             ) |  | ||||||
|  |  | ||||||
|         self.assertListEqual( |  | ||||||
|             cm.output, |  | ||||||
|             [ |  | ||||||
|                 "WARNING:paperless.barcodes:Unsupported file format for barcode reader: image/jpeg", |  | ||||||
|             ], |  | ||||||
|         ) |  | ||||||
|         m.assert_called_once() |  | ||||||
|  |  | ||||||
|         args, kwargs = m.call_args |  | ||||||
|         self.assertIsNone(kwargs["override_filename"]) |  | ||||||
|         self.assertIsNone(kwargs["override_title"]) |  | ||||||
|         self.assertIsNone(kwargs["override_correspondent_id"]) |  | ||||||
|         self.assertIsNone(kwargs["override_document_type_id"]) |  | ||||||
|         self.assertIsNone(kwargs["override_tag_ids"]) |  | ||||||
|  |  | ||||||
|     @override_settings( |     @override_settings( | ||||||
|         CONSUMER_ENABLE_BARCODES=True, |         CONSUMER_ENABLE_BARCODES=True, | ||||||
| @@ -467,7 +449,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | |||||||
|         """ |         """ | ||||||
|         test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-2.pdf" |         test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-2.pdf" | ||||||
|  |  | ||||||
|         with BarcodeReader(test_file, "application/pdf") as reader: |         with self.get_reader(test_file) as reader: | ||||||
|             reader.detect() |             reader.detect() | ||||||
|             separator_page_numbers = reader.get_separation_pages() |             separator_page_numbers = reader.get_separation_pages() | ||||||
|  |  | ||||||
| @@ -504,7 +486,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test | |||||||
|         """ |         """ | ||||||
|         test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-1.pdf" |         test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-1.pdf" | ||||||
|  |  | ||||||
|         with BarcodeReader(test_file, "application/pdf") as reader: |         with self.get_reader(test_file) as reader: | ||||||
|             reader.detect() |             reader.detect() | ||||||
|             separator_page_numbers = reader.get_separation_pages() |             separator_page_numbers = reader.get_separation_pages() | ||||||
|  |  | ||||||
| @@ -550,7 +532,7 @@ class TestBarcodeNewConsume( | |||||||
|  |  | ||||||
|         overrides = DocumentMetadataOverrides(tag_ids=[1, 2, 9]) |         overrides = DocumentMetadataOverrides(tag_ids=[1, 2, 9]) | ||||||
|  |  | ||||||
|         with mock.patch("documents.tasks.async_to_sync") as progress_mocker: |         with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): | ||||||
|             self.assertEqual( |             self.assertEqual( | ||||||
|                 tasks.consume_file( |                 tasks.consume_file( | ||||||
|                     ConsumableDocument( |                     ConsumableDocument( | ||||||
| @@ -559,10 +541,8 @@ class TestBarcodeNewConsume( | |||||||
|                     ), |                     ), | ||||||
|                     overrides, |                     overrides, | ||||||
|                 ), |                 ), | ||||||
|                 "File successfully split", |                 "Barcode splitting complete!", | ||||||
|             ) |             ) | ||||||
|             # We let the consumer know progress is done |  | ||||||
|             progress_mocker.assert_called_once() |  | ||||||
|             # 2 new document consume tasks created |             # 2 new document consume tasks created | ||||||
|             self.assertEqual(self.consume_file_mock.call_count, 2) |             self.assertEqual(self.consume_file_mock.call_count, 2) | ||||||
|  |  | ||||||
| @@ -580,7 +560,20 @@ class TestBarcodeNewConsume( | |||||||
|                 self.assertEqual(overrides, new_doc_overrides) |                 self.assertEqual(overrides, new_doc_overrides) | ||||||
|  |  | ||||||
|  |  | ||||||
| class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase): | class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, GetReaderPluginMixin, TestCase): | ||||||
|  |     @contextmanager | ||||||
|  |     def get_reader(self, filepath: Path) -> BarcodePlugin: | ||||||
|  |         reader = BarcodePlugin( | ||||||
|  |             ConsumableDocument(DocumentSource.ConsumeFolder, original_file=filepath), | ||||||
|  |             DocumentMetadataOverrides(), | ||||||
|  |             DummyProgressManager(filepath.name, None), | ||||||
|  |             self.dirs.scratch_dir, | ||||||
|  |             "task-id", | ||||||
|  |         ) | ||||||
|  |         reader.setup() | ||||||
|  |         yield reader | ||||||
|  |         reader.cleanup() | ||||||
|  |  | ||||||
|     @override_settings(CONSUMER_ASN_BARCODE_PREFIX="CUSTOM-PREFIX-") |     @override_settings(CONSUMER_ASN_BARCODE_PREFIX="CUSTOM-PREFIX-") | ||||||
|     def test_scan_file_for_asn_custom_prefix(self): |     def test_scan_file_for_asn_custom_prefix(self): | ||||||
|         """ |         """ | ||||||
| @@ -594,7 +587,7 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase): | |||||||
|             - The ASN integer value is correct |             - The ASN integer value is correct | ||||||
|         """ |         """ | ||||||
|         test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf" |         test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf" | ||||||
|         with BarcodeReader(test_file, "application/pdf") as reader: |         with self.get_reader(test_file) as reader: | ||||||
|             asn = reader.asn |             asn = reader.asn | ||||||
|  |  | ||||||
|             self.assertEqual(reader.pdf_file, test_file) |             self.assertEqual(reader.pdf_file, test_file) | ||||||
| @@ -613,7 +606,7 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase): | |||||||
|         """ |         """ | ||||||
|         test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-123.pdf" |         test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-123.pdf" | ||||||
|  |  | ||||||
|         with BarcodeReader(test_file, "application/pdf") as reader: |         with self.get_reader(test_file) as reader: | ||||||
|             asn = reader.asn |             asn = reader.asn | ||||||
|  |  | ||||||
|             self.assertEqual(reader.pdf_file, test_file) |             self.assertEqual(reader.pdf_file, test_file) | ||||||
| @@ -630,55 +623,12 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase): | |||||||
|         """ |         """ | ||||||
|         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf" |         test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf" | ||||||
|  |  | ||||||
|         with BarcodeReader(test_file, "application/pdf") as reader: |         with self.get_reader(test_file) as reader: | ||||||
|             asn = reader.asn |             asn = reader.asn | ||||||
|  |  | ||||||
|             self.assertEqual(reader.pdf_file, test_file) |             self.assertEqual(reader.pdf_file, test_file) | ||||||
|             self.assertEqual(asn, None) |             self.assertEqual(asn, None) | ||||||
|  |  | ||||||
|     @override_settings(CONSUMER_ENABLE_ASN_BARCODE=True) |  | ||||||
|     def test_scan_file_for_asn_already_exists(self): |  | ||||||
|         """ |  | ||||||
|         GIVEN: |  | ||||||
|             - PDF with an ASN barcode |  | ||||||
|             - ASN value already exists |  | ||||||
|         WHEN: |  | ||||||
|             - File is scanned for barcodes |  | ||||||
|         THEN: |  | ||||||
|             - ASN is retrieved from the document |  | ||||||
|             - Consumption fails |  | ||||||
|         """ |  | ||||||
|  |  | ||||||
|         Document.objects.create( |  | ||||||
|             title="WOW", |  | ||||||
|             content="the content", |  | ||||||
|             archive_serial_number=123, |  | ||||||
|             checksum="456", |  | ||||||
|             mime_type="application/pdf", |  | ||||||
|         ) |  | ||||||
|  |  | ||||||
|         test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-123.pdf" |  | ||||||
|  |  | ||||||
|         dst = settings.SCRATCH_DIR / "barcode-39-asn-123.pdf" |  | ||||||
|         shutil.copy(test_file, dst) |  | ||||||
|  |  | ||||||
|         with mock.patch("documents.consumer.Consumer._send_progress"): |  | ||||||
|             with self.assertRaises(ConsumerError) as cm, self.assertLogs( |  | ||||||
|                 "paperless.consumer", |  | ||||||
|                 level="ERROR", |  | ||||||
|             ) as logs_cm: |  | ||||||
|                 tasks.consume_file( |  | ||||||
|                     ConsumableDocument( |  | ||||||
|                         source=DocumentSource.ConsumeFolder, |  | ||||||
|                         original_file=dst, |  | ||||||
|                     ), |  | ||||||
|                     None, |  | ||||||
|                 ) |  | ||||||
|             self.assertIn("Not consuming barcode-39-asn-123.pdf", str(cm.exception)) |  | ||||||
|             error_str = logs_cm.output[0] |  | ||||||
|             expected_str = "ERROR:paperless.consumer:Not consuming barcode-39-asn-123.pdf: Given ASN already exists!" |  | ||||||
|             self.assertEqual(expected_str, error_str) |  | ||||||
|  |  | ||||||
|     def test_scan_file_for_asn_barcode_invalid(self): |     def test_scan_file_for_asn_barcode_invalid(self): | ||||||
|         """ |         """ | ||||||
|         GIVEN: |         GIVEN: | ||||||
| @@ -692,7 +642,7 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase): | |||||||
|         """ |         """ | ||||||
|         test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-invalid.pdf" |         test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-invalid.pdf" | ||||||
|  |  | ||||||
|         with BarcodeReader(test_file, "application/pdf") as reader: |         with self.get_reader(test_file) as reader: | ||||||
|             asn = reader.asn |             asn = reader.asn | ||||||
|  |  | ||||||
|             self.assertEqual(reader.pdf_file, test_file) |             self.assertEqual(reader.pdf_file, test_file) | ||||||
| @@ -718,7 +668,9 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase): | |||||||
|         dst = settings.SCRATCH_DIR / "barcode-39-asn-123.pdf" |         dst = settings.SCRATCH_DIR / "barcode-39-asn-123.pdf" | ||||||
|         shutil.copy(test_file, dst) |         shutil.copy(test_file, dst) | ||||||
|  |  | ||||||
|         with mock.patch("documents.consumer.Consumer.try_consume_file") as mocked_call: |         with mock.patch( | ||||||
|  |             "documents.consumer.Consumer.try_consume_file", | ||||||
|  |         ) as mocked_consumer: | ||||||
|             tasks.consume_file( |             tasks.consume_file( | ||||||
|                 ConsumableDocument( |                 ConsumableDocument( | ||||||
|                     source=DocumentSource.ConsumeFolder, |                     source=DocumentSource.ConsumeFolder, | ||||||
| @@ -726,40 +678,11 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase): | |||||||
|                 ), |                 ), | ||||||
|                 None, |                 None, | ||||||
|             ) |             ) | ||||||
|  |             mocked_consumer.assert_called_once() | ||||||
|             args, kwargs = mocked_call.call_args |             args, kwargs = mocked_consumer.call_args | ||||||
|  |  | ||||||
|             self.assertEqual(kwargs["override_asn"], 123) |             self.assertEqual(kwargs["override_asn"], 123) | ||||||
|  |  | ||||||
|     @override_settings(CONSUMER_ENABLE_ASN_BARCODE=True) |  | ||||||
|     def test_asn_too_large(self): |  | ||||||
|         """ |  | ||||||
|         GIVEN: |  | ||||||
|             - ASN from barcode enabled |  | ||||||
|             - Barcode contains too large an ASN value |  | ||||||
|         WHEN: |  | ||||||
|             - ASN from barcode checked for correctness |  | ||||||
|         THEN: |  | ||||||
|             - Exception is raised regarding size limits |  | ||||||
|         """ |  | ||||||
|         src = self.BARCODE_SAMPLE_DIR / "barcode-128-asn-too-large.pdf" |  | ||||||
|  |  | ||||||
|         dst = self.dirs.scratch_dir / "barcode-128-asn-too-large.pdf" |  | ||||||
|         shutil.copy(src, dst) |  | ||||||
|  |  | ||||||
|         input_doc = ConsumableDocument( |  | ||||||
|             source=DocumentSource.ConsumeFolder, |  | ||||||
|             original_file=dst, |  | ||||||
|         ) |  | ||||||
|  |  | ||||||
|         with mock.patch("documents.consumer.Consumer._send_progress"): |  | ||||||
|             self.assertRaisesMessage( |  | ||||||
|                 ConsumerError, |  | ||||||
|                 "Given ASN 4294967296 is out of range [0, 4,294,967,295]", |  | ||||||
|                 tasks.consume_file, |  | ||||||
|                 input_doc, |  | ||||||
|             ) |  | ||||||
|  |  | ||||||
|     @override_settings(CONSUMER_BARCODE_SCANNER="PYZBAR") |     @override_settings(CONSUMER_BARCODE_SCANNER="PYZBAR") | ||||||
|     def test_scan_file_for_qrcode_without_upscale(self): |     def test_scan_file_for_qrcode_without_upscale(self): | ||||||
|         """ |         """ | ||||||
| @@ -774,7 +697,7 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase): | |||||||
|  |  | ||||||
|         test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-asn-000123-upscale-dpi.pdf" |         test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-asn-000123-upscale-dpi.pdf" | ||||||
|  |  | ||||||
|         with BarcodeReader(test_file, "application/pdf") as reader: |         with self.get_reader(test_file) as reader: | ||||||
|             reader.detect() |             reader.detect() | ||||||
|             self.assertEqual(len(reader.barcodes), 0) |             self.assertEqual(len(reader.barcodes), 0) | ||||||
|  |  | ||||||
| @@ -796,7 +719,7 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase): | |||||||
|  |  | ||||||
|         test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-asn-000123-upscale-dpi.pdf" |         test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-asn-000123-upscale-dpi.pdf" | ||||||
|  |  | ||||||
|         with BarcodeReader(test_file, "application/pdf") as reader: |         with self.get_reader(test_file) as reader: | ||||||
|             reader.detect() |             reader.detect() | ||||||
|             self.assertEqual(len(reader.barcodes), 1) |             self.assertEqual(len(reader.barcodes), 1) | ||||||
|             self.assertEqual(reader.asn, 123) |             self.assertEqual(reader.asn, 123) | ||||||
|   | |||||||
| @@ -17,6 +17,7 @@ from documents.data_models import DocumentSource | |||||||
| from documents.double_sided import STAGING_FILE_NAME | from documents.double_sided import STAGING_FILE_NAME | ||||||
| from documents.double_sided import TIMEOUT_MINUTES | from documents.double_sided import TIMEOUT_MINUTES | ||||||
| from documents.tests.utils import DirectoriesMixin | from documents.tests.utils import DirectoriesMixin | ||||||
|  | from documents.tests.utils import DummyProgressManager | ||||||
| from documents.tests.utils import FileSystemAssertsMixin | from documents.tests.utils import FileSystemAssertsMixin | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -42,9 +43,10 @@ class TestDoubleSided(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|         dst = self.dirs.double_sided_dir / dstname |         dst = self.dirs.double_sided_dir / dstname | ||||||
|         dst.parent.mkdir(parents=True, exist_ok=True) |         dst.parent.mkdir(parents=True, exist_ok=True) | ||||||
|         shutil.copy(src, dst) |         shutil.copy(src, dst) | ||||||
|         with mock.patch("documents.tasks.async_to_sync"), mock.patch( |         with mock.patch( | ||||||
|             "documents.consumer.async_to_sync", |             "documents.tasks.ProgressManager", | ||||||
|         ): |             DummyProgressManager, | ||||||
|  |         ), mock.patch("documents.consumer.async_to_sync"): | ||||||
|             msg = tasks.consume_file( |             msg = tasks.consume_file( | ||||||
|                 ConsumableDocument( |                 ConsumableDocument( | ||||||
|                     source=DocumentSource.ConsumeFolder, |                     source=DocumentSource.ConsumeFolder, | ||||||
| @@ -211,7 +213,7 @@ class TestDoubleSided(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|         """ |         """ | ||||||
|         msg = self.consume_file("simple.pdf", Path("..") / "simple.pdf") |         msg = self.consume_file("simple.pdf", Path("..") / "simple.pdf") | ||||||
|         self.assertIsNotFile(self.staging_file) |         self.assertIsNotFile(self.staging_file) | ||||||
|         self.assertRegex(msg, "Success. New document .* created") |         self.assertRegex(msg, r"Success. New document id \d+ created") | ||||||
|  |  | ||||||
|     def test_subdirectory_upload(self): |     def test_subdirectory_upload(self): | ||||||
|         """ |         """ | ||||||
| @@ -250,4 +252,4 @@ class TestDoubleSided(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|         """ |         """ | ||||||
|         msg = self.consume_file("simple.pdf") |         msg = self.consume_file("simple.pdf") | ||||||
|         self.assertIsNotFile(self.staging_file) |         self.assertIsNotFile(self.staging_file) | ||||||
|         self.assertRegex(msg, "Success. New document .* created") |         self.assertRegex(msg, r"Success. New document id \d+ created") | ||||||
|   | |||||||
| @@ -24,6 +24,7 @@ from documents.models import WorkflowAction | |||||||
| from documents.models import WorkflowTrigger | from documents.models import WorkflowTrigger | ||||||
| from documents.signals import document_consumption_finished | from documents.signals import document_consumption_finished | ||||||
| from documents.tests.utils import DirectoriesMixin | from documents.tests.utils import DirectoriesMixin | ||||||
|  | from documents.tests.utils import DummyProgressManager | ||||||
| from documents.tests.utils import FileSystemAssertsMixin | from documents.tests.utils import FileSystemAssertsMixin | ||||||
| from paperless_mail.models import MailAccount | from paperless_mail.models import MailAccount | ||||||
| from paperless_mail.models import MailRule | from paperless_mail.models import MailRule | ||||||
| @@ -126,7 +127,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase): | |||||||
|  |  | ||||||
|         test_file = self.SAMPLE_DIR / "simple.pdf" |         test_file = self.SAMPLE_DIR / "simple.pdf" | ||||||
|  |  | ||||||
|         with mock.patch("documents.tasks.async_to_sync"): |         with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): | ||||||
|             with self.assertLogs("paperless.matching", level="INFO") as cm: |             with self.assertLogs("paperless.matching", level="INFO") as cm: | ||||||
|                 tasks.consume_file( |                 tasks.consume_file( | ||||||
|                     ConsumableDocument( |                     ConsumableDocument( | ||||||
| @@ -203,7 +204,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase): | |||||||
|         w.save() |         w.save() | ||||||
|  |  | ||||||
|         test_file = self.SAMPLE_DIR / "simple.pdf" |         test_file = self.SAMPLE_DIR / "simple.pdf" | ||||||
|         with mock.patch("documents.tasks.async_to_sync"): |         with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): | ||||||
|             with self.assertLogs("paperless.matching", level="INFO") as cm: |             with self.assertLogs("paperless.matching", level="INFO") as cm: | ||||||
|                 tasks.consume_file( |                 tasks.consume_file( | ||||||
|                     ConsumableDocument( |                     ConsumableDocument( | ||||||
| @@ -294,7 +295,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase): | |||||||
|  |  | ||||||
|         test_file = self.SAMPLE_DIR / "simple.pdf" |         test_file = self.SAMPLE_DIR / "simple.pdf" | ||||||
|  |  | ||||||
|         with mock.patch("documents.tasks.async_to_sync"): |         with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): | ||||||
|             with self.assertLogs("paperless.matching", level="INFO") as cm: |             with self.assertLogs("paperless.matching", level="INFO") as cm: | ||||||
|                 tasks.consume_file( |                 tasks.consume_file( | ||||||
|                     ConsumableDocument( |                     ConsumableDocument( | ||||||
| @@ -356,7 +357,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase): | |||||||
|  |  | ||||||
|         test_file = self.SAMPLE_DIR / "simple.pdf" |         test_file = self.SAMPLE_DIR / "simple.pdf" | ||||||
|  |  | ||||||
|         with mock.patch("documents.tasks.async_to_sync"): |         with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): | ||||||
|             with self.assertLogs("paperless.matching", level="DEBUG") as cm: |             with self.assertLogs("paperless.matching", level="DEBUG") as cm: | ||||||
|                 tasks.consume_file( |                 tasks.consume_file( | ||||||
|                     ConsumableDocument( |                     ConsumableDocument( | ||||||
| @@ -407,7 +408,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase): | |||||||
|  |  | ||||||
|         test_file = self.SAMPLE_DIR / "simple.pdf" |         test_file = self.SAMPLE_DIR / "simple.pdf" | ||||||
|  |  | ||||||
|         with mock.patch("documents.tasks.async_to_sync"): |         with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): | ||||||
|             with self.assertLogs("paperless.matching", level="DEBUG") as cm: |             with self.assertLogs("paperless.matching", level="DEBUG") as cm: | ||||||
|                 tasks.consume_file( |                 tasks.consume_file( | ||||||
|                     ConsumableDocument( |                     ConsumableDocument( | ||||||
| @@ -468,7 +469,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase): | |||||||
|  |  | ||||||
|         test_file = self.SAMPLE_DIR / "simple.pdf" |         test_file = self.SAMPLE_DIR / "simple.pdf" | ||||||
|  |  | ||||||
|         with mock.patch("documents.tasks.async_to_sync"): |         with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): | ||||||
|             with self.assertLogs("paperless.matching", level="DEBUG") as cm: |             with self.assertLogs("paperless.matching", level="DEBUG") as cm: | ||||||
|                 tasks.consume_file( |                 tasks.consume_file( | ||||||
|                     ConsumableDocument( |                     ConsumableDocument( | ||||||
| @@ -529,7 +530,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase): | |||||||
|  |  | ||||||
|         test_file = self.SAMPLE_DIR / "simple.pdf" |         test_file = self.SAMPLE_DIR / "simple.pdf" | ||||||
|  |  | ||||||
|         with mock.patch("documents.tasks.async_to_sync"): |         with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): | ||||||
|             with self.assertLogs("paperless.matching", level="DEBUG") as cm: |             with self.assertLogs("paperless.matching", level="DEBUG") as cm: | ||||||
|                 tasks.consume_file( |                 tasks.consume_file( | ||||||
|                     ConsumableDocument( |                     ConsumableDocument( | ||||||
| @@ -591,7 +592,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase): | |||||||
|  |  | ||||||
|         test_file = self.SAMPLE_DIR / "simple.pdf" |         test_file = self.SAMPLE_DIR / "simple.pdf" | ||||||
|  |  | ||||||
|         with mock.patch("documents.tasks.async_to_sync"): |         with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): | ||||||
|             with self.assertLogs("paperless.matching", level="DEBUG") as cm: |             with self.assertLogs("paperless.matching", level="DEBUG") as cm: | ||||||
|                 tasks.consume_file( |                 tasks.consume_file( | ||||||
|                     ConsumableDocument( |                     ConsumableDocument( | ||||||
| @@ -686,7 +687,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase): | |||||||
|  |  | ||||||
|         test_file = self.SAMPLE_DIR / "simple.pdf" |         test_file = self.SAMPLE_DIR / "simple.pdf" | ||||||
|  |  | ||||||
|         with mock.patch("documents.tasks.async_to_sync"): |         with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): | ||||||
|             with self.assertLogs("paperless.matching", level="INFO") as cm: |             with self.assertLogs("paperless.matching", level="INFO") as cm: | ||||||
|                 tasks.consume_file( |                 tasks.consume_file( | ||||||
|                     ConsumableDocument( |                     ConsumableDocument( | ||||||
|   | |||||||
| @@ -9,6 +9,7 @@ from os import PathLike | |||||||
| from pathlib import Path | from pathlib import Path | ||||||
| from typing import Any | from typing import Any | ||||||
| from typing import Callable | from typing import Callable | ||||||
|  | from typing import Optional | ||||||
| from typing import Union | from typing import Union | ||||||
| from unittest import mock | from unittest import mock | ||||||
|  |  | ||||||
| @@ -23,6 +24,7 @@ from django.test import override_settings | |||||||
| from documents.data_models import ConsumableDocument | from documents.data_models import ConsumableDocument | ||||||
| from documents.data_models import DocumentMetadataOverrides | from documents.data_models import DocumentMetadataOverrides | ||||||
| from documents.parsers import ParseError | from documents.parsers import ParseError | ||||||
|  | from documents.plugins.helpers import ProgressStatusOptions | ||||||
|  |  | ||||||
|  |  | ||||||
| def setup_directories(): | def setup_directories(): | ||||||
| @@ -146,6 +148,11 @@ def util_call_with_backoff( | |||||||
|  |  | ||||||
|  |  | ||||||
| class DirectoriesMixin: | class DirectoriesMixin: | ||||||
|  |     """ | ||||||
|  |     Creates and overrides settings for all folders and paths, then ensures | ||||||
|  |     they are cleaned up on exit | ||||||
|  |     """ | ||||||
|  |  | ||||||
|     def __init__(self, *args, **kwargs): |     def __init__(self, *args, **kwargs): | ||||||
|         super().__init__(*args, **kwargs) |         super().__init__(*args, **kwargs) | ||||||
|         self.dirs = None |         self.dirs = None | ||||||
| @@ -160,6 +167,10 @@ class DirectoriesMixin: | |||||||
|  |  | ||||||
|  |  | ||||||
| class FileSystemAssertsMixin: | class FileSystemAssertsMixin: | ||||||
|  |     """ | ||||||
|  |     Utilities for checks various state information of the file system | ||||||
|  |     """ | ||||||
|  |  | ||||||
|     def assertIsFile(self, path: Union[PathLike, str]): |     def assertIsFile(self, path: Union[PathLike, str]): | ||||||
|         self.assertTrue(Path(path).resolve().is_file(), f"File does not exist: {path}") |         self.assertTrue(Path(path).resolve().is_file(), f"File does not exist: {path}") | ||||||
|  |  | ||||||
| @@ -188,6 +199,11 @@ class FileSystemAssertsMixin: | |||||||
|  |  | ||||||
|  |  | ||||||
| class ConsumerProgressMixin: | class ConsumerProgressMixin: | ||||||
|  |     """ | ||||||
|  |     Mocks the Consumer _send_progress, preventing attempts to connect to Redis | ||||||
|  |     and allowing access to its calls for verification | ||||||
|  |     """ | ||||||
|  |  | ||||||
|     def setUp(self) -> None: |     def setUp(self) -> None: | ||||||
|         self.send_progress_patcher = mock.patch( |         self.send_progress_patcher = mock.patch( | ||||||
|             "documents.consumer.Consumer._send_progress", |             "documents.consumer.Consumer._send_progress", | ||||||
| @@ -310,3 +326,59 @@ class SampleDirMixin: | |||||||
|     SAMPLE_DIR = Path(__file__).parent / "samples" |     SAMPLE_DIR = Path(__file__).parent / "samples" | ||||||
|  |  | ||||||
|     BARCODE_SAMPLE_DIR = SAMPLE_DIR / "barcodes" |     BARCODE_SAMPLE_DIR = SAMPLE_DIR / "barcodes" | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class DummyProgressManager: | ||||||
|  |     """ | ||||||
|  |     A dummy handler for progress management that doesn't actually try to | ||||||
|  |     connect to Redis.  Payloads are stored for test assertions if needed. | ||||||
|  |  | ||||||
|  |     Use it with | ||||||
|  |       mock.patch("documents.tasks.ProgressManager", DummyProgressManager) | ||||||
|  |     """ | ||||||
|  |  | ||||||
|  |     def __init__(self, filename: str, task_id: Optional[str] = None) -> None: | ||||||
|  |         self.filename = filename | ||||||
|  |         self.task_id = task_id | ||||||
|  |         print("hello world") | ||||||
|  |         self.payloads = [] | ||||||
|  |  | ||||||
|  |     def __enter__(self): | ||||||
|  |         self.open() | ||||||
|  |         return self | ||||||
|  |  | ||||||
|  |     def __exit__(self, exc_type, exc_val, exc_tb): | ||||||
|  |         self.close() | ||||||
|  |  | ||||||
|  |     def open(self) -> None: | ||||||
|  |         pass | ||||||
|  |  | ||||||
|  |     def close(self) -> None: | ||||||
|  |         pass | ||||||
|  |  | ||||||
|  |     def send_progress( | ||||||
|  |         self, | ||||||
|  |         status: ProgressStatusOptions, | ||||||
|  |         message: str, | ||||||
|  |         current_progress: int, | ||||||
|  |         max_progress: int, | ||||||
|  |         extra_args: Optional[dict[str, Union[str, int]]] = None, | ||||||
|  |     ) -> None: | ||||||
|  |         # Ensure the layer is open | ||||||
|  |         self.open() | ||||||
|  |  | ||||||
|  |         payload = { | ||||||
|  |             "type": "status_update", | ||||||
|  |             "data": { | ||||||
|  |                 "filename": self.filename, | ||||||
|  |                 "task_id": self.task_id, | ||||||
|  |                 "current_progress": current_progress, | ||||||
|  |                 "max_progress": max_progress, | ||||||
|  |                 "status": status, | ||||||
|  |                 "message": message, | ||||||
|  |             }, | ||||||
|  |         } | ||||||
|  |         if extra_args is not None: | ||||||
|  |             payload["data"].update(extra_args) | ||||||
|  |  | ||||||
|  |         self.payloads.append(payload) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Trenton H
					Trenton H