mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Refactor file consumption task to allow beginnings of a plugin system (#5367)
This commit is contained in:
parent
4dbf8d7969
commit
2da5e46386
@ -3,7 +3,6 @@ import re
|
||||
import tempfile
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Final
|
||||
from typing import Optional
|
||||
|
||||
from django.conf import settings
|
||||
@ -15,8 +14,9 @@ from PIL import Image
|
||||
|
||||
from documents.converters import convert_from_tiff_to_pdf
|
||||
from documents.data_models import ConsumableDocument
|
||||
from documents.data_models import DocumentMetadataOverrides
|
||||
from documents.data_models import DocumentSource
|
||||
from documents.plugins.base import ConsumeTaskPlugin
|
||||
from documents.plugins.base import StopConsumeTaskError
|
||||
from documents.plugins.helpers import ProgressStatusOptions
|
||||
from documents.utils import copy_basic_file_stats
|
||||
from documents.utils import copy_file_with_basic_stats
|
||||
|
||||
@ -26,7 +26,7 @@ logger = logging.getLogger("paperless.barcodes")
|
||||
@dataclass(frozen=True)
|
||||
class Barcode:
|
||||
"""
|
||||
Holds the information about a single barcode and its location
|
||||
Holds the information about a single barcode and its location in a document
|
||||
"""
|
||||
|
||||
page: int
|
||||
@ -49,77 +49,111 @@ class Barcode:
|
||||
return self.value.startswith(settings.CONSUMER_ASN_BARCODE_PREFIX)
|
||||
|
||||
|
||||
class BarcodeReader:
|
||||
def __init__(self, filepath: Path, mime_type: str) -> None:
|
||||
self.file: Final[Path] = filepath
|
||||
self.mime: Final[str] = mime_type
|
||||
self.pdf_file: Path = self.file
|
||||
self.barcodes: list[Barcode] = []
|
||||
self._tiff_conversion_done = False
|
||||
self.temp_dir: Optional[tempfile.TemporaryDirectory] = None
|
||||
class BarcodePlugin(ConsumeTaskPlugin):
|
||||
NAME: str = "BarcodePlugin"
|
||||
|
||||
@property
|
||||
def able_to_run(self) -> bool:
|
||||
"""
|
||||
Able to run if:
|
||||
- ASN from barcode detection is enabled or
|
||||
- Barcode support is enabled and the mime type is supported
|
||||
"""
|
||||
if settings.CONSUMER_BARCODE_TIFF_SUPPORT:
|
||||
self.SUPPORTED_FILE_MIMES = {"application/pdf", "image/tiff"}
|
||||
supported_mimes = {"application/pdf", "image/tiff"}
|
||||
else:
|
||||
self.SUPPORTED_FILE_MIMES = {"application/pdf"}
|
||||
supported_mimes = {"application/pdf"}
|
||||
|
||||
def __enter__(self):
|
||||
if self.supported_mime_type:
|
||||
self.temp_dir = tempfile.TemporaryDirectory(prefix="paperless-barcodes")
|
||||
return self
|
||||
return (
|
||||
settings.CONSUMER_ENABLE_ASN_BARCODE or settings.CONSUMER_ENABLE_BARCODES
|
||||
) and self.input_doc.mime_type in supported_mimes
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
if self.temp_dir is not None:
|
||||
self.temp_dir.cleanup()
|
||||
self.temp_dir = None
|
||||
def setup(self):
|
||||
self.temp_dir = tempfile.TemporaryDirectory(
|
||||
dir=self.base_tmp_dir,
|
||||
prefix="barcode",
|
||||
)
|
||||
self.pdf_file = self.input_doc.original_file
|
||||
self._tiff_conversion_done = False
|
||||
self.barcodes: list[Barcode] = []
|
||||
|
||||
@property
|
||||
def supported_mime_type(self) -> bool:
|
||||
"""
|
||||
Return True if the given mime type is supported for barcodes, false otherwise
|
||||
"""
|
||||
return self.mime in self.SUPPORTED_FILE_MIMES
|
||||
def run(self) -> Optional[str]:
|
||||
# Maybe do the conversion of TIFF to PDF
|
||||
self.convert_from_tiff_to_pdf()
|
||||
|
||||
@property
|
||||
def asn(self) -> Optional[int]:
|
||||
"""
|
||||
Search the parsed barcodes for any ASNs.
|
||||
The first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
|
||||
is considered the ASN to be used.
|
||||
Returns the detected ASN (or None)
|
||||
"""
|
||||
asn = None
|
||||
|
||||
if not self.supported_mime_type:
|
||||
return None
|
||||
|
||||
# Ensure the barcodes have been read
|
||||
# Locate any barcodes in the files
|
||||
self.detect()
|
||||
|
||||
# get the first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
|
||||
asn_text = next(
|
||||
(x.value for x in self.barcodes if x.is_asn),
|
||||
None,
|
||||
# Update/overwrite an ASN if possible
|
||||
located_asn = self.asn
|
||||
if located_asn is not None:
|
||||
logger.info(f"Found ASN in barcode: {located_asn}")
|
||||
self.metadata.asn = located_asn
|
||||
|
||||
separator_pages = self.get_separation_pages()
|
||||
if not separator_pages:
|
||||
return "No pages to split on!"
|
||||
|
||||
# We have pages to split against
|
||||
|
||||
# Note this does NOT use the base_temp_dir, as that will be removed
|
||||
tmp_dir = Path(
|
||||
tempfile.mkdtemp(
|
||||
dir=settings.SCRATCH_DIR,
|
||||
prefix="paperless-barcode-split-",
|
||||
),
|
||||
).resolve()
|
||||
|
||||
from documents import tasks
|
||||
|
||||
# Create the split document tasks
|
||||
for new_document in self.separate_pages(separator_pages):
|
||||
copy_file_with_basic_stats(new_document, tmp_dir / new_document.name)
|
||||
|
||||
task = tasks.consume_file.delay(
|
||||
ConsumableDocument(
|
||||
# Same source, for templates
|
||||
source=self.input_doc.source,
|
||||
mailrule_id=self.input_doc.mailrule_id,
|
||||
# Can't use same folder or the consume might grab it again
|
||||
original_file=(tmp_dir / new_document.name).resolve(),
|
||||
),
|
||||
# All the same metadata
|
||||
self.metadata,
|
||||
)
|
||||
logger.info(f"Created new task {task.id} for {new_document.name}")
|
||||
|
||||
# This file is now two or more files
|
||||
self.input_doc.original_file.unlink()
|
||||
|
||||
msg = "Barcode splitting complete!"
|
||||
|
||||
# Update the progress to complete
|
||||
self.status_mgr.send_progress(ProgressStatusOptions.SUCCESS, msg, 100, 100)
|
||||
|
||||
# Request the consume task stops
|
||||
raise StopConsumeTaskError(msg)
|
||||
|
||||
def cleanup(self) -> None:
|
||||
self.temp_dir.cleanup()
|
||||
|
||||
def convert_from_tiff_to_pdf(self):
|
||||
"""
|
||||
May convert a TIFF image into a PDF, if the input is a TIFF and
|
||||
the TIFF has not been made into a PDF
|
||||
"""
|
||||
# Nothing to do, pdf_file is already assigned correctly
|
||||
if self.input_doc.mime_type != "image/tiff" or self._tiff_conversion_done:
|
||||
return
|
||||
|
||||
self.pdf_file = convert_from_tiff_to_pdf(
|
||||
self.input_doc.original_file,
|
||||
Path(self.temp_dir.name),
|
||||
)
|
||||
|
||||
if asn_text:
|
||||
logger.debug(f"Found ASN Barcode: {asn_text}")
|
||||
# remove the prefix and remove whitespace
|
||||
asn_text = asn_text[len(settings.CONSUMER_ASN_BARCODE_PREFIX) :].strip()
|
||||
|
||||
# remove non-numeric parts of the remaining string
|
||||
asn_text = re.sub(r"\D", "", asn_text)
|
||||
|
||||
# now, try parsing the ASN number
|
||||
try:
|
||||
asn = int(asn_text)
|
||||
except ValueError as e:
|
||||
logger.warning(f"Failed to parse ASN number because: {e}")
|
||||
|
||||
return asn
|
||||
self._tiff_conversion_done = True
|
||||
|
||||
@staticmethod
|
||||
def read_barcodes_zxing(image: Image) -> list[str]:
|
||||
def read_barcodes_zxing(image: Image.Image) -> list[str]:
|
||||
barcodes = []
|
||||
|
||||
import zxingcpp
|
||||
@ -135,7 +169,7 @@ class BarcodeReader:
|
||||
return barcodes
|
||||
|
||||
@staticmethod
|
||||
def read_barcodes_pyzbar(image: Image) -> list[str]:
|
||||
def read_barcodes_pyzbar(image: Image.Image) -> list[str]:
|
||||
barcodes = []
|
||||
|
||||
from pyzbar import pyzbar
|
||||
@ -154,18 +188,6 @@ class BarcodeReader:
|
||||
|
||||
return barcodes
|
||||
|
||||
def convert_from_tiff_to_pdf(self):
|
||||
"""
|
||||
May convert a TIFF image into a PDF, if the input is a TIFF and
|
||||
the TIFF has not been made into a PDF
|
||||
"""
|
||||
# Nothing to do, pdf_file is already assigned correctly
|
||||
if self.mime != "image/tiff" or self._tiff_conversion_done:
|
||||
return
|
||||
|
||||
self._tiff_conversion_done = True
|
||||
self.pdf_file = convert_from_tiff_to_pdf(self.file, Path(self.temp_dir.name))
|
||||
|
||||
def detect(self) -> None:
|
||||
"""
|
||||
Scan all pages of the PDF as images, updating barcodes and the pages
|
||||
@ -218,10 +240,45 @@ class BarcodeReader:
|
||||
# This file is really borked, allow the consumption to continue
|
||||
# but it may fail further on
|
||||
except Exception as e: # pragma: no cover
|
||||
logger.exception(
|
||||
logger.warning(
|
||||
f"Exception during barcode scanning: {e}",
|
||||
)
|
||||
|
||||
@property
|
||||
def asn(self) -> Optional[int]:
|
||||
"""
|
||||
Search the parsed barcodes for any ASNs.
|
||||
The first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
|
||||
is considered the ASN to be used.
|
||||
Returns the detected ASN (or None)
|
||||
"""
|
||||
asn = None
|
||||
|
||||
# Ensure the barcodes have been read
|
||||
self.detect()
|
||||
|
||||
# get the first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
|
||||
asn_text = next(
|
||||
(x.value for x in self.barcodes if x.is_asn),
|
||||
None,
|
||||
)
|
||||
|
||||
if asn_text:
|
||||
logger.debug(f"Found ASN Barcode: {asn_text}")
|
||||
# remove the prefix and remove whitespace
|
||||
asn_text = asn_text[len(settings.CONSUMER_ASN_BARCODE_PREFIX) :].strip()
|
||||
|
||||
# remove non-numeric parts of the remaining string
|
||||
asn_text = re.sub(r"\D", "", asn_text)
|
||||
|
||||
# now, try parsing the ASN number
|
||||
try:
|
||||
asn = int(asn_text)
|
||||
except ValueError as e:
|
||||
logger.warning(f"Failed to parse ASN number because: {e}")
|
||||
|
||||
return asn
|
||||
|
||||
def get_separation_pages(self) -> dict[int, bool]:
|
||||
"""
|
||||
Search the parsed barcodes for separators and returns a dict of page
|
||||
@ -251,7 +308,7 @@ class BarcodeReader:
|
||||
"""
|
||||
|
||||
document_paths = []
|
||||
fname = self.file.stem
|
||||
fname = self.input_doc.original_file.stem
|
||||
with Pdf.open(self.pdf_file) as input_pdf:
|
||||
# Start with an empty document
|
||||
current_document: list[Page] = []
|
||||
@ -292,58 +349,8 @@ class BarcodeReader:
|
||||
with open(savepath, "wb") as out:
|
||||
dst.save(out)
|
||||
|
||||
copy_basic_file_stats(self.file, savepath)
|
||||
copy_basic_file_stats(self.input_doc.original_file, savepath)
|
||||
|
||||
document_paths.append(savepath)
|
||||
|
||||
return document_paths
|
||||
|
||||
def separate(
|
||||
self,
|
||||
source: DocumentSource,
|
||||
overrides: DocumentMetadataOverrides,
|
||||
) -> bool:
|
||||
"""
|
||||
Separates the document, based on barcodes and configuration, creating new
|
||||
documents as required in the appropriate location.
|
||||
|
||||
Returns True if a split happened, False otherwise
|
||||
"""
|
||||
# Do nothing
|
||||
if not self.supported_mime_type:
|
||||
logger.warning(f"Unsupported file format for barcode reader: {self.mime}")
|
||||
return False
|
||||
|
||||
# Does nothing unless needed
|
||||
self.convert_from_tiff_to_pdf()
|
||||
|
||||
# Actually read the codes, if any
|
||||
self.detect()
|
||||
|
||||
separator_pages = self.get_separation_pages()
|
||||
|
||||
# Also do nothing
|
||||
if not separator_pages:
|
||||
logger.warning("No pages to split on!")
|
||||
return False
|
||||
|
||||
tmp_dir = Path(tempfile.mkdtemp(prefix="paperless-barcode-split-")).resolve()
|
||||
|
||||
from documents import tasks
|
||||
|
||||
# Create the split document tasks
|
||||
for new_document in self.separate_pages(separator_pages):
|
||||
copy_file_with_basic_stats(new_document, tmp_dir / new_document.name)
|
||||
|
||||
tasks.consume_file.delay(
|
||||
ConsumableDocument(
|
||||
# Same source, for templates
|
||||
source=source,
|
||||
# Can't use same folder or the consume might grab it again
|
||||
original_file=(tmp_dir / new_document.name).resolve(),
|
||||
),
|
||||
# All the same metadata
|
||||
overrides,
|
||||
)
|
||||
logger.info("Barcode splitting complete!")
|
||||
return True
|
||||
|
@ -21,7 +21,6 @@ from filelock import FileLock
|
||||
from rest_framework.reverse import reverse
|
||||
|
||||
from documents.classifier import load_classifier
|
||||
from documents.data_models import ConsumableDocument
|
||||
from documents.data_models import DocumentMetadataOverrides
|
||||
from documents.file_handling import create_source_path_directory
|
||||
from documents.file_handling import generate_unique_filename
|
||||
@ -42,12 +41,83 @@ from documents.parsers import ParseError
|
||||
from documents.parsers import get_parser_class_for_mime_type
|
||||
from documents.parsers import parse_date
|
||||
from documents.permissions import set_permissions_for_object
|
||||
from documents.plugins.base import AlwaysRunPluginMixin
|
||||
from documents.plugins.base import ConsumeTaskPlugin
|
||||
from documents.plugins.base import NoCleanupPluginMixin
|
||||
from documents.plugins.base import NoSetupPluginMixin
|
||||
from documents.signals import document_consumption_finished
|
||||
from documents.signals import document_consumption_started
|
||||
from documents.utils import copy_basic_file_stats
|
||||
from documents.utils import copy_file_with_basic_stats
|
||||
|
||||
|
||||
class WorkflowTriggerPlugin(
|
||||
NoCleanupPluginMixin,
|
||||
NoSetupPluginMixin,
|
||||
AlwaysRunPluginMixin,
|
||||
ConsumeTaskPlugin,
|
||||
):
|
||||
NAME: str = "WorkflowTriggerPlugin"
|
||||
|
||||
def run(self) -> Optional[str]:
|
||||
"""
|
||||
Get overrides from matching workflows
|
||||
"""
|
||||
overrides = DocumentMetadataOverrides()
|
||||
for workflow in Workflow.objects.filter(enabled=True).order_by("order"):
|
||||
template_overrides = DocumentMetadataOverrides()
|
||||
|
||||
if document_matches_workflow(
|
||||
self.input_doc,
|
||||
workflow,
|
||||
WorkflowTrigger.WorkflowTriggerType.CONSUMPTION,
|
||||
):
|
||||
for action in workflow.actions.all():
|
||||
if action.assign_title is not None:
|
||||
template_overrides.title = action.assign_title
|
||||
if action.assign_tags is not None:
|
||||
template_overrides.tag_ids = [
|
||||
tag.pk for tag in action.assign_tags.all()
|
||||
]
|
||||
if action.assign_correspondent is not None:
|
||||
template_overrides.correspondent_id = (
|
||||
action.assign_correspondent.pk
|
||||
)
|
||||
if action.assign_document_type is not None:
|
||||
template_overrides.document_type_id = (
|
||||
action.assign_document_type.pk
|
||||
)
|
||||
if action.assign_storage_path is not None:
|
||||
template_overrides.storage_path_id = (
|
||||
action.assign_storage_path.pk
|
||||
)
|
||||
if action.assign_owner is not None:
|
||||
template_overrides.owner_id = action.assign_owner.pk
|
||||
if action.assign_view_users is not None:
|
||||
template_overrides.view_users = [
|
||||
user.pk for user in action.assign_view_users.all()
|
||||
]
|
||||
if action.assign_view_groups is not None:
|
||||
template_overrides.view_groups = [
|
||||
group.pk for group in action.assign_view_groups.all()
|
||||
]
|
||||
if action.assign_change_users is not None:
|
||||
template_overrides.change_users = [
|
||||
user.pk for user in action.assign_change_users.all()
|
||||
]
|
||||
if action.assign_change_groups is not None:
|
||||
template_overrides.change_groups = [
|
||||
group.pk for group in action.assign_change_groups.all()
|
||||
]
|
||||
if action.assign_custom_fields is not None:
|
||||
template_overrides.custom_field_ids = [
|
||||
field.pk for field in action.assign_custom_fields.all()
|
||||
]
|
||||
|
||||
overrides.update(template_overrides)
|
||||
self.metadata.update(overrides)
|
||||
|
||||
|
||||
class ConsumerError(Exception):
|
||||
pass
|
||||
|
||||
@ -602,70 +672,6 @@ class Consumer(LoggingMixin):
|
||||
|
||||
return document
|
||||
|
||||
def get_workflow_overrides(
|
||||
self,
|
||||
input_doc: ConsumableDocument,
|
||||
) -> DocumentMetadataOverrides:
|
||||
"""
|
||||
Get overrides from matching workflows
|
||||
"""
|
||||
overrides = DocumentMetadataOverrides()
|
||||
for workflow in Workflow.objects.filter(enabled=True).order_by("order"):
|
||||
template_overrides = DocumentMetadataOverrides()
|
||||
|
||||
if document_matches_workflow(
|
||||
input_doc,
|
||||
workflow,
|
||||
WorkflowTrigger.WorkflowTriggerType.CONSUMPTION,
|
||||
):
|
||||
for action in workflow.actions.all():
|
||||
self.log.info(
|
||||
f"Applying overrides in {action} from {workflow}",
|
||||
)
|
||||
if action.assign_title is not None:
|
||||
template_overrides.title = action.assign_title
|
||||
if action.assign_tags is not None:
|
||||
template_overrides.tag_ids = [
|
||||
tag.pk for tag in action.assign_tags.all()
|
||||
]
|
||||
if action.assign_correspondent is not None:
|
||||
template_overrides.correspondent_id = (
|
||||
action.assign_correspondent.pk
|
||||
)
|
||||
if action.assign_document_type is not None:
|
||||
template_overrides.document_type_id = (
|
||||
action.assign_document_type.pk
|
||||
)
|
||||
if action.assign_storage_path is not None:
|
||||
template_overrides.storage_path_id = (
|
||||
action.assign_storage_path.pk
|
||||
)
|
||||
if action.assign_owner is not None:
|
||||
template_overrides.owner_id = action.assign_owner.pk
|
||||
if action.assign_view_users is not None:
|
||||
template_overrides.view_users = [
|
||||
user.pk for user in action.assign_view_users.all()
|
||||
]
|
||||
if action.assign_view_groups is not None:
|
||||
template_overrides.view_groups = [
|
||||
group.pk for group in action.assign_view_groups.all()
|
||||
]
|
||||
if action.assign_change_users is not None:
|
||||
template_overrides.change_users = [
|
||||
user.pk for user in action.assign_change_users.all()
|
||||
]
|
||||
if action.assign_change_groups is not None:
|
||||
template_overrides.change_groups = [
|
||||
group.pk for group in action.assign_change_groups.all()
|
||||
]
|
||||
if action.assign_custom_fields is not None:
|
||||
template_overrides.custom_field_ids = [
|
||||
field.pk for field in action.assign_custom_fields.all()
|
||||
]
|
||||
|
||||
overrides.update(template_overrides)
|
||||
return overrides
|
||||
|
||||
def _parse_title_placeholders(self, title: str) -> str:
|
||||
local_added = timezone.localtime(timezone.now())
|
||||
|
||||
|
@ -3,127 +3,145 @@ import logging
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import Final
|
||||
from typing import Optional
|
||||
|
||||
from django.conf import settings
|
||||
from pikepdf import Pdf
|
||||
|
||||
from documents.consumer import ConsumerError
|
||||
from documents.converters import convert_from_tiff_to_pdf
|
||||
from documents.data_models import ConsumableDocument
|
||||
from documents.plugins.base import ConsumeTaskPlugin
|
||||
from documents.plugins.base import NoCleanupPluginMixin
|
||||
from documents.plugins.base import NoSetupPluginMixin
|
||||
from documents.plugins.base import StopConsumeTaskError
|
||||
|
||||
logger = logging.getLogger("paperless.double_sided")
|
||||
|
||||
# Hardcoded for now, could be made a configurable setting if needed
|
||||
TIMEOUT_MINUTES = 30
|
||||
TIMEOUT_MINUTES: Final[int] = 30
|
||||
TIMEOUT_SECONDS: Final[int] = TIMEOUT_MINUTES * 60
|
||||
|
||||
# Used by test cases
|
||||
STAGING_FILE_NAME = "double-sided-staging.pdf"
|
||||
|
||||
|
||||
def collate(input_doc: ConsumableDocument) -> str:
|
||||
"""
|
||||
Tries to collate pages from 2 single sided scans of a double sided
|
||||
document.
|
||||
class CollatePlugin(NoCleanupPluginMixin, NoSetupPluginMixin, ConsumeTaskPlugin):
|
||||
NAME: str = "CollatePlugin"
|
||||
|
||||
When called with a file, it checks whether or not a staging file
|
||||
exists, if not, the current file is turned into that staging file
|
||||
containing the odd numbered pages.
|
||||
|
||||
If a staging file exists, and it is not too old, the current file is
|
||||
considered to be the second part (the even numbered pages) and it will
|
||||
collate the pages of both, the pages of the second file will be added
|
||||
in reverse order, since the ADF will have scanned the pages from bottom
|
||||
to top.
|
||||
|
||||
Returns a status message on success, or raises a ConsumerError
|
||||
in case of failure.
|
||||
"""
|
||||
|
||||
# Make sure scratch dir exists, Consumer might not have run yet
|
||||
settings.SCRATCH_DIR.mkdir(exist_ok=True)
|
||||
|
||||
if input_doc.mime_type == "application/pdf":
|
||||
pdf_file = input_doc.original_file
|
||||
elif (
|
||||
input_doc.mime_type == "image/tiff"
|
||||
and settings.CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT
|
||||
):
|
||||
pdf_file = convert_from_tiff_to_pdf(
|
||||
input_doc.original_file,
|
||||
settings.SCRATCH_DIR,
|
||||
)
|
||||
input_doc.original_file.unlink()
|
||||
else:
|
||||
raise ConsumerError("Unsupported file type for collation of double-sided scans")
|
||||
|
||||
staging = settings.SCRATCH_DIR / STAGING_FILE_NAME
|
||||
|
||||
valid_staging_exists = False
|
||||
if staging.exists():
|
||||
stats = os.stat(str(staging))
|
||||
# if the file is older than the timeout, we don't consider
|
||||
# it valid
|
||||
if dt.datetime.now().timestamp() - stats.st_mtime > TIMEOUT_MINUTES * 60:
|
||||
logger.warning("Outdated double sided staging file exists, deleting it")
|
||||
os.unlink(str(staging))
|
||||
else:
|
||||
valid_staging_exists = True
|
||||
|
||||
if valid_staging_exists:
|
||||
try:
|
||||
# Collate pages from second PDF in reverse order
|
||||
with Pdf.open(staging) as pdf1, Pdf.open(pdf_file) as pdf2:
|
||||
pdf2.pages.reverse()
|
||||
try:
|
||||
for i, page in enumerate(pdf2.pages):
|
||||
pdf1.pages.insert(2 * i + 1, page)
|
||||
except IndexError:
|
||||
raise ConsumerError(
|
||||
"This second file (even numbered pages) contains more "
|
||||
"pages than the first/odd numbered one. This means the "
|
||||
"two uploaded files don't belong to the same double-"
|
||||
"sided scan. Please retry, starting with the odd "
|
||||
"numbered pages again.",
|
||||
)
|
||||
# Merged file has the same path, but without the
|
||||
# double-sided subdir. Therefore, it is also in the
|
||||
# consumption dir and will be picked up for processing
|
||||
old_file = input_doc.original_file
|
||||
new_file = Path(
|
||||
*(
|
||||
part
|
||||
for part in old_file.with_name(
|
||||
f"{old_file.stem}-collated.pdf",
|
||||
).parts
|
||||
if part != settings.CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME
|
||||
),
|
||||
)
|
||||
# If the user didn't create the subdirs yet, do it for them
|
||||
new_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
pdf1.save(new_file)
|
||||
logger.info("Collated documents into new file %s", new_file)
|
||||
return (
|
||||
"Success. Even numbered pages of double sided scan collated "
|
||||
"with odd pages"
|
||||
)
|
||||
finally:
|
||||
# Delete staging and recently uploaded file no matter what.
|
||||
# If any error occurs, the user needs to be able to restart
|
||||
# the process from scratch; after all, the staging file
|
||||
# with the odd numbered pages might be the culprit
|
||||
pdf_file.unlink()
|
||||
staging.unlink()
|
||||
|
||||
else:
|
||||
shutil.move(pdf_file, staging)
|
||||
# update access to modification time so we know if the file
|
||||
# is outdated when another file gets uploaded
|
||||
os.utime(staging, (dt.datetime.now().timestamp(),) * 2)
|
||||
logger.info(
|
||||
"Got scan with odd numbered pages of double-sided scan, moved it to %s",
|
||||
staging,
|
||||
)
|
||||
@property
|
||||
def able_to_run(self) -> bool:
|
||||
return (
|
||||
"Received odd numbered pages of double sided scan, waiting up to "
|
||||
f"{TIMEOUT_MINUTES} minutes for even numbered pages"
|
||||
settings.CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED
|
||||
and settings.CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME
|
||||
in self.input_doc.original_file.parts
|
||||
)
|
||||
|
||||
def run(self) -> Optional[str]:
|
||||
"""
|
||||
Tries to collate pages from 2 single sided scans of a double sided
|
||||
document.
|
||||
|
||||
When called with a file, it checks whether or not a staging file
|
||||
exists, if not, the current file is turned into that staging file
|
||||
containing the odd numbered pages.
|
||||
|
||||
If a staging file exists, and it is not too old, the current file is
|
||||
considered to be the second part (the even numbered pages) and it will
|
||||
collate the pages of both, the pages of the second file will be added
|
||||
in reverse order, since the ADF will have scanned the pages from bottom
|
||||
to top.
|
||||
|
||||
Returns a status message on success, or raises a ConsumerError
|
||||
in case of failure.
|
||||
"""
|
||||
|
||||
if self.input_doc.mime_type == "application/pdf":
|
||||
pdf_file = self.input_doc.original_file
|
||||
elif (
|
||||
self.input_doc.mime_type == "image/tiff"
|
||||
and settings.CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT
|
||||
):
|
||||
pdf_file = convert_from_tiff_to_pdf(
|
||||
self.input_doc.original_file,
|
||||
self.base_tmp_dir,
|
||||
)
|
||||
self.input_doc.original_file.unlink()
|
||||
else:
|
||||
raise ConsumerError(
|
||||
"Unsupported file type for collation of double-sided scans",
|
||||
)
|
||||
|
||||
staging: Path = settings.SCRATCH_DIR / STAGING_FILE_NAME
|
||||
|
||||
valid_staging_exists = False
|
||||
if staging.exists():
|
||||
stats = staging.stat()
|
||||
# if the file is older than the timeout, we don't consider
|
||||
# it valid
|
||||
if (dt.datetime.now().timestamp() - stats.st_mtime) > TIMEOUT_SECONDS:
|
||||
logger.warning("Outdated double sided staging file exists, deleting it")
|
||||
staging.unlink()
|
||||
else:
|
||||
valid_staging_exists = True
|
||||
|
||||
if valid_staging_exists:
|
||||
try:
|
||||
# Collate pages from second PDF in reverse order
|
||||
with Pdf.open(staging) as pdf1, Pdf.open(pdf_file) as pdf2:
|
||||
pdf2.pages.reverse()
|
||||
try:
|
||||
for i, page in enumerate(pdf2.pages):
|
||||
pdf1.pages.insert(2 * i + 1, page)
|
||||
except IndexError:
|
||||
raise ConsumerError(
|
||||
"This second file (even numbered pages) contains more "
|
||||
"pages than the first/odd numbered one. This means the "
|
||||
"two uploaded files don't belong to the same double-"
|
||||
"sided scan. Please retry, starting with the odd "
|
||||
"numbered pages again.",
|
||||
)
|
||||
# Merged file has the same path, but without the
|
||||
# double-sided subdir. Therefore, it is also in the
|
||||
# consumption dir and will be picked up for processing
|
||||
old_file = self.input_doc.original_file
|
||||
new_file = Path(
|
||||
*(
|
||||
part
|
||||
for part in old_file.with_name(
|
||||
f"{old_file.stem}-collated.pdf",
|
||||
).parts
|
||||
if part
|
||||
!= settings.CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME
|
||||
),
|
||||
)
|
||||
# If the user didn't create the subdirs yet, do it for them
|
||||
new_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
pdf1.save(new_file)
|
||||
logger.info("Collated documents into new file %s", new_file)
|
||||
raise StopConsumeTaskError(
|
||||
"Success. Even numbered pages of double sided scan collated "
|
||||
"with odd pages",
|
||||
)
|
||||
finally:
|
||||
# Delete staging and recently uploaded file no matter what.
|
||||
# If any error occurs, the user needs to be able to restart
|
||||
# the process from scratch; after all, the staging file
|
||||
# with the odd numbered pages might be the culprit
|
||||
pdf_file.unlink()
|
||||
staging.unlink()
|
||||
|
||||
else:
|
||||
shutil.move(pdf_file, staging)
|
||||
# update access to modification time so we know if the file
|
||||
# is outdated when another file gets uploaded
|
||||
timestamp = dt.datetime.now().timestamp()
|
||||
os.utime(staging, (timestamp, timestamp))
|
||||
logger.info(
|
||||
"Got scan with odd numbered pages of double-sided scan, moved it to %s",
|
||||
staging,
|
||||
)
|
||||
raise StopConsumeTaskError(
|
||||
"Received odd numbered pages of double sided scan, waiting up to "
|
||||
f"{TIMEOUT_MINUTES} minutes for even numbered pages",
|
||||
)
|
||||
|
0
src/documents/plugins/__init__.py
Normal file
0
src/documents/plugins/__init__.py
Normal file
131
src/documents/plugins/base.py
Normal file
131
src/documents/plugins/base.py
Normal file
@ -0,0 +1,131 @@
|
||||
import abc
|
||||
from pathlib import Path
|
||||
from typing import Final
|
||||
from typing import Optional
|
||||
|
||||
from documents.data_models import ConsumableDocument
|
||||
from documents.data_models import DocumentMetadataOverrides
|
||||
from documents.plugins.helpers import ProgressManager
|
||||
|
||||
|
||||
class StopConsumeTaskError(Exception):
|
||||
"""
|
||||
A plugin setup or run may raise this to exit the asynchronous consume task.
|
||||
|
||||
Most likely, this means it has created one or more new tasks to execute instead,
|
||||
such as when a barcode has been used to create new documents
|
||||
"""
|
||||
|
||||
def __init__(self, message: str) -> None:
|
||||
self.message = message
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class ConsumeTaskPlugin(abc.ABC):
|
||||
"""
|
||||
Defines the interface for a plugin for the document consume task
|
||||
Meanings as per RFC2119 (https://datatracker.ietf.org/doc/html/rfc2119)
|
||||
|
||||
Plugin Implementation
|
||||
|
||||
The plugin SHALL implement property able_to_run and methods setup, run and cleanup.
|
||||
The plugin property able_to_run SHALL return True if the plugin is able to run, given the conditions, settings and document information.
|
||||
The plugin property able_to_run MAY be hardcoded to return True.
|
||||
The plugin setup SHOULD perform any resource creation or additional initialization needed to run the document.
|
||||
The plugin setup MAY be a non-operation.
|
||||
The plugin cleanup SHOULD perform resource cleanup, including in the event of an error.
|
||||
The plugin cleanup MAY be a non-operation.
|
||||
The plugin run SHALL perform any operations against the document or system state required for the plugin.
|
||||
The plugin run MAY update the document metadata.
|
||||
The plugin run MAY return an informational message.
|
||||
The plugin run MAY raise StopConsumeTaskError to cease any further operations against the document.
|
||||
|
||||
Plugin Manager Implementation
|
||||
|
||||
The plugin manager SHALL provide the plugin with the input document, document metadata, progress manager and a created temporary directory.
|
||||
The plugin manager SHALL execute the plugin setup, run and cleanup, in that order IF the plugin property able_to_run is True.
|
||||
The plugin manager SHOULD log the return message of executing a plugin's run.
|
||||
The plugin manager SHALL always execute the plugin cleanup, IF the plugin property able_to_run is True.
|
||||
The plugin manager SHALL cease calling plugins and exit the task IF a plugin raises StopConsumeTaskError.
|
||||
The plugin manager SHOULD return the StopConsumeTaskError message IF a plugin raises StopConsumeTaskError.
|
||||
"""
|
||||
|
||||
NAME: str = "ConsumeTaskPlugin"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
input_doc: ConsumableDocument,
|
||||
metadata: DocumentMetadataOverrides,
|
||||
status_mgr: ProgressManager,
|
||||
base_tmp_dir: Path,
|
||||
task_id: str,
|
||||
) -> None:
|
||||
super().__init__()
|
||||
self.input_doc = input_doc
|
||||
self.metadata = metadata
|
||||
self.base_tmp_dir: Final = base_tmp_dir
|
||||
self.status_mgr = status_mgr
|
||||
self.task_id: Final = task_id
|
||||
|
||||
@abc.abstractproperty
|
||||
def able_to_run(self) -> bool:
|
||||
"""
|
||||
Return True if the conditions are met for the plugin to run, False otherwise
|
||||
|
||||
If False, setup(), run() and cleanup() will not be called
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def setup(self) -> None:
|
||||
"""
|
||||
Allows the plugin to perform any additional setup it may need, such as creating
|
||||
a temporary directory, copying a file somewhere, etc.
|
||||
|
||||
Executed before run()
|
||||
|
||||
In general, this should be the "light" work, not the bulk of processing
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def run(self) -> Optional[str]:
|
||||
"""
|
||||
The bulk of plugin processing, this does whatever action the plugin is for.
|
||||
|
||||
Executed after setup() and before cleanup()
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def cleanup(self) -> None:
|
||||
"""
|
||||
Allows the plugin to execute any cleanup it may require
|
||||
|
||||
Executed after run(), even in the case of error
|
||||
"""
|
||||
|
||||
|
||||
class AlwaysRunPluginMixin(ConsumeTaskPlugin):
|
||||
"""
|
||||
A plugin which is always able to run
|
||||
"""
|
||||
|
||||
@property
|
||||
def able_to_run(self) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
class NoSetupPluginMixin(ConsumeTaskPlugin):
|
||||
"""
|
||||
A plugin which requires no setup
|
||||
"""
|
||||
|
||||
def setup(self) -> None:
|
||||
pass
|
||||
|
||||
|
||||
class NoCleanupPluginMixin(ConsumeTaskPlugin):
|
||||
"""
|
||||
A plugin which needs to clean up no files
|
||||
"""
|
||||
|
||||
def cleanup(self) -> None:
|
||||
pass
|
82
src/documents/plugins/helpers.py
Normal file
82
src/documents/plugins/helpers.py
Normal file
@ -0,0 +1,82 @@
|
||||
import enum
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Optional
|
||||
from typing import Union
|
||||
|
||||
from asgiref.sync import async_to_sync
|
||||
from channels.layers import get_channel_layer
|
||||
from channels_redis.pubsub import RedisPubSubChannelLayer
|
||||
|
||||
|
||||
class ProgressStatusOptions(str, enum.Enum):
|
||||
STARTED = "STARTED"
|
||||
WORKING = "WORKING"
|
||||
SUCCESS = "SUCCESS"
|
||||
FAILED = "FAILED"
|
||||
|
||||
|
||||
class ProgressManager:
|
||||
"""
|
||||
Handles sending of progress information via the channel layer, with proper management
|
||||
of the open/close of the layer to ensure messages go out and everything is cleaned up
|
||||
"""
|
||||
|
||||
def __init__(self, filename: str, task_id: Optional[str] = None) -> None:
|
||||
self.filename = filename
|
||||
self._channel: Optional[RedisPubSubChannelLayer] = None
|
||||
self.task_id = task_id
|
||||
|
||||
def __enter__(self):
|
||||
self.open()
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.close()
|
||||
|
||||
def open(self) -> None:
|
||||
"""
|
||||
If not already opened, gets the default channel layer
|
||||
opened and ready to send messages
|
||||
"""
|
||||
if self._channel is None:
|
||||
self._channel = get_channel_layer()
|
||||
|
||||
def close(self) -> None:
|
||||
"""
|
||||
If it was opened, flushes the channel layer
|
||||
"""
|
||||
if self._channel is not None:
|
||||
async_to_sync(self._channel.flush)
|
||||
self._channel = None
|
||||
|
||||
def send_progress(
|
||||
self,
|
||||
status: ProgressStatusOptions,
|
||||
message: str,
|
||||
current_progress: int,
|
||||
max_progress: int,
|
||||
extra_args: Optional[dict[str, Union[str, int]]] = None,
|
||||
) -> None:
|
||||
# Ensure the layer is open
|
||||
self.open()
|
||||
|
||||
# Just for IDEs
|
||||
if TYPE_CHECKING:
|
||||
assert self._channel is not None
|
||||
|
||||
payload = {
|
||||
"type": "status_update",
|
||||
"data": {
|
||||
"filename": self.filename,
|
||||
"task_id": self.task_id,
|
||||
"current_progress": current_progress,
|
||||
"max_progress": max_progress,
|
||||
"status": status,
|
||||
"message": message,
|
||||
},
|
||||
}
|
||||
if extra_args is not None:
|
||||
payload["data"].update(extra_args)
|
||||
|
||||
# Construct and send the update
|
||||
async_to_sync(self._channel.group_send)("status_updates", payload)
|
@ -2,30 +2,30 @@ import hashlib
|
||||
import logging
|
||||
import shutil
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from tempfile import TemporaryDirectory
|
||||
from typing import Optional
|
||||
|
||||
import tqdm
|
||||
from asgiref.sync import async_to_sync
|
||||
from celery import Task
|
||||
from celery import shared_task
|
||||
from channels.layers import get_channel_layer
|
||||
from django.conf import settings
|
||||
from django.db import transaction
|
||||
from django.db.models.signals import post_save
|
||||
from filelock import FileLock
|
||||
from redis.exceptions import ConnectionError
|
||||
from whoosh.writing import AsyncWriter
|
||||
|
||||
from documents import index
|
||||
from documents import sanity_checker
|
||||
from documents.barcodes import BarcodeReader
|
||||
from documents.barcodes import BarcodePlugin
|
||||
from documents.classifier import DocumentClassifier
|
||||
from documents.classifier import load_classifier
|
||||
from documents.consumer import Consumer
|
||||
from documents.consumer import ConsumerError
|
||||
from documents.consumer import WorkflowTriggerPlugin
|
||||
from documents.data_models import ConsumableDocument
|
||||
from documents.data_models import DocumentMetadataOverrides
|
||||
from documents.double_sided import collate
|
||||
from documents.double_sided import CollatePlugin
|
||||
from documents.file_handling import create_source_path_directory
|
||||
from documents.file_handling import generate_unique_filename
|
||||
from documents.models import Correspondent
|
||||
@ -35,6 +35,10 @@ from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
from documents.parsers import DocumentParser
|
||||
from documents.parsers import get_parser_class_for_mime_type
|
||||
from documents.plugins.base import ConsumeTaskPlugin
|
||||
from documents.plugins.base import ProgressManager
|
||||
from documents.plugins.base import StopConsumeTaskError
|
||||
from documents.plugins.helpers import ProgressStatusOptions
|
||||
from documents.sanity_checker import SanityCheckFailedException
|
||||
from documents.signals import document_updated
|
||||
|
||||
@ -102,70 +106,60 @@ def consume_file(
|
||||
input_doc: ConsumableDocument,
|
||||
overrides: Optional[DocumentMetadataOverrides] = None,
|
||||
):
|
||||
def send_progress(status="SUCCESS", message="finished"):
|
||||
payload = {
|
||||
"filename": overrides.filename or input_doc.original_file.name,
|
||||
"task_id": None,
|
||||
"current_progress": 100,
|
||||
"max_progress": 100,
|
||||
"status": status,
|
||||
"message": message,
|
||||
}
|
||||
try:
|
||||
async_to_sync(get_channel_layer().group_send)(
|
||||
"status_updates",
|
||||
{"type": "status_update", "data": payload},
|
||||
)
|
||||
except ConnectionError as e:
|
||||
logger.warning(f"ConnectionError on status send: {e!s}")
|
||||
|
||||
# Default no overrides
|
||||
if overrides is None:
|
||||
overrides = DocumentMetadataOverrides()
|
||||
|
||||
# Handle collation of double-sided documents scanned in two parts
|
||||
if settings.CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED and (
|
||||
settings.CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME
|
||||
in input_doc.original_file.parts
|
||||
):
|
||||
try:
|
||||
msg = collate(input_doc)
|
||||
send_progress(message=msg)
|
||||
return msg
|
||||
except ConsumerError as e:
|
||||
send_progress(status="FAILURE", message=e.args[0])
|
||||
raise e
|
||||
plugins: list[type[ConsumeTaskPlugin]] = [
|
||||
CollatePlugin,
|
||||
BarcodePlugin,
|
||||
WorkflowTriggerPlugin,
|
||||
]
|
||||
|
||||
# read all barcodes in the current document
|
||||
if settings.CONSUMER_ENABLE_BARCODES or settings.CONSUMER_ENABLE_ASN_BARCODE:
|
||||
with BarcodeReader(input_doc.original_file, input_doc.mime_type) as reader:
|
||||
if settings.CONSUMER_ENABLE_BARCODES and reader.separate(
|
||||
input_doc.source,
|
||||
with ProgressManager(
|
||||
overrides.filename or input_doc.original_file.name,
|
||||
self.request.id,
|
||||
) as status_mgr, TemporaryDirectory(dir=settings.SCRATCH_DIR) as tmp_dir:
|
||||
tmp_dir = Path(tmp_dir)
|
||||
for plugin_class in plugins:
|
||||
plugin_name = plugin_class.NAME
|
||||
|
||||
plugin = plugin_class(
|
||||
input_doc,
|
||||
overrides,
|
||||
):
|
||||
# notify the sender, otherwise the progress bar
|
||||
# in the UI stays stuck
|
||||
send_progress()
|
||||
# consuming stops here, since the original document with
|
||||
# the barcodes has been split and will be consumed separately
|
||||
input_doc.original_file.unlink()
|
||||
return "File successfully split"
|
||||
status_mgr,
|
||||
tmp_dir,
|
||||
self.request.id,
|
||||
)
|
||||
|
||||
# try reading the ASN from barcode
|
||||
if (
|
||||
settings.CONSUMER_ENABLE_ASN_BARCODE
|
||||
and (located_asn := reader.asn) is not None
|
||||
):
|
||||
# Note this will take precedence over an API provided ASN
|
||||
# But it's from a physical barcode, so that's good
|
||||
overrides.asn = located_asn
|
||||
logger.info(f"Found ASN in barcode: {overrides.asn}")
|
||||
if not plugin.able_to_run:
|
||||
logger.debug(f"Skipping plugin {plugin_name}")
|
||||
continue
|
||||
|
||||
template_overrides = Consumer().get_workflow_overrides(
|
||||
input_doc=input_doc,
|
||||
)
|
||||
try:
|
||||
logger.debug(f"Executing plugin {plugin_name}")
|
||||
plugin.setup()
|
||||
|
||||
overrides.update(template_overrides)
|
||||
msg = plugin.run()
|
||||
|
||||
if msg is not None:
|
||||
logger.info(f"{plugin_name} completed with: {msg}")
|
||||
else:
|
||||
logger.info(f"{plugin_name} completed with no message")
|
||||
|
||||
overrides = plugin.metadata
|
||||
|
||||
except StopConsumeTaskError as e:
|
||||
logger.info(f"{plugin_name} requested task exit: {e.message}")
|
||||
return e.message
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"{plugin_name} failed: {e}")
|
||||
status_mgr.send_progress(ProgressStatusOptions.FAILED, f"{e}", 100, 100)
|
||||
raise
|
||||
|
||||
finally:
|
||||
plugin.cleanup()
|
||||
|
||||
# continue with consumption if no barcode was found
|
||||
document = Consumer().try_consume_file(
|
||||
|
@ -1,4 +1,7 @@
|
||||
import shutil
|
||||
from collections.abc import Generator
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
@ -7,14 +10,13 @@ from django.test import TestCase
|
||||
from django.test import override_settings
|
||||
|
||||
from documents import tasks
|
||||
from documents.barcodes import BarcodeReader
|
||||
from documents.consumer import ConsumerError
|
||||
from documents.barcodes import BarcodePlugin
|
||||
from documents.data_models import ConsumableDocument
|
||||
from documents.data_models import DocumentMetadataOverrides
|
||||
from documents.data_models import DocumentSource
|
||||
from documents.models import Document
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from documents.tests.utils import DocumentConsumeDelayMixin
|
||||
from documents.tests.utils import DummyProgressManager
|
||||
from documents.tests.utils import FileSystemAssertsMixin
|
||||
from documents.tests.utils import SampleDirMixin
|
||||
|
||||
@ -26,8 +28,29 @@ except ImportError:
|
||||
HAS_ZXING_LIB = False
|
||||
|
||||
|
||||
class GetReaderPluginMixin:
|
||||
@contextmanager
|
||||
def get_reader(self, filepath: Path) -> Generator[BarcodePlugin, None, None]:
|
||||
reader = BarcodePlugin(
|
||||
ConsumableDocument(DocumentSource.ConsumeFolder, original_file=filepath),
|
||||
DocumentMetadataOverrides(),
|
||||
DummyProgressManager(filepath.name, None),
|
||||
self.dirs.scratch_dir,
|
||||
"task-id",
|
||||
)
|
||||
reader.setup()
|
||||
yield reader
|
||||
reader.cleanup()
|
||||
|
||||
|
||||
@override_settings(CONSUMER_BARCODE_SCANNER="PYZBAR")
|
||||
class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, TestCase):
|
||||
class TestBarcode(
|
||||
DirectoriesMixin,
|
||||
FileSystemAssertsMixin,
|
||||
SampleDirMixin,
|
||||
GetReaderPluginMixin,
|
||||
TestCase,
|
||||
):
|
||||
def test_scan_file_for_separating_barcodes(self):
|
||||
"""
|
||||
GIVEN:
|
||||
@ -39,7 +62,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
||||
"""
|
||||
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
|
||||
|
||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
||||
with self.get_reader(test_file) as reader:
|
||||
reader.detect()
|
||||
separator_page_numbers = reader.get_separation_pages()
|
||||
|
||||
@ -60,7 +83,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
||||
"""
|
||||
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.tiff"
|
||||
|
||||
with BarcodeReader(test_file, "image/tiff") as reader:
|
||||
with self.get_reader(test_file) as reader:
|
||||
reader.detect()
|
||||
separator_page_numbers = reader.get_separation_pages()
|
||||
|
||||
@ -80,7 +103,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
||||
"""
|
||||
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle-alpha.tiff"
|
||||
|
||||
with BarcodeReader(test_file, "image/tiff") as reader:
|
||||
with self.get_reader(test_file) as reader:
|
||||
reader.detect()
|
||||
separator_page_numbers = reader.get_separation_pages()
|
||||
|
||||
@ -97,7 +120,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
||||
- No pages to split on
|
||||
"""
|
||||
test_file = self.SAMPLE_DIR / "simple.pdf"
|
||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
||||
with self.get_reader(test_file) as reader:
|
||||
reader.detect()
|
||||
separator_page_numbers = reader.get_separation_pages()
|
||||
|
||||
@ -115,7 +138,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
||||
"""
|
||||
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
|
||||
|
||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
||||
with self.get_reader(test_file) as reader:
|
||||
reader.detect()
|
||||
separator_page_numbers = reader.get_separation_pages()
|
||||
|
||||
@ -133,7 +156,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
||||
"""
|
||||
test_file = self.BARCODE_SAMPLE_DIR / "several-patcht-codes.pdf"
|
||||
|
||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
||||
with self.get_reader(test_file) as reader:
|
||||
reader.detect()
|
||||
separator_page_numbers = reader.get_separation_pages()
|
||||
|
||||
@ -158,7 +181,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
||||
]:
|
||||
test_file = self.BARCODE_SAMPLE_DIR / test_file
|
||||
|
||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
||||
with self.get_reader(test_file) as reader:
|
||||
reader.detect()
|
||||
separator_page_numbers = reader.get_separation_pages()
|
||||
|
||||
@ -177,7 +200,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
||||
"""
|
||||
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle-unreadable.pdf"
|
||||
|
||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
||||
with self.get_reader(test_file) as reader:
|
||||
reader.detect()
|
||||
separator_page_numbers = reader.get_separation_pages()
|
||||
|
||||
@ -195,7 +218,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
||||
"""
|
||||
test_file = self.BARCODE_SAMPLE_DIR / "barcode-fax-image.pdf"
|
||||
|
||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
||||
with self.get_reader(test_file) as reader:
|
||||
reader.detect()
|
||||
separator_page_numbers = reader.get_separation_pages()
|
||||
|
||||
@ -214,7 +237,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
||||
"""
|
||||
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-qr.pdf"
|
||||
|
||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
||||
with self.get_reader(test_file) as reader:
|
||||
reader.detect()
|
||||
separator_page_numbers = reader.get_separation_pages()
|
||||
|
||||
@ -234,7 +257,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
||||
"""
|
||||
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.pdf"
|
||||
|
||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
||||
with self.get_reader(test_file) as reader:
|
||||
reader.detect()
|
||||
separator_page_numbers = reader.get_separation_pages()
|
||||
|
||||
@ -255,7 +278,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
||||
"""
|
||||
test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-custom.pdf"
|
||||
|
||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
||||
with self.get_reader(test_file) as reader:
|
||||
reader.detect()
|
||||
separator_page_numbers = reader.get_separation_pages()
|
||||
|
||||
@ -276,7 +299,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
||||
"""
|
||||
test_file = self.BARCODE_SAMPLE_DIR / "barcode-128-custom.pdf"
|
||||
|
||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
||||
with self.get_reader(test_file) as reader:
|
||||
reader.detect()
|
||||
separator_page_numbers = reader.get_separation_pages()
|
||||
|
||||
@ -296,7 +319,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
||||
"""
|
||||
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.pdf"
|
||||
|
||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
||||
with self.get_reader(test_file) as reader:
|
||||
reader.detect()
|
||||
separator_page_numbers = reader.get_separation_pages()
|
||||
|
||||
@ -315,7 +338,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
||||
"""
|
||||
test_file = self.BARCODE_SAMPLE_DIR / "many-qr-codes.pdf"
|
||||
|
||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
||||
with self.get_reader(test_file) as reader:
|
||||
reader.detect()
|
||||
separator_page_numbers = reader.get_separation_pages()
|
||||
|
||||
@ -334,7 +357,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
||||
"""
|
||||
test_file = self.SAMPLE_DIR / "password-is-test.pdf"
|
||||
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
|
||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
||||
with self.get_reader(test_file) as reader:
|
||||
reader.detect()
|
||||
warning = cm.output[0]
|
||||
expected_str = "WARNING:paperless.barcodes:File is likely password protected, not checking for barcodes"
|
||||
@ -356,7 +379,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
||||
"""
|
||||
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
|
||||
|
||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
||||
with self.get_reader(test_file) as reader:
|
||||
documents = reader.separate_pages({1: False})
|
||||
|
||||
self.assertEqual(reader.pdf_file, test_file)
|
||||
@ -373,7 +396,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
||||
"""
|
||||
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-double.pdf"
|
||||
|
||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
||||
with self.get_reader(test_file) as reader:
|
||||
documents = reader.separate_pages({1: False, 2: False})
|
||||
|
||||
self.assertEqual(len(documents), 2)
|
||||
@ -385,32 +408,18 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
||||
WHEN:
|
||||
- No separation pages are provided
|
||||
THEN:
|
||||
- No new documents are produced
|
||||
- A warning is logged
|
||||
- Nothing happens
|
||||
"""
|
||||
test_file = self.SAMPLE_DIR / "simple.pdf"
|
||||
|
||||
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
|
||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
||||
self.assertFalse(
|
||||
reader.separate(
|
||||
DocumentSource.ApiUpload,
|
||||
DocumentMetadataOverrides(),
|
||||
),
|
||||
)
|
||||
self.assertEqual(
|
||||
cm.output,
|
||||
[
|
||||
"WARNING:paperless.barcodes:No pages to split on!",
|
||||
],
|
||||
)
|
||||
with self.get_reader(test_file) as reader:
|
||||
self.assertEqual("No pages to split on!", reader.run())
|
||||
|
||||
@override_settings(
|
||||
CONSUMER_ENABLE_BARCODES=True,
|
||||
CONSUMER_BARCODE_TIFF_SUPPORT=True,
|
||||
)
|
||||
@mock.patch("documents.consumer.Consumer.try_consume_file")
|
||||
def test_consume_barcode_unsupported_jpg_file(self, m):
|
||||
def test_consume_barcode_unsupported_jpg_file(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- JPEG image as input
|
||||
@ -422,35 +431,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
||||
"""
|
||||
test_file = self.SAMPLE_DIR / "simple.jpg"
|
||||
|
||||
dst = settings.SCRATCH_DIR / "simple.jpg"
|
||||
shutil.copy(test_file, dst)
|
||||
|
||||
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
|
||||
self.assertIn(
|
||||
"Success",
|
||||
tasks.consume_file(
|
||||
ConsumableDocument(
|
||||
source=DocumentSource.ConsumeFolder,
|
||||
original_file=dst,
|
||||
),
|
||||
None,
|
||||
),
|
||||
)
|
||||
|
||||
self.assertListEqual(
|
||||
cm.output,
|
||||
[
|
||||
"WARNING:paperless.barcodes:Unsupported file format for barcode reader: image/jpeg",
|
||||
],
|
||||
)
|
||||
m.assert_called_once()
|
||||
|
||||
args, kwargs = m.call_args
|
||||
self.assertIsNone(kwargs["override_filename"])
|
||||
self.assertIsNone(kwargs["override_title"])
|
||||
self.assertIsNone(kwargs["override_correspondent_id"])
|
||||
self.assertIsNone(kwargs["override_document_type_id"])
|
||||
self.assertIsNone(kwargs["override_tag_ids"])
|
||||
with self.get_reader(test_file) as reader:
|
||||
self.assertFalse(reader.able_to_run)
|
||||
|
||||
@override_settings(
|
||||
CONSUMER_ENABLE_BARCODES=True,
|
||||
@ -467,7 +449,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
||||
"""
|
||||
test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-2.pdf"
|
||||
|
||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
||||
with self.get_reader(test_file) as reader:
|
||||
reader.detect()
|
||||
separator_page_numbers = reader.get_separation_pages()
|
||||
|
||||
@ -504,7 +486,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
||||
"""
|
||||
test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-1.pdf"
|
||||
|
||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
||||
with self.get_reader(test_file) as reader:
|
||||
reader.detect()
|
||||
separator_page_numbers = reader.get_separation_pages()
|
||||
|
||||
@ -550,7 +532,7 @@ class TestBarcodeNewConsume(
|
||||
|
||||
overrides = DocumentMetadataOverrides(tag_ids=[1, 2, 9])
|
||||
|
||||
with mock.patch("documents.tasks.async_to_sync") as progress_mocker:
|
||||
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
|
||||
self.assertEqual(
|
||||
tasks.consume_file(
|
||||
ConsumableDocument(
|
||||
@ -559,10 +541,8 @@ class TestBarcodeNewConsume(
|
||||
),
|
||||
overrides,
|
||||
),
|
||||
"File successfully split",
|
||||
"Barcode splitting complete!",
|
||||
)
|
||||
# We let the consumer know progress is done
|
||||
progress_mocker.assert_called_once()
|
||||
# 2 new document consume tasks created
|
||||
self.assertEqual(self.consume_file_mock.call_count, 2)
|
||||
|
||||
@ -580,7 +560,20 @@ class TestBarcodeNewConsume(
|
||||
self.assertEqual(overrides, new_doc_overrides)
|
||||
|
||||
|
||||
class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase):
|
||||
class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, GetReaderPluginMixin, TestCase):
|
||||
@contextmanager
|
||||
def get_reader(self, filepath: Path) -> BarcodePlugin:
|
||||
reader = BarcodePlugin(
|
||||
ConsumableDocument(DocumentSource.ConsumeFolder, original_file=filepath),
|
||||
DocumentMetadataOverrides(),
|
||||
DummyProgressManager(filepath.name, None),
|
||||
self.dirs.scratch_dir,
|
||||
"task-id",
|
||||
)
|
||||
reader.setup()
|
||||
yield reader
|
||||
reader.cleanup()
|
||||
|
||||
@override_settings(CONSUMER_ASN_BARCODE_PREFIX="CUSTOM-PREFIX-")
|
||||
def test_scan_file_for_asn_custom_prefix(self):
|
||||
"""
|
||||
@ -594,7 +587,7 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase):
|
||||
- The ASN integer value is correct
|
||||
"""
|
||||
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf"
|
||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
||||
with self.get_reader(test_file) as reader:
|
||||
asn = reader.asn
|
||||
|
||||
self.assertEqual(reader.pdf_file, test_file)
|
||||
@ -613,7 +606,7 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase):
|
||||
"""
|
||||
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-123.pdf"
|
||||
|
||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
||||
with self.get_reader(test_file) as reader:
|
||||
asn = reader.asn
|
||||
|
||||
self.assertEqual(reader.pdf_file, test_file)
|
||||
@ -630,55 +623,12 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase):
|
||||
"""
|
||||
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
|
||||
|
||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
||||
with self.get_reader(test_file) as reader:
|
||||
asn = reader.asn
|
||||
|
||||
self.assertEqual(reader.pdf_file, test_file)
|
||||
self.assertEqual(asn, None)
|
||||
|
||||
@override_settings(CONSUMER_ENABLE_ASN_BARCODE=True)
|
||||
def test_scan_file_for_asn_already_exists(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- PDF with an ASN barcode
|
||||
- ASN value already exists
|
||||
WHEN:
|
||||
- File is scanned for barcodes
|
||||
THEN:
|
||||
- ASN is retrieved from the document
|
||||
- Consumption fails
|
||||
"""
|
||||
|
||||
Document.objects.create(
|
||||
title="WOW",
|
||||
content="the content",
|
||||
archive_serial_number=123,
|
||||
checksum="456",
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
|
||||
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-123.pdf"
|
||||
|
||||
dst = settings.SCRATCH_DIR / "barcode-39-asn-123.pdf"
|
||||
shutil.copy(test_file, dst)
|
||||
|
||||
with mock.patch("documents.consumer.Consumer._send_progress"):
|
||||
with self.assertRaises(ConsumerError) as cm, self.assertLogs(
|
||||
"paperless.consumer",
|
||||
level="ERROR",
|
||||
) as logs_cm:
|
||||
tasks.consume_file(
|
||||
ConsumableDocument(
|
||||
source=DocumentSource.ConsumeFolder,
|
||||
original_file=dst,
|
||||
),
|
||||
None,
|
||||
)
|
||||
self.assertIn("Not consuming barcode-39-asn-123.pdf", str(cm.exception))
|
||||
error_str = logs_cm.output[0]
|
||||
expected_str = "ERROR:paperless.consumer:Not consuming barcode-39-asn-123.pdf: Given ASN already exists!"
|
||||
self.assertEqual(expected_str, error_str)
|
||||
|
||||
def test_scan_file_for_asn_barcode_invalid(self):
|
||||
"""
|
||||
GIVEN:
|
||||
@ -692,7 +642,7 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase):
|
||||
"""
|
||||
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-invalid.pdf"
|
||||
|
||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
||||
with self.get_reader(test_file) as reader:
|
||||
asn = reader.asn
|
||||
|
||||
self.assertEqual(reader.pdf_file, test_file)
|
||||
@ -718,7 +668,9 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase):
|
||||
dst = settings.SCRATCH_DIR / "barcode-39-asn-123.pdf"
|
||||
shutil.copy(test_file, dst)
|
||||
|
||||
with mock.patch("documents.consumer.Consumer.try_consume_file") as mocked_call:
|
||||
with mock.patch(
|
||||
"documents.consumer.Consumer.try_consume_file",
|
||||
) as mocked_consumer:
|
||||
tasks.consume_file(
|
||||
ConsumableDocument(
|
||||
source=DocumentSource.ConsumeFolder,
|
||||
@ -726,40 +678,11 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase):
|
||||
),
|
||||
None,
|
||||
)
|
||||
|
||||
args, kwargs = mocked_call.call_args
|
||||
mocked_consumer.assert_called_once()
|
||||
args, kwargs = mocked_consumer.call_args
|
||||
|
||||
self.assertEqual(kwargs["override_asn"], 123)
|
||||
|
||||
@override_settings(CONSUMER_ENABLE_ASN_BARCODE=True)
|
||||
def test_asn_too_large(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- ASN from barcode enabled
|
||||
- Barcode contains too large an ASN value
|
||||
WHEN:
|
||||
- ASN from barcode checked for correctness
|
||||
THEN:
|
||||
- Exception is raised regarding size limits
|
||||
"""
|
||||
src = self.BARCODE_SAMPLE_DIR / "barcode-128-asn-too-large.pdf"
|
||||
|
||||
dst = self.dirs.scratch_dir / "barcode-128-asn-too-large.pdf"
|
||||
shutil.copy(src, dst)
|
||||
|
||||
input_doc = ConsumableDocument(
|
||||
source=DocumentSource.ConsumeFolder,
|
||||
original_file=dst,
|
||||
)
|
||||
|
||||
with mock.patch("documents.consumer.Consumer._send_progress"):
|
||||
self.assertRaisesMessage(
|
||||
ConsumerError,
|
||||
"Given ASN 4294967296 is out of range [0, 4,294,967,295]",
|
||||
tasks.consume_file,
|
||||
input_doc,
|
||||
)
|
||||
|
||||
@override_settings(CONSUMER_BARCODE_SCANNER="PYZBAR")
|
||||
def test_scan_file_for_qrcode_without_upscale(self):
|
||||
"""
|
||||
@ -774,7 +697,7 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase):
|
||||
|
||||
test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-asn-000123-upscale-dpi.pdf"
|
||||
|
||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
||||
with self.get_reader(test_file) as reader:
|
||||
reader.detect()
|
||||
self.assertEqual(len(reader.barcodes), 0)
|
||||
|
||||
@ -796,7 +719,7 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase):
|
||||
|
||||
test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-asn-000123-upscale-dpi.pdf"
|
||||
|
||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
||||
with self.get_reader(test_file) as reader:
|
||||
reader.detect()
|
||||
self.assertEqual(len(reader.barcodes), 1)
|
||||
self.assertEqual(reader.asn, 123)
|
||||
|
@ -17,6 +17,7 @@ from documents.data_models import DocumentSource
|
||||
from documents.double_sided import STAGING_FILE_NAME
|
||||
from documents.double_sided import TIMEOUT_MINUTES
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from documents.tests.utils import DummyProgressManager
|
||||
from documents.tests.utils import FileSystemAssertsMixin
|
||||
|
||||
|
||||
@ -42,9 +43,10 @@ class TestDoubleSided(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
dst = self.dirs.double_sided_dir / dstname
|
||||
dst.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy(src, dst)
|
||||
with mock.patch("documents.tasks.async_to_sync"), mock.patch(
|
||||
"documents.consumer.async_to_sync",
|
||||
):
|
||||
with mock.patch(
|
||||
"documents.tasks.ProgressManager",
|
||||
DummyProgressManager,
|
||||
), mock.patch("documents.consumer.async_to_sync"):
|
||||
msg = tasks.consume_file(
|
||||
ConsumableDocument(
|
||||
source=DocumentSource.ConsumeFolder,
|
||||
@ -211,7 +213,7 @@ class TestDoubleSided(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
"""
|
||||
msg = self.consume_file("simple.pdf", Path("..") / "simple.pdf")
|
||||
self.assertIsNotFile(self.staging_file)
|
||||
self.assertRegex(msg, "Success. New document .* created")
|
||||
self.assertRegex(msg, r"Success. New document id \d+ created")
|
||||
|
||||
def test_subdirectory_upload(self):
|
||||
"""
|
||||
@ -250,4 +252,4 @@ class TestDoubleSided(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
"""
|
||||
msg = self.consume_file("simple.pdf")
|
||||
self.assertIsNotFile(self.staging_file)
|
||||
self.assertRegex(msg, "Success. New document .* created")
|
||||
self.assertRegex(msg, r"Success. New document id \d+ created")
|
||||
|
@ -24,6 +24,7 @@ from documents.models import WorkflowAction
|
||||
from documents.models import WorkflowTrigger
|
||||
from documents.signals import document_consumption_finished
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from documents.tests.utils import DummyProgressManager
|
||||
from documents.tests.utils import FileSystemAssertsMixin
|
||||
from paperless_mail.models import MailAccount
|
||||
from paperless_mail.models import MailRule
|
||||
@ -126,7 +127,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
|
||||
|
||||
test_file = self.SAMPLE_DIR / "simple.pdf"
|
||||
|
||||
with mock.patch("documents.tasks.async_to_sync"):
|
||||
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
|
||||
with self.assertLogs("paperless.matching", level="INFO") as cm:
|
||||
tasks.consume_file(
|
||||
ConsumableDocument(
|
||||
@ -203,7 +204,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
|
||||
w.save()
|
||||
|
||||
test_file = self.SAMPLE_DIR / "simple.pdf"
|
||||
with mock.patch("documents.tasks.async_to_sync"):
|
||||
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
|
||||
with self.assertLogs("paperless.matching", level="INFO") as cm:
|
||||
tasks.consume_file(
|
||||
ConsumableDocument(
|
||||
@ -294,7 +295,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
|
||||
|
||||
test_file = self.SAMPLE_DIR / "simple.pdf"
|
||||
|
||||
with mock.patch("documents.tasks.async_to_sync"):
|
||||
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
|
||||
with self.assertLogs("paperless.matching", level="INFO") as cm:
|
||||
tasks.consume_file(
|
||||
ConsumableDocument(
|
||||
@ -356,7 +357,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
|
||||
|
||||
test_file = self.SAMPLE_DIR / "simple.pdf"
|
||||
|
||||
with mock.patch("documents.tasks.async_to_sync"):
|
||||
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
|
||||
with self.assertLogs("paperless.matching", level="DEBUG") as cm:
|
||||
tasks.consume_file(
|
||||
ConsumableDocument(
|
||||
@ -407,7 +408,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
|
||||
|
||||
test_file = self.SAMPLE_DIR / "simple.pdf"
|
||||
|
||||
with mock.patch("documents.tasks.async_to_sync"):
|
||||
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
|
||||
with self.assertLogs("paperless.matching", level="DEBUG") as cm:
|
||||
tasks.consume_file(
|
||||
ConsumableDocument(
|
||||
@ -468,7 +469,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
|
||||
|
||||
test_file = self.SAMPLE_DIR / "simple.pdf"
|
||||
|
||||
with mock.patch("documents.tasks.async_to_sync"):
|
||||
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
|
||||
with self.assertLogs("paperless.matching", level="DEBUG") as cm:
|
||||
tasks.consume_file(
|
||||
ConsumableDocument(
|
||||
@ -529,7 +530,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
|
||||
|
||||
test_file = self.SAMPLE_DIR / "simple.pdf"
|
||||
|
||||
with mock.patch("documents.tasks.async_to_sync"):
|
||||
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
|
||||
with self.assertLogs("paperless.matching", level="DEBUG") as cm:
|
||||
tasks.consume_file(
|
||||
ConsumableDocument(
|
||||
@ -591,7 +592,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
|
||||
|
||||
test_file = self.SAMPLE_DIR / "simple.pdf"
|
||||
|
||||
with mock.patch("documents.tasks.async_to_sync"):
|
||||
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
|
||||
with self.assertLogs("paperless.matching", level="DEBUG") as cm:
|
||||
tasks.consume_file(
|
||||
ConsumableDocument(
|
||||
@ -686,7 +687,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
|
||||
|
||||
test_file = self.SAMPLE_DIR / "simple.pdf"
|
||||
|
||||
with mock.patch("documents.tasks.async_to_sync"):
|
||||
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
|
||||
with self.assertLogs("paperless.matching", level="INFO") as cm:
|
||||
tasks.consume_file(
|
||||
ConsumableDocument(
|
||||
|
@ -9,6 +9,7 @@ from os import PathLike
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from typing import Callable
|
||||
from typing import Optional
|
||||
from typing import Union
|
||||
from unittest import mock
|
||||
|
||||
@ -23,6 +24,7 @@ from django.test import override_settings
|
||||
from documents.data_models import ConsumableDocument
|
||||
from documents.data_models import DocumentMetadataOverrides
|
||||
from documents.parsers import ParseError
|
||||
from documents.plugins.helpers import ProgressStatusOptions
|
||||
|
||||
|
||||
def setup_directories():
|
||||
@ -146,6 +148,11 @@ def util_call_with_backoff(
|
||||
|
||||
|
||||
class DirectoriesMixin:
|
||||
"""
|
||||
Creates and overrides settings for all folders and paths, then ensures
|
||||
they are cleaned up on exit
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.dirs = None
|
||||
@ -160,6 +167,10 @@ class DirectoriesMixin:
|
||||
|
||||
|
||||
class FileSystemAssertsMixin:
|
||||
"""
|
||||
Utilities for checks various state information of the file system
|
||||
"""
|
||||
|
||||
def assertIsFile(self, path: Union[PathLike, str]):
|
||||
self.assertTrue(Path(path).resolve().is_file(), f"File does not exist: {path}")
|
||||
|
||||
@ -188,6 +199,11 @@ class FileSystemAssertsMixin:
|
||||
|
||||
|
||||
class ConsumerProgressMixin:
|
||||
"""
|
||||
Mocks the Consumer _send_progress, preventing attempts to connect to Redis
|
||||
and allowing access to its calls for verification
|
||||
"""
|
||||
|
||||
def setUp(self) -> None:
|
||||
self.send_progress_patcher = mock.patch(
|
||||
"documents.consumer.Consumer._send_progress",
|
||||
@ -310,3 +326,59 @@ class SampleDirMixin:
|
||||
SAMPLE_DIR = Path(__file__).parent / "samples"
|
||||
|
||||
BARCODE_SAMPLE_DIR = SAMPLE_DIR / "barcodes"
|
||||
|
||||
|
||||
class DummyProgressManager:
|
||||
"""
|
||||
A dummy handler for progress management that doesn't actually try to
|
||||
connect to Redis. Payloads are stored for test assertions if needed.
|
||||
|
||||
Use it with
|
||||
mock.patch("documents.tasks.ProgressManager", DummyProgressManager)
|
||||
"""
|
||||
|
||||
def __init__(self, filename: str, task_id: Optional[str] = None) -> None:
|
||||
self.filename = filename
|
||||
self.task_id = task_id
|
||||
print("hello world")
|
||||
self.payloads = []
|
||||
|
||||
def __enter__(self):
|
||||
self.open()
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.close()
|
||||
|
||||
def open(self) -> None:
|
||||
pass
|
||||
|
||||
def close(self) -> None:
|
||||
pass
|
||||
|
||||
def send_progress(
|
||||
self,
|
||||
status: ProgressStatusOptions,
|
||||
message: str,
|
||||
current_progress: int,
|
||||
max_progress: int,
|
||||
extra_args: Optional[dict[str, Union[str, int]]] = None,
|
||||
) -> None:
|
||||
# Ensure the layer is open
|
||||
self.open()
|
||||
|
||||
payload = {
|
||||
"type": "status_update",
|
||||
"data": {
|
||||
"filename": self.filename,
|
||||
"task_id": self.task_id,
|
||||
"current_progress": current_progress,
|
||||
"max_progress": max_progress,
|
||||
"status": status,
|
||||
"message": message,
|
||||
},
|
||||
}
|
||||
if extra_args is not None:
|
||||
payload["data"].update(extra_args)
|
||||
|
||||
self.payloads.append(payload)
|
||||
|
Loading…
x
Reference in New Issue
Block a user