mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-19 10:19:27 -05:00
Refactor file consumption task to allow beginnings of a plugin system (#5367)
This commit is contained in:
parent
4dbf8d7969
commit
2da5e46386
@ -3,7 +3,6 @@ import re
|
|||||||
import tempfile
|
import tempfile
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Final
|
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
@ -15,8 +14,9 @@ from PIL import Image
|
|||||||
|
|
||||||
from documents.converters import convert_from_tiff_to_pdf
|
from documents.converters import convert_from_tiff_to_pdf
|
||||||
from documents.data_models import ConsumableDocument
|
from documents.data_models import ConsumableDocument
|
||||||
from documents.data_models import DocumentMetadataOverrides
|
from documents.plugins.base import ConsumeTaskPlugin
|
||||||
from documents.data_models import DocumentSource
|
from documents.plugins.base import StopConsumeTaskError
|
||||||
|
from documents.plugins.helpers import ProgressStatusOptions
|
||||||
from documents.utils import copy_basic_file_stats
|
from documents.utils import copy_basic_file_stats
|
||||||
from documents.utils import copy_file_with_basic_stats
|
from documents.utils import copy_file_with_basic_stats
|
||||||
|
|
||||||
@ -26,7 +26,7 @@ logger = logging.getLogger("paperless.barcodes")
|
|||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class Barcode:
|
class Barcode:
|
||||||
"""
|
"""
|
||||||
Holds the information about a single barcode and its location
|
Holds the information about a single barcode and its location in a document
|
||||||
"""
|
"""
|
||||||
|
|
||||||
page: int
|
page: int
|
||||||
@ -49,77 +49,111 @@ class Barcode:
|
|||||||
return self.value.startswith(settings.CONSUMER_ASN_BARCODE_PREFIX)
|
return self.value.startswith(settings.CONSUMER_ASN_BARCODE_PREFIX)
|
||||||
|
|
||||||
|
|
||||||
class BarcodeReader:
|
class BarcodePlugin(ConsumeTaskPlugin):
|
||||||
def __init__(self, filepath: Path, mime_type: str) -> None:
|
NAME: str = "BarcodePlugin"
|
||||||
self.file: Final[Path] = filepath
|
|
||||||
self.mime: Final[str] = mime_type
|
|
||||||
self.pdf_file: Path = self.file
|
|
||||||
self.barcodes: list[Barcode] = []
|
|
||||||
self._tiff_conversion_done = False
|
|
||||||
self.temp_dir: Optional[tempfile.TemporaryDirectory] = None
|
|
||||||
|
|
||||||
|
@property
|
||||||
|
def able_to_run(self) -> bool:
|
||||||
|
"""
|
||||||
|
Able to run if:
|
||||||
|
- ASN from barcode detection is enabled or
|
||||||
|
- Barcode support is enabled and the mime type is supported
|
||||||
|
"""
|
||||||
if settings.CONSUMER_BARCODE_TIFF_SUPPORT:
|
if settings.CONSUMER_BARCODE_TIFF_SUPPORT:
|
||||||
self.SUPPORTED_FILE_MIMES = {"application/pdf", "image/tiff"}
|
supported_mimes = {"application/pdf", "image/tiff"}
|
||||||
else:
|
else:
|
||||||
self.SUPPORTED_FILE_MIMES = {"application/pdf"}
|
supported_mimes = {"application/pdf"}
|
||||||
|
|
||||||
def __enter__(self):
|
return (
|
||||||
if self.supported_mime_type:
|
settings.CONSUMER_ENABLE_ASN_BARCODE or settings.CONSUMER_ENABLE_BARCODES
|
||||||
self.temp_dir = tempfile.TemporaryDirectory(prefix="paperless-barcodes")
|
) and self.input_doc.mime_type in supported_mimes
|
||||||
return self
|
|
||||||
|
|
||||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
def setup(self):
|
||||||
if self.temp_dir is not None:
|
self.temp_dir = tempfile.TemporaryDirectory(
|
||||||
self.temp_dir.cleanup()
|
dir=self.base_tmp_dir,
|
||||||
self.temp_dir = None
|
prefix="barcode",
|
||||||
|
)
|
||||||
|
self.pdf_file = self.input_doc.original_file
|
||||||
|
self._tiff_conversion_done = False
|
||||||
|
self.barcodes: list[Barcode] = []
|
||||||
|
|
||||||
@property
|
def run(self) -> Optional[str]:
|
||||||
def supported_mime_type(self) -> bool:
|
# Maybe do the conversion of TIFF to PDF
|
||||||
"""
|
self.convert_from_tiff_to_pdf()
|
||||||
Return True if the given mime type is supported for barcodes, false otherwise
|
|
||||||
"""
|
|
||||||
return self.mime in self.SUPPORTED_FILE_MIMES
|
|
||||||
|
|
||||||
@property
|
# Locate any barcodes in the files
|
||||||
def asn(self) -> Optional[int]:
|
|
||||||
"""
|
|
||||||
Search the parsed barcodes for any ASNs.
|
|
||||||
The first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
|
|
||||||
is considered the ASN to be used.
|
|
||||||
Returns the detected ASN (or None)
|
|
||||||
"""
|
|
||||||
asn = None
|
|
||||||
|
|
||||||
if not self.supported_mime_type:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Ensure the barcodes have been read
|
|
||||||
self.detect()
|
self.detect()
|
||||||
|
|
||||||
# get the first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
|
# Update/overwrite an ASN if possible
|
||||||
asn_text = next(
|
located_asn = self.asn
|
||||||
(x.value for x in self.barcodes if x.is_asn),
|
if located_asn is not None:
|
||||||
None,
|
logger.info(f"Found ASN in barcode: {located_asn}")
|
||||||
|
self.metadata.asn = located_asn
|
||||||
|
|
||||||
|
separator_pages = self.get_separation_pages()
|
||||||
|
if not separator_pages:
|
||||||
|
return "No pages to split on!"
|
||||||
|
|
||||||
|
# We have pages to split against
|
||||||
|
|
||||||
|
# Note this does NOT use the base_temp_dir, as that will be removed
|
||||||
|
tmp_dir = Path(
|
||||||
|
tempfile.mkdtemp(
|
||||||
|
dir=settings.SCRATCH_DIR,
|
||||||
|
prefix="paperless-barcode-split-",
|
||||||
|
),
|
||||||
|
).resolve()
|
||||||
|
|
||||||
|
from documents import tasks
|
||||||
|
|
||||||
|
# Create the split document tasks
|
||||||
|
for new_document in self.separate_pages(separator_pages):
|
||||||
|
copy_file_with_basic_stats(new_document, tmp_dir / new_document.name)
|
||||||
|
|
||||||
|
task = tasks.consume_file.delay(
|
||||||
|
ConsumableDocument(
|
||||||
|
# Same source, for templates
|
||||||
|
source=self.input_doc.source,
|
||||||
|
mailrule_id=self.input_doc.mailrule_id,
|
||||||
|
# Can't use same folder or the consume might grab it again
|
||||||
|
original_file=(tmp_dir / new_document.name).resolve(),
|
||||||
|
),
|
||||||
|
# All the same metadata
|
||||||
|
self.metadata,
|
||||||
)
|
)
|
||||||
|
logger.info(f"Created new task {task.id} for {new_document.name}")
|
||||||
|
|
||||||
if asn_text:
|
# This file is now two or more files
|
||||||
logger.debug(f"Found ASN Barcode: {asn_text}")
|
self.input_doc.original_file.unlink()
|
||||||
# remove the prefix and remove whitespace
|
|
||||||
asn_text = asn_text[len(settings.CONSUMER_ASN_BARCODE_PREFIX) :].strip()
|
|
||||||
|
|
||||||
# remove non-numeric parts of the remaining string
|
msg = "Barcode splitting complete!"
|
||||||
asn_text = re.sub(r"\D", "", asn_text)
|
|
||||||
|
|
||||||
# now, try parsing the ASN number
|
# Update the progress to complete
|
||||||
try:
|
self.status_mgr.send_progress(ProgressStatusOptions.SUCCESS, msg, 100, 100)
|
||||||
asn = int(asn_text)
|
|
||||||
except ValueError as e:
|
|
||||||
logger.warning(f"Failed to parse ASN number because: {e}")
|
|
||||||
|
|
||||||
return asn
|
# Request the consume task stops
|
||||||
|
raise StopConsumeTaskError(msg)
|
||||||
|
|
||||||
|
def cleanup(self) -> None:
|
||||||
|
self.temp_dir.cleanup()
|
||||||
|
|
||||||
|
def convert_from_tiff_to_pdf(self):
|
||||||
|
"""
|
||||||
|
May convert a TIFF image into a PDF, if the input is a TIFF and
|
||||||
|
the TIFF has not been made into a PDF
|
||||||
|
"""
|
||||||
|
# Nothing to do, pdf_file is already assigned correctly
|
||||||
|
if self.input_doc.mime_type != "image/tiff" or self._tiff_conversion_done:
|
||||||
|
return
|
||||||
|
|
||||||
|
self.pdf_file = convert_from_tiff_to_pdf(
|
||||||
|
self.input_doc.original_file,
|
||||||
|
Path(self.temp_dir.name),
|
||||||
|
)
|
||||||
|
self._tiff_conversion_done = True
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def read_barcodes_zxing(image: Image) -> list[str]:
|
def read_barcodes_zxing(image: Image.Image) -> list[str]:
|
||||||
barcodes = []
|
barcodes = []
|
||||||
|
|
||||||
import zxingcpp
|
import zxingcpp
|
||||||
@ -135,7 +169,7 @@ class BarcodeReader:
|
|||||||
return barcodes
|
return barcodes
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def read_barcodes_pyzbar(image: Image) -> list[str]:
|
def read_barcodes_pyzbar(image: Image.Image) -> list[str]:
|
||||||
barcodes = []
|
barcodes = []
|
||||||
|
|
||||||
from pyzbar import pyzbar
|
from pyzbar import pyzbar
|
||||||
@ -154,18 +188,6 @@ class BarcodeReader:
|
|||||||
|
|
||||||
return barcodes
|
return barcodes
|
||||||
|
|
||||||
def convert_from_tiff_to_pdf(self):
|
|
||||||
"""
|
|
||||||
May convert a TIFF image into a PDF, if the input is a TIFF and
|
|
||||||
the TIFF has not been made into a PDF
|
|
||||||
"""
|
|
||||||
# Nothing to do, pdf_file is already assigned correctly
|
|
||||||
if self.mime != "image/tiff" or self._tiff_conversion_done:
|
|
||||||
return
|
|
||||||
|
|
||||||
self._tiff_conversion_done = True
|
|
||||||
self.pdf_file = convert_from_tiff_to_pdf(self.file, Path(self.temp_dir.name))
|
|
||||||
|
|
||||||
def detect(self) -> None:
|
def detect(self) -> None:
|
||||||
"""
|
"""
|
||||||
Scan all pages of the PDF as images, updating barcodes and the pages
|
Scan all pages of the PDF as images, updating barcodes and the pages
|
||||||
@ -218,10 +240,45 @@ class BarcodeReader:
|
|||||||
# This file is really borked, allow the consumption to continue
|
# This file is really borked, allow the consumption to continue
|
||||||
# but it may fail further on
|
# but it may fail further on
|
||||||
except Exception as e: # pragma: no cover
|
except Exception as e: # pragma: no cover
|
||||||
logger.exception(
|
logger.warning(
|
||||||
f"Exception during barcode scanning: {e}",
|
f"Exception during barcode scanning: {e}",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def asn(self) -> Optional[int]:
|
||||||
|
"""
|
||||||
|
Search the parsed barcodes for any ASNs.
|
||||||
|
The first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
|
||||||
|
is considered the ASN to be used.
|
||||||
|
Returns the detected ASN (or None)
|
||||||
|
"""
|
||||||
|
asn = None
|
||||||
|
|
||||||
|
# Ensure the barcodes have been read
|
||||||
|
self.detect()
|
||||||
|
|
||||||
|
# get the first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
|
||||||
|
asn_text = next(
|
||||||
|
(x.value for x in self.barcodes if x.is_asn),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
|
||||||
|
if asn_text:
|
||||||
|
logger.debug(f"Found ASN Barcode: {asn_text}")
|
||||||
|
# remove the prefix and remove whitespace
|
||||||
|
asn_text = asn_text[len(settings.CONSUMER_ASN_BARCODE_PREFIX) :].strip()
|
||||||
|
|
||||||
|
# remove non-numeric parts of the remaining string
|
||||||
|
asn_text = re.sub(r"\D", "", asn_text)
|
||||||
|
|
||||||
|
# now, try parsing the ASN number
|
||||||
|
try:
|
||||||
|
asn = int(asn_text)
|
||||||
|
except ValueError as e:
|
||||||
|
logger.warning(f"Failed to parse ASN number because: {e}")
|
||||||
|
|
||||||
|
return asn
|
||||||
|
|
||||||
def get_separation_pages(self) -> dict[int, bool]:
|
def get_separation_pages(self) -> dict[int, bool]:
|
||||||
"""
|
"""
|
||||||
Search the parsed barcodes for separators and returns a dict of page
|
Search the parsed barcodes for separators and returns a dict of page
|
||||||
@ -251,7 +308,7 @@ class BarcodeReader:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
document_paths = []
|
document_paths = []
|
||||||
fname = self.file.stem
|
fname = self.input_doc.original_file.stem
|
||||||
with Pdf.open(self.pdf_file) as input_pdf:
|
with Pdf.open(self.pdf_file) as input_pdf:
|
||||||
# Start with an empty document
|
# Start with an empty document
|
||||||
current_document: list[Page] = []
|
current_document: list[Page] = []
|
||||||
@ -292,58 +349,8 @@ class BarcodeReader:
|
|||||||
with open(savepath, "wb") as out:
|
with open(savepath, "wb") as out:
|
||||||
dst.save(out)
|
dst.save(out)
|
||||||
|
|
||||||
copy_basic_file_stats(self.file, savepath)
|
copy_basic_file_stats(self.input_doc.original_file, savepath)
|
||||||
|
|
||||||
document_paths.append(savepath)
|
document_paths.append(savepath)
|
||||||
|
|
||||||
return document_paths
|
return document_paths
|
||||||
|
|
||||||
def separate(
|
|
||||||
self,
|
|
||||||
source: DocumentSource,
|
|
||||||
overrides: DocumentMetadataOverrides,
|
|
||||||
) -> bool:
|
|
||||||
"""
|
|
||||||
Separates the document, based on barcodes and configuration, creating new
|
|
||||||
documents as required in the appropriate location.
|
|
||||||
|
|
||||||
Returns True if a split happened, False otherwise
|
|
||||||
"""
|
|
||||||
# Do nothing
|
|
||||||
if not self.supported_mime_type:
|
|
||||||
logger.warning(f"Unsupported file format for barcode reader: {self.mime}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Does nothing unless needed
|
|
||||||
self.convert_from_tiff_to_pdf()
|
|
||||||
|
|
||||||
# Actually read the codes, if any
|
|
||||||
self.detect()
|
|
||||||
|
|
||||||
separator_pages = self.get_separation_pages()
|
|
||||||
|
|
||||||
# Also do nothing
|
|
||||||
if not separator_pages:
|
|
||||||
logger.warning("No pages to split on!")
|
|
||||||
return False
|
|
||||||
|
|
||||||
tmp_dir = Path(tempfile.mkdtemp(prefix="paperless-barcode-split-")).resolve()
|
|
||||||
|
|
||||||
from documents import tasks
|
|
||||||
|
|
||||||
# Create the split document tasks
|
|
||||||
for new_document in self.separate_pages(separator_pages):
|
|
||||||
copy_file_with_basic_stats(new_document, tmp_dir / new_document.name)
|
|
||||||
|
|
||||||
tasks.consume_file.delay(
|
|
||||||
ConsumableDocument(
|
|
||||||
# Same source, for templates
|
|
||||||
source=source,
|
|
||||||
# Can't use same folder or the consume might grab it again
|
|
||||||
original_file=(tmp_dir / new_document.name).resolve(),
|
|
||||||
),
|
|
||||||
# All the same metadata
|
|
||||||
overrides,
|
|
||||||
)
|
|
||||||
logger.info("Barcode splitting complete!")
|
|
||||||
return True
|
|
||||||
|
@ -21,7 +21,6 @@ from filelock import FileLock
|
|||||||
from rest_framework.reverse import reverse
|
from rest_framework.reverse import reverse
|
||||||
|
|
||||||
from documents.classifier import load_classifier
|
from documents.classifier import load_classifier
|
||||||
from documents.data_models import ConsumableDocument
|
|
||||||
from documents.data_models import DocumentMetadataOverrides
|
from documents.data_models import DocumentMetadataOverrides
|
||||||
from documents.file_handling import create_source_path_directory
|
from documents.file_handling import create_source_path_directory
|
||||||
from documents.file_handling import generate_unique_filename
|
from documents.file_handling import generate_unique_filename
|
||||||
@ -42,12 +41,83 @@ from documents.parsers import ParseError
|
|||||||
from documents.parsers import get_parser_class_for_mime_type
|
from documents.parsers import get_parser_class_for_mime_type
|
||||||
from documents.parsers import parse_date
|
from documents.parsers import parse_date
|
||||||
from documents.permissions import set_permissions_for_object
|
from documents.permissions import set_permissions_for_object
|
||||||
|
from documents.plugins.base import AlwaysRunPluginMixin
|
||||||
|
from documents.plugins.base import ConsumeTaskPlugin
|
||||||
|
from documents.plugins.base import NoCleanupPluginMixin
|
||||||
|
from documents.plugins.base import NoSetupPluginMixin
|
||||||
from documents.signals import document_consumption_finished
|
from documents.signals import document_consumption_finished
|
||||||
from documents.signals import document_consumption_started
|
from documents.signals import document_consumption_started
|
||||||
from documents.utils import copy_basic_file_stats
|
from documents.utils import copy_basic_file_stats
|
||||||
from documents.utils import copy_file_with_basic_stats
|
from documents.utils import copy_file_with_basic_stats
|
||||||
|
|
||||||
|
|
||||||
|
class WorkflowTriggerPlugin(
|
||||||
|
NoCleanupPluginMixin,
|
||||||
|
NoSetupPluginMixin,
|
||||||
|
AlwaysRunPluginMixin,
|
||||||
|
ConsumeTaskPlugin,
|
||||||
|
):
|
||||||
|
NAME: str = "WorkflowTriggerPlugin"
|
||||||
|
|
||||||
|
def run(self) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Get overrides from matching workflows
|
||||||
|
"""
|
||||||
|
overrides = DocumentMetadataOverrides()
|
||||||
|
for workflow in Workflow.objects.filter(enabled=True).order_by("order"):
|
||||||
|
template_overrides = DocumentMetadataOverrides()
|
||||||
|
|
||||||
|
if document_matches_workflow(
|
||||||
|
self.input_doc,
|
||||||
|
workflow,
|
||||||
|
WorkflowTrigger.WorkflowTriggerType.CONSUMPTION,
|
||||||
|
):
|
||||||
|
for action in workflow.actions.all():
|
||||||
|
if action.assign_title is not None:
|
||||||
|
template_overrides.title = action.assign_title
|
||||||
|
if action.assign_tags is not None:
|
||||||
|
template_overrides.tag_ids = [
|
||||||
|
tag.pk for tag in action.assign_tags.all()
|
||||||
|
]
|
||||||
|
if action.assign_correspondent is not None:
|
||||||
|
template_overrides.correspondent_id = (
|
||||||
|
action.assign_correspondent.pk
|
||||||
|
)
|
||||||
|
if action.assign_document_type is not None:
|
||||||
|
template_overrides.document_type_id = (
|
||||||
|
action.assign_document_type.pk
|
||||||
|
)
|
||||||
|
if action.assign_storage_path is not None:
|
||||||
|
template_overrides.storage_path_id = (
|
||||||
|
action.assign_storage_path.pk
|
||||||
|
)
|
||||||
|
if action.assign_owner is not None:
|
||||||
|
template_overrides.owner_id = action.assign_owner.pk
|
||||||
|
if action.assign_view_users is not None:
|
||||||
|
template_overrides.view_users = [
|
||||||
|
user.pk for user in action.assign_view_users.all()
|
||||||
|
]
|
||||||
|
if action.assign_view_groups is not None:
|
||||||
|
template_overrides.view_groups = [
|
||||||
|
group.pk for group in action.assign_view_groups.all()
|
||||||
|
]
|
||||||
|
if action.assign_change_users is not None:
|
||||||
|
template_overrides.change_users = [
|
||||||
|
user.pk for user in action.assign_change_users.all()
|
||||||
|
]
|
||||||
|
if action.assign_change_groups is not None:
|
||||||
|
template_overrides.change_groups = [
|
||||||
|
group.pk for group in action.assign_change_groups.all()
|
||||||
|
]
|
||||||
|
if action.assign_custom_fields is not None:
|
||||||
|
template_overrides.custom_field_ids = [
|
||||||
|
field.pk for field in action.assign_custom_fields.all()
|
||||||
|
]
|
||||||
|
|
||||||
|
overrides.update(template_overrides)
|
||||||
|
self.metadata.update(overrides)
|
||||||
|
|
||||||
|
|
||||||
class ConsumerError(Exception):
|
class ConsumerError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@ -602,70 +672,6 @@ class Consumer(LoggingMixin):
|
|||||||
|
|
||||||
return document
|
return document
|
||||||
|
|
||||||
def get_workflow_overrides(
|
|
||||||
self,
|
|
||||||
input_doc: ConsumableDocument,
|
|
||||||
) -> DocumentMetadataOverrides:
|
|
||||||
"""
|
|
||||||
Get overrides from matching workflows
|
|
||||||
"""
|
|
||||||
overrides = DocumentMetadataOverrides()
|
|
||||||
for workflow in Workflow.objects.filter(enabled=True).order_by("order"):
|
|
||||||
template_overrides = DocumentMetadataOverrides()
|
|
||||||
|
|
||||||
if document_matches_workflow(
|
|
||||||
input_doc,
|
|
||||||
workflow,
|
|
||||||
WorkflowTrigger.WorkflowTriggerType.CONSUMPTION,
|
|
||||||
):
|
|
||||||
for action in workflow.actions.all():
|
|
||||||
self.log.info(
|
|
||||||
f"Applying overrides in {action} from {workflow}",
|
|
||||||
)
|
|
||||||
if action.assign_title is not None:
|
|
||||||
template_overrides.title = action.assign_title
|
|
||||||
if action.assign_tags is not None:
|
|
||||||
template_overrides.tag_ids = [
|
|
||||||
tag.pk for tag in action.assign_tags.all()
|
|
||||||
]
|
|
||||||
if action.assign_correspondent is not None:
|
|
||||||
template_overrides.correspondent_id = (
|
|
||||||
action.assign_correspondent.pk
|
|
||||||
)
|
|
||||||
if action.assign_document_type is not None:
|
|
||||||
template_overrides.document_type_id = (
|
|
||||||
action.assign_document_type.pk
|
|
||||||
)
|
|
||||||
if action.assign_storage_path is not None:
|
|
||||||
template_overrides.storage_path_id = (
|
|
||||||
action.assign_storage_path.pk
|
|
||||||
)
|
|
||||||
if action.assign_owner is not None:
|
|
||||||
template_overrides.owner_id = action.assign_owner.pk
|
|
||||||
if action.assign_view_users is not None:
|
|
||||||
template_overrides.view_users = [
|
|
||||||
user.pk for user in action.assign_view_users.all()
|
|
||||||
]
|
|
||||||
if action.assign_view_groups is not None:
|
|
||||||
template_overrides.view_groups = [
|
|
||||||
group.pk for group in action.assign_view_groups.all()
|
|
||||||
]
|
|
||||||
if action.assign_change_users is not None:
|
|
||||||
template_overrides.change_users = [
|
|
||||||
user.pk for user in action.assign_change_users.all()
|
|
||||||
]
|
|
||||||
if action.assign_change_groups is not None:
|
|
||||||
template_overrides.change_groups = [
|
|
||||||
group.pk for group in action.assign_change_groups.all()
|
|
||||||
]
|
|
||||||
if action.assign_custom_fields is not None:
|
|
||||||
template_overrides.custom_field_ids = [
|
|
||||||
field.pk for field in action.assign_custom_fields.all()
|
|
||||||
]
|
|
||||||
|
|
||||||
overrides.update(template_overrides)
|
|
||||||
return overrides
|
|
||||||
|
|
||||||
def _parse_title_placeholders(self, title: str) -> str:
|
def _parse_title_placeholders(self, title: str) -> str:
|
||||||
local_added = timezone.localtime(timezone.now())
|
local_added = timezone.localtime(timezone.now())
|
||||||
|
|
||||||
|
@ -3,24 +3,41 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Final
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from pikepdf import Pdf
|
from pikepdf import Pdf
|
||||||
|
|
||||||
from documents.consumer import ConsumerError
|
from documents.consumer import ConsumerError
|
||||||
from documents.converters import convert_from_tiff_to_pdf
|
from documents.converters import convert_from_tiff_to_pdf
|
||||||
from documents.data_models import ConsumableDocument
|
from documents.plugins.base import ConsumeTaskPlugin
|
||||||
|
from documents.plugins.base import NoCleanupPluginMixin
|
||||||
|
from documents.plugins.base import NoSetupPluginMixin
|
||||||
|
from documents.plugins.base import StopConsumeTaskError
|
||||||
|
|
||||||
logger = logging.getLogger("paperless.double_sided")
|
logger = logging.getLogger("paperless.double_sided")
|
||||||
|
|
||||||
# Hardcoded for now, could be made a configurable setting if needed
|
# Hardcoded for now, could be made a configurable setting if needed
|
||||||
TIMEOUT_MINUTES = 30
|
TIMEOUT_MINUTES: Final[int] = 30
|
||||||
|
TIMEOUT_SECONDS: Final[int] = TIMEOUT_MINUTES * 60
|
||||||
|
|
||||||
# Used by test cases
|
# Used by test cases
|
||||||
STAGING_FILE_NAME = "double-sided-staging.pdf"
|
STAGING_FILE_NAME = "double-sided-staging.pdf"
|
||||||
|
|
||||||
|
|
||||||
def collate(input_doc: ConsumableDocument) -> str:
|
class CollatePlugin(NoCleanupPluginMixin, NoSetupPluginMixin, ConsumeTaskPlugin):
|
||||||
|
NAME: str = "CollatePlugin"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def able_to_run(self) -> bool:
|
||||||
|
return (
|
||||||
|
settings.CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED
|
||||||
|
and settings.CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME
|
||||||
|
in self.input_doc.original_file.parts
|
||||||
|
)
|
||||||
|
|
||||||
|
def run(self) -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
Tries to collate pages from 2 single sided scans of a double sided
|
Tries to collate pages from 2 single sided scans of a double sided
|
||||||
document.
|
document.
|
||||||
@ -39,33 +56,32 @@ def collate(input_doc: ConsumableDocument) -> str:
|
|||||||
in case of failure.
|
in case of failure.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Make sure scratch dir exists, Consumer might not have run yet
|
if self.input_doc.mime_type == "application/pdf":
|
||||||
settings.SCRATCH_DIR.mkdir(exist_ok=True)
|
pdf_file = self.input_doc.original_file
|
||||||
|
|
||||||
if input_doc.mime_type == "application/pdf":
|
|
||||||
pdf_file = input_doc.original_file
|
|
||||||
elif (
|
elif (
|
||||||
input_doc.mime_type == "image/tiff"
|
self.input_doc.mime_type == "image/tiff"
|
||||||
and settings.CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT
|
and settings.CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT
|
||||||
):
|
):
|
||||||
pdf_file = convert_from_tiff_to_pdf(
|
pdf_file = convert_from_tiff_to_pdf(
|
||||||
input_doc.original_file,
|
self.input_doc.original_file,
|
||||||
settings.SCRATCH_DIR,
|
self.base_tmp_dir,
|
||||||
)
|
)
|
||||||
input_doc.original_file.unlink()
|
self.input_doc.original_file.unlink()
|
||||||
else:
|
else:
|
||||||
raise ConsumerError("Unsupported file type for collation of double-sided scans")
|
raise ConsumerError(
|
||||||
|
"Unsupported file type for collation of double-sided scans",
|
||||||
|
)
|
||||||
|
|
||||||
staging = settings.SCRATCH_DIR / STAGING_FILE_NAME
|
staging: Path = settings.SCRATCH_DIR / STAGING_FILE_NAME
|
||||||
|
|
||||||
valid_staging_exists = False
|
valid_staging_exists = False
|
||||||
if staging.exists():
|
if staging.exists():
|
||||||
stats = os.stat(str(staging))
|
stats = staging.stat()
|
||||||
# if the file is older than the timeout, we don't consider
|
# if the file is older than the timeout, we don't consider
|
||||||
# it valid
|
# it valid
|
||||||
if dt.datetime.now().timestamp() - stats.st_mtime > TIMEOUT_MINUTES * 60:
|
if (dt.datetime.now().timestamp() - stats.st_mtime) > TIMEOUT_SECONDS:
|
||||||
logger.warning("Outdated double sided staging file exists, deleting it")
|
logger.warning("Outdated double sided staging file exists, deleting it")
|
||||||
os.unlink(str(staging))
|
staging.unlink()
|
||||||
else:
|
else:
|
||||||
valid_staging_exists = True
|
valid_staging_exists = True
|
||||||
|
|
||||||
@ -88,23 +104,24 @@ def collate(input_doc: ConsumableDocument) -> str:
|
|||||||
# Merged file has the same path, but without the
|
# Merged file has the same path, but without the
|
||||||
# double-sided subdir. Therefore, it is also in the
|
# double-sided subdir. Therefore, it is also in the
|
||||||
# consumption dir and will be picked up for processing
|
# consumption dir and will be picked up for processing
|
||||||
old_file = input_doc.original_file
|
old_file = self.input_doc.original_file
|
||||||
new_file = Path(
|
new_file = Path(
|
||||||
*(
|
*(
|
||||||
part
|
part
|
||||||
for part in old_file.with_name(
|
for part in old_file.with_name(
|
||||||
f"{old_file.stem}-collated.pdf",
|
f"{old_file.stem}-collated.pdf",
|
||||||
).parts
|
).parts
|
||||||
if part != settings.CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME
|
if part
|
||||||
|
!= settings.CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
# If the user didn't create the subdirs yet, do it for them
|
# If the user didn't create the subdirs yet, do it for them
|
||||||
new_file.parent.mkdir(parents=True, exist_ok=True)
|
new_file.parent.mkdir(parents=True, exist_ok=True)
|
||||||
pdf1.save(new_file)
|
pdf1.save(new_file)
|
||||||
logger.info("Collated documents into new file %s", new_file)
|
logger.info("Collated documents into new file %s", new_file)
|
||||||
return (
|
raise StopConsumeTaskError(
|
||||||
"Success. Even numbered pages of double sided scan collated "
|
"Success. Even numbered pages of double sided scan collated "
|
||||||
"with odd pages"
|
"with odd pages",
|
||||||
)
|
)
|
||||||
finally:
|
finally:
|
||||||
# Delete staging and recently uploaded file no matter what.
|
# Delete staging and recently uploaded file no matter what.
|
||||||
@ -118,12 +135,13 @@ def collate(input_doc: ConsumableDocument) -> str:
|
|||||||
shutil.move(pdf_file, staging)
|
shutil.move(pdf_file, staging)
|
||||||
# update access to modification time so we know if the file
|
# update access to modification time so we know if the file
|
||||||
# is outdated when another file gets uploaded
|
# is outdated when another file gets uploaded
|
||||||
os.utime(staging, (dt.datetime.now().timestamp(),) * 2)
|
timestamp = dt.datetime.now().timestamp()
|
||||||
|
os.utime(staging, (timestamp, timestamp))
|
||||||
logger.info(
|
logger.info(
|
||||||
"Got scan with odd numbered pages of double-sided scan, moved it to %s",
|
"Got scan with odd numbered pages of double-sided scan, moved it to %s",
|
||||||
staging,
|
staging,
|
||||||
)
|
)
|
||||||
return (
|
raise StopConsumeTaskError(
|
||||||
"Received odd numbered pages of double sided scan, waiting up to "
|
"Received odd numbered pages of double sided scan, waiting up to "
|
||||||
f"{TIMEOUT_MINUTES} minutes for even numbered pages"
|
f"{TIMEOUT_MINUTES} minutes for even numbered pages",
|
||||||
)
|
)
|
||||||
|
0
src/documents/plugins/__init__.py
Normal file
0
src/documents/plugins/__init__.py
Normal file
131
src/documents/plugins/base.py
Normal file
131
src/documents/plugins/base.py
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
import abc
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Final
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from documents.data_models import ConsumableDocument
|
||||||
|
from documents.data_models import DocumentMetadataOverrides
|
||||||
|
from documents.plugins.helpers import ProgressManager
|
||||||
|
|
||||||
|
|
||||||
|
class StopConsumeTaskError(Exception):
|
||||||
|
"""
|
||||||
|
A plugin setup or run may raise this to exit the asynchronous consume task.
|
||||||
|
|
||||||
|
Most likely, this means it has created one or more new tasks to execute instead,
|
||||||
|
such as when a barcode has been used to create new documents
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, message: str) -> None:
|
||||||
|
self.message = message
|
||||||
|
super().__init__(message)
|
||||||
|
|
||||||
|
|
||||||
|
class ConsumeTaskPlugin(abc.ABC):
|
||||||
|
"""
|
||||||
|
Defines the interface for a plugin for the document consume task
|
||||||
|
Meanings as per RFC2119 (https://datatracker.ietf.org/doc/html/rfc2119)
|
||||||
|
|
||||||
|
Plugin Implementation
|
||||||
|
|
||||||
|
The plugin SHALL implement property able_to_run and methods setup, run and cleanup.
|
||||||
|
The plugin property able_to_run SHALL return True if the plugin is able to run, given the conditions, settings and document information.
|
||||||
|
The plugin property able_to_run MAY be hardcoded to return True.
|
||||||
|
The plugin setup SHOULD perform any resource creation or additional initialization needed to run the document.
|
||||||
|
The plugin setup MAY be a non-operation.
|
||||||
|
The plugin cleanup SHOULD perform resource cleanup, including in the event of an error.
|
||||||
|
The plugin cleanup MAY be a non-operation.
|
||||||
|
The plugin run SHALL perform any operations against the document or system state required for the plugin.
|
||||||
|
The plugin run MAY update the document metadata.
|
||||||
|
The plugin run MAY return an informational message.
|
||||||
|
The plugin run MAY raise StopConsumeTaskError to cease any further operations against the document.
|
||||||
|
|
||||||
|
Plugin Manager Implementation
|
||||||
|
|
||||||
|
The plugin manager SHALL provide the plugin with the input document, document metadata, progress manager and a created temporary directory.
|
||||||
|
The plugin manager SHALL execute the plugin setup, run and cleanup, in that order IF the plugin property able_to_run is True.
|
||||||
|
The plugin manager SHOULD log the return message of executing a plugin's run.
|
||||||
|
The plugin manager SHALL always execute the plugin cleanup, IF the plugin property able_to_run is True.
|
||||||
|
The plugin manager SHALL cease calling plugins and exit the task IF a plugin raises StopConsumeTaskError.
|
||||||
|
The plugin manager SHOULD return the StopConsumeTaskError message IF a plugin raises StopConsumeTaskError.
|
||||||
|
"""
|
||||||
|
|
||||||
|
NAME: str = "ConsumeTaskPlugin"
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
input_doc: ConsumableDocument,
|
||||||
|
metadata: DocumentMetadataOverrides,
|
||||||
|
status_mgr: ProgressManager,
|
||||||
|
base_tmp_dir: Path,
|
||||||
|
task_id: str,
|
||||||
|
) -> None:
|
||||||
|
super().__init__()
|
||||||
|
self.input_doc = input_doc
|
||||||
|
self.metadata = metadata
|
||||||
|
self.base_tmp_dir: Final = base_tmp_dir
|
||||||
|
self.status_mgr = status_mgr
|
||||||
|
self.task_id: Final = task_id
|
||||||
|
|
||||||
|
@abc.abstractproperty
|
||||||
|
def able_to_run(self) -> bool:
|
||||||
|
"""
|
||||||
|
Return True if the conditions are met for the plugin to run, False otherwise
|
||||||
|
|
||||||
|
If False, setup(), run() and cleanup() will not be called
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def setup(self) -> None:
|
||||||
|
"""
|
||||||
|
Allows the plugin to perform any additional setup it may need, such as creating
|
||||||
|
a temporary directory, copying a file somewhere, etc.
|
||||||
|
|
||||||
|
Executed before run()
|
||||||
|
|
||||||
|
In general, this should be the "light" work, not the bulk of processing
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def run(self) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
The bulk of plugin processing, this does whatever action the plugin is for.
|
||||||
|
|
||||||
|
Executed after setup() and before cleanup()
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def cleanup(self) -> None:
|
||||||
|
"""
|
||||||
|
Allows the plugin to execute any cleanup it may require
|
||||||
|
|
||||||
|
Executed after run(), even in the case of error
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class AlwaysRunPluginMixin(ConsumeTaskPlugin):
|
||||||
|
"""
|
||||||
|
A plugin which is always able to run
|
||||||
|
"""
|
||||||
|
|
||||||
|
@property
|
||||||
|
def able_to_run(self) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
class NoSetupPluginMixin(ConsumeTaskPlugin):
|
||||||
|
"""
|
||||||
|
A plugin which requires no setup
|
||||||
|
"""
|
||||||
|
|
||||||
|
def setup(self) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class NoCleanupPluginMixin(ConsumeTaskPlugin):
|
||||||
|
"""
|
||||||
|
A plugin which needs to clean up no files
|
||||||
|
"""
|
||||||
|
|
||||||
|
def cleanup(self) -> None:
|
||||||
|
pass
|
82
src/documents/plugins/helpers.py
Normal file
82
src/documents/plugins/helpers.py
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
import enum
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
from typing import Optional
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
|
from asgiref.sync import async_to_sync
|
||||||
|
from channels.layers import get_channel_layer
|
||||||
|
from channels_redis.pubsub import RedisPubSubChannelLayer
|
||||||
|
|
||||||
|
|
||||||
|
class ProgressStatusOptions(str, enum.Enum):
|
||||||
|
STARTED = "STARTED"
|
||||||
|
WORKING = "WORKING"
|
||||||
|
SUCCESS = "SUCCESS"
|
||||||
|
FAILED = "FAILED"
|
||||||
|
|
||||||
|
|
||||||
|
class ProgressManager:
|
||||||
|
"""
|
||||||
|
Handles sending of progress information via the channel layer, with proper management
|
||||||
|
of the open/close of the layer to ensure messages go out and everything is cleaned up
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, filename: str, task_id: Optional[str] = None) -> None:
|
||||||
|
self.filename = filename
|
||||||
|
self._channel: Optional[RedisPubSubChannelLayer] = None
|
||||||
|
self.task_id = task_id
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
self.open()
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
|
self.close()
|
||||||
|
|
||||||
|
def open(self) -> None:
|
||||||
|
"""
|
||||||
|
If not already opened, gets the default channel layer
|
||||||
|
opened and ready to send messages
|
||||||
|
"""
|
||||||
|
if self._channel is None:
|
||||||
|
self._channel = get_channel_layer()
|
||||||
|
|
||||||
|
def close(self) -> None:
|
||||||
|
"""
|
||||||
|
If it was opened, flushes the channel layer
|
||||||
|
"""
|
||||||
|
if self._channel is not None:
|
||||||
|
async_to_sync(self._channel.flush)
|
||||||
|
self._channel = None
|
||||||
|
|
||||||
|
def send_progress(
|
||||||
|
self,
|
||||||
|
status: ProgressStatusOptions,
|
||||||
|
message: str,
|
||||||
|
current_progress: int,
|
||||||
|
max_progress: int,
|
||||||
|
extra_args: Optional[dict[str, Union[str, int]]] = None,
|
||||||
|
) -> None:
|
||||||
|
# Ensure the layer is open
|
||||||
|
self.open()
|
||||||
|
|
||||||
|
# Just for IDEs
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
assert self._channel is not None
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"type": "status_update",
|
||||||
|
"data": {
|
||||||
|
"filename": self.filename,
|
||||||
|
"task_id": self.task_id,
|
||||||
|
"current_progress": current_progress,
|
||||||
|
"max_progress": max_progress,
|
||||||
|
"status": status,
|
||||||
|
"message": message,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if extra_args is not None:
|
||||||
|
payload["data"].update(extra_args)
|
||||||
|
|
||||||
|
# Construct and send the update
|
||||||
|
async_to_sync(self._channel.group_send)("status_updates", payload)
|
@ -2,30 +2,30 @@ import hashlib
|
|||||||
import logging
|
import logging
|
||||||
import shutil
|
import shutil
|
||||||
import uuid
|
import uuid
|
||||||
|
from pathlib import Path
|
||||||
|
from tempfile import TemporaryDirectory
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import tqdm
|
import tqdm
|
||||||
from asgiref.sync import async_to_sync
|
|
||||||
from celery import Task
|
from celery import Task
|
||||||
from celery import shared_task
|
from celery import shared_task
|
||||||
from channels.layers import get_channel_layer
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.db import transaction
|
from django.db import transaction
|
||||||
from django.db.models.signals import post_save
|
from django.db.models.signals import post_save
|
||||||
from filelock import FileLock
|
from filelock import FileLock
|
||||||
from redis.exceptions import ConnectionError
|
|
||||||
from whoosh.writing import AsyncWriter
|
from whoosh.writing import AsyncWriter
|
||||||
|
|
||||||
from documents import index
|
from documents import index
|
||||||
from documents import sanity_checker
|
from documents import sanity_checker
|
||||||
from documents.barcodes import BarcodeReader
|
from documents.barcodes import BarcodePlugin
|
||||||
from documents.classifier import DocumentClassifier
|
from documents.classifier import DocumentClassifier
|
||||||
from documents.classifier import load_classifier
|
from documents.classifier import load_classifier
|
||||||
from documents.consumer import Consumer
|
from documents.consumer import Consumer
|
||||||
from documents.consumer import ConsumerError
|
from documents.consumer import ConsumerError
|
||||||
|
from documents.consumer import WorkflowTriggerPlugin
|
||||||
from documents.data_models import ConsumableDocument
|
from documents.data_models import ConsumableDocument
|
||||||
from documents.data_models import DocumentMetadataOverrides
|
from documents.data_models import DocumentMetadataOverrides
|
||||||
from documents.double_sided import collate
|
from documents.double_sided import CollatePlugin
|
||||||
from documents.file_handling import create_source_path_directory
|
from documents.file_handling import create_source_path_directory
|
||||||
from documents.file_handling import generate_unique_filename
|
from documents.file_handling import generate_unique_filename
|
||||||
from documents.models import Correspondent
|
from documents.models import Correspondent
|
||||||
@ -35,6 +35,10 @@ from documents.models import StoragePath
|
|||||||
from documents.models import Tag
|
from documents.models import Tag
|
||||||
from documents.parsers import DocumentParser
|
from documents.parsers import DocumentParser
|
||||||
from documents.parsers import get_parser_class_for_mime_type
|
from documents.parsers import get_parser_class_for_mime_type
|
||||||
|
from documents.plugins.base import ConsumeTaskPlugin
|
||||||
|
from documents.plugins.base import ProgressManager
|
||||||
|
from documents.plugins.base import StopConsumeTaskError
|
||||||
|
from documents.plugins.helpers import ProgressStatusOptions
|
||||||
from documents.sanity_checker import SanityCheckFailedException
|
from documents.sanity_checker import SanityCheckFailedException
|
||||||
from documents.signals import document_updated
|
from documents.signals import document_updated
|
||||||
|
|
||||||
@ -102,70 +106,60 @@ def consume_file(
|
|||||||
input_doc: ConsumableDocument,
|
input_doc: ConsumableDocument,
|
||||||
overrides: Optional[DocumentMetadataOverrides] = None,
|
overrides: Optional[DocumentMetadataOverrides] = None,
|
||||||
):
|
):
|
||||||
def send_progress(status="SUCCESS", message="finished"):
|
|
||||||
payload = {
|
|
||||||
"filename": overrides.filename or input_doc.original_file.name,
|
|
||||||
"task_id": None,
|
|
||||||
"current_progress": 100,
|
|
||||||
"max_progress": 100,
|
|
||||||
"status": status,
|
|
||||||
"message": message,
|
|
||||||
}
|
|
||||||
try:
|
|
||||||
async_to_sync(get_channel_layer().group_send)(
|
|
||||||
"status_updates",
|
|
||||||
{"type": "status_update", "data": payload},
|
|
||||||
)
|
|
||||||
except ConnectionError as e:
|
|
||||||
logger.warning(f"ConnectionError on status send: {e!s}")
|
|
||||||
|
|
||||||
# Default no overrides
|
# Default no overrides
|
||||||
if overrides is None:
|
if overrides is None:
|
||||||
overrides = DocumentMetadataOverrides()
|
overrides = DocumentMetadataOverrides()
|
||||||
|
|
||||||
# Handle collation of double-sided documents scanned in two parts
|
plugins: list[type[ConsumeTaskPlugin]] = [
|
||||||
if settings.CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED and (
|
CollatePlugin,
|
||||||
settings.CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME
|
BarcodePlugin,
|
||||||
in input_doc.original_file.parts
|
WorkflowTriggerPlugin,
|
||||||
):
|
]
|
||||||
try:
|
|
||||||
msg = collate(input_doc)
|
|
||||||
send_progress(message=msg)
|
|
||||||
return msg
|
|
||||||
except ConsumerError as e:
|
|
||||||
send_progress(status="FAILURE", message=e.args[0])
|
|
||||||
raise e
|
|
||||||
|
|
||||||
# read all barcodes in the current document
|
with ProgressManager(
|
||||||
if settings.CONSUMER_ENABLE_BARCODES or settings.CONSUMER_ENABLE_ASN_BARCODE:
|
overrides.filename or input_doc.original_file.name,
|
||||||
with BarcodeReader(input_doc.original_file, input_doc.mime_type) as reader:
|
self.request.id,
|
||||||
if settings.CONSUMER_ENABLE_BARCODES and reader.separate(
|
) as status_mgr, TemporaryDirectory(dir=settings.SCRATCH_DIR) as tmp_dir:
|
||||||
input_doc.source,
|
tmp_dir = Path(tmp_dir)
|
||||||
|
for plugin_class in plugins:
|
||||||
|
plugin_name = plugin_class.NAME
|
||||||
|
|
||||||
|
plugin = plugin_class(
|
||||||
|
input_doc,
|
||||||
overrides,
|
overrides,
|
||||||
):
|
status_mgr,
|
||||||
# notify the sender, otherwise the progress bar
|
tmp_dir,
|
||||||
# in the UI stays stuck
|
self.request.id,
|
||||||
send_progress()
|
|
||||||
# consuming stops here, since the original document with
|
|
||||||
# the barcodes has been split and will be consumed separately
|
|
||||||
input_doc.original_file.unlink()
|
|
||||||
return "File successfully split"
|
|
||||||
|
|
||||||
# try reading the ASN from barcode
|
|
||||||
if (
|
|
||||||
settings.CONSUMER_ENABLE_ASN_BARCODE
|
|
||||||
and (located_asn := reader.asn) is not None
|
|
||||||
):
|
|
||||||
# Note this will take precedence over an API provided ASN
|
|
||||||
# But it's from a physical barcode, so that's good
|
|
||||||
overrides.asn = located_asn
|
|
||||||
logger.info(f"Found ASN in barcode: {overrides.asn}")
|
|
||||||
|
|
||||||
template_overrides = Consumer().get_workflow_overrides(
|
|
||||||
input_doc=input_doc,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
overrides.update(template_overrides)
|
if not plugin.able_to_run:
|
||||||
|
logger.debug(f"Skipping plugin {plugin_name}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
logger.debug(f"Executing plugin {plugin_name}")
|
||||||
|
plugin.setup()
|
||||||
|
|
||||||
|
msg = plugin.run()
|
||||||
|
|
||||||
|
if msg is not None:
|
||||||
|
logger.info(f"{plugin_name} completed with: {msg}")
|
||||||
|
else:
|
||||||
|
logger.info(f"{plugin_name} completed with no message")
|
||||||
|
|
||||||
|
overrides = plugin.metadata
|
||||||
|
|
||||||
|
except StopConsumeTaskError as e:
|
||||||
|
logger.info(f"{plugin_name} requested task exit: {e.message}")
|
||||||
|
return e.message
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"{plugin_name} failed: {e}")
|
||||||
|
status_mgr.send_progress(ProgressStatusOptions.FAILED, f"{e}", 100, 100)
|
||||||
|
raise
|
||||||
|
|
||||||
|
finally:
|
||||||
|
plugin.cleanup()
|
||||||
|
|
||||||
# continue with consumption if no barcode was found
|
# continue with consumption if no barcode was found
|
||||||
document = Consumer().try_consume_file(
|
document = Consumer().try_consume_file(
|
||||||
|
@ -1,4 +1,7 @@
|
|||||||
import shutil
|
import shutil
|
||||||
|
from collections.abc import Generator
|
||||||
|
from contextlib import contextmanager
|
||||||
|
from pathlib import Path
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
@ -7,14 +10,13 @@ from django.test import TestCase
|
|||||||
from django.test import override_settings
|
from django.test import override_settings
|
||||||
|
|
||||||
from documents import tasks
|
from documents import tasks
|
||||||
from documents.barcodes import BarcodeReader
|
from documents.barcodes import BarcodePlugin
|
||||||
from documents.consumer import ConsumerError
|
|
||||||
from documents.data_models import ConsumableDocument
|
from documents.data_models import ConsumableDocument
|
||||||
from documents.data_models import DocumentMetadataOverrides
|
from documents.data_models import DocumentMetadataOverrides
|
||||||
from documents.data_models import DocumentSource
|
from documents.data_models import DocumentSource
|
||||||
from documents.models import Document
|
|
||||||
from documents.tests.utils import DirectoriesMixin
|
from documents.tests.utils import DirectoriesMixin
|
||||||
from documents.tests.utils import DocumentConsumeDelayMixin
|
from documents.tests.utils import DocumentConsumeDelayMixin
|
||||||
|
from documents.tests.utils import DummyProgressManager
|
||||||
from documents.tests.utils import FileSystemAssertsMixin
|
from documents.tests.utils import FileSystemAssertsMixin
|
||||||
from documents.tests.utils import SampleDirMixin
|
from documents.tests.utils import SampleDirMixin
|
||||||
|
|
||||||
@ -26,8 +28,29 @@ except ImportError:
|
|||||||
HAS_ZXING_LIB = False
|
HAS_ZXING_LIB = False
|
||||||
|
|
||||||
|
|
||||||
|
class GetReaderPluginMixin:
|
||||||
|
@contextmanager
|
||||||
|
def get_reader(self, filepath: Path) -> Generator[BarcodePlugin, None, None]:
|
||||||
|
reader = BarcodePlugin(
|
||||||
|
ConsumableDocument(DocumentSource.ConsumeFolder, original_file=filepath),
|
||||||
|
DocumentMetadataOverrides(),
|
||||||
|
DummyProgressManager(filepath.name, None),
|
||||||
|
self.dirs.scratch_dir,
|
||||||
|
"task-id",
|
||||||
|
)
|
||||||
|
reader.setup()
|
||||||
|
yield reader
|
||||||
|
reader.cleanup()
|
||||||
|
|
||||||
|
|
||||||
@override_settings(CONSUMER_BARCODE_SCANNER="PYZBAR")
|
@override_settings(CONSUMER_BARCODE_SCANNER="PYZBAR")
|
||||||
class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, TestCase):
|
class TestBarcode(
|
||||||
|
DirectoriesMixin,
|
||||||
|
FileSystemAssertsMixin,
|
||||||
|
SampleDirMixin,
|
||||||
|
GetReaderPluginMixin,
|
||||||
|
TestCase,
|
||||||
|
):
|
||||||
def test_scan_file_for_separating_barcodes(self):
|
def test_scan_file_for_separating_barcodes(self):
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
@ -39,7 +62,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
|||||||
"""
|
"""
|
||||||
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
|
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
|
||||||
|
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
with self.get_reader(test_file) as reader:
|
||||||
reader.detect()
|
reader.detect()
|
||||||
separator_page_numbers = reader.get_separation_pages()
|
separator_page_numbers = reader.get_separation_pages()
|
||||||
|
|
||||||
@ -60,7 +83,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
|||||||
"""
|
"""
|
||||||
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.tiff"
|
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.tiff"
|
||||||
|
|
||||||
with BarcodeReader(test_file, "image/tiff") as reader:
|
with self.get_reader(test_file) as reader:
|
||||||
reader.detect()
|
reader.detect()
|
||||||
separator_page_numbers = reader.get_separation_pages()
|
separator_page_numbers = reader.get_separation_pages()
|
||||||
|
|
||||||
@ -80,7 +103,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
|||||||
"""
|
"""
|
||||||
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle-alpha.tiff"
|
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle-alpha.tiff"
|
||||||
|
|
||||||
with BarcodeReader(test_file, "image/tiff") as reader:
|
with self.get_reader(test_file) as reader:
|
||||||
reader.detect()
|
reader.detect()
|
||||||
separator_page_numbers = reader.get_separation_pages()
|
separator_page_numbers = reader.get_separation_pages()
|
||||||
|
|
||||||
@ -97,7 +120,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
|||||||
- No pages to split on
|
- No pages to split on
|
||||||
"""
|
"""
|
||||||
test_file = self.SAMPLE_DIR / "simple.pdf"
|
test_file = self.SAMPLE_DIR / "simple.pdf"
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
with self.get_reader(test_file) as reader:
|
||||||
reader.detect()
|
reader.detect()
|
||||||
separator_page_numbers = reader.get_separation_pages()
|
separator_page_numbers = reader.get_separation_pages()
|
||||||
|
|
||||||
@ -115,7 +138,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
|||||||
"""
|
"""
|
||||||
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
|
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
|
||||||
|
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
with self.get_reader(test_file) as reader:
|
||||||
reader.detect()
|
reader.detect()
|
||||||
separator_page_numbers = reader.get_separation_pages()
|
separator_page_numbers = reader.get_separation_pages()
|
||||||
|
|
||||||
@ -133,7 +156,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
|||||||
"""
|
"""
|
||||||
test_file = self.BARCODE_SAMPLE_DIR / "several-patcht-codes.pdf"
|
test_file = self.BARCODE_SAMPLE_DIR / "several-patcht-codes.pdf"
|
||||||
|
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
with self.get_reader(test_file) as reader:
|
||||||
reader.detect()
|
reader.detect()
|
||||||
separator_page_numbers = reader.get_separation_pages()
|
separator_page_numbers = reader.get_separation_pages()
|
||||||
|
|
||||||
@ -158,7 +181,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
|||||||
]:
|
]:
|
||||||
test_file = self.BARCODE_SAMPLE_DIR / test_file
|
test_file = self.BARCODE_SAMPLE_DIR / test_file
|
||||||
|
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
with self.get_reader(test_file) as reader:
|
||||||
reader.detect()
|
reader.detect()
|
||||||
separator_page_numbers = reader.get_separation_pages()
|
separator_page_numbers = reader.get_separation_pages()
|
||||||
|
|
||||||
@ -177,7 +200,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
|||||||
"""
|
"""
|
||||||
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle-unreadable.pdf"
|
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle-unreadable.pdf"
|
||||||
|
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
with self.get_reader(test_file) as reader:
|
||||||
reader.detect()
|
reader.detect()
|
||||||
separator_page_numbers = reader.get_separation_pages()
|
separator_page_numbers = reader.get_separation_pages()
|
||||||
|
|
||||||
@ -195,7 +218,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
|||||||
"""
|
"""
|
||||||
test_file = self.BARCODE_SAMPLE_DIR / "barcode-fax-image.pdf"
|
test_file = self.BARCODE_SAMPLE_DIR / "barcode-fax-image.pdf"
|
||||||
|
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
with self.get_reader(test_file) as reader:
|
||||||
reader.detect()
|
reader.detect()
|
||||||
separator_page_numbers = reader.get_separation_pages()
|
separator_page_numbers = reader.get_separation_pages()
|
||||||
|
|
||||||
@ -214,7 +237,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
|||||||
"""
|
"""
|
||||||
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-qr.pdf"
|
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-qr.pdf"
|
||||||
|
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
with self.get_reader(test_file) as reader:
|
||||||
reader.detect()
|
reader.detect()
|
||||||
separator_page_numbers = reader.get_separation_pages()
|
separator_page_numbers = reader.get_separation_pages()
|
||||||
|
|
||||||
@ -234,7 +257,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
|||||||
"""
|
"""
|
||||||
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.pdf"
|
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.pdf"
|
||||||
|
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
with self.get_reader(test_file) as reader:
|
||||||
reader.detect()
|
reader.detect()
|
||||||
separator_page_numbers = reader.get_separation_pages()
|
separator_page_numbers = reader.get_separation_pages()
|
||||||
|
|
||||||
@ -255,7 +278,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
|||||||
"""
|
"""
|
||||||
test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-custom.pdf"
|
test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-custom.pdf"
|
||||||
|
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
with self.get_reader(test_file) as reader:
|
||||||
reader.detect()
|
reader.detect()
|
||||||
separator_page_numbers = reader.get_separation_pages()
|
separator_page_numbers = reader.get_separation_pages()
|
||||||
|
|
||||||
@ -276,7 +299,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
|||||||
"""
|
"""
|
||||||
test_file = self.BARCODE_SAMPLE_DIR / "barcode-128-custom.pdf"
|
test_file = self.BARCODE_SAMPLE_DIR / "barcode-128-custom.pdf"
|
||||||
|
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
with self.get_reader(test_file) as reader:
|
||||||
reader.detect()
|
reader.detect()
|
||||||
separator_page_numbers = reader.get_separation_pages()
|
separator_page_numbers = reader.get_separation_pages()
|
||||||
|
|
||||||
@ -296,7 +319,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
|||||||
"""
|
"""
|
||||||
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.pdf"
|
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.pdf"
|
||||||
|
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
with self.get_reader(test_file) as reader:
|
||||||
reader.detect()
|
reader.detect()
|
||||||
separator_page_numbers = reader.get_separation_pages()
|
separator_page_numbers = reader.get_separation_pages()
|
||||||
|
|
||||||
@ -315,7 +338,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
|||||||
"""
|
"""
|
||||||
test_file = self.BARCODE_SAMPLE_DIR / "many-qr-codes.pdf"
|
test_file = self.BARCODE_SAMPLE_DIR / "many-qr-codes.pdf"
|
||||||
|
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
with self.get_reader(test_file) as reader:
|
||||||
reader.detect()
|
reader.detect()
|
||||||
separator_page_numbers = reader.get_separation_pages()
|
separator_page_numbers = reader.get_separation_pages()
|
||||||
|
|
||||||
@ -334,7 +357,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
|||||||
"""
|
"""
|
||||||
test_file = self.SAMPLE_DIR / "password-is-test.pdf"
|
test_file = self.SAMPLE_DIR / "password-is-test.pdf"
|
||||||
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
|
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
with self.get_reader(test_file) as reader:
|
||||||
reader.detect()
|
reader.detect()
|
||||||
warning = cm.output[0]
|
warning = cm.output[0]
|
||||||
expected_str = "WARNING:paperless.barcodes:File is likely password protected, not checking for barcodes"
|
expected_str = "WARNING:paperless.barcodes:File is likely password protected, not checking for barcodes"
|
||||||
@ -356,7 +379,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
|||||||
"""
|
"""
|
||||||
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
|
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
|
||||||
|
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
with self.get_reader(test_file) as reader:
|
||||||
documents = reader.separate_pages({1: False})
|
documents = reader.separate_pages({1: False})
|
||||||
|
|
||||||
self.assertEqual(reader.pdf_file, test_file)
|
self.assertEqual(reader.pdf_file, test_file)
|
||||||
@ -373,7 +396,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
|||||||
"""
|
"""
|
||||||
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-double.pdf"
|
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-double.pdf"
|
||||||
|
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
with self.get_reader(test_file) as reader:
|
||||||
documents = reader.separate_pages({1: False, 2: False})
|
documents = reader.separate_pages({1: False, 2: False})
|
||||||
|
|
||||||
self.assertEqual(len(documents), 2)
|
self.assertEqual(len(documents), 2)
|
||||||
@ -385,32 +408,18 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
|||||||
WHEN:
|
WHEN:
|
||||||
- No separation pages are provided
|
- No separation pages are provided
|
||||||
THEN:
|
THEN:
|
||||||
- No new documents are produced
|
- Nothing happens
|
||||||
- A warning is logged
|
|
||||||
"""
|
"""
|
||||||
test_file = self.SAMPLE_DIR / "simple.pdf"
|
test_file = self.SAMPLE_DIR / "simple.pdf"
|
||||||
|
|
||||||
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
|
with self.get_reader(test_file) as reader:
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
self.assertEqual("No pages to split on!", reader.run())
|
||||||
self.assertFalse(
|
|
||||||
reader.separate(
|
|
||||||
DocumentSource.ApiUpload,
|
|
||||||
DocumentMetadataOverrides(),
|
|
||||||
),
|
|
||||||
)
|
|
||||||
self.assertEqual(
|
|
||||||
cm.output,
|
|
||||||
[
|
|
||||||
"WARNING:paperless.barcodes:No pages to split on!",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
@override_settings(
|
@override_settings(
|
||||||
CONSUMER_ENABLE_BARCODES=True,
|
CONSUMER_ENABLE_BARCODES=True,
|
||||||
CONSUMER_BARCODE_TIFF_SUPPORT=True,
|
CONSUMER_BARCODE_TIFF_SUPPORT=True,
|
||||||
)
|
)
|
||||||
@mock.patch("documents.consumer.Consumer.try_consume_file")
|
def test_consume_barcode_unsupported_jpg_file(self):
|
||||||
def test_consume_barcode_unsupported_jpg_file(self, m):
|
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
- JPEG image as input
|
- JPEG image as input
|
||||||
@ -422,35 +431,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
|||||||
"""
|
"""
|
||||||
test_file = self.SAMPLE_DIR / "simple.jpg"
|
test_file = self.SAMPLE_DIR / "simple.jpg"
|
||||||
|
|
||||||
dst = settings.SCRATCH_DIR / "simple.jpg"
|
with self.get_reader(test_file) as reader:
|
||||||
shutil.copy(test_file, dst)
|
self.assertFalse(reader.able_to_run)
|
||||||
|
|
||||||
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
|
|
||||||
self.assertIn(
|
|
||||||
"Success",
|
|
||||||
tasks.consume_file(
|
|
||||||
ConsumableDocument(
|
|
||||||
source=DocumentSource.ConsumeFolder,
|
|
||||||
original_file=dst,
|
|
||||||
),
|
|
||||||
None,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
self.assertListEqual(
|
|
||||||
cm.output,
|
|
||||||
[
|
|
||||||
"WARNING:paperless.barcodes:Unsupported file format for barcode reader: image/jpeg",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
m.assert_called_once()
|
|
||||||
|
|
||||||
args, kwargs = m.call_args
|
|
||||||
self.assertIsNone(kwargs["override_filename"])
|
|
||||||
self.assertIsNone(kwargs["override_title"])
|
|
||||||
self.assertIsNone(kwargs["override_correspondent_id"])
|
|
||||||
self.assertIsNone(kwargs["override_document_type_id"])
|
|
||||||
self.assertIsNone(kwargs["override_tag_ids"])
|
|
||||||
|
|
||||||
@override_settings(
|
@override_settings(
|
||||||
CONSUMER_ENABLE_BARCODES=True,
|
CONSUMER_ENABLE_BARCODES=True,
|
||||||
@ -467,7 +449,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
|||||||
"""
|
"""
|
||||||
test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-2.pdf"
|
test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-2.pdf"
|
||||||
|
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
with self.get_reader(test_file) as reader:
|
||||||
reader.detect()
|
reader.detect()
|
||||||
separator_page_numbers = reader.get_separation_pages()
|
separator_page_numbers = reader.get_separation_pages()
|
||||||
|
|
||||||
@ -504,7 +486,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
|
|||||||
"""
|
"""
|
||||||
test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-1.pdf"
|
test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-1.pdf"
|
||||||
|
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
with self.get_reader(test_file) as reader:
|
||||||
reader.detect()
|
reader.detect()
|
||||||
separator_page_numbers = reader.get_separation_pages()
|
separator_page_numbers = reader.get_separation_pages()
|
||||||
|
|
||||||
@ -550,7 +532,7 @@ class TestBarcodeNewConsume(
|
|||||||
|
|
||||||
overrides = DocumentMetadataOverrides(tag_ids=[1, 2, 9])
|
overrides = DocumentMetadataOverrides(tag_ids=[1, 2, 9])
|
||||||
|
|
||||||
with mock.patch("documents.tasks.async_to_sync") as progress_mocker:
|
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
tasks.consume_file(
|
tasks.consume_file(
|
||||||
ConsumableDocument(
|
ConsumableDocument(
|
||||||
@ -559,10 +541,8 @@ class TestBarcodeNewConsume(
|
|||||||
),
|
),
|
||||||
overrides,
|
overrides,
|
||||||
),
|
),
|
||||||
"File successfully split",
|
"Barcode splitting complete!",
|
||||||
)
|
)
|
||||||
# We let the consumer know progress is done
|
|
||||||
progress_mocker.assert_called_once()
|
|
||||||
# 2 new document consume tasks created
|
# 2 new document consume tasks created
|
||||||
self.assertEqual(self.consume_file_mock.call_count, 2)
|
self.assertEqual(self.consume_file_mock.call_count, 2)
|
||||||
|
|
||||||
@ -580,7 +560,20 @@ class TestBarcodeNewConsume(
|
|||||||
self.assertEqual(overrides, new_doc_overrides)
|
self.assertEqual(overrides, new_doc_overrides)
|
||||||
|
|
||||||
|
|
||||||
class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase):
|
class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, GetReaderPluginMixin, TestCase):
|
||||||
|
@contextmanager
|
||||||
|
def get_reader(self, filepath: Path) -> BarcodePlugin:
|
||||||
|
reader = BarcodePlugin(
|
||||||
|
ConsumableDocument(DocumentSource.ConsumeFolder, original_file=filepath),
|
||||||
|
DocumentMetadataOverrides(),
|
||||||
|
DummyProgressManager(filepath.name, None),
|
||||||
|
self.dirs.scratch_dir,
|
||||||
|
"task-id",
|
||||||
|
)
|
||||||
|
reader.setup()
|
||||||
|
yield reader
|
||||||
|
reader.cleanup()
|
||||||
|
|
||||||
@override_settings(CONSUMER_ASN_BARCODE_PREFIX="CUSTOM-PREFIX-")
|
@override_settings(CONSUMER_ASN_BARCODE_PREFIX="CUSTOM-PREFIX-")
|
||||||
def test_scan_file_for_asn_custom_prefix(self):
|
def test_scan_file_for_asn_custom_prefix(self):
|
||||||
"""
|
"""
|
||||||
@ -594,7 +587,7 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase):
|
|||||||
- The ASN integer value is correct
|
- The ASN integer value is correct
|
||||||
"""
|
"""
|
||||||
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf"
|
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf"
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
with self.get_reader(test_file) as reader:
|
||||||
asn = reader.asn
|
asn = reader.asn
|
||||||
|
|
||||||
self.assertEqual(reader.pdf_file, test_file)
|
self.assertEqual(reader.pdf_file, test_file)
|
||||||
@ -613,7 +606,7 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase):
|
|||||||
"""
|
"""
|
||||||
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-123.pdf"
|
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-123.pdf"
|
||||||
|
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
with self.get_reader(test_file) as reader:
|
||||||
asn = reader.asn
|
asn = reader.asn
|
||||||
|
|
||||||
self.assertEqual(reader.pdf_file, test_file)
|
self.assertEqual(reader.pdf_file, test_file)
|
||||||
@ -630,55 +623,12 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase):
|
|||||||
"""
|
"""
|
||||||
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
|
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
|
||||||
|
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
with self.get_reader(test_file) as reader:
|
||||||
asn = reader.asn
|
asn = reader.asn
|
||||||
|
|
||||||
self.assertEqual(reader.pdf_file, test_file)
|
self.assertEqual(reader.pdf_file, test_file)
|
||||||
self.assertEqual(asn, None)
|
self.assertEqual(asn, None)
|
||||||
|
|
||||||
@override_settings(CONSUMER_ENABLE_ASN_BARCODE=True)
|
|
||||||
def test_scan_file_for_asn_already_exists(self):
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- PDF with an ASN barcode
|
|
||||||
- ASN value already exists
|
|
||||||
WHEN:
|
|
||||||
- File is scanned for barcodes
|
|
||||||
THEN:
|
|
||||||
- ASN is retrieved from the document
|
|
||||||
- Consumption fails
|
|
||||||
"""
|
|
||||||
|
|
||||||
Document.objects.create(
|
|
||||||
title="WOW",
|
|
||||||
content="the content",
|
|
||||||
archive_serial_number=123,
|
|
||||||
checksum="456",
|
|
||||||
mime_type="application/pdf",
|
|
||||||
)
|
|
||||||
|
|
||||||
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-123.pdf"
|
|
||||||
|
|
||||||
dst = settings.SCRATCH_DIR / "barcode-39-asn-123.pdf"
|
|
||||||
shutil.copy(test_file, dst)
|
|
||||||
|
|
||||||
with mock.patch("documents.consumer.Consumer._send_progress"):
|
|
||||||
with self.assertRaises(ConsumerError) as cm, self.assertLogs(
|
|
||||||
"paperless.consumer",
|
|
||||||
level="ERROR",
|
|
||||||
) as logs_cm:
|
|
||||||
tasks.consume_file(
|
|
||||||
ConsumableDocument(
|
|
||||||
source=DocumentSource.ConsumeFolder,
|
|
||||||
original_file=dst,
|
|
||||||
),
|
|
||||||
None,
|
|
||||||
)
|
|
||||||
self.assertIn("Not consuming barcode-39-asn-123.pdf", str(cm.exception))
|
|
||||||
error_str = logs_cm.output[0]
|
|
||||||
expected_str = "ERROR:paperless.consumer:Not consuming barcode-39-asn-123.pdf: Given ASN already exists!"
|
|
||||||
self.assertEqual(expected_str, error_str)
|
|
||||||
|
|
||||||
def test_scan_file_for_asn_barcode_invalid(self):
|
def test_scan_file_for_asn_barcode_invalid(self):
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
@ -692,7 +642,7 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase):
|
|||||||
"""
|
"""
|
||||||
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-invalid.pdf"
|
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-invalid.pdf"
|
||||||
|
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
with self.get_reader(test_file) as reader:
|
||||||
asn = reader.asn
|
asn = reader.asn
|
||||||
|
|
||||||
self.assertEqual(reader.pdf_file, test_file)
|
self.assertEqual(reader.pdf_file, test_file)
|
||||||
@ -718,7 +668,9 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase):
|
|||||||
dst = settings.SCRATCH_DIR / "barcode-39-asn-123.pdf"
|
dst = settings.SCRATCH_DIR / "barcode-39-asn-123.pdf"
|
||||||
shutil.copy(test_file, dst)
|
shutil.copy(test_file, dst)
|
||||||
|
|
||||||
with mock.patch("documents.consumer.Consumer.try_consume_file") as mocked_call:
|
with mock.patch(
|
||||||
|
"documents.consumer.Consumer.try_consume_file",
|
||||||
|
) as mocked_consumer:
|
||||||
tasks.consume_file(
|
tasks.consume_file(
|
||||||
ConsumableDocument(
|
ConsumableDocument(
|
||||||
source=DocumentSource.ConsumeFolder,
|
source=DocumentSource.ConsumeFolder,
|
||||||
@ -726,40 +678,11 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase):
|
|||||||
),
|
),
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
|
mocked_consumer.assert_called_once()
|
||||||
args, kwargs = mocked_call.call_args
|
args, kwargs = mocked_consumer.call_args
|
||||||
|
|
||||||
self.assertEqual(kwargs["override_asn"], 123)
|
self.assertEqual(kwargs["override_asn"], 123)
|
||||||
|
|
||||||
@override_settings(CONSUMER_ENABLE_ASN_BARCODE=True)
|
|
||||||
def test_asn_too_large(self):
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- ASN from barcode enabled
|
|
||||||
- Barcode contains too large an ASN value
|
|
||||||
WHEN:
|
|
||||||
- ASN from barcode checked for correctness
|
|
||||||
THEN:
|
|
||||||
- Exception is raised regarding size limits
|
|
||||||
"""
|
|
||||||
src = self.BARCODE_SAMPLE_DIR / "barcode-128-asn-too-large.pdf"
|
|
||||||
|
|
||||||
dst = self.dirs.scratch_dir / "barcode-128-asn-too-large.pdf"
|
|
||||||
shutil.copy(src, dst)
|
|
||||||
|
|
||||||
input_doc = ConsumableDocument(
|
|
||||||
source=DocumentSource.ConsumeFolder,
|
|
||||||
original_file=dst,
|
|
||||||
)
|
|
||||||
|
|
||||||
with mock.patch("documents.consumer.Consumer._send_progress"):
|
|
||||||
self.assertRaisesMessage(
|
|
||||||
ConsumerError,
|
|
||||||
"Given ASN 4294967296 is out of range [0, 4,294,967,295]",
|
|
||||||
tasks.consume_file,
|
|
||||||
input_doc,
|
|
||||||
)
|
|
||||||
|
|
||||||
@override_settings(CONSUMER_BARCODE_SCANNER="PYZBAR")
|
@override_settings(CONSUMER_BARCODE_SCANNER="PYZBAR")
|
||||||
def test_scan_file_for_qrcode_without_upscale(self):
|
def test_scan_file_for_qrcode_without_upscale(self):
|
||||||
"""
|
"""
|
||||||
@ -774,7 +697,7 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase):
|
|||||||
|
|
||||||
test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-asn-000123-upscale-dpi.pdf"
|
test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-asn-000123-upscale-dpi.pdf"
|
||||||
|
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
with self.get_reader(test_file) as reader:
|
||||||
reader.detect()
|
reader.detect()
|
||||||
self.assertEqual(len(reader.barcodes), 0)
|
self.assertEqual(len(reader.barcodes), 0)
|
||||||
|
|
||||||
@ -796,7 +719,7 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase):
|
|||||||
|
|
||||||
test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-asn-000123-upscale-dpi.pdf"
|
test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-asn-000123-upscale-dpi.pdf"
|
||||||
|
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
with self.get_reader(test_file) as reader:
|
||||||
reader.detect()
|
reader.detect()
|
||||||
self.assertEqual(len(reader.barcodes), 1)
|
self.assertEqual(len(reader.barcodes), 1)
|
||||||
self.assertEqual(reader.asn, 123)
|
self.assertEqual(reader.asn, 123)
|
||||||
|
@ -17,6 +17,7 @@ from documents.data_models import DocumentSource
|
|||||||
from documents.double_sided import STAGING_FILE_NAME
|
from documents.double_sided import STAGING_FILE_NAME
|
||||||
from documents.double_sided import TIMEOUT_MINUTES
|
from documents.double_sided import TIMEOUT_MINUTES
|
||||||
from documents.tests.utils import DirectoriesMixin
|
from documents.tests.utils import DirectoriesMixin
|
||||||
|
from documents.tests.utils import DummyProgressManager
|
||||||
from documents.tests.utils import FileSystemAssertsMixin
|
from documents.tests.utils import FileSystemAssertsMixin
|
||||||
|
|
||||||
|
|
||||||
@ -42,9 +43,10 @@ class TestDoubleSided(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
dst = self.dirs.double_sided_dir / dstname
|
dst = self.dirs.double_sided_dir / dstname
|
||||||
dst.parent.mkdir(parents=True, exist_ok=True)
|
dst.parent.mkdir(parents=True, exist_ok=True)
|
||||||
shutil.copy(src, dst)
|
shutil.copy(src, dst)
|
||||||
with mock.patch("documents.tasks.async_to_sync"), mock.patch(
|
with mock.patch(
|
||||||
"documents.consumer.async_to_sync",
|
"documents.tasks.ProgressManager",
|
||||||
):
|
DummyProgressManager,
|
||||||
|
), mock.patch("documents.consumer.async_to_sync"):
|
||||||
msg = tasks.consume_file(
|
msg = tasks.consume_file(
|
||||||
ConsumableDocument(
|
ConsumableDocument(
|
||||||
source=DocumentSource.ConsumeFolder,
|
source=DocumentSource.ConsumeFolder,
|
||||||
@ -211,7 +213,7 @@ class TestDoubleSided(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
"""
|
"""
|
||||||
msg = self.consume_file("simple.pdf", Path("..") / "simple.pdf")
|
msg = self.consume_file("simple.pdf", Path("..") / "simple.pdf")
|
||||||
self.assertIsNotFile(self.staging_file)
|
self.assertIsNotFile(self.staging_file)
|
||||||
self.assertRegex(msg, "Success. New document .* created")
|
self.assertRegex(msg, r"Success. New document id \d+ created")
|
||||||
|
|
||||||
def test_subdirectory_upload(self):
|
def test_subdirectory_upload(self):
|
||||||
"""
|
"""
|
||||||
@ -250,4 +252,4 @@ class TestDoubleSided(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
"""
|
"""
|
||||||
msg = self.consume_file("simple.pdf")
|
msg = self.consume_file("simple.pdf")
|
||||||
self.assertIsNotFile(self.staging_file)
|
self.assertIsNotFile(self.staging_file)
|
||||||
self.assertRegex(msg, "Success. New document .* created")
|
self.assertRegex(msg, r"Success. New document id \d+ created")
|
||||||
|
@ -24,6 +24,7 @@ from documents.models import WorkflowAction
|
|||||||
from documents.models import WorkflowTrigger
|
from documents.models import WorkflowTrigger
|
||||||
from documents.signals import document_consumption_finished
|
from documents.signals import document_consumption_finished
|
||||||
from documents.tests.utils import DirectoriesMixin
|
from documents.tests.utils import DirectoriesMixin
|
||||||
|
from documents.tests.utils import DummyProgressManager
|
||||||
from documents.tests.utils import FileSystemAssertsMixin
|
from documents.tests.utils import FileSystemAssertsMixin
|
||||||
from paperless_mail.models import MailAccount
|
from paperless_mail.models import MailAccount
|
||||||
from paperless_mail.models import MailRule
|
from paperless_mail.models import MailRule
|
||||||
@ -126,7 +127,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
|
|||||||
|
|
||||||
test_file = self.SAMPLE_DIR / "simple.pdf"
|
test_file = self.SAMPLE_DIR / "simple.pdf"
|
||||||
|
|
||||||
with mock.patch("documents.tasks.async_to_sync"):
|
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
|
||||||
with self.assertLogs("paperless.matching", level="INFO") as cm:
|
with self.assertLogs("paperless.matching", level="INFO") as cm:
|
||||||
tasks.consume_file(
|
tasks.consume_file(
|
||||||
ConsumableDocument(
|
ConsumableDocument(
|
||||||
@ -203,7 +204,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
|
|||||||
w.save()
|
w.save()
|
||||||
|
|
||||||
test_file = self.SAMPLE_DIR / "simple.pdf"
|
test_file = self.SAMPLE_DIR / "simple.pdf"
|
||||||
with mock.patch("documents.tasks.async_to_sync"):
|
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
|
||||||
with self.assertLogs("paperless.matching", level="INFO") as cm:
|
with self.assertLogs("paperless.matching", level="INFO") as cm:
|
||||||
tasks.consume_file(
|
tasks.consume_file(
|
||||||
ConsumableDocument(
|
ConsumableDocument(
|
||||||
@ -294,7 +295,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
|
|||||||
|
|
||||||
test_file = self.SAMPLE_DIR / "simple.pdf"
|
test_file = self.SAMPLE_DIR / "simple.pdf"
|
||||||
|
|
||||||
with mock.patch("documents.tasks.async_to_sync"):
|
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
|
||||||
with self.assertLogs("paperless.matching", level="INFO") as cm:
|
with self.assertLogs("paperless.matching", level="INFO") as cm:
|
||||||
tasks.consume_file(
|
tasks.consume_file(
|
||||||
ConsumableDocument(
|
ConsumableDocument(
|
||||||
@ -356,7 +357,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
|
|||||||
|
|
||||||
test_file = self.SAMPLE_DIR / "simple.pdf"
|
test_file = self.SAMPLE_DIR / "simple.pdf"
|
||||||
|
|
||||||
with mock.patch("documents.tasks.async_to_sync"):
|
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
|
||||||
with self.assertLogs("paperless.matching", level="DEBUG") as cm:
|
with self.assertLogs("paperless.matching", level="DEBUG") as cm:
|
||||||
tasks.consume_file(
|
tasks.consume_file(
|
||||||
ConsumableDocument(
|
ConsumableDocument(
|
||||||
@ -407,7 +408,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
|
|||||||
|
|
||||||
test_file = self.SAMPLE_DIR / "simple.pdf"
|
test_file = self.SAMPLE_DIR / "simple.pdf"
|
||||||
|
|
||||||
with mock.patch("documents.tasks.async_to_sync"):
|
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
|
||||||
with self.assertLogs("paperless.matching", level="DEBUG") as cm:
|
with self.assertLogs("paperless.matching", level="DEBUG") as cm:
|
||||||
tasks.consume_file(
|
tasks.consume_file(
|
||||||
ConsumableDocument(
|
ConsumableDocument(
|
||||||
@ -468,7 +469,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
|
|||||||
|
|
||||||
test_file = self.SAMPLE_DIR / "simple.pdf"
|
test_file = self.SAMPLE_DIR / "simple.pdf"
|
||||||
|
|
||||||
with mock.patch("documents.tasks.async_to_sync"):
|
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
|
||||||
with self.assertLogs("paperless.matching", level="DEBUG") as cm:
|
with self.assertLogs("paperless.matching", level="DEBUG") as cm:
|
||||||
tasks.consume_file(
|
tasks.consume_file(
|
||||||
ConsumableDocument(
|
ConsumableDocument(
|
||||||
@ -529,7 +530,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
|
|||||||
|
|
||||||
test_file = self.SAMPLE_DIR / "simple.pdf"
|
test_file = self.SAMPLE_DIR / "simple.pdf"
|
||||||
|
|
||||||
with mock.patch("documents.tasks.async_to_sync"):
|
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
|
||||||
with self.assertLogs("paperless.matching", level="DEBUG") as cm:
|
with self.assertLogs("paperless.matching", level="DEBUG") as cm:
|
||||||
tasks.consume_file(
|
tasks.consume_file(
|
||||||
ConsumableDocument(
|
ConsumableDocument(
|
||||||
@ -591,7 +592,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
|
|||||||
|
|
||||||
test_file = self.SAMPLE_DIR / "simple.pdf"
|
test_file = self.SAMPLE_DIR / "simple.pdf"
|
||||||
|
|
||||||
with mock.patch("documents.tasks.async_to_sync"):
|
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
|
||||||
with self.assertLogs("paperless.matching", level="DEBUG") as cm:
|
with self.assertLogs("paperless.matching", level="DEBUG") as cm:
|
||||||
tasks.consume_file(
|
tasks.consume_file(
|
||||||
ConsumableDocument(
|
ConsumableDocument(
|
||||||
@ -686,7 +687,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
|
|||||||
|
|
||||||
test_file = self.SAMPLE_DIR / "simple.pdf"
|
test_file = self.SAMPLE_DIR / "simple.pdf"
|
||||||
|
|
||||||
with mock.patch("documents.tasks.async_to_sync"):
|
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
|
||||||
with self.assertLogs("paperless.matching", level="INFO") as cm:
|
with self.assertLogs("paperless.matching", level="INFO") as cm:
|
||||||
tasks.consume_file(
|
tasks.consume_file(
|
||||||
ConsumableDocument(
|
ConsumableDocument(
|
||||||
|
@ -9,6 +9,7 @@ from os import PathLike
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
from typing import Callable
|
from typing import Callable
|
||||||
|
from typing import Optional
|
||||||
from typing import Union
|
from typing import Union
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
@ -23,6 +24,7 @@ from django.test import override_settings
|
|||||||
from documents.data_models import ConsumableDocument
|
from documents.data_models import ConsumableDocument
|
||||||
from documents.data_models import DocumentMetadataOverrides
|
from documents.data_models import DocumentMetadataOverrides
|
||||||
from documents.parsers import ParseError
|
from documents.parsers import ParseError
|
||||||
|
from documents.plugins.helpers import ProgressStatusOptions
|
||||||
|
|
||||||
|
|
||||||
def setup_directories():
|
def setup_directories():
|
||||||
@ -146,6 +148,11 @@ def util_call_with_backoff(
|
|||||||
|
|
||||||
|
|
||||||
class DirectoriesMixin:
|
class DirectoriesMixin:
|
||||||
|
"""
|
||||||
|
Creates and overrides settings for all folders and paths, then ensures
|
||||||
|
they are cleaned up on exit
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
self.dirs = None
|
self.dirs = None
|
||||||
@ -160,6 +167,10 @@ class DirectoriesMixin:
|
|||||||
|
|
||||||
|
|
||||||
class FileSystemAssertsMixin:
|
class FileSystemAssertsMixin:
|
||||||
|
"""
|
||||||
|
Utilities for checks various state information of the file system
|
||||||
|
"""
|
||||||
|
|
||||||
def assertIsFile(self, path: Union[PathLike, str]):
|
def assertIsFile(self, path: Union[PathLike, str]):
|
||||||
self.assertTrue(Path(path).resolve().is_file(), f"File does not exist: {path}")
|
self.assertTrue(Path(path).resolve().is_file(), f"File does not exist: {path}")
|
||||||
|
|
||||||
@ -188,6 +199,11 @@ class FileSystemAssertsMixin:
|
|||||||
|
|
||||||
|
|
||||||
class ConsumerProgressMixin:
|
class ConsumerProgressMixin:
|
||||||
|
"""
|
||||||
|
Mocks the Consumer _send_progress, preventing attempts to connect to Redis
|
||||||
|
and allowing access to its calls for verification
|
||||||
|
"""
|
||||||
|
|
||||||
def setUp(self) -> None:
|
def setUp(self) -> None:
|
||||||
self.send_progress_patcher = mock.patch(
|
self.send_progress_patcher = mock.patch(
|
||||||
"documents.consumer.Consumer._send_progress",
|
"documents.consumer.Consumer._send_progress",
|
||||||
@ -310,3 +326,59 @@ class SampleDirMixin:
|
|||||||
SAMPLE_DIR = Path(__file__).parent / "samples"
|
SAMPLE_DIR = Path(__file__).parent / "samples"
|
||||||
|
|
||||||
BARCODE_SAMPLE_DIR = SAMPLE_DIR / "barcodes"
|
BARCODE_SAMPLE_DIR = SAMPLE_DIR / "barcodes"
|
||||||
|
|
||||||
|
|
||||||
|
class DummyProgressManager:
|
||||||
|
"""
|
||||||
|
A dummy handler for progress management that doesn't actually try to
|
||||||
|
connect to Redis. Payloads are stored for test assertions if needed.
|
||||||
|
|
||||||
|
Use it with
|
||||||
|
mock.patch("documents.tasks.ProgressManager", DummyProgressManager)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, filename: str, task_id: Optional[str] = None) -> None:
|
||||||
|
self.filename = filename
|
||||||
|
self.task_id = task_id
|
||||||
|
print("hello world")
|
||||||
|
self.payloads = []
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
self.open()
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
|
self.close()
|
||||||
|
|
||||||
|
def open(self) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def close(self) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def send_progress(
|
||||||
|
self,
|
||||||
|
status: ProgressStatusOptions,
|
||||||
|
message: str,
|
||||||
|
current_progress: int,
|
||||||
|
max_progress: int,
|
||||||
|
extra_args: Optional[dict[str, Union[str, int]]] = None,
|
||||||
|
) -> None:
|
||||||
|
# Ensure the layer is open
|
||||||
|
self.open()
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"type": "status_update",
|
||||||
|
"data": {
|
||||||
|
"filename": self.filename,
|
||||||
|
"task_id": self.task_id,
|
||||||
|
"current_progress": current_progress,
|
||||||
|
"max_progress": max_progress,
|
||||||
|
"status": status,
|
||||||
|
"message": message,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if extra_args is not None:
|
||||||
|
payload["data"].update(extra_args)
|
||||||
|
|
||||||
|
self.payloads.append(payload)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user