Chore: Convert the consumer to a plugin (#6361)

This commit is contained in:
Trenton H 2024-04-17 19:59:14 -07:00 committed by GitHub
parent e837f1e85b
commit b720aa3cd1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 922 additions and 617 deletions

View File

@ -0,0 +1,36 @@
{
"folders": [
{
"path": "."
},
{
"path": "./src",
"name": "Backend"
},
{
"path": "./src-ui",
"name": "Frontend"
},
{
"path": "./.github",
"name": "CI/CD"
},
{
"path": "./docs",
"name": "Documentation"
}
],
"settings": {
"files.exclude": {
"**/__pycache__": true,
"**/.mypy_cache": true,
"**/.ruff_cache": true,
"**/.pytest_cache": true,
"**/.idea": true,
"**/.venv": true,
"**/.coverage": true,
"**/coverage.json": true
}
}
}

View File

@ -4,7 +4,7 @@ import { environment } from 'src/environments/environment'
import { WebsocketConsumerStatusMessage } from '../data/websocket-consumer-status-message' import { WebsocketConsumerStatusMessage } from '../data/websocket-consumer-status-message'
import { SettingsService } from './settings.service' import { SettingsService } from './settings.service'
// see ConsumerFilePhase in src/documents/consumer.py // see ProgressStatusOptions in src/documents/plugins/helpers.py
export enum FileStatusPhase { export enum FileStatusPhase {
STARTED = 0, STARTED = 0,
UPLOADING = 1, UPLOADING = 1,

View File

@ -2,15 +2,13 @@ import datetime
import hashlib import hashlib
import os import os
import tempfile import tempfile
import uuid
from enum import Enum from enum import Enum
from pathlib import Path from pathlib import Path
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from typing import Optional from typing import Optional
from typing import Union
import magic import magic
from asgiref.sync import async_to_sync
from channels.layers import get_channel_layer
from django.conf import settings from django.conf import settings
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.db import transaction from django.db import transaction
@ -20,6 +18,7 @@ from filelock import FileLock
from rest_framework.reverse import reverse from rest_framework.reverse import reverse
from documents.classifier import load_classifier from documents.classifier import load_classifier
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides from documents.data_models import DocumentMetadataOverrides
from documents.file_handling import create_source_path_directory from documents.file_handling import create_source_path_directory
from documents.file_handling import generate_unique_filename from documents.file_handling import generate_unique_filename
@ -45,6 +44,8 @@ from documents.plugins.base import AlwaysRunPluginMixin
from documents.plugins.base import ConsumeTaskPlugin from documents.plugins.base import ConsumeTaskPlugin
from documents.plugins.base import NoCleanupPluginMixin from documents.plugins.base import NoCleanupPluginMixin
from documents.plugins.base import NoSetupPluginMixin from documents.plugins.base import NoSetupPluginMixin
from documents.plugins.helpers import ProgressManager
from documents.plugins.helpers import ProgressStatusOptions
from documents.signals import document_consumption_finished from documents.signals import document_consumption_finished
from documents.signals import document_consumption_started from documents.signals import document_consumption_started
from documents.utils import copy_basic_file_stats from documents.utils import copy_basic_file_stats
@ -247,88 +248,81 @@ class ConsumerStatusShortMessage(str, Enum):
FAILED = "failed" FAILED = "failed"
class ConsumerFilePhase(str, Enum): class ConsumerPlugin(
STARTED = "STARTED" AlwaysRunPluginMixin,
WORKING = "WORKING" NoSetupPluginMixin,
SUCCESS = "SUCCESS" NoCleanupPluginMixin,
FAILED = "FAILED" LoggingMixin,
ConsumeTaskPlugin,
):
class Consumer(LoggingMixin):
logging_name = "paperless.consumer" logging_name = "paperless.consumer"
def __init__(
self,
input_doc: ConsumableDocument,
metadata: DocumentMetadataOverrides,
status_mgr: ProgressManager,
base_tmp_dir: Path,
task_id: str,
) -> None:
super().__init__(input_doc, metadata, status_mgr, base_tmp_dir, task_id)
self.renew_logging_group()
self.filename = self.metadata.filename or self.input_doc.original_file.name
def _send_progress( def _send_progress(
self, self,
current_progress: int, current_progress: int,
max_progress: int, max_progress: int,
status: ConsumerFilePhase, status: ProgressStatusOptions,
message: Optional[ConsumerStatusShortMessage] = None, message: Optional[Union[ConsumerStatusShortMessage, str]] = None,
document_id=None, document_id=None,
): # pragma: no cover ): # pragma: no cover
payload = { self.status_mgr.send_progress(
"filename": os.path.basename(self.filename) if self.filename else None, status,
"task_id": self.task_id, message,
"current_progress": current_progress, current_progress,
"max_progress": max_progress, max_progress,
"status": status, extra_args={
"message": message, "document_id": document_id,
"document_id": document_id, "owner_id": self.metadata.owner_id if self.metadata.owner_id else None,
"owner_id": self.override_owner_id if self.override_owner_id else None, },
}
async_to_sync(self.channel_layer.group_send)(
"status_updates",
{"type": "status_update", "data": payload},
) )
def _fail( def _fail(
self, self,
message: ConsumerStatusShortMessage, message: Union[ConsumerStatusShortMessage, str],
log_message: Optional[str] = None, log_message: Optional[str] = None,
exc_info=None, exc_info=None,
exception: Optional[Exception] = None, exception: Optional[Exception] = None,
): ):
self._send_progress(100, 100, ConsumerFilePhase.FAILED, message) self._send_progress(100, 100, ProgressStatusOptions.FAILED, message)
self.log.error(log_message or message, exc_info=exc_info) self.log.error(log_message or message, exc_info=exc_info)
raise ConsumerError(f"{self.filename}: {log_message or message}") from exception raise ConsumerError(f"{self.filename}: {log_message or message}") from exception
def __init__(self):
super().__init__()
self.path: Optional[Path] = None
self.original_path: Optional[Path] = None
self.filename = None
self.override_title = None
self.override_correspondent_id = None
self.override_tag_ids = None
self.override_document_type_id = None
self.override_asn = None
self.task_id = None
self.override_owner_id = None
self.override_custom_field_ids = None
self.channel_layer = get_channel_layer()
def pre_check_file_exists(self): def pre_check_file_exists(self):
""" """
Confirm the input file still exists where it should Confirm the input file still exists where it should
""" """
if not os.path.isfile(self.original_path): if not os.path.isfile(self.input_doc.original_file):
self._fail( self._fail(
ConsumerStatusShortMessage.FILE_NOT_FOUND, ConsumerStatusShortMessage.FILE_NOT_FOUND,
f"Cannot consume {self.original_path}: File not found.", f"Cannot consume {self.input_doc.original_file}: File not found.",
) )
def pre_check_duplicate(self): def pre_check_duplicate(self):
""" """
Using the MD5 of the file, check this exact file doesn't already exist Using the MD5 of the file, check this exact file doesn't already exist
""" """
with open(self.original_path, "rb") as f: with open(self.input_doc.original_file, "rb") as f:
checksum = hashlib.md5(f.read()).hexdigest() checksum = hashlib.md5(f.read()).hexdigest()
existing_doc = Document.objects.filter( existing_doc = Document.objects.filter(
Q(checksum=checksum) | Q(archive_checksum=checksum), Q(checksum=checksum) | Q(archive_checksum=checksum),
) )
if existing_doc.exists(): if existing_doc.exists():
if settings.CONSUMER_DELETE_DUPLICATES: if settings.CONSUMER_DELETE_DUPLICATES:
os.unlink(self.original_path) os.unlink(self.input_doc.original_file)
self._fail( self._fail(
ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS, ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS,
f"Not consuming {self.filename}: It is a duplicate of" f"Not consuming {self.filename}: It is a duplicate of"
@ -348,26 +342,26 @@ class Consumer(LoggingMixin):
""" """
Check that if override_asn is given, it is unique and within a valid range Check that if override_asn is given, it is unique and within a valid range
""" """
if not self.override_asn: if not self.metadata.asn:
# check not necessary in case no ASN gets set # check not necessary in case no ASN gets set
return return
# Validate the range is above zero and less than uint32_t max # Validate the range is above zero and less than uint32_t max
# otherwise, Whoosh can't handle it in the index # otherwise, Whoosh can't handle it in the index
if ( if (
self.override_asn < Document.ARCHIVE_SERIAL_NUMBER_MIN self.metadata.asn < Document.ARCHIVE_SERIAL_NUMBER_MIN
or self.override_asn > Document.ARCHIVE_SERIAL_NUMBER_MAX or self.metadata.asn > Document.ARCHIVE_SERIAL_NUMBER_MAX
): ):
self._fail( self._fail(
ConsumerStatusShortMessage.ASN_RANGE, ConsumerStatusShortMessage.ASN_RANGE,
f"Not consuming {self.filename}: " f"Not consuming {self.filename}: "
f"Given ASN {self.override_asn} is out of range " f"Given ASN {self.metadata.asn} is out of range "
f"[{Document.ARCHIVE_SERIAL_NUMBER_MIN:,}, " f"[{Document.ARCHIVE_SERIAL_NUMBER_MIN:,}, "
f"{Document.ARCHIVE_SERIAL_NUMBER_MAX:,}]", f"{Document.ARCHIVE_SERIAL_NUMBER_MAX:,}]",
) )
if Document.objects.filter(archive_serial_number=self.override_asn).exists(): if Document.objects.filter(archive_serial_number=self.metadata.asn).exists():
self._fail( self._fail(
ConsumerStatusShortMessage.ASN_ALREADY_EXISTS, ConsumerStatusShortMessage.ASN_ALREADY_EXISTS,
f"Not consuming {self.filename}: Given ASN {self.override_asn} already exists!", f"Not consuming {self.filename}: Given ASN {self.metadata.asn} already exists!",
) )
def run_pre_consume_script(self): def run_pre_consume_script(self):
@ -388,7 +382,7 @@ class Consumer(LoggingMixin):
self.log.info(f"Executing pre-consume script {settings.PRE_CONSUME_SCRIPT}") self.log.info(f"Executing pre-consume script {settings.PRE_CONSUME_SCRIPT}")
working_file_path = str(self.working_copy) working_file_path = str(self.working_copy)
original_file_path = str(self.original_path) original_file_path = str(self.input_doc.original_file)
script_env = os.environ.copy() script_env = os.environ.copy()
script_env["DOCUMENT_SOURCE_PATH"] = original_file_path script_env["DOCUMENT_SOURCE_PATH"] = original_file_path
@ -486,50 +480,15 @@ class Consumer(LoggingMixin):
exception=e, exception=e,
) )
def try_consume_file( def run(self) -> str:
self,
path: Path,
override_filename=None,
override_title=None,
override_correspondent_id=None,
override_document_type_id=None,
override_tag_ids=None,
override_storage_path_id=None,
task_id=None,
override_created=None,
override_asn=None,
override_owner_id=None,
override_view_users=None,
override_view_groups=None,
override_change_users=None,
override_change_groups=None,
override_custom_field_ids=None,
) -> Document:
""" """
Return the document object if it was successfully created. Return the document object if it was successfully created.
""" """
self.original_path = Path(path).resolve()
self.filename = override_filename or self.original_path.name
self.override_title = override_title
self.override_correspondent_id = override_correspondent_id
self.override_document_type_id = override_document_type_id
self.override_tag_ids = override_tag_ids
self.override_storage_path_id = override_storage_path_id
self.task_id = task_id or str(uuid.uuid4())
self.override_created = override_created
self.override_asn = override_asn
self.override_owner_id = override_owner_id
self.override_view_users = override_view_users
self.override_view_groups = override_view_groups
self.override_change_users = override_change_users
self.override_change_groups = override_change_groups
self.override_custom_field_ids = override_custom_field_ids
self._send_progress( self._send_progress(
0, 0,
100, 100,
ConsumerFilePhase.STARTED, ProgressStatusOptions.STARTED,
ConsumerStatusShortMessage.NEW_FILE, ConsumerStatusShortMessage.NEW_FILE,
) )
@ -548,7 +507,7 @@ class Consumer(LoggingMixin):
dir=settings.SCRATCH_DIR, dir=settings.SCRATCH_DIR,
) )
self.working_copy = Path(tempdir.name) / Path(self.filename) self.working_copy = Path(tempdir.name) / Path(self.filename)
copy_file_with_basic_stats(self.original_path, self.working_copy) copy_file_with_basic_stats(self.input_doc.original_file, self.working_copy)
# Determine the parser class. # Determine the parser class.
@ -580,7 +539,7 @@ class Consumer(LoggingMixin):
def progress_callback(current_progress, max_progress): # pragma: no cover def progress_callback(current_progress, max_progress): # pragma: no cover
# recalculate progress to be within 20 and 80 # recalculate progress to be within 20 and 80
p = int((current_progress / max_progress) * 50 + 20) p = int((current_progress / max_progress) * 50 + 20)
self._send_progress(p, 100, ConsumerFilePhase.WORKING) self._send_progress(p, 100, ProgressStatusOptions.WORKING)
# This doesn't parse the document yet, but gives us a parser. # This doesn't parse the document yet, but gives us a parser.
@ -591,9 +550,6 @@ class Consumer(LoggingMixin):
self.log.debug(f"Parser: {type(document_parser).__name__}") self.log.debug(f"Parser: {type(document_parser).__name__}")
# However, this already created working directories which we have to
# clean up.
# Parse the document. This may take some time. # Parse the document. This may take some time.
text = None text = None
@ -605,7 +561,7 @@ class Consumer(LoggingMixin):
self._send_progress( self._send_progress(
20, 20,
100, 100,
ConsumerFilePhase.WORKING, ProgressStatusOptions.WORKING,
ConsumerStatusShortMessage.PARSING_DOCUMENT, ConsumerStatusShortMessage.PARSING_DOCUMENT,
) )
self.log.debug(f"Parsing {self.filename}...") self.log.debug(f"Parsing {self.filename}...")
@ -615,7 +571,7 @@ class Consumer(LoggingMixin):
self._send_progress( self._send_progress(
70, 70,
100, 100,
ConsumerFilePhase.WORKING, ProgressStatusOptions.WORKING,
ConsumerStatusShortMessage.GENERATING_THUMBNAIL, ConsumerStatusShortMessage.GENERATING_THUMBNAIL,
) )
thumbnail = document_parser.get_thumbnail( thumbnail = document_parser.get_thumbnail(
@ -630,7 +586,7 @@ class Consumer(LoggingMixin):
self._send_progress( self._send_progress(
90, 90,
100, 100,
ConsumerFilePhase.WORKING, ProgressStatusOptions.WORKING,
ConsumerStatusShortMessage.PARSE_DATE, ConsumerStatusShortMessage.PARSE_DATE,
) )
date = parse_date(self.filename, text) date = parse_date(self.filename, text)
@ -664,7 +620,7 @@ class Consumer(LoggingMixin):
self._send_progress( self._send_progress(
95, 95,
100, 100,
ConsumerFilePhase.WORKING, ProgressStatusOptions.WORKING,
ConsumerStatusShortMessage.SAVE_DOCUMENT, ConsumerStatusShortMessage.SAVE_DOCUMENT,
) )
# now that everything is done, we can start to store the document # now that everything is done, we can start to store the document
@ -726,13 +682,13 @@ class Consumer(LoggingMixin):
# Delete the file only if it was successfully consumed # Delete the file only if it was successfully consumed
self.log.debug(f"Deleting file {self.working_copy}") self.log.debug(f"Deleting file {self.working_copy}")
self.original_path.unlink() self.input_doc.original_file.unlink()
self.working_copy.unlink() self.working_copy.unlink()
# https://github.com/jonaswinkler/paperless-ng/discussions/1037 # https://github.com/jonaswinkler/paperless-ng/discussions/1037
shadow_file = os.path.join( shadow_file = os.path.join(
os.path.dirname(self.original_path), os.path.dirname(self.input_doc.original_file),
"._" + os.path.basename(self.original_path), "._" + os.path.basename(self.input_doc.original_file),
) )
if os.path.isfile(shadow_file): if os.path.isfile(shadow_file):
@ -758,7 +714,7 @@ class Consumer(LoggingMixin):
self._send_progress( self._send_progress(
100, 100,
100, 100,
ConsumerFilePhase.SUCCESS, ProgressStatusOptions.SUCCESS,
ConsumerStatusShortMessage.FINISHED, ConsumerStatusShortMessage.FINISHED,
document.id, document.id,
) )
@ -766,24 +722,24 @@ class Consumer(LoggingMixin):
# Return the most up to date fields # Return the most up to date fields
document.refresh_from_db() document.refresh_from_db()
return document return f"Success. New document id {document.pk} created"
def _parse_title_placeholders(self, title: str) -> str: def _parse_title_placeholders(self, title: str) -> str:
local_added = timezone.localtime(timezone.now()) local_added = timezone.localtime(timezone.now())
correspondent_name = ( correspondent_name = (
Correspondent.objects.get(pk=self.override_correspondent_id).name Correspondent.objects.get(pk=self.metadata.correspondent_id).name
if self.override_correspondent_id is not None if self.metadata.correspondent_id is not None
else None else None
) )
doc_type_name = ( doc_type_name = (
DocumentType.objects.get(pk=self.override_document_type_id).name DocumentType.objects.get(pk=self.metadata.document_type_id).name
if self.override_document_type_id is not None if self.metadata.document_type_id is not None
else None else None
) )
owner_username = ( owner_username = (
User.objects.get(pk=self.override_owner_id).username User.objects.get(pk=self.metadata.owner_id).username
if self.override_owner_id is not None if self.metadata.owner_id is not None
else None else None
) )
@ -808,8 +764,8 @@ class Consumer(LoggingMixin):
self.log.debug("Saving record to database") self.log.debug("Saving record to database")
if self.override_created is not None: if self.metadata.created is not None:
create_date = self.override_created create_date = self.metadata.created
self.log.debug( self.log.debug(
f"Creation date from post_documents parameter: {create_date}", f"Creation date from post_documents parameter: {create_date}",
) )
@ -820,7 +776,7 @@ class Consumer(LoggingMixin):
create_date = date create_date = date
self.log.debug(f"Creation date from parse_date: {create_date}") self.log.debug(f"Creation date from parse_date: {create_date}")
else: else:
stats = os.stat(self.original_path) stats = os.stat(self.input_doc.original_file)
create_date = timezone.make_aware( create_date = timezone.make_aware(
datetime.datetime.fromtimestamp(stats.st_mtime), datetime.datetime.fromtimestamp(stats.st_mtime),
) )
@ -829,12 +785,12 @@ class Consumer(LoggingMixin):
storage_type = Document.STORAGE_TYPE_UNENCRYPTED storage_type = Document.STORAGE_TYPE_UNENCRYPTED
title = file_info.title title = file_info.title
if self.override_title is not None: if self.metadata.title is not None:
try: try:
title = self._parse_title_placeholders(self.override_title) title = self._parse_title_placeholders(self.metadata.title)
except Exception as e: except Exception as e:
self.log.error( self.log.error(
f"Error occurred parsing title override '{self.override_title}', falling back to original. Exception: {e}", f"Error occurred parsing title override '{self.metadata.title}', falling back to original. Exception: {e}",
) )
document = Document.objects.create( document = Document.objects.create(
@ -855,53 +811,53 @@ class Consumer(LoggingMixin):
return document return document
def apply_overrides(self, document): def apply_overrides(self, document):
if self.override_correspondent_id: if self.metadata.correspondent_id:
document.correspondent = Correspondent.objects.get( document.correspondent = Correspondent.objects.get(
pk=self.override_correspondent_id, pk=self.metadata.correspondent_id,
) )
if self.override_document_type_id: if self.metadata.document_type_id:
document.document_type = DocumentType.objects.get( document.document_type = DocumentType.objects.get(
pk=self.override_document_type_id, pk=self.metadata.document_type_id,
) )
if self.override_tag_ids: if self.metadata.tag_ids:
for tag_id in self.override_tag_ids: for tag_id in self.metadata.tag_ids:
document.tags.add(Tag.objects.get(pk=tag_id)) document.tags.add(Tag.objects.get(pk=tag_id))
if self.override_storage_path_id: if self.metadata.storage_path_id:
document.storage_path = StoragePath.objects.get( document.storage_path = StoragePath.objects.get(
pk=self.override_storage_path_id, pk=self.metadata.storage_path_id,
) )
if self.override_asn: if self.metadata.asn:
document.archive_serial_number = self.override_asn document.archive_serial_number = self.metadata.asn
if self.override_owner_id: if self.metadata.owner_id:
document.owner = User.objects.get( document.owner = User.objects.get(
pk=self.override_owner_id, pk=self.metadata.owner_id,
) )
if ( if (
self.override_view_users is not None self.metadata.view_users is not None
or self.override_view_groups is not None or self.metadata.view_groups is not None
or self.override_change_users is not None or self.metadata.change_users is not None
or self.override_change_users is not None or self.metadata.change_users is not None
): ):
permissions = { permissions = {
"view": { "view": {
"users": self.override_view_users or [], "users": self.metadata.view_users or [],
"groups": self.override_view_groups or [], "groups": self.metadata.view_groups or [],
}, },
"change": { "change": {
"users": self.override_change_users or [], "users": self.metadata.change_users or [],
"groups": self.override_change_groups or [], "groups": self.metadata.change_groups or [],
}, },
} }
set_permissions_for_object(permissions=permissions, object=document) set_permissions_for_object(permissions=permissions, object=document)
if self.override_custom_field_ids: if self.metadata.custom_field_ids:
for field_id in self.override_custom_field_ids: for field_id in self.metadata.custom_field_ids:
field = CustomField.objects.get(pk=field_id) field = CustomField.objects.get(pk=field_id)
CustomFieldInstance.objects.create( CustomFieldInstance.objects.create(
field=field, field=field,

View File

@ -3,9 +3,6 @@ import uuid
class LoggingMixin: class LoggingMixin:
def __init__(self) -> None:
self.renew_logging_group()
def renew_logging_group(self): def renew_logging_group(self):
""" """
Creates a new UUID to group subsequent log calls together with Creates a new UUID to group subsequent log calls together with

View File

@ -328,6 +328,7 @@ class DocumentParser(LoggingMixin):
def __init__(self, logging_group, progress_callback=None): def __init__(self, logging_group, progress_callback=None):
super().__init__() super().__init__()
self.renew_logging_group()
self.logging_group = logging_group self.logging_group = logging_group
self.settings = self.get_settings() self.settings = self.get_settings()
settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True) settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)

View File

@ -67,7 +67,8 @@ class ConsumeTaskPlugin(abc.ABC):
self.status_mgr = status_mgr self.status_mgr = status_mgr
self.task_id: Final = task_id self.task_id: Final = task_id
@abc.abstractproperty @property
@abc.abstractmethod
def able_to_run(self) -> bool: def able_to_run(self) -> bool:
""" """
Return True if the conditions are met for the plugin to run, False otherwise Return True if the conditions are met for the plugin to run, False otherwise

View File

@ -57,7 +57,7 @@ class ProgressManager:
message: str, message: str,
current_progress: int, current_progress: int,
max_progress: int, max_progress: int,
extra_args: Optional[dict[str, Union[str, int]]] = None, extra_args: Optional[dict[str, Union[str, int, None]]] = None,
) -> None: ) -> None:
# Ensure the layer is open # Ensure the layer is open
self.open() self.open()

View File

@ -21,8 +21,7 @@ from documents.barcodes import BarcodePlugin
from documents.caching import clear_document_caches from documents.caching import clear_document_caches
from documents.classifier import DocumentClassifier from documents.classifier import DocumentClassifier
from documents.classifier import load_classifier from documents.classifier import load_classifier
from documents.consumer import Consumer from documents.consumer import ConsumerPlugin
from documents.consumer import ConsumerError
from documents.consumer import WorkflowTriggerPlugin from documents.consumer import WorkflowTriggerPlugin
from documents.data_models import ConsumableDocument from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides from documents.data_models import DocumentMetadataOverrides
@ -115,6 +114,7 @@ def consume_file(
CollatePlugin, CollatePlugin,
BarcodePlugin, BarcodePlugin,
WorkflowTriggerPlugin, WorkflowTriggerPlugin,
ConsumerPlugin,
] ]
with ProgressManager( with ProgressManager(
@ -162,33 +162,7 @@ def consume_file(
finally: finally:
plugin.cleanup() plugin.cleanup()
# continue with consumption if no barcode was found return msg
document = Consumer().try_consume_file(
input_doc.original_file,
override_filename=overrides.filename,
override_title=overrides.title,
override_correspondent_id=overrides.correspondent_id,
override_document_type_id=overrides.document_type_id,
override_tag_ids=overrides.tag_ids,
override_storage_path_id=overrides.storage_path_id,
override_created=overrides.created,
override_asn=overrides.asn,
override_owner_id=overrides.owner_id,
override_view_users=overrides.view_users,
override_view_groups=overrides.view_groups,
override_change_users=overrides.change_users,
override_change_groups=overrides.change_groups,
override_custom_field_ids=overrides.custom_field_ids,
task_id=self.request.id,
)
if document:
return f"Success. New document id {document.pk} created"
else:
raise ConsumerError(
"Unknown error: Returned document was null, but "
"no error message was given.",
)
@shared_task @shared_task

View File

@ -14,6 +14,7 @@ from documents.barcodes import BarcodePlugin
from documents.data_models import ConsumableDocument from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides from documents.data_models import DocumentMetadataOverrides
from documents.data_models import DocumentSource from documents.data_models import DocumentSource
from documents.models import Document
from documents.models import Tag from documents.models import Tag
from documents.plugins.base import StopConsumeTaskError from documents.plugins.base import StopConsumeTaskError
from documents.tests.utils import DirectoriesMixin from documents.tests.utils import DirectoriesMixin
@ -674,9 +675,7 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, GetReaderPluginMixin, Tes
dst = settings.SCRATCH_DIR / "barcode-39-asn-123.pdf" dst = settings.SCRATCH_DIR / "barcode-39-asn-123.pdf"
shutil.copy(test_file, dst) shutil.copy(test_file, dst)
with mock.patch( with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
"documents.consumer.Consumer.try_consume_file",
) as mocked_consumer:
tasks.consume_file( tasks.consume_file(
ConsumableDocument( ConsumableDocument(
source=DocumentSource.ConsumeFolder, source=DocumentSource.ConsumeFolder,
@ -684,10 +683,10 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, GetReaderPluginMixin, Tes
), ),
None, None,
) )
mocked_consumer.assert_called_once()
args, kwargs = mocked_consumer.call_args
self.assertEqual(kwargs["override_asn"], 123) document = Document.objects.first()
self.assertEqual(document.archive_serial_number, 123)
@override_settings(CONSUMER_BARCODE_SCANNER="PYZBAR") @override_settings(CONSUMER_BARCODE_SCANNER="PYZBAR")
def test_scan_file_for_qrcode_without_upscale(self): def test_scan_file_for_qrcode_without_upscale(self):

File diff suppressed because it is too large Load Diff

View File

@ -46,7 +46,7 @@ class TestDoubleSided(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
with mock.patch( with mock.patch(
"documents.tasks.ProgressManager", "documents.tasks.ProgressManager",
DummyProgressManager, DummyProgressManager,
), mock.patch("documents.consumer.async_to_sync"): ):
msg = tasks.consume_file( msg = tasks.consume_file(
ConsumableDocument( ConsumableDocument(
source=DocumentSource.ConsumeFolder, source=DocumentSource.ConsumeFolder,

View File

@ -1,3 +1,4 @@
import shutil
from datetime import timedelta from datetime import timedelta
from pathlib import Path from pathlib import Path
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
@ -88,8 +89,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
return super().setUp() return super().setUp()
@mock.patch("documents.consumer.Consumer.try_consume_file") def test_workflow_match(self):
def test_workflow_match(self, m):
""" """
GIVEN: GIVEN:
- Existing workflow - Existing workflow
@ -102,7 +102,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
type=WorkflowTrigger.WorkflowTriggerType.CONSUMPTION, type=WorkflowTrigger.WorkflowTriggerType.CONSUMPTION,
sources=f"{DocumentSource.ApiUpload},{DocumentSource.ConsumeFolder},{DocumentSource.MailFetch}", sources=f"{DocumentSource.ApiUpload},{DocumentSource.ConsumeFolder},{DocumentSource.MailFetch}",
filter_filename="*simple*", filter_filename="*simple*",
filter_path="*/samples/*", filter_path=f"*/{self.dirs.scratch_dir.parts[-1]}/*",
) )
action = WorkflowAction.objects.create( action = WorkflowAction.objects.create(
assign_title="Doc from {correspondent}", assign_title="Doc from {correspondent}",
@ -133,7 +133,10 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
self.assertEqual(trigger.__str__(), "WorkflowTrigger 1") self.assertEqual(trigger.__str__(), "WorkflowTrigger 1")
self.assertEqual(action.__str__(), "WorkflowAction 1") self.assertEqual(action.__str__(), "WorkflowAction 1")
test_file = self.SAMPLE_DIR / "simple.pdf" test_file = shutil.copy(
self.SAMPLE_DIR / "simple.pdf",
self.dirs.scratch_dir / "simple.pdf",
)
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
with self.assertLogs("paperless.matching", level="INFO") as cm: with self.assertLogs("paperless.matching", level="INFO") as cm:
@ -144,26 +147,53 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
), ),
None, None,
) )
m.assert_called_once()
_, overrides = m.call_args document = Document.objects.first()
self.assertEqual(overrides["override_correspondent_id"], self.c.pk) self.assertEqual(document.correspondent, self.c)
self.assertEqual(overrides["override_document_type_id"], self.dt.pk) self.assertEqual(document.document_type, self.dt)
self.assertEqual(list(document.tags.all()), [self.t1, self.t2, self.t3])
self.assertEqual(document.storage_path, self.sp)
self.assertEqual(document.owner, self.user2)
self.assertEqual( self.assertEqual(
overrides["override_tag_ids"], list(
[self.t1.pk, self.t2.pk, self.t3.pk], get_users_with_perms(
) document,
self.assertEqual(overrides["override_storage_path_id"], self.sp.pk) only_with_perms_in=["view_document"],
self.assertEqual(overrides["override_owner_id"], self.user2.pk) ),
self.assertEqual(overrides["override_view_users"], [self.user3.pk]) ),
self.assertEqual(overrides["override_view_groups"], [self.group1.pk]) [self.user3],
self.assertEqual(overrides["override_change_users"], [self.user3.pk])
self.assertEqual(overrides["override_change_groups"], [self.group1.pk])
self.assertEqual(
overrides["override_title"],
"Doc from {correspondent}",
) )
self.assertEqual( self.assertEqual(
overrides["override_custom_field_ids"], list(
get_groups_with_perms(
document,
),
),
[self.group1],
)
self.assertEqual(
list(
get_users_with_perms(
document,
only_with_perms_in=["change_document"],
),
),
[self.user3],
)
self.assertEqual(
list(
get_groups_with_perms(
document,
),
),
[self.group1],
)
self.assertEqual(
document.title,
f"Doc from {self.c.name}",
)
self.assertEqual(
list(document.custom_fields.all().values_list("field", flat=True)),
[self.cf1.pk, self.cf2.pk], [self.cf1.pk, self.cf2.pk],
) )
@ -171,8 +201,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
expected_str = f"Document matched {trigger} from {w}" expected_str = f"Document matched {trigger} from {w}"
self.assertIn(expected_str, info) self.assertIn(expected_str, info)
@mock.patch("documents.consumer.Consumer.try_consume_file") def test_workflow_match_mailrule(self):
def test_workflow_match_mailrule(self, m):
""" """
GIVEN: GIVEN:
- Existing workflow - Existing workflow
@ -211,7 +240,11 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
w.actions.add(action) w.actions.add(action)
w.save() w.save()
test_file = self.SAMPLE_DIR / "simple.pdf" test_file = shutil.copy(
self.SAMPLE_DIR / "simple.pdf",
self.dirs.scratch_dir / "simple.pdf",
)
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
with self.assertLogs("paperless.matching", level="INFO") as cm: with self.assertLogs("paperless.matching", level="INFO") as cm:
tasks.consume_file( tasks.consume_file(
@ -222,31 +255,55 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
), ),
None, None,
) )
m.assert_called_once() document = Document.objects.first()
_, overrides = m.call_args self.assertEqual(document.correspondent, self.c)
self.assertEqual(overrides["override_correspondent_id"], self.c.pk) self.assertEqual(document.document_type, self.dt)
self.assertEqual(overrides["override_document_type_id"], self.dt.pk) self.assertEqual(list(document.tags.all()), [self.t1, self.t2, self.t3])
self.assertEqual(document.storage_path, self.sp)
self.assertEqual(document.owner, self.user2)
self.assertEqual( self.assertEqual(
overrides["override_tag_ids"], list(
[self.t1.pk, self.t2.pk, self.t3.pk], get_users_with_perms(
document,
only_with_perms_in=["view_document"],
),
),
[self.user3],
) )
self.assertEqual(overrides["override_storage_path_id"], self.sp.pk)
self.assertEqual(overrides["override_owner_id"], self.user2.pk)
self.assertEqual(overrides["override_view_users"], [self.user3.pk])
self.assertEqual(overrides["override_view_groups"], [self.group1.pk])
self.assertEqual(overrides["override_change_users"], [self.user3.pk])
self.assertEqual(overrides["override_change_groups"], [self.group1.pk])
self.assertEqual( self.assertEqual(
overrides["override_title"], list(
"Doc from {correspondent}", get_groups_with_perms(
document,
),
),
[self.group1],
)
self.assertEqual(
list(
get_users_with_perms(
document,
only_with_perms_in=["change_document"],
),
),
[self.user3],
)
self.assertEqual(
list(
get_groups_with_perms(
document,
),
),
[self.group1],
)
self.assertEqual(
document.title,
f"Doc from {self.c.name}",
) )
info = cm.output[0] info = cm.output[0]
expected_str = f"Document matched {trigger} from {w}" expected_str = f"Document matched {trigger} from {w}"
self.assertIn(expected_str, info) self.assertIn(expected_str, info)
@mock.patch("documents.consumer.Consumer.try_consume_file") def test_workflow_match_multiple(self):
def test_workflow_match_multiple(self, m):
""" """
GIVEN: GIVEN:
- Multiple existing workflow - Multiple existing workflow
@ -259,7 +316,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
trigger1 = WorkflowTrigger.objects.create( trigger1 = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.CONSUMPTION, type=WorkflowTrigger.WorkflowTriggerType.CONSUMPTION,
sources=f"{DocumentSource.ApiUpload},{DocumentSource.ConsumeFolder},{DocumentSource.MailFetch}", sources=f"{DocumentSource.ApiUpload},{DocumentSource.ConsumeFolder},{DocumentSource.MailFetch}",
filter_path="*/samples/*", filter_path=f"*/{self.dirs.scratch_dir.parts[-1]}/*",
) )
action1 = WorkflowAction.objects.create( action1 = WorkflowAction.objects.create(
assign_title="Doc from {correspondent}", assign_title="Doc from {correspondent}",
@ -301,7 +358,10 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
w2.actions.add(action2) w2.actions.add(action2)
w2.save() w2.save()
test_file = self.SAMPLE_DIR / "simple.pdf" test_file = shutil.copy(
self.SAMPLE_DIR / "simple.pdf",
self.dirs.scratch_dir / "simple.pdf",
)
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
with self.assertLogs("paperless.matching", level="INFO") as cm: with self.assertLogs("paperless.matching", level="INFO") as cm:
@ -312,21 +372,25 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
), ),
None, None,
) )
m.assert_called_once() document = Document.objects.first()
_, overrides = m.call_args
# template 1 # template 1
self.assertEqual(overrides["override_document_type_id"], self.dt.pk) self.assertEqual(document.document_type, self.dt)
# template 2 # template 2
self.assertEqual(overrides["override_correspondent_id"], self.c2.pk) self.assertEqual(document.correspondent, self.c2)
self.assertEqual(overrides["override_storage_path_id"], self.sp.pk) self.assertEqual(document.storage_path, self.sp)
# template 1 & 2 # template 1 & 2
self.assertEqual( self.assertEqual(
overrides["override_tag_ids"], list(document.tags.all()),
[self.t1.pk, self.t2.pk, self.t3.pk], [self.t1, self.t2, self.t3],
) )
self.assertEqual( self.assertEqual(
overrides["override_view_users"], list(
[self.user2.pk, self.user3.pk], get_users_with_perms(
document,
only_with_perms_in=["view_document"],
),
),
[self.user2, self.user3],
) )
expected_str = f"Document matched {trigger1} from {w1}" expected_str = f"Document matched {trigger1} from {w1}"
@ -334,8 +398,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
expected_str = f"Document matched {trigger2} from {w2}" expected_str = f"Document matched {trigger2} from {w2}"
self.assertIn(expected_str, cm.output[1]) self.assertIn(expected_str, cm.output[1])
@mock.patch("documents.consumer.Consumer.try_consume_file") def test_workflow_fnmatch_path(self):
def test_workflow_fnmatch_path(self, m):
""" """
GIVEN: GIVEN:
- Existing workflow - Existing workflow
@ -348,7 +411,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
trigger = WorkflowTrigger.objects.create( trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.CONSUMPTION, type=WorkflowTrigger.WorkflowTriggerType.CONSUMPTION,
sources=f"{DocumentSource.ApiUpload},{DocumentSource.ConsumeFolder},{DocumentSource.MailFetch}", sources=f"{DocumentSource.ApiUpload},{DocumentSource.ConsumeFolder},{DocumentSource.MailFetch}",
filter_path="*sample*", filter_path=f"*{self.dirs.scratch_dir.parts[-1]}*",
) )
action = WorkflowAction.objects.create( action = WorkflowAction.objects.create(
assign_title="Doc fnmatch title", assign_title="Doc fnmatch title",
@ -363,7 +426,10 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
w.actions.add(action) w.actions.add(action)
w.save() w.save()
test_file = self.SAMPLE_DIR / "simple.pdf" test_file = shutil.copy(
self.SAMPLE_DIR / "simple.pdf",
self.dirs.scratch_dir / "simple.pdf",
)
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
with self.assertLogs("paperless.matching", level="DEBUG") as cm: with self.assertLogs("paperless.matching", level="DEBUG") as cm:
@ -374,15 +440,13 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
), ),
None, None,
) )
m.assert_called_once() document = Document.objects.first()
_, overrides = m.call_args self.assertEqual(document.title, "Doc fnmatch title")
self.assertEqual(overrides["override_title"], "Doc fnmatch title")
expected_str = f"Document matched {trigger} from {w}" expected_str = f"Document matched {trigger} from {w}"
self.assertIn(expected_str, cm.output[0]) self.assertIn(expected_str, cm.output[0])
@mock.patch("documents.consumer.Consumer.try_consume_file") def test_workflow_no_match_filename(self):
def test_workflow_no_match_filename(self, m):
""" """
GIVEN: GIVEN:
- Existing workflow - Existing workflow
@ -414,7 +478,10 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
w.actions.add(action) w.actions.add(action)
w.save() w.save()
test_file = self.SAMPLE_DIR / "simple.pdf" test_file = shutil.copy(
self.SAMPLE_DIR / "simple.pdf",
self.dirs.scratch_dir / "simple.pdf",
)
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
with self.assertLogs("paperless.matching", level="DEBUG") as cm: with self.assertLogs("paperless.matching", level="DEBUG") as cm:
@ -425,26 +492,36 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
), ),
None, None,
) )
m.assert_called_once() document = Document.objects.first()
_, overrides = m.call_args self.assertIsNone(document.correspondent)
self.assertIsNone(overrides["override_correspondent_id"]) self.assertIsNone(document.document_type)
self.assertIsNone(overrides["override_document_type_id"]) self.assertEqual(document.tags.all().count(), 0)
self.assertIsNone(overrides["override_tag_ids"]) self.assertIsNone(document.storage_path)
self.assertIsNone(overrides["override_storage_path_id"]) self.assertIsNone(document.owner)
self.assertIsNone(overrides["override_owner_id"]) self.assertEqual(
self.assertIsNone(overrides["override_view_users"]) get_users_with_perms(
self.assertIsNone(overrides["override_view_groups"]) document,
self.assertIsNone(overrides["override_change_users"]) only_with_perms_in=["view_document"],
self.assertIsNone(overrides["override_change_groups"]) ).count(),
self.assertIsNone(overrides["override_title"]) 0,
)
self.assertEqual(get_groups_with_perms(document).count(), 0)
self.assertEqual(
get_users_with_perms(
document,
only_with_perms_in=["change_document"],
).count(),
0,
)
self.assertEqual(get_groups_with_perms(document).count(), 0)
self.assertEqual(document.title, "simple")
expected_str = f"Document did not match {w}" expected_str = f"Document did not match {w}"
self.assertIn(expected_str, cm.output[0]) self.assertIn(expected_str, cm.output[0])
expected_str = f"Document filename {test_file.name} does not match" expected_str = f"Document filename {test_file.name} does not match"
self.assertIn(expected_str, cm.output[1]) self.assertIn(expected_str, cm.output[1])
@mock.patch("documents.consumer.Consumer.try_consume_file") def test_workflow_no_match_path(self):
def test_workflow_no_match_path(self, m):
""" """
GIVEN: GIVEN:
- Existing workflow - Existing workflow
@ -475,7 +552,10 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
w.actions.add(action) w.actions.add(action)
w.save() w.save()
test_file = self.SAMPLE_DIR / "simple.pdf" test_file = shutil.copy(
self.SAMPLE_DIR / "simple.pdf",
self.dirs.scratch_dir / "simple.pdf",
)
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
with self.assertLogs("paperless.matching", level="DEBUG") as cm: with self.assertLogs("paperless.matching", level="DEBUG") as cm:
@ -486,26 +566,46 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
), ),
None, None,
) )
m.assert_called_once() document = Document.objects.first()
_, overrides = m.call_args self.assertIsNone(document.correspondent)
self.assertIsNone(overrides["override_correspondent_id"]) self.assertIsNone(document.document_type)
self.assertIsNone(overrides["override_document_type_id"]) self.assertEqual(document.tags.all().count(), 0)
self.assertIsNone(overrides["override_tag_ids"]) self.assertIsNone(document.storage_path)
self.assertIsNone(overrides["override_storage_path_id"]) self.assertIsNone(document.owner)
self.assertIsNone(overrides["override_owner_id"]) self.assertEqual(
self.assertIsNone(overrides["override_view_users"]) get_users_with_perms(
self.assertIsNone(overrides["override_view_groups"]) document,
self.assertIsNone(overrides["override_change_users"]) only_with_perms_in=["view_document"],
self.assertIsNone(overrides["override_change_groups"]) ).count(),
self.assertIsNone(overrides["override_title"]) 0,
)
self.assertEqual(
get_groups_with_perms(
document,
).count(),
0,
)
self.assertEqual(
get_users_with_perms(
document,
only_with_perms_in=["change_document"],
).count(),
0,
)
self.assertEqual(
get_groups_with_perms(
document,
).count(),
0,
)
self.assertEqual(document.title, "simple")
expected_str = f"Document did not match {w}" expected_str = f"Document did not match {w}"
self.assertIn(expected_str, cm.output[0]) self.assertIn(expected_str, cm.output[0])
expected_str = f"Document path {test_file} does not match" expected_str = f"Document path {test_file} does not match"
self.assertIn(expected_str, cm.output[1]) self.assertIn(expected_str, cm.output[1])
@mock.patch("documents.consumer.Consumer.try_consume_file") def test_workflow_no_match_mail_rule(self):
def test_workflow_no_match_mail_rule(self, m):
""" """
GIVEN: GIVEN:
- Existing workflow - Existing workflow
@ -536,7 +636,10 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
w.actions.add(action) w.actions.add(action)
w.save() w.save()
test_file = self.SAMPLE_DIR / "simple.pdf" test_file = shutil.copy(
self.SAMPLE_DIR / "simple.pdf",
self.dirs.scratch_dir / "simple.pdf",
)
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
with self.assertLogs("paperless.matching", level="DEBUG") as cm: with self.assertLogs("paperless.matching", level="DEBUG") as cm:
@ -548,26 +651,46 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
), ),
None, None,
) )
m.assert_called_once() document = Document.objects.first()
_, overrides = m.call_args self.assertIsNone(document.correspondent)
self.assertIsNone(overrides["override_correspondent_id"]) self.assertIsNone(document.document_type)
self.assertIsNone(overrides["override_document_type_id"]) self.assertEqual(document.tags.all().count(), 0)
self.assertIsNone(overrides["override_tag_ids"]) self.assertIsNone(document.storage_path)
self.assertIsNone(overrides["override_storage_path_id"]) self.assertIsNone(document.owner)
self.assertIsNone(overrides["override_owner_id"]) self.assertEqual(
self.assertIsNone(overrides["override_view_users"]) get_users_with_perms(
self.assertIsNone(overrides["override_view_groups"]) document,
self.assertIsNone(overrides["override_change_users"]) only_with_perms_in=["view_document"],
self.assertIsNone(overrides["override_change_groups"]) ).count(),
self.assertIsNone(overrides["override_title"]) 0,
)
self.assertEqual(
get_groups_with_perms(
document,
).count(),
0,
)
self.assertEqual(
get_users_with_perms(
document,
only_with_perms_in=["change_document"],
).count(),
0,
)
self.assertEqual(
get_groups_with_perms(
document,
).count(),
0,
)
self.assertEqual(document.title, "simple")
expected_str = f"Document did not match {w}" expected_str = f"Document did not match {w}"
self.assertIn(expected_str, cm.output[0]) self.assertIn(expected_str, cm.output[0])
expected_str = "Document mail rule 99 !=" expected_str = "Document mail rule 99 !="
self.assertIn(expected_str, cm.output[1]) self.assertIn(expected_str, cm.output[1])
@mock.patch("documents.consumer.Consumer.try_consume_file") def test_workflow_no_match_source(self):
def test_workflow_no_match_source(self, m):
""" """
GIVEN: GIVEN:
- Existing workflow - Existing workflow
@ -598,7 +721,10 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
w.actions.add(action) w.actions.add(action)
w.save() w.save()
test_file = self.SAMPLE_DIR / "simple.pdf" test_file = shutil.copy(
self.SAMPLE_DIR / "simple.pdf",
self.dirs.scratch_dir / "simple.pdf",
)
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
with self.assertLogs("paperless.matching", level="DEBUG") as cm: with self.assertLogs("paperless.matching", level="DEBUG") as cm:
@ -609,18 +735,39 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
), ),
None, None,
) )
m.assert_called_once() document = Document.objects.first()
_, overrides = m.call_args self.assertIsNone(document.correspondent)
self.assertIsNone(overrides["override_correspondent_id"]) self.assertIsNone(document.document_type)
self.assertIsNone(overrides["override_document_type_id"]) self.assertEqual(document.tags.all().count(), 0)
self.assertIsNone(overrides["override_tag_ids"]) self.assertIsNone(document.storage_path)
self.assertIsNone(overrides["override_storage_path_id"]) self.assertIsNone(document.owner)
self.assertIsNone(overrides["override_owner_id"]) self.assertEqual(
self.assertIsNone(overrides["override_view_users"]) get_users_with_perms(
self.assertIsNone(overrides["override_view_groups"]) document,
self.assertIsNone(overrides["override_change_users"]) only_with_perms_in=["view_document"],
self.assertIsNone(overrides["override_change_groups"]) ).count(),
self.assertIsNone(overrides["override_title"]) 0,
)
self.assertEqual(
get_groups_with_perms(
document,
).count(),
0,
)
self.assertEqual(
get_users_with_perms(
document,
only_with_perms_in=["change_document"],
).count(),
0,
)
self.assertEqual(
get_groups_with_perms(
document,
).count(),
0,
)
self.assertEqual(document.title, "simple")
expected_str = f"Document did not match {w}" expected_str = f"Document did not match {w}"
self.assertIn(expected_str, cm.output[0]) self.assertIn(expected_str, cm.output[0])
@ -662,8 +809,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
expected_str = f"No matching triggers with type {WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED} found" expected_str = f"No matching triggers with type {WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED} found"
self.assertIn(expected_str, cm.output[1]) self.assertIn(expected_str, cm.output[1])
@mock.patch("documents.consumer.Consumer.try_consume_file") def test_workflow_repeat_custom_fields(self):
def test_workflow_repeat_custom_fields(self, m):
""" """
GIVEN: GIVEN:
- Existing workflows which assign the same custom field - Existing workflows which assign the same custom field
@ -693,7 +839,10 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
w.actions.add(action1, action2) w.actions.add(action1, action2)
w.save() w.save()
test_file = self.SAMPLE_DIR / "simple.pdf" test_file = shutil.copy(
self.SAMPLE_DIR / "simple.pdf",
self.dirs.scratch_dir / "simple.pdf",
)
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
with self.assertLogs("paperless.matching", level="INFO") as cm: with self.assertLogs("paperless.matching", level="INFO") as cm:
@ -704,10 +853,9 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
), ),
None, None,
) )
m.assert_called_once() document = Document.objects.first()
_, overrides = m.call_args
self.assertEqual( self.assertEqual(
overrides["override_custom_field_ids"], list(document.custom_fields.all().values_list("field", flat=True)),
[self.cf1.pk], [self.cf1.pk],
) )
@ -1369,8 +1517,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
group_perms: QuerySet = get_groups_with_perms(doc) group_perms: QuerySet = get_groups_with_perms(doc)
self.assertNotIn(self.group1, group_perms) self.assertNotIn(self.group1, group_perms)
@mock.patch("documents.consumer.Consumer.try_consume_file") def test_removal_action_document_consumed(self):
def test_removal_action_document_consumed(self, m):
""" """
GIVEN: GIVEN:
- Workflow with assignment and removal actions - Workflow with assignment and removal actions
@ -1429,7 +1576,10 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
w.actions.add(action2) w.actions.add(action2)
w.save() w.save()
test_file = self.SAMPLE_DIR / "simple.pdf" test_file = shutil.copy(
self.SAMPLE_DIR / "simple.pdf",
self.dirs.scratch_dir / "simple.pdf",
)
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
with self.assertLogs("paperless.matching", level="INFO") as cm: with self.assertLogs("paperless.matching", level="INFO") as cm:
@ -1440,26 +1590,57 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
), ),
None, None,
) )
m.assert_called_once()
_, overrides = m.call_args document = Document.objects.first()
self.assertIsNone(overrides["override_correspondent_id"])
self.assertIsNone(overrides["override_document_type_id"]) self.assertIsNone(document.correspondent)
self.assertIsNone(document.document_type)
self.assertEqual( self.assertEqual(
overrides["override_tag_ids"], list(document.tags.all()),
[self.t2.pk, self.t3.pk], [self.t2, self.t3],
) )
self.assertIsNone(overrides["override_storage_path_id"]) self.assertIsNone(document.storage_path)
self.assertIsNone(overrides["override_owner_id"]) self.assertIsNone(document.owner)
self.assertEqual(overrides["override_view_users"], [self.user2.pk])
self.assertEqual(overrides["override_view_groups"], [self.group2.pk])
self.assertEqual(overrides["override_change_users"], [self.user2.pk])
self.assertEqual(overrides["override_change_groups"], [self.group2.pk])
self.assertEqual( self.assertEqual(
overrides["override_title"], list(
"Doc from {correspondent}", get_users_with_perms(
document,
only_with_perms_in=["view_document"],
),
),
[self.user2],
) )
self.assertEqual( self.assertEqual(
overrides["override_custom_field_ids"], list(
get_groups_with_perms(
document,
),
),
[self.group2],
)
self.assertEqual(
list(
get_users_with_perms(
document,
only_with_perms_in=["change_document"],
),
),
[self.user2],
)
self.assertEqual(
list(
get_groups_with_perms(
document,
),
),
[self.group2],
)
self.assertEqual(
document.title,
"Doc from None",
)
self.assertEqual(
list(document.custom_fields.all().values_list("field", flat=True)),
[self.cf2.pk], [self.cf2.pk],
) )
@ -1467,8 +1648,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
expected_str = f"Document matched {trigger} from {w}" expected_str = f"Document matched {trigger} from {w}"
self.assertIn(expected_str, info) self.assertIn(expected_str, info)
@mock.patch("documents.consumer.Consumer.try_consume_file") def test_removal_action_document_consumed_remove_all(self):
def test_removal_action_document_consumed_removeall(self, m):
""" """
GIVEN: GIVEN:
- Workflow with assignment and removal actions with remove all fields set - Workflow with assignment and removal actions with remove all fields set
@ -1519,7 +1699,10 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
w.actions.add(action2) w.actions.add(action2)
w.save() w.save()
test_file = self.SAMPLE_DIR / "simple.pdf" test_file = shutil.copy(
self.SAMPLE_DIR / "simple.pdf",
self.dirs.scratch_dir / "simple.pdf",
)
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager): with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
with self.assertLogs("paperless.matching", level="INFO") as cm: with self.assertLogs("paperless.matching", level="INFO") as cm:
@ -1530,23 +1713,46 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
), ),
None, None,
) )
m.assert_called_once() document = Document.objects.first()
_, overrides = m.call_args self.assertIsNone(document.correspondent)
self.assertIsNone(overrides["override_correspondent_id"]) self.assertIsNone(document.document_type)
self.assertIsNone(overrides["override_document_type_id"]) self.assertEqual(document.tags.all().count(), 0)
self.assertIsNone(document.storage_path)
self.assertIsNone(document.owner)
self.assertEqual( self.assertEqual(
overrides["override_tag_ids"], get_users_with_perms(
[], document,
only_with_perms_in=["view_document"],
).count(),
0,
) )
self.assertIsNone(overrides["override_storage_path_id"])
self.assertIsNone(overrides["override_owner_id"])
self.assertEqual(overrides["override_view_users"], [])
self.assertEqual(overrides["override_view_groups"], [])
self.assertEqual(overrides["override_change_users"], [])
self.assertEqual(overrides["override_change_groups"], [])
self.assertEqual( self.assertEqual(
overrides["override_custom_field_ids"], get_groups_with_perms(
[], document,
).count(),
0,
)
self.assertEqual(
get_users_with_perms(
document,
only_with_perms_in=["change_document"],
).count(),
0,
)
self.assertEqual(
get_groups_with_perms(
document,
).count(),
0,
)
self.assertEqual(
document.custom_fields.all()
.values_list(
"field",
)
.count(),
0,
) )
info = cm.output[0] info = cm.output[0]

View File

@ -3,6 +3,7 @@ import tempfile
import time import time
import warnings import warnings
from collections import namedtuple from collections import namedtuple
from collections.abc import Generator
from collections.abc import Iterator from collections.abc import Iterator
from contextlib import contextmanager from contextlib import contextmanager
from os import PathLike from os import PathLike
@ -21,8 +22,10 @@ from django.db.migrations.executor import MigrationExecutor
from django.test import TransactionTestCase from django.test import TransactionTestCase
from django.test import override_settings from django.test import override_settings
from documents.consumer import ConsumerPlugin
from documents.data_models import ConsumableDocument from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides from documents.data_models import DocumentMetadataOverrides
from documents.data_models import DocumentSource
from documents.parsers import ParseError from documents.parsers import ParseError
from documents.plugins.helpers import ProgressStatusOptions from documents.plugins.helpers import ProgressStatusOptions
@ -326,6 +329,30 @@ class SampleDirMixin:
BARCODE_SAMPLE_DIR = SAMPLE_DIR / "barcodes" BARCODE_SAMPLE_DIR = SAMPLE_DIR / "barcodes"
class GetConsumerMixin:
@contextmanager
def get_consumer(
self,
filepath: Path,
overrides: Union[DocumentMetadataOverrides, None] = None,
source: DocumentSource = DocumentSource.ConsumeFolder,
) -> Generator[ConsumerPlugin, None, None]:
# Store this for verification
self.status = DummyProgressManager(filepath.name, None)
reader = ConsumerPlugin(
ConsumableDocument(source, original_file=filepath),
overrides or DocumentMetadataOverrides(),
self.status, # type: ignore
self.dirs.scratch_dir,
"task-id",
)
reader.setup()
try:
yield reader
finally:
reader.cleanup()
class DummyProgressManager: class DummyProgressManager:
""" """
A dummy handler for progress management that doesn't actually try to A dummy handler for progress management that doesn't actually try to

View File

@ -7,7 +7,6 @@ import re
import tempfile import tempfile
from os import PathLike from os import PathLike
from pathlib import Path from pathlib import Path
from platform import machine
from typing import Final from typing import Final
from typing import Optional from typing import Optional
from typing import Union from typing import Union
@ -112,7 +111,7 @@ def __get_list(
return [] return []
def _parse_redis_url(env_redis: Optional[str]) -> tuple[str]: def _parse_redis_url(env_redis: Optional[str]) -> tuple[str, str]:
""" """
Gets the Redis information from the environment or a default and handles Gets the Redis information from the environment or a default and handles
converting from incompatible django_channels and celery formats. converting from incompatible django_channels and celery formats.
@ -371,10 +370,7 @@ ASGI_APPLICATION = "paperless.asgi.application"
STATIC_URL = os.getenv("PAPERLESS_STATIC_URL", BASE_URL + "static/") STATIC_URL = os.getenv("PAPERLESS_STATIC_URL", BASE_URL + "static/")
WHITENOISE_STATIC_PREFIX = "/static/" WHITENOISE_STATIC_PREFIX = "/static/"
if machine().lower() == "aarch64": # pragma: no cover _static_backend = "django.contrib.staticfiles.storage.StaticFilesStorage"
_static_backend = "django.contrib.staticfiles.storage.StaticFilesStorage"
else:
_static_backend = "whitenoise.storage.CompressedStaticFilesStorage"
STORAGES = { STORAGES = {
"staticfiles": { "staticfiles": {

View File

@ -425,6 +425,10 @@ class MailAccountHandler(LoggingMixin):
logging_name = "paperless_mail" logging_name = "paperless_mail"
def __init__(self) -> None:
super().__init__()
self.renew_logging_group()
def _correspondent_from_name(self, name: str) -> Optional[Correspondent]: def _correspondent_from_name(self, name: str) -> Optional[Correspondent]:
try: try:
return Correspondent.objects.get_or_create(name=name)[0] return Correspondent.objects.get_or_create(name=name)[0]