mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-01-20 22:24:24 -06:00
Compare commits
5 Commits
feature-pw
...
feature-be
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a071579f64 | ||
|
|
9a29195792 | ||
|
|
bdd00498a1 | ||
|
|
92deebddd4 | ||
|
|
c7efcee3d6 |
@@ -430,24 +430,6 @@
|
||||
</div>
|
||||
</div>
|
||||
}
|
||||
@case (WorkflowActionType.PasswordRemoval) {
|
||||
<div class="row">
|
||||
<div class="col">
|
||||
<p class="small" i18n>
|
||||
One or more passwords separated by commas or new lines. The workflow will try them in order until one succeeds.
|
||||
</p>
|
||||
<pngx-input-textarea
|
||||
i18n-title
|
||||
title="Passwords"
|
||||
formControlName="passwords"
|
||||
rows="4"
|
||||
[error]="error?.actions?.[i]?.passwords"
|
||||
hint="Passwords are stored in plain text. Use with caution."
|
||||
i18n-hint
|
||||
></pngx-input-textarea>
|
||||
</div>
|
||||
</div>
|
||||
}
|
||||
}
|
||||
</div>
|
||||
</ng-template>
|
||||
|
||||
@@ -139,10 +139,6 @@ export const WORKFLOW_ACTION_OPTIONS = [
|
||||
id: WorkflowActionType.Webhook,
|
||||
name: $localize`Webhook`,
|
||||
},
|
||||
{
|
||||
id: WorkflowActionType.PasswordRemoval,
|
||||
name: $localize`Password removal`,
|
||||
},
|
||||
]
|
||||
|
||||
export enum TriggerFilterType {
|
||||
@@ -1137,7 +1133,6 @@ export class WorkflowEditDialogComponent
|
||||
headers: new FormControl(action.webhook?.headers),
|
||||
include_document: new FormControl(!!action.webhook?.include_document),
|
||||
}),
|
||||
passwords: new FormControl(action.passwords),
|
||||
}),
|
||||
{ emitEvent }
|
||||
)
|
||||
|
||||
@@ -176,7 +176,6 @@ export enum ZoomSetting {
|
||||
NgxBootstrapIconsModule,
|
||||
PdfViewerModule,
|
||||
TextAreaComponent,
|
||||
PasswordRemovalConfirmDialogComponent,
|
||||
],
|
||||
})
|
||||
export class DocumentDetailComponent
|
||||
|
||||
@@ -5,7 +5,6 @@ export enum WorkflowActionType {
|
||||
Removal = 2,
|
||||
Email = 3,
|
||||
Webhook = 4,
|
||||
PasswordRemoval = 5,
|
||||
}
|
||||
|
||||
export interface WorkflowActionEmail extends ObjectWithId {
|
||||
@@ -98,6 +97,4 @@ export interface WorkflowAction extends ObjectWithId {
|
||||
email?: WorkflowActionEmail
|
||||
|
||||
webhook?: WorkflowActionWebhook
|
||||
|
||||
passwords?: string
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@ from pikepdf import Pdf
|
||||
from documents.converters import convert_from_tiff_to_pdf
|
||||
from documents.data_models import ConsumableDocument
|
||||
from documents.data_models import DocumentMetadataOverrides
|
||||
from documents.models import Document
|
||||
from documents.models import Tag
|
||||
from documents.plugins.base import ConsumeTaskPlugin
|
||||
from documents.plugins.base import StopConsumeTaskError
|
||||
@@ -115,6 +116,24 @@ class BarcodePlugin(ConsumeTaskPlugin):
|
||||
self._tiff_conversion_done = False
|
||||
self.barcodes: list[Barcode] = []
|
||||
|
||||
def _apply_detected_asn(self, detected_asn: int) -> None:
|
||||
"""
|
||||
Apply a detected ASN to metadata if allowed.
|
||||
"""
|
||||
if (
|
||||
self.metadata.skip_asn_if_exists
|
||||
and Document.global_objects.filter(
|
||||
archive_serial_number=detected_asn,
|
||||
).exists()
|
||||
):
|
||||
logger.info(
|
||||
f"Found ASN in barcode {detected_asn} but skipping because it already exists.",
|
||||
)
|
||||
return
|
||||
|
||||
logger.info(f"Found ASN in barcode: {detected_asn}")
|
||||
self.metadata.asn = detected_asn
|
||||
|
||||
def run(self) -> None:
|
||||
# Some operations may use PIL, override pixel setting if needed
|
||||
maybe_override_pixel_limit()
|
||||
@@ -186,13 +205,8 @@ class BarcodePlugin(ConsumeTaskPlugin):
|
||||
|
||||
# Update/overwrite an ASN if possible
|
||||
# After splitting, as otherwise each split document gets the same ASN
|
||||
if (
|
||||
self.settings.barcode_enable_asn
|
||||
and not self.metadata.skip_asn
|
||||
and (located_asn := self.asn) is not None
|
||||
):
|
||||
logger.info(f"Found ASN in barcode: {located_asn}")
|
||||
self.metadata.asn = located_asn
|
||||
if self.settings.barcode_enable_asn and (located_asn := self.asn) is not None:
|
||||
self._apply_detected_asn(located_asn)
|
||||
|
||||
def cleanup(self) -> None:
|
||||
self.temp_dir.cleanup()
|
||||
|
||||
@@ -7,7 +7,6 @@ from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Literal
|
||||
|
||||
from celery import chain
|
||||
from celery import chord
|
||||
from celery import group
|
||||
from celery import shared_task
|
||||
@@ -38,6 +37,42 @@ if TYPE_CHECKING:
|
||||
logger: logging.Logger = logging.getLogger("paperless.bulk_edit")
|
||||
|
||||
|
||||
@shared_task(bind=True)
|
||||
def restore_archive_serial_numbers_task(
|
||||
self,
|
||||
backup: dict[int, int],
|
||||
*args,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
restore_archive_serial_numbers(backup)
|
||||
|
||||
|
||||
def release_archive_serial_numbers(doc_ids: list[int]) -> dict[int, int]:
|
||||
"""
|
||||
Clears ASNs on documents that are about to be replaced so new documents
|
||||
can be assigned ASNs without uniqueness collisions. Returns a backup map
|
||||
of doc_id -> previous ASN for potential restoration.
|
||||
"""
|
||||
qs = Document.objects.filter(
|
||||
id__in=doc_ids,
|
||||
archive_serial_number__isnull=False,
|
||||
).only("pk", "archive_serial_number")
|
||||
backup = dict(qs.values_list("pk", "archive_serial_number"))
|
||||
qs.update(archive_serial_number=None)
|
||||
logger.info(f"Released archive serial numbers for documents {list(backup.keys())}")
|
||||
return backup
|
||||
|
||||
|
||||
def restore_archive_serial_numbers(backup: dict[int, int]) -> None:
|
||||
"""
|
||||
Restores ASNs using the provided backup map, intended for
|
||||
rollback when replacement consumption fails.
|
||||
"""
|
||||
for doc_id, asn in backup.items():
|
||||
Document.objects.filter(pk=doc_id).update(archive_serial_number=asn)
|
||||
logger.info(f"Restored archive serial numbers for documents {list(backup.keys())}")
|
||||
|
||||
|
||||
def set_correspondent(
|
||||
doc_ids: list[int],
|
||||
correspondent: Correspondent,
|
||||
@@ -386,6 +421,7 @@ def merge(
|
||||
|
||||
merged_pdf = pikepdf.new()
|
||||
version: str = merged_pdf.pdf_version
|
||||
handoff_asn: int | None = None
|
||||
# use doc_ids to preserve order
|
||||
for doc_id in doc_ids:
|
||||
doc = qs.get(id=doc_id)
|
||||
@@ -401,6 +437,8 @@ def merge(
|
||||
version = max(version, pdf.pdf_version)
|
||||
merged_pdf.pages.extend(pdf.pages)
|
||||
affected_docs.append(doc.id)
|
||||
if handoff_asn is None and doc.archive_serial_number is not None:
|
||||
handoff_asn = doc.archive_serial_number
|
||||
except Exception as e:
|
||||
logger.exception(
|
||||
f"Error merging document {doc.id}, it will not be included in the merge: {e}",
|
||||
@@ -426,6 +464,8 @@ def merge(
|
||||
DocumentMetadataOverrides.from_document(metadata_document)
|
||||
)
|
||||
overrides.title = metadata_document.title + " (merged)"
|
||||
if metadata_document.archive_serial_number is not None:
|
||||
handoff_asn = metadata_document.archive_serial_number
|
||||
else:
|
||||
overrides = DocumentMetadataOverrides()
|
||||
else:
|
||||
@@ -433,8 +473,11 @@ def merge(
|
||||
|
||||
if user is not None:
|
||||
overrides.owner_id = user.id
|
||||
# Avoid copying or detecting ASN from merged PDFs to prevent collision
|
||||
overrides.skip_asn = True
|
||||
if not delete_originals:
|
||||
overrides.skip_asn_if_exists = True
|
||||
|
||||
if delete_originals and handoff_asn is not None:
|
||||
overrides.asn = handoff_asn
|
||||
|
||||
logger.info("Adding merged document to the task queue.")
|
||||
|
||||
@@ -447,12 +490,20 @@ def merge(
|
||||
)
|
||||
|
||||
if delete_originals:
|
||||
backup = release_archive_serial_numbers(affected_docs)
|
||||
logger.info(
|
||||
"Queueing removal of original documents after consumption of merged document",
|
||||
)
|
||||
chain(consume_task, delete.si(affected_docs)).delay()
|
||||
else:
|
||||
consume_task.delay()
|
||||
try:
|
||||
consume_task.apply_async(
|
||||
link=[delete.si(affected_docs)],
|
||||
link_error=[restore_archive_serial_numbers_task.s(backup)],
|
||||
)
|
||||
except Exception:
|
||||
restore_archive_serial_numbers(backup)
|
||||
raise
|
||||
else:
|
||||
consume_task.delay()
|
||||
|
||||
return "OK"
|
||||
|
||||
@@ -494,6 +545,8 @@ def split(
|
||||
overrides.title = f"{doc.title} (split {idx + 1})"
|
||||
if user is not None:
|
||||
overrides.owner_id = user.id
|
||||
if not delete_originals:
|
||||
overrides.skip_asn_if_exists = True
|
||||
logger.info(
|
||||
f"Adding split document with pages {split_doc} to the task queue.",
|
||||
)
|
||||
@@ -508,10 +561,20 @@ def split(
|
||||
)
|
||||
|
||||
if delete_originals:
|
||||
backup = release_archive_serial_numbers([doc.id])
|
||||
logger.info(
|
||||
"Queueing removal of original document after consumption of the split documents",
|
||||
)
|
||||
chord(header=consume_tasks, body=delete.si([doc.id])).delay()
|
||||
try:
|
||||
chord(
|
||||
header=consume_tasks,
|
||||
body=delete.si([doc.id]),
|
||||
).apply_async(
|
||||
link_error=[restore_archive_serial_numbers_task.s(backup)],
|
||||
)
|
||||
except Exception:
|
||||
restore_archive_serial_numbers(backup)
|
||||
raise
|
||||
else:
|
||||
group(consume_tasks).delay()
|
||||
|
||||
@@ -614,7 +677,10 @@ def edit_pdf(
|
||||
)
|
||||
if user is not None:
|
||||
overrides.owner_id = user.id
|
||||
|
||||
if not delete_original:
|
||||
overrides.skip_asn_if_exists = True
|
||||
if delete_original and len(pdf_docs) == 1:
|
||||
overrides.asn = doc.archive_serial_number
|
||||
for idx, pdf in enumerate(pdf_docs, start=1):
|
||||
filepath: Path = (
|
||||
Path(tempfile.mkdtemp(dir=settings.SCRATCH_DIR))
|
||||
@@ -633,7 +699,17 @@ def edit_pdf(
|
||||
)
|
||||
|
||||
if delete_original:
|
||||
chord(header=consume_tasks, body=delete.si([doc.id])).delay()
|
||||
backup = release_archive_serial_numbers([doc.id])
|
||||
try:
|
||||
chord(
|
||||
header=consume_tasks,
|
||||
body=delete.si([doc.id]),
|
||||
).apply_async(
|
||||
link_error=[restore_archive_serial_numbers_task.s(backup)],
|
||||
)
|
||||
except Exception:
|
||||
restore_archive_serial_numbers(backup)
|
||||
raise
|
||||
else:
|
||||
group(consume_tasks).delay()
|
||||
|
||||
|
||||
@@ -696,7 +696,7 @@ class ConsumerPlugin(
|
||||
pk=self.metadata.storage_path_id,
|
||||
)
|
||||
|
||||
if self.metadata.asn is not None and not self.metadata.skip_asn:
|
||||
if self.metadata.asn is not None:
|
||||
document.archive_serial_number = self.metadata.asn
|
||||
|
||||
if self.metadata.owner_id:
|
||||
@@ -812,8 +812,8 @@ class ConsumerPreflightPlugin(
|
||||
"""
|
||||
Check that if override_asn is given, it is unique and within a valid range
|
||||
"""
|
||||
if self.metadata.skip_asn or self.metadata.asn is None:
|
||||
# if skip is set or ASN is None
|
||||
if self.metadata.asn is None:
|
||||
# if ASN is None
|
||||
return
|
||||
# Validate the range is above zero and less than uint32_t max
|
||||
# otherwise, Whoosh can't handle it in the index
|
||||
|
||||
@@ -30,7 +30,7 @@ class DocumentMetadataOverrides:
|
||||
change_users: list[int] | None = None
|
||||
change_groups: list[int] | None = None
|
||||
custom_fields: dict | None = None
|
||||
skip_asn: bool = False
|
||||
skip_asn_if_exists: bool = False
|
||||
|
||||
def update(self, other: "DocumentMetadataOverrides") -> "DocumentMetadataOverrides":
|
||||
"""
|
||||
@@ -50,8 +50,8 @@ class DocumentMetadataOverrides:
|
||||
self.storage_path_id = other.storage_path_id
|
||||
if other.owner_id is not None:
|
||||
self.owner_id = other.owner_id
|
||||
if other.skip_asn:
|
||||
self.skip_asn = True
|
||||
if other.skip_asn_if_exists:
|
||||
self.skip_asn_if_exists = True
|
||||
|
||||
# merge
|
||||
if self.tag_ids is None:
|
||||
|
||||
@@ -1,38 +0,0 @@
|
||||
# Generated by Django 5.2.7 on 2025-12-29 03:56
|
||||
|
||||
from django.db import migrations
|
||||
from django.db import models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "1074_workflowrun_deleted_at_workflowrun_restored_at_and_more"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="workflowaction",
|
||||
name="passwords",
|
||||
field=models.TextField(
|
||||
blank=True,
|
||||
help_text="Passwords to try when removing PDF protection. Separate with commas or new lines.",
|
||||
null=True,
|
||||
verbose_name="passwords",
|
||||
),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name="workflowaction",
|
||||
name="type",
|
||||
field=models.PositiveIntegerField(
|
||||
choices=[
|
||||
(1, "Assignment"),
|
||||
(2, "Removal"),
|
||||
(3, "Email"),
|
||||
(4, "Webhook"),
|
||||
(5, "Password removal"),
|
||||
],
|
||||
default=1,
|
||||
verbose_name="Workflow Action Type",
|
||||
),
|
||||
),
|
||||
]
|
||||
@@ -1287,10 +1287,6 @@ class WorkflowAction(models.Model):
|
||||
4,
|
||||
_("Webhook"),
|
||||
)
|
||||
PASSWORD_REMOVAL = (
|
||||
5,
|
||||
_("Password removal"),
|
||||
)
|
||||
|
||||
type = models.PositiveIntegerField(
|
||||
_("Workflow Action Type"),
|
||||
@@ -1518,15 +1514,6 @@ class WorkflowAction(models.Model):
|
||||
verbose_name=_("webhook"),
|
||||
)
|
||||
|
||||
passwords = models.TextField(
|
||||
_("passwords"),
|
||||
null=True,
|
||||
blank=True,
|
||||
help_text=_(
|
||||
"Passwords to try when removing PDF protection. Separate with commas or new lines.",
|
||||
),
|
||||
)
|
||||
|
||||
class Meta:
|
||||
verbose_name = _("workflow action")
|
||||
verbose_name_plural = _("workflow actions")
|
||||
|
||||
@@ -2449,7 +2449,6 @@ class WorkflowActionSerializer(serializers.ModelSerializer):
|
||||
"remove_change_groups",
|
||||
"email",
|
||||
"webhook",
|
||||
"passwords",
|
||||
]
|
||||
|
||||
def validate(self, attrs):
|
||||
@@ -2506,20 +2505,6 @@ class WorkflowActionSerializer(serializers.ModelSerializer):
|
||||
"Webhook data is required for webhook actions",
|
||||
)
|
||||
|
||||
if (
|
||||
"type" in attrs
|
||||
and attrs["type"] == WorkflowAction.WorkflowActionType.PASSWORD_REMOVAL
|
||||
):
|
||||
passwords = attrs.get("passwords")
|
||||
if passwords is None or not isinstance(passwords, str):
|
||||
raise serializers.ValidationError(
|
||||
"Passwords are required for password removal actions",
|
||||
)
|
||||
if not passwords.strip():
|
||||
raise serializers.ValidationError(
|
||||
"Passwords are required for password removal actions",
|
||||
)
|
||||
|
||||
return attrs
|
||||
|
||||
|
||||
|
||||
@@ -46,7 +46,6 @@ from documents.permissions import get_objects_for_user_owner_aware
|
||||
from documents.templating.utils import convert_format_str_to_template_format
|
||||
from documents.workflows.actions import build_workflow_action_context
|
||||
from documents.workflows.actions import execute_email_action
|
||||
from documents.workflows.actions import execute_password_removal_action
|
||||
from documents.workflows.actions import execute_webhook_action
|
||||
from documents.workflows.mutations import apply_assignment_to_document
|
||||
from documents.workflows.mutations import apply_assignment_to_overrides
|
||||
@@ -793,8 +792,6 @@ def run_workflows(
|
||||
logging_group,
|
||||
original_file,
|
||||
)
|
||||
elif action.type == WorkflowAction.WorkflowActionType.PASSWORD_REMOVAL:
|
||||
execute_password_removal_action(action, document, logging_group)
|
||||
|
||||
if not use_overrides:
|
||||
# limit title to 128 characters
|
||||
|
||||
@@ -808,57 +808,3 @@ class TestApiWorkflows(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.action.refresh_from_db()
|
||||
self.assertEqual(self.action.assign_title, "Patched Title")
|
||||
|
||||
def test_password_action_passwords_field(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Nothing
|
||||
WHEN:
|
||||
- A workflow password removal action is created with passwords set
|
||||
THEN:
|
||||
- The passwords field is correctly stored and retrieved
|
||||
"""
|
||||
passwords = "password1,password2\npassword3"
|
||||
response = self.client.post(
|
||||
"/api/workflow_actions/",
|
||||
{
|
||||
"type": WorkflowAction.WorkflowActionType.PASSWORD_REMOVAL,
|
||||
"passwords": passwords,
|
||||
},
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
|
||||
self.assertEqual(response.data["passwords"], passwords)
|
||||
|
||||
def test_password_action_no_passwords_field(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Nothing
|
||||
WHEN:
|
||||
- A workflow password removal action is created with no passwords set
|
||||
- A workflow password removal action is created with passwords set to empty string
|
||||
THEN:
|
||||
- The required validation error is raised
|
||||
"""
|
||||
response = self.client.post(
|
||||
"/api/workflow_actions/",
|
||||
{
|
||||
"type": WorkflowAction.WorkflowActionType.PASSWORD_REMOVAL,
|
||||
},
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(
|
||||
"Passwords are required",
|
||||
str(response.data["non_field_errors"][0]),
|
||||
)
|
||||
response = self.client.post(
|
||||
"/api/workflow_actions/",
|
||||
{
|
||||
"type": WorkflowAction.WorkflowActionType.PASSWORD_REMOVAL,
|
||||
"passwords": "",
|
||||
},
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(
|
||||
"Passwords are required",
|
||||
str(response.data["non_field_errors"][0]),
|
||||
)
|
||||
|
||||
@@ -603,23 +603,21 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
||||
expected_filename,
|
||||
)
|
||||
self.assertEqual(consume_file_args[1].title, None)
|
||||
self.assertTrue(consume_file_args[1].skip_asn)
|
||||
# No metadata_document_id, delete_originals False, so ASN should be None
|
||||
self.assertIsNone(consume_file_args[1].asn)
|
||||
|
||||
# With metadata_document_id overrides
|
||||
result = bulk_edit.merge(doc_ids, metadata_document_id=metadata_document_id)
|
||||
consume_file_args, _ = mock_consume_file.call_args
|
||||
self.assertEqual(consume_file_args[1].title, "B (merged)")
|
||||
self.assertEqual(consume_file_args[1].created, self.doc2.created)
|
||||
self.assertTrue(consume_file_args[1].skip_asn)
|
||||
|
||||
self.assertEqual(result, "OK")
|
||||
|
||||
@mock.patch("documents.bulk_edit.delete.si")
|
||||
@mock.patch("documents.tasks.consume_file.s")
|
||||
@mock.patch("documents.bulk_edit.chain")
|
||||
def test_merge_and_delete_originals(
|
||||
self,
|
||||
mock_chain,
|
||||
mock_consume_file,
|
||||
mock_delete_documents,
|
||||
):
|
||||
@@ -633,6 +631,12 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
||||
- Document deletion task should be called
|
||||
"""
|
||||
doc_ids = [self.doc1.id, self.doc2.id, self.doc3.id]
|
||||
self.doc1.archive_serial_number = 101
|
||||
self.doc2.archive_serial_number = 102
|
||||
self.doc3.archive_serial_number = 103
|
||||
self.doc1.save()
|
||||
self.doc2.save()
|
||||
self.doc3.save()
|
||||
|
||||
result = bulk_edit.merge(doc_ids, delete_originals=True)
|
||||
self.assertEqual(result, "OK")
|
||||
@@ -643,7 +647,8 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
||||
|
||||
mock_consume_file.assert_called()
|
||||
mock_delete_documents.assert_called()
|
||||
mock_chain.assert_called_once()
|
||||
consume_sig = mock_consume_file.return_value
|
||||
consume_sig.apply_async.assert_called_once()
|
||||
|
||||
consume_file_args, _ = mock_consume_file.call_args
|
||||
self.assertEqual(
|
||||
@@ -651,7 +656,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
||||
expected_filename,
|
||||
)
|
||||
self.assertEqual(consume_file_args[1].title, None)
|
||||
self.assertTrue(consume_file_args[1].skip_asn)
|
||||
self.assertEqual(consume_file_args[1].asn, 101)
|
||||
|
||||
delete_documents_args, _ = mock_delete_documents.call_args
|
||||
self.assertEqual(
|
||||
@@ -659,6 +664,92 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
||||
doc_ids,
|
||||
)
|
||||
|
||||
self.doc1.refresh_from_db()
|
||||
self.doc2.refresh_from_db()
|
||||
self.doc3.refresh_from_db()
|
||||
self.assertIsNone(self.doc1.archive_serial_number)
|
||||
self.assertIsNone(self.doc2.archive_serial_number)
|
||||
self.assertIsNone(self.doc3.archive_serial_number)
|
||||
|
||||
@mock.patch("documents.bulk_edit.delete.si")
|
||||
@mock.patch("documents.tasks.consume_file.s")
|
||||
def test_merge_and_delete_originals_restore_on_failure(
|
||||
self,
|
||||
mock_consume_file,
|
||||
mock_delete_documents,
|
||||
):
|
||||
"""
|
||||
GIVEN:
|
||||
- Existing documents
|
||||
WHEN:
|
||||
- Merge action with deleting documents is called with 1 document
|
||||
- Error occurs when queuing consume file task
|
||||
THEN:
|
||||
- Archive serial numbers are restored
|
||||
"""
|
||||
doc_ids = [self.doc1.id]
|
||||
self.doc1.archive_serial_number = 111
|
||||
self.doc1.save()
|
||||
sig = mock.Mock()
|
||||
sig.apply_async.side_effect = Exception("boom")
|
||||
mock_consume_file.return_value = sig
|
||||
|
||||
with self.assertRaises(Exception):
|
||||
bulk_edit.merge(doc_ids, delete_originals=True)
|
||||
|
||||
self.doc1.refresh_from_db()
|
||||
self.assertEqual(self.doc1.archive_serial_number, 111)
|
||||
|
||||
@mock.patch("documents.bulk_edit.delete.si")
|
||||
@mock.patch("documents.tasks.consume_file.s")
|
||||
def test_merge_and_delete_originals_metadata_handoff(
|
||||
self,
|
||||
mock_consume_file,
|
||||
mock_delete_documents,
|
||||
):
|
||||
"""
|
||||
GIVEN:
|
||||
- Existing documents with ASNs
|
||||
WHEN:
|
||||
- Merge with delete_originals=True and metadata_document_id set
|
||||
THEN:
|
||||
- Handoff ASN uses metadata document ASN
|
||||
"""
|
||||
doc_ids = [self.doc1.id, self.doc2.id]
|
||||
self.doc1.archive_serial_number = 101
|
||||
self.doc2.archive_serial_number = 202
|
||||
self.doc1.save()
|
||||
self.doc2.save()
|
||||
|
||||
result = bulk_edit.merge(
|
||||
doc_ids,
|
||||
metadata_document_id=self.doc2.id,
|
||||
delete_originals=True,
|
||||
)
|
||||
self.assertEqual(result, "OK")
|
||||
|
||||
consume_file_args, _ = mock_consume_file.call_args
|
||||
self.assertEqual(consume_file_args[1].asn, 202)
|
||||
|
||||
def test_restore_archive_serial_numbers_task(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Existing document with no archive serial number
|
||||
WHEN:
|
||||
- Restore archive serial number task is called with backup data
|
||||
THEN:
|
||||
- Document archive serial number is restored
|
||||
"""
|
||||
self.doc1.archive_serial_number = 444
|
||||
self.doc1.save()
|
||||
Document.objects.filter(pk=self.doc1.id).update(archive_serial_number=None)
|
||||
|
||||
backup = {self.doc1.id: 444}
|
||||
bulk_edit.restore_archive_serial_numbers_task(backup)
|
||||
|
||||
self.doc1.refresh_from_db()
|
||||
self.assertEqual(self.doc1.archive_serial_number, 444)
|
||||
|
||||
@mock.patch("documents.tasks.consume_file.s")
|
||||
def test_merge_with_archive_fallback(self, mock_consume_file):
|
||||
"""
|
||||
@@ -727,6 +818,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(mock_consume_file.call_count, 2)
|
||||
consume_file_args, _ = mock_consume_file.call_args
|
||||
self.assertEqual(consume_file_args[1].title, "B (split 2)")
|
||||
self.assertIsNone(consume_file_args[1].asn)
|
||||
|
||||
self.assertEqual(result, "OK")
|
||||
|
||||
@@ -751,6 +843,8 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
||||
"""
|
||||
doc_ids = [self.doc2.id]
|
||||
pages = [[1, 2], [3]]
|
||||
self.doc2.archive_serial_number = 200
|
||||
self.doc2.save()
|
||||
|
||||
result = bulk_edit.split(doc_ids, pages, delete_originals=True)
|
||||
self.assertEqual(result, "OK")
|
||||
@@ -768,6 +862,42 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
||||
doc_ids,
|
||||
)
|
||||
|
||||
self.doc2.refresh_from_db()
|
||||
self.assertIsNone(self.doc2.archive_serial_number)
|
||||
|
||||
@mock.patch("documents.bulk_edit.delete.si")
|
||||
@mock.patch("documents.tasks.consume_file.s")
|
||||
@mock.patch("documents.bulk_edit.chord")
|
||||
def test_split_restore_on_failure(
|
||||
self,
|
||||
mock_chord,
|
||||
mock_consume_file,
|
||||
mock_delete_documents,
|
||||
):
|
||||
"""
|
||||
GIVEN:
|
||||
- Existing documents
|
||||
WHEN:
|
||||
- Split action with deleting documents is called with 1 document and 2 page groups
|
||||
- Error occurs when queuing chord task
|
||||
THEN:
|
||||
- Archive serial numbers are restored
|
||||
"""
|
||||
doc_ids = [self.doc2.id]
|
||||
pages = [[1, 2]]
|
||||
self.doc2.archive_serial_number = 222
|
||||
self.doc2.save()
|
||||
|
||||
sig = mock.Mock()
|
||||
sig.apply_async.side_effect = Exception("boom")
|
||||
mock_chord.return_value = sig
|
||||
|
||||
result = bulk_edit.split(doc_ids, pages, delete_originals=True)
|
||||
self.assertEqual(result, "OK")
|
||||
|
||||
self.doc2.refresh_from_db()
|
||||
self.assertEqual(self.doc2.archive_serial_number, 222)
|
||||
|
||||
@mock.patch("documents.tasks.consume_file.delay")
|
||||
@mock.patch("pikepdf.Pdf.save")
|
||||
def test_split_with_errors(self, mock_save_pdf, mock_consume_file):
|
||||
@@ -968,10 +1098,49 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
||||
mock_chord.return_value.delay.return_value = None
|
||||
doc_ids = [self.doc2.id]
|
||||
operations = [{"page": 1}, {"page": 2}]
|
||||
self.doc2.archive_serial_number = 250
|
||||
self.doc2.save()
|
||||
|
||||
result = bulk_edit.edit_pdf(doc_ids, operations, delete_original=True)
|
||||
self.assertEqual(result, "OK")
|
||||
mock_chord.assert_called_once()
|
||||
consume_file_args, _ = mock_consume_file.call_args
|
||||
self.assertEqual(consume_file_args[1].asn, 250)
|
||||
self.doc2.refresh_from_db()
|
||||
self.assertIsNone(self.doc2.archive_serial_number)
|
||||
|
||||
@mock.patch("documents.bulk_edit.delete.si")
|
||||
@mock.patch("documents.tasks.consume_file.s")
|
||||
@mock.patch("documents.bulk_edit.chord")
|
||||
def test_edit_pdf_restore_on_failure(
|
||||
self,
|
||||
mock_chord,
|
||||
mock_consume_file,
|
||||
mock_delete_documents,
|
||||
):
|
||||
"""
|
||||
GIVEN:
|
||||
- Existing document
|
||||
WHEN:
|
||||
- edit_pdf is called with delete_original=True
|
||||
- Error occurs when queuing chord task
|
||||
THEN:
|
||||
- Archive serial numbers are restored
|
||||
"""
|
||||
doc_ids = [self.doc2.id]
|
||||
operations = [{"page": 1}]
|
||||
self.doc2.archive_serial_number = 333
|
||||
self.doc2.save()
|
||||
|
||||
sig = mock.Mock()
|
||||
sig.apply_async.side_effect = Exception("boom")
|
||||
mock_chord.return_value = sig
|
||||
|
||||
with self.assertRaises(Exception):
|
||||
bulk_edit.edit_pdf(doc_ids, operations, delete_original=True)
|
||||
|
||||
self.doc2.refresh_from_db()
|
||||
self.assertEqual(self.doc2.archive_serial_number, 333)
|
||||
|
||||
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.delay")
|
||||
def test_edit_pdf_with_update_document(self, mock_update_document):
|
||||
|
||||
@@ -14,6 +14,7 @@ from django.test import override_settings
|
||||
from django.utils import timezone
|
||||
from guardian.core import ObjectPermissionChecker
|
||||
|
||||
from documents.barcodes import BarcodePlugin
|
||||
from documents.consumer import ConsumerError
|
||||
from documents.data_models import DocumentMetadataOverrides
|
||||
from documents.data_models import DocumentSource
|
||||
@@ -412,14 +413,6 @@ class TestConsumer(
|
||||
self.assertEqual(document.archive_serial_number, 123)
|
||||
self._assert_first_last_send_progress()
|
||||
|
||||
def testMetadataOverridesSkipAsnPropagation(self):
|
||||
overrides = DocumentMetadataOverrides()
|
||||
incoming = DocumentMetadataOverrides(skip_asn=True)
|
||||
|
||||
overrides.update(incoming)
|
||||
|
||||
self.assertTrue(overrides.skip_asn)
|
||||
|
||||
def testOverrideTitlePlaceholders(self):
|
||||
c = Correspondent.objects.create(name="Correspondent Name")
|
||||
dt = DocumentType.objects.create(name="DocType Name")
|
||||
@@ -1240,3 +1233,46 @@ class PostConsumeTestCase(DirectoriesMixin, GetConsumerMixin, TestCase):
|
||||
r"sample\.pdf: Error while executing post-consume script: Command '\[.*\]' returned non-zero exit status \d+\.",
|
||||
):
|
||||
consumer.run_post_consume_script(doc)
|
||||
|
||||
|
||||
class TestMetadataOverrides(TestCase):
|
||||
def test_update_skip_asn_if_exists(self):
|
||||
base = DocumentMetadataOverrides()
|
||||
incoming = DocumentMetadataOverrides(skip_asn_if_exists=True)
|
||||
base.update(incoming)
|
||||
self.assertTrue(base.skip_asn_if_exists)
|
||||
|
||||
|
||||
class TestBarcodeApplyDetectedASN(TestCase):
|
||||
"""
|
||||
GIVEN:
|
||||
- Existing Documents with ASN 123
|
||||
WHEN:
|
||||
- A BarcodePlugin which detected an ASN
|
||||
THEN:
|
||||
- If skip_asn_if_exists is set, and ASN exists, do not set ASN
|
||||
- If skip_asn_if_exists is set, and ASN does not exist, set ASN
|
||||
"""
|
||||
|
||||
def test_apply_detected_asn_skips_existing_when_flag_set(self):
|
||||
doc = Document.objects.create(
|
||||
checksum="X1",
|
||||
title="D1",
|
||||
archive_serial_number=123,
|
||||
)
|
||||
metadata = DocumentMetadataOverrides(skip_asn_if_exists=True)
|
||||
plugin = BarcodePlugin(
|
||||
input_doc=mock.Mock(),
|
||||
metadata=metadata,
|
||||
status_mgr=mock.Mock(),
|
||||
base_tmp_dir=tempfile.gettempdir(),
|
||||
task_id="test-task",
|
||||
)
|
||||
|
||||
plugin._apply_detected_asn(123)
|
||||
self.assertIsNone(plugin.metadata.asn)
|
||||
|
||||
doc.hard_delete()
|
||||
|
||||
plugin._apply_detected_asn(123)
|
||||
self.assertEqual(plugin.metadata.asn, 123)
|
||||
|
||||
@@ -2,7 +2,6 @@ import datetime
|
||||
import json
|
||||
import shutil
|
||||
import socket
|
||||
import tempfile
|
||||
from datetime import timedelta
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
@@ -61,7 +60,6 @@ from documents.tests.utils import DirectoriesMixin
|
||||
from documents.tests.utils import DummyProgressManager
|
||||
from documents.tests.utils import FileSystemAssertsMixin
|
||||
from documents.tests.utils import SampleDirMixin
|
||||
from documents.workflows.actions import execute_password_removal_action
|
||||
from paperless_mail.models import MailAccount
|
||||
from paperless_mail.models import MailRule
|
||||
|
||||
@@ -3612,196 +3610,6 @@ class TestWorkflows(
|
||||
|
||||
mock_post.assert_called_once()
|
||||
|
||||
@mock.patch("documents.bulk_edit.remove_password")
|
||||
def test_password_removal_action_attempts_multiple_passwords(
|
||||
self,
|
||||
mock_remove_password,
|
||||
):
|
||||
"""
|
||||
GIVEN:
|
||||
- Workflow password removal action
|
||||
- Multiple passwords provided
|
||||
WHEN:
|
||||
- Document updated triggering the workflow
|
||||
THEN:
|
||||
- Password removal is attempted until one succeeds
|
||||
"""
|
||||
doc = Document.objects.create(
|
||||
title="Protected",
|
||||
checksum="pw-checksum",
|
||||
)
|
||||
trigger = WorkflowTrigger.objects.create(
|
||||
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED,
|
||||
)
|
||||
action = WorkflowAction.objects.create(
|
||||
type=WorkflowAction.WorkflowActionType.PASSWORD_REMOVAL,
|
||||
passwords="wrong, right\n extra ",
|
||||
)
|
||||
workflow = Workflow.objects.create(name="Password workflow")
|
||||
workflow.triggers.add(trigger)
|
||||
workflow.actions.add(action)
|
||||
|
||||
mock_remove_password.side_effect = [
|
||||
ValueError("wrong password"),
|
||||
"OK",
|
||||
]
|
||||
|
||||
run_workflows(trigger.type, doc)
|
||||
|
||||
assert mock_remove_password.call_count == 2
|
||||
mock_remove_password.assert_has_calls(
|
||||
[
|
||||
mock.call(
|
||||
[doc.id],
|
||||
password="wrong",
|
||||
update_document=True,
|
||||
user=doc.owner,
|
||||
),
|
||||
mock.call(
|
||||
[doc.id],
|
||||
password="right",
|
||||
update_document=True,
|
||||
user=doc.owner,
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
@mock.patch("documents.bulk_edit.remove_password")
|
||||
def test_password_removal_action_fails_without_correct_password(
|
||||
self,
|
||||
mock_remove_password,
|
||||
):
|
||||
"""
|
||||
GIVEN:
|
||||
- Workflow password removal action
|
||||
- No correct password provided
|
||||
WHEN:
|
||||
- Document updated triggering the workflow
|
||||
THEN:
|
||||
- Password removal is attempted for all passwords and fails
|
||||
"""
|
||||
doc = Document.objects.create(
|
||||
title="Protected",
|
||||
checksum="pw-checksum-2",
|
||||
)
|
||||
trigger = WorkflowTrigger.objects.create(
|
||||
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED,
|
||||
)
|
||||
action = WorkflowAction.objects.create(
|
||||
type=WorkflowAction.WorkflowActionType.PASSWORD_REMOVAL,
|
||||
passwords=" \n , ",
|
||||
)
|
||||
workflow = Workflow.objects.create(name="Password workflow missing passwords")
|
||||
workflow.triggers.add(trigger)
|
||||
workflow.actions.add(action)
|
||||
|
||||
run_workflows(trigger.type, doc)
|
||||
|
||||
mock_remove_password.assert_not_called()
|
||||
|
||||
@mock.patch("documents.bulk_edit.remove_password")
|
||||
def test_password_removal_action_skips_without_passwords(
|
||||
self,
|
||||
mock_remove_password,
|
||||
):
|
||||
"""
|
||||
GIVEN:
|
||||
- Workflow password removal action with no passwords
|
||||
WHEN:
|
||||
- Workflow is run
|
||||
THEN:
|
||||
- Password removal is not attempted
|
||||
"""
|
||||
doc = Document.objects.create(
|
||||
title="Protected",
|
||||
checksum="pw-checksum-2",
|
||||
)
|
||||
trigger = WorkflowTrigger.objects.create(
|
||||
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED,
|
||||
)
|
||||
action = WorkflowAction.objects.create(
|
||||
type=WorkflowAction.WorkflowActionType.PASSWORD_REMOVAL,
|
||||
passwords="",
|
||||
)
|
||||
workflow = Workflow.objects.create(name="Password workflow missing passwords")
|
||||
workflow.triggers.add(trigger)
|
||||
workflow.actions.add(action)
|
||||
|
||||
run_workflows(trigger.type, doc)
|
||||
|
||||
mock_remove_password.assert_not_called()
|
||||
|
||||
@mock.patch("documents.bulk_edit.remove_password")
|
||||
def test_password_removal_consumable_document_deferred(
|
||||
self,
|
||||
mock_remove_password,
|
||||
):
|
||||
"""
|
||||
GIVEN:
|
||||
- Workflow password removal action
|
||||
- Simulated consumption trigger (a ConsumableDocument is used)
|
||||
WHEN:
|
||||
- Document consumption is finished
|
||||
THEN:
|
||||
- Password removal is attempted
|
||||
"""
|
||||
action = WorkflowAction.objects.create(
|
||||
type=WorkflowAction.WorkflowActionType.PASSWORD_REMOVAL,
|
||||
passwords="first, second",
|
||||
)
|
||||
|
||||
temp_dir = Path(tempfile.mkdtemp())
|
||||
original_file = temp_dir / "file.pdf"
|
||||
original_file.write_bytes(b"pdf content")
|
||||
consumable = ConsumableDocument(
|
||||
source=DocumentSource.ApiUpload,
|
||||
original_file=original_file,
|
||||
)
|
||||
|
||||
execute_password_removal_action(action, consumable, logging_group=None)
|
||||
|
||||
mock_remove_password.assert_not_called()
|
||||
|
||||
mock_remove_password.side_effect = [
|
||||
ValueError("bad password"),
|
||||
"OK",
|
||||
]
|
||||
|
||||
doc = Document.objects.create(
|
||||
checksum="pw-checksum-consumed",
|
||||
title="Protected",
|
||||
)
|
||||
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__,
|
||||
document=doc,
|
||||
)
|
||||
|
||||
assert mock_remove_password.call_count == 2
|
||||
mock_remove_password.assert_has_calls(
|
||||
[
|
||||
mock.call(
|
||||
[doc.id],
|
||||
password="first",
|
||||
update_document=True,
|
||||
user=doc.owner,
|
||||
),
|
||||
mock.call(
|
||||
[doc.id],
|
||||
password="second",
|
||||
update_document=True,
|
||||
user=doc.owner,
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
# ensure handler disconnected after first run
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__,
|
||||
document=doc,
|
||||
)
|
||||
assert mock_remove_password.call_count == 2
|
||||
|
||||
|
||||
class TestWebhookSend:
|
||||
def test_send_webhook_data_or_json(
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import logging
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
from django.conf import settings
|
||||
@@ -15,7 +14,6 @@ from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import WorkflowAction
|
||||
from documents.models import WorkflowTrigger
|
||||
from documents.signals import document_consumption_finished
|
||||
from documents.templating.workflows import parse_w_workflow_placeholders
|
||||
from documents.workflows.webhooks import send_webhook
|
||||
|
||||
@@ -261,74 +259,3 @@ def execute_webhook_action(
|
||||
f"Error occurred sending webhook: {e}",
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
|
||||
|
||||
def execute_password_removal_action(
|
||||
action: WorkflowAction,
|
||||
document: Document | ConsumableDocument,
|
||||
logging_group,
|
||||
) -> None:
|
||||
"""
|
||||
Try to remove a password from a document using the configured list.
|
||||
"""
|
||||
passwords = action.passwords
|
||||
if not passwords:
|
||||
logger.warning(
|
||||
"Password removal action %s has no passwords configured",
|
||||
action.pk,
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
return
|
||||
|
||||
passwords = [
|
||||
password.strip()
|
||||
for password in re.split(r"[,\n]", passwords)
|
||||
if password.strip()
|
||||
]
|
||||
|
||||
if isinstance(document, ConsumableDocument):
|
||||
# hook the consumption-finished signal to attempt password removal later
|
||||
def handler(sender, **kwargs):
|
||||
consumed_document: Document = kwargs.get("document")
|
||||
if consumed_document is not None:
|
||||
execute_password_removal_action(
|
||||
action,
|
||||
consumed_document,
|
||||
logging_group,
|
||||
)
|
||||
document_consumption_finished.disconnect(handler)
|
||||
|
||||
document_consumption_finished.connect(handler, weak=False)
|
||||
return
|
||||
|
||||
# import here to avoid circular dependency
|
||||
from documents.bulk_edit import remove_password
|
||||
|
||||
for password in passwords:
|
||||
try:
|
||||
remove_password(
|
||||
[document.id],
|
||||
password=password,
|
||||
update_document=True,
|
||||
user=document.owner,
|
||||
)
|
||||
logger.info(
|
||||
"Removed password from document %s using workflow action %s",
|
||||
document.pk,
|
||||
action.pk,
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
return
|
||||
except ValueError as e:
|
||||
logger.warning(
|
||||
"Password removal failed for document %s with supplied password: %s",
|
||||
document.pk,
|
||||
e,
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
|
||||
logger.error(
|
||||
"Password removal failed for document %s after trying all provided passwords",
|
||||
document.pk,
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user