mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-12-31 13:58:04 -06:00
First, release ASNs before document replacement (and restore if needed)
This commit is contained in:
@@ -7,7 +7,6 @@ from pathlib import Path
|
|||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
from typing import Literal
|
from typing import Literal
|
||||||
|
|
||||||
from celery import chain
|
|
||||||
from celery import chord
|
from celery import chord
|
||||||
from celery import group
|
from celery import group
|
||||||
from celery import shared_task
|
from celery import shared_task
|
||||||
@@ -38,6 +37,42 @@ if TYPE_CHECKING:
|
|||||||
logger: logging.Logger = logging.getLogger("paperless.bulk_edit")
|
logger: logging.Logger = logging.getLogger("paperless.bulk_edit")
|
||||||
|
|
||||||
|
|
||||||
|
@shared_task(bind=True)
|
||||||
|
def restore_archive_serial_numbers_task(
|
||||||
|
self,
|
||||||
|
backup: dict[int, int],
|
||||||
|
*args,
|
||||||
|
**kwargs,
|
||||||
|
) -> None:
|
||||||
|
restore_archive_serial_numbers(backup)
|
||||||
|
|
||||||
|
|
||||||
|
def release_archive_serial_numbers(doc_ids: list[int]) -> dict[int, int]:
|
||||||
|
"""
|
||||||
|
Clears ASNs on documents that are about to be replaced so new documents
|
||||||
|
can be assigned ASNs without uniqueness collisions. Returns a backup map
|
||||||
|
of doc_id -> previous ASN for potential restoration.
|
||||||
|
"""
|
||||||
|
qs = Document.objects.filter(
|
||||||
|
id__in=doc_ids,
|
||||||
|
archive_serial_number__isnull=False,
|
||||||
|
).only("pk", "archive_serial_number")
|
||||||
|
backup = dict(qs.values_list("pk", "archive_serial_number"))
|
||||||
|
qs.update(archive_serial_number=None)
|
||||||
|
logger.info(f"Released archive serial numbers for documents {list(backup.keys())}")
|
||||||
|
return backup
|
||||||
|
|
||||||
|
|
||||||
|
def restore_archive_serial_numbers(backup: dict[int, int]) -> None:
|
||||||
|
"""
|
||||||
|
Restores ASNs using the provided backup map, intended for
|
||||||
|
rollback when replacement consumption fails.
|
||||||
|
"""
|
||||||
|
for doc_id, asn in backup.items():
|
||||||
|
Document.objects.filter(pk=doc_id).update(archive_serial_number=asn)
|
||||||
|
logger.info(f"Restored archive serial numbers for documents {list(backup.keys())}")
|
||||||
|
|
||||||
|
|
||||||
def set_correspondent(
|
def set_correspondent(
|
||||||
doc_ids: list[int],
|
doc_ids: list[int],
|
||||||
correspondent: Correspondent,
|
correspondent: Correspondent,
|
||||||
@@ -433,8 +468,6 @@ def merge(
|
|||||||
|
|
||||||
if user is not None:
|
if user is not None:
|
||||||
overrides.owner_id = user.id
|
overrides.owner_id = user.id
|
||||||
# Avoid copying or detecting ASN from merged PDFs to prevent collision
|
|
||||||
overrides.skip_asn = True
|
|
||||||
|
|
||||||
logger.info("Adding merged document to the task queue.")
|
logger.info("Adding merged document to the task queue.")
|
||||||
|
|
||||||
@@ -447,10 +480,18 @@ def merge(
|
|||||||
)
|
)
|
||||||
|
|
||||||
if delete_originals:
|
if delete_originals:
|
||||||
|
backup = release_archive_serial_numbers(affected_docs)
|
||||||
logger.info(
|
logger.info(
|
||||||
"Queueing removal of original documents after consumption of merged document",
|
"Queueing removal of original documents after consumption of merged document",
|
||||||
)
|
)
|
||||||
chain(consume_task, delete.si(affected_docs)).delay()
|
try:
|
||||||
|
consume_task.apply_async(
|
||||||
|
link=[delete.si(affected_docs)],
|
||||||
|
link_error=[restore_archive_serial_numbers_task.s(backup)],
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
restore_archive_serial_numbers(backup)
|
||||||
|
raise
|
||||||
else:
|
else:
|
||||||
consume_task.delay()
|
consume_task.delay()
|
||||||
|
|
||||||
@@ -508,10 +549,20 @@ def split(
|
|||||||
)
|
)
|
||||||
|
|
||||||
if delete_originals:
|
if delete_originals:
|
||||||
|
backup = release_archive_serial_numbers([doc.id])
|
||||||
logger.info(
|
logger.info(
|
||||||
"Queueing removal of original document after consumption of the split documents",
|
"Queueing removal of original document after consumption of the split documents",
|
||||||
)
|
)
|
||||||
chord(header=consume_tasks, body=delete.si([doc.id])).delay()
|
try:
|
||||||
|
chord(
|
||||||
|
header=consume_tasks,
|
||||||
|
body=delete.si([doc.id]),
|
||||||
|
).apply_async(
|
||||||
|
link_error=[restore_archive_serial_numbers_task.s(backup)],
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
restore_archive_serial_numbers(backup)
|
||||||
|
raise
|
||||||
else:
|
else:
|
||||||
group(consume_tasks).delay()
|
group(consume_tasks).delay()
|
||||||
|
|
||||||
@@ -614,7 +665,6 @@ def edit_pdf(
|
|||||||
)
|
)
|
||||||
if user is not None:
|
if user is not None:
|
||||||
overrides.owner_id = user.id
|
overrides.owner_id = user.id
|
||||||
|
|
||||||
for idx, pdf in enumerate(pdf_docs, start=1):
|
for idx, pdf in enumerate(pdf_docs, start=1):
|
||||||
filepath: Path = (
|
filepath: Path = (
|
||||||
Path(tempfile.mkdtemp(dir=settings.SCRATCH_DIR))
|
Path(tempfile.mkdtemp(dir=settings.SCRATCH_DIR))
|
||||||
@@ -633,7 +683,17 @@ def edit_pdf(
|
|||||||
)
|
)
|
||||||
|
|
||||||
if delete_original:
|
if delete_original:
|
||||||
chord(header=consume_tasks, body=delete.si([doc.id])).delay()
|
backup = release_archive_serial_numbers([doc.id])
|
||||||
|
try:
|
||||||
|
chord(
|
||||||
|
header=consume_tasks,
|
||||||
|
body=delete.si([doc.id]),
|
||||||
|
).apply_async(
|
||||||
|
link_error=[restore_archive_serial_numbers_task.s(backup)],
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
restore_archive_serial_numbers(backup)
|
||||||
|
raise
|
||||||
else:
|
else:
|
||||||
group(consume_tasks).delay()
|
group(consume_tasks).delay()
|
||||||
|
|
||||||
|
|||||||
@@ -602,23 +602,21 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
|||||||
expected_filename,
|
expected_filename,
|
||||||
)
|
)
|
||||||
self.assertEqual(consume_file_args[1].title, None)
|
self.assertEqual(consume_file_args[1].title, None)
|
||||||
self.assertTrue(consume_file_args[1].skip_asn)
|
self.assertFalse(consume_file_args[1].skip_asn)
|
||||||
|
|
||||||
# With metadata_document_id overrides
|
# With metadata_document_id overrides
|
||||||
result = bulk_edit.merge(doc_ids, metadata_document_id=metadata_document_id)
|
result = bulk_edit.merge(doc_ids, metadata_document_id=metadata_document_id)
|
||||||
consume_file_args, _ = mock_consume_file.call_args
|
consume_file_args, _ = mock_consume_file.call_args
|
||||||
self.assertEqual(consume_file_args[1].title, "B (merged)")
|
self.assertEqual(consume_file_args[1].title, "B (merged)")
|
||||||
self.assertEqual(consume_file_args[1].created, self.doc2.created)
|
self.assertEqual(consume_file_args[1].created, self.doc2.created)
|
||||||
self.assertTrue(consume_file_args[1].skip_asn)
|
self.assertFalse(consume_file_args[1].skip_asn)
|
||||||
|
|
||||||
self.assertEqual(result, "OK")
|
self.assertEqual(result, "OK")
|
||||||
|
|
||||||
@mock.patch("documents.bulk_edit.delete.si")
|
@mock.patch("documents.bulk_edit.delete.si")
|
||||||
@mock.patch("documents.tasks.consume_file.s")
|
@mock.patch("documents.tasks.consume_file.s")
|
||||||
@mock.patch("documents.bulk_edit.chain")
|
|
||||||
def test_merge_and_delete_originals(
|
def test_merge_and_delete_originals(
|
||||||
self,
|
self,
|
||||||
mock_chain,
|
|
||||||
mock_consume_file,
|
mock_consume_file,
|
||||||
mock_delete_documents,
|
mock_delete_documents,
|
||||||
):
|
):
|
||||||
@@ -632,6 +630,12 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
|||||||
- Document deletion task should be called
|
- Document deletion task should be called
|
||||||
"""
|
"""
|
||||||
doc_ids = [self.doc1.id, self.doc2.id, self.doc3.id]
|
doc_ids = [self.doc1.id, self.doc2.id, self.doc3.id]
|
||||||
|
self.doc1.archive_serial_number = 101
|
||||||
|
self.doc2.archive_serial_number = 102
|
||||||
|
self.doc3.archive_serial_number = 103
|
||||||
|
self.doc1.save()
|
||||||
|
self.doc2.save()
|
||||||
|
self.doc3.save()
|
||||||
|
|
||||||
result = bulk_edit.merge(doc_ids, delete_originals=True)
|
result = bulk_edit.merge(doc_ids, delete_originals=True)
|
||||||
self.assertEqual(result, "OK")
|
self.assertEqual(result, "OK")
|
||||||
@@ -642,7 +646,8 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
|||||||
|
|
||||||
mock_consume_file.assert_called()
|
mock_consume_file.assert_called()
|
||||||
mock_delete_documents.assert_called()
|
mock_delete_documents.assert_called()
|
||||||
mock_chain.assert_called_once()
|
consume_sig = mock_consume_file.return_value
|
||||||
|
consume_sig.apply_async.assert_called_once()
|
||||||
|
|
||||||
consume_file_args, _ = mock_consume_file.call_args
|
consume_file_args, _ = mock_consume_file.call_args
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
@@ -650,7 +655,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
|||||||
expected_filename,
|
expected_filename,
|
||||||
)
|
)
|
||||||
self.assertEqual(consume_file_args[1].title, None)
|
self.assertEqual(consume_file_args[1].title, None)
|
||||||
self.assertTrue(consume_file_args[1].skip_asn)
|
self.assertFalse(consume_file_args[1].skip_asn)
|
||||||
|
|
||||||
delete_documents_args, _ = mock_delete_documents.call_args
|
delete_documents_args, _ = mock_delete_documents.call_args
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
@@ -658,6 +663,13 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
|||||||
doc_ids,
|
doc_ids,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.doc1.refresh_from_db()
|
||||||
|
self.doc2.refresh_from_db()
|
||||||
|
self.doc3.refresh_from_db()
|
||||||
|
self.assertIsNone(self.doc1.archive_serial_number)
|
||||||
|
self.assertIsNone(self.doc2.archive_serial_number)
|
||||||
|
self.assertIsNone(self.doc3.archive_serial_number)
|
||||||
|
|
||||||
@mock.patch("documents.tasks.consume_file.s")
|
@mock.patch("documents.tasks.consume_file.s")
|
||||||
def test_merge_with_archive_fallback(self, mock_consume_file):
|
def test_merge_with_archive_fallback(self, mock_consume_file):
|
||||||
"""
|
"""
|
||||||
@@ -726,6 +738,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
|||||||
self.assertEqual(mock_consume_file.call_count, 2)
|
self.assertEqual(mock_consume_file.call_count, 2)
|
||||||
consume_file_args, _ = mock_consume_file.call_args
|
consume_file_args, _ = mock_consume_file.call_args
|
||||||
self.assertEqual(consume_file_args[1].title, "B (split 2)")
|
self.assertEqual(consume_file_args[1].title, "B (split 2)")
|
||||||
|
self.assertFalse(consume_file_args[1].skip_asn)
|
||||||
|
|
||||||
self.assertEqual(result, "OK")
|
self.assertEqual(result, "OK")
|
||||||
|
|
||||||
@@ -750,6 +763,8 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
|||||||
"""
|
"""
|
||||||
doc_ids = [self.doc2.id]
|
doc_ids = [self.doc2.id]
|
||||||
pages = [[1, 2], [3]]
|
pages = [[1, 2], [3]]
|
||||||
|
self.doc2.archive_serial_number = 200
|
||||||
|
self.doc2.save()
|
||||||
|
|
||||||
result = bulk_edit.split(doc_ids, pages, delete_originals=True)
|
result = bulk_edit.split(doc_ids, pages, delete_originals=True)
|
||||||
self.assertEqual(result, "OK")
|
self.assertEqual(result, "OK")
|
||||||
@@ -757,6 +772,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
|||||||
self.assertEqual(mock_consume_file.call_count, 2)
|
self.assertEqual(mock_consume_file.call_count, 2)
|
||||||
consume_file_args, _ = mock_consume_file.call_args
|
consume_file_args, _ = mock_consume_file.call_args
|
||||||
self.assertEqual(consume_file_args[1].title, "B (split 2)")
|
self.assertEqual(consume_file_args[1].title, "B (split 2)")
|
||||||
|
self.assertFalse(consume_file_args[1].skip_asn)
|
||||||
|
|
||||||
mock_delete_documents.assert_called()
|
mock_delete_documents.assert_called()
|
||||||
mock_chord.assert_called_once()
|
mock_chord.assert_called_once()
|
||||||
@@ -767,6 +783,9 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
|||||||
doc_ids,
|
doc_ids,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.doc2.refresh_from_db()
|
||||||
|
self.assertIsNone(self.doc2.archive_serial_number)
|
||||||
|
|
||||||
@mock.patch("documents.tasks.consume_file.delay")
|
@mock.patch("documents.tasks.consume_file.delay")
|
||||||
@mock.patch("pikepdf.Pdf.save")
|
@mock.patch("pikepdf.Pdf.save")
|
||||||
def test_split_with_errors(self, mock_save_pdf, mock_consume_file):
|
def test_split_with_errors(self, mock_save_pdf, mock_consume_file):
|
||||||
@@ -967,10 +986,16 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
|||||||
mock_chord.return_value.delay.return_value = None
|
mock_chord.return_value.delay.return_value = None
|
||||||
doc_ids = [self.doc2.id]
|
doc_ids = [self.doc2.id]
|
||||||
operations = [{"page": 1}, {"page": 2}]
|
operations = [{"page": 1}, {"page": 2}]
|
||||||
|
self.doc2.archive_serial_number = 250
|
||||||
|
self.doc2.save()
|
||||||
|
|
||||||
result = bulk_edit.edit_pdf(doc_ids, operations, delete_original=True)
|
result = bulk_edit.edit_pdf(doc_ids, operations, delete_original=True)
|
||||||
self.assertEqual(result, "OK")
|
self.assertEqual(result, "OK")
|
||||||
mock_chord.assert_called_once()
|
mock_chord.assert_called_once()
|
||||||
|
consume_file_args, _ = mock_consume_file.call_args
|
||||||
|
self.assertFalse(consume_file_args[1].skip_asn)
|
||||||
|
self.doc2.refresh_from_db()
|
||||||
|
self.assertIsNone(self.doc2.archive_serial_number)
|
||||||
|
|
||||||
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.delay")
|
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.delay")
|
||||||
def test_edit_pdf_with_update_document(self, mock_update_document):
|
def test_edit_pdf_with_update_document(self, mock_update_document):
|
||||||
|
|||||||
Reference in New Issue
Block a user