mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Change: change update content to handle archive disabled (#8315)
This commit is contained in:
parent
d7d3fed833
commit
9c1561adfb
@ -24,7 +24,7 @@ from documents.models import StoragePath
|
||||
from documents.permissions import set_permissions_for_object
|
||||
from documents.tasks import bulk_update_documents
|
||||
from documents.tasks import consume_file
|
||||
from documents.tasks import update_document_archive_file
|
||||
from documents.tasks import update_document_content_maybe_archive_file
|
||||
|
||||
logger: logging.Logger = logging.getLogger("paperless.bulk_edit")
|
||||
|
||||
@ -191,7 +191,7 @@ def delete(doc_ids: list[int]) -> Literal["OK"]:
|
||||
|
||||
def reprocess(doc_ids: list[int]) -> Literal["OK"]:
|
||||
for document_id in doc_ids:
|
||||
update_document_archive_file.delay(
|
||||
update_document_content_maybe_archive_file.delay(
|
||||
document_id=document_id,
|
||||
)
|
||||
|
||||
@ -245,7 +245,7 @@ def rotate(doc_ids: list[int], degrees: int) -> Literal["OK"]:
|
||||
doc.checksum = hashlib.md5(doc.source_path.read_bytes()).hexdigest()
|
||||
doc.save()
|
||||
rotate_tasks.append(
|
||||
update_document_archive_file.s(
|
||||
update_document_content_maybe_archive_file.s(
|
||||
document_id=doc.id,
|
||||
),
|
||||
)
|
||||
@ -423,7 +423,7 @@ def delete_pages(doc_ids: list[int], pages: list[int]) -> Literal["OK"]:
|
||||
if doc.page_count is not None:
|
||||
doc.page_count = doc.page_count - len(pages)
|
||||
doc.save()
|
||||
update_document_archive_file.delay(document_id=doc.id)
|
||||
update_document_content_maybe_archive_file.delay(document_id=doc.id)
|
||||
logger.info(f"Deleted pages {pages} from document {doc.id}")
|
||||
except Exception as e:
|
||||
logger.exception(f"Error deleting pages from document {doc.id}: {e}")
|
||||
|
@ -9,7 +9,7 @@ from django.core.management.base import BaseCommand
|
||||
from documents.management.commands.mixins import MultiProcessMixin
|
||||
from documents.management.commands.mixins import ProgressBarMixin
|
||||
from documents.models import Document
|
||||
from documents.tasks import update_document_archive_file
|
||||
from documents.tasks import update_document_content_maybe_archive_file
|
||||
|
||||
logger = logging.getLogger("paperless.management.archiver")
|
||||
|
||||
@ -77,13 +77,13 @@ class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
|
||||
|
||||
if self.process_count == 1:
|
||||
for doc_id in document_ids:
|
||||
update_document_archive_file(doc_id)
|
||||
update_document_content_maybe_archive_file(doc_id)
|
||||
else: # pragma: no cover
|
||||
with multiprocessing.Pool(self.process_count) as pool:
|
||||
list(
|
||||
tqdm.tqdm(
|
||||
pool.imap_unordered(
|
||||
update_document_archive_file,
|
||||
update_document_content_maybe_archive_file,
|
||||
document_ids,
|
||||
),
|
||||
total=len(document_ids),
|
||||
|
@ -206,9 +206,10 @@ def bulk_update_documents(document_ids):
|
||||
|
||||
|
||||
@shared_task
|
||||
def update_document_archive_file(document_id):
|
||||
def update_document_content_maybe_archive_file(document_id):
|
||||
"""
|
||||
Re-creates the archive file of a document, including new OCR content and thumbnail
|
||||
Re-creates OCR content and thumbnail for a document, and archive file if
|
||||
it exists.
|
||||
"""
|
||||
document = Document.objects.get(id=document_id)
|
||||
|
||||
@ -234,8 +235,9 @@ def update_document_archive_file(document_id):
|
||||
document.get_public_filename(),
|
||||
)
|
||||
|
||||
if parser.get_archive_path():
|
||||
with transaction.atomic():
|
||||
with transaction.atomic():
|
||||
oldDocument = Document.objects.get(pk=document.pk)
|
||||
if parser.get_archive_path():
|
||||
with open(parser.get_archive_path(), "rb") as f:
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
# I'm going to save first so that in case the file move
|
||||
@ -246,7 +248,6 @@ def update_document_archive_file(document_id):
|
||||
document,
|
||||
archive_filename=True,
|
||||
)
|
||||
oldDocument = Document.objects.get(pk=document.pk)
|
||||
Document.objects.filter(pk=document.pk).update(
|
||||
archive_checksum=checksum,
|
||||
content=parser.get_text(),
|
||||
@ -268,24 +269,41 @@ def update_document_archive_file(document_id):
|
||||
],
|
||||
},
|
||||
additional_data={
|
||||
"reason": "Update document archive file",
|
||||
"reason": "Update document content",
|
||||
},
|
||||
action=LogEntry.Action.UPDATE,
|
||||
)
|
||||
else:
|
||||
Document.objects.filter(pk=document.pk).update(
|
||||
content=parser.get_text(),
|
||||
)
|
||||
|
||||
if settings.AUDIT_LOG_ENABLED:
|
||||
LogEntry.objects.log_create(
|
||||
instance=oldDocument,
|
||||
changes={
|
||||
"content": [oldDocument.content, parser.get_text()],
|
||||
},
|
||||
additional_data={
|
||||
"reason": "Update document content",
|
||||
},
|
||||
action=LogEntry.Action.UPDATE,
|
||||
)
|
||||
|
||||
with FileLock(settings.MEDIA_LOCK):
|
||||
with FileLock(settings.MEDIA_LOCK):
|
||||
if parser.get_archive_path():
|
||||
create_source_path_directory(document.archive_path)
|
||||
shutil.move(parser.get_archive_path(), document.archive_path)
|
||||
shutil.move(thumbnail, document.thumbnail_path)
|
||||
shutil.move(thumbnail, document.thumbnail_path)
|
||||
|
||||
document.refresh_from_db()
|
||||
logger.info(
|
||||
f"Updating index for document {document_id} ({document.archive_checksum})",
|
||||
)
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, document)
|
||||
document.refresh_from_db()
|
||||
logger.info(
|
||||
f"Updating index for document {document_id} ({document.archive_checksum})",
|
||||
)
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, document)
|
||||
|
||||
clear_document_caches(document.pk)
|
||||
clear_document_caches(document.pk)
|
||||
|
||||
except Exception:
|
||||
logger.exception(
|
||||
|
@ -607,7 +607,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
||||
mock_consume_file.assert_not_called()
|
||||
|
||||
@mock.patch("documents.tasks.bulk_update_documents.si")
|
||||
@mock.patch("documents.tasks.update_document_archive_file.s")
|
||||
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.s")
|
||||
@mock.patch("celery.chord.delay")
|
||||
def test_rotate(self, mock_chord, mock_update_document, mock_update_documents):
|
||||
"""
|
||||
@ -626,7 +626,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(result, "OK")
|
||||
|
||||
@mock.patch("documents.tasks.bulk_update_documents.si")
|
||||
@mock.patch("documents.tasks.update_document_archive_file.s")
|
||||
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.s")
|
||||
@mock.patch("pikepdf.Pdf.save")
|
||||
def test_rotate_with_error(
|
||||
self,
|
||||
@ -654,7 +654,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
||||
mock_update_archive_file.assert_not_called()
|
||||
|
||||
@mock.patch("documents.tasks.bulk_update_documents.si")
|
||||
@mock.patch("documents.tasks.update_document_archive_file.s")
|
||||
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.s")
|
||||
@mock.patch("celery.chord.delay")
|
||||
def test_rotate_non_pdf(
|
||||
self,
|
||||
@ -680,7 +680,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
||||
mock_chord.assert_called_once()
|
||||
self.assertEqual(result, "OK")
|
||||
|
||||
@mock.patch("documents.tasks.update_document_archive_file.delay")
|
||||
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.delay")
|
||||
@mock.patch("pikepdf.Pdf.save")
|
||||
def test_delete_pages(self, mock_pdf_save, mock_update_archive_file):
|
||||
"""
|
||||
@ -705,7 +705,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
||||
self.doc2.refresh_from_db()
|
||||
self.assertEqual(self.doc2.page_count, expected_page_count)
|
||||
|
||||
@mock.patch("documents.tasks.update_document_archive_file.delay")
|
||||
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.delay")
|
||||
@mock.patch("pikepdf.Pdf.save")
|
||||
def test_delete_pages_with_error(self, mock_pdf_save, mock_update_archive_file):
|
||||
"""
|
||||
|
@ -13,7 +13,7 @@ from django.test import override_settings
|
||||
|
||||
from documents.file_handling import generate_filename
|
||||
from documents.models import Document
|
||||
from documents.tasks import update_document_archive_file
|
||||
from documents.tasks import update_document_content_maybe_archive_file
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from documents.tests.utils import FileSystemAssertsMixin
|
||||
|
||||
@ -46,7 +46,7 @@ class TestArchiver(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"),
|
||||
)
|
||||
|
||||
update_document_archive_file(doc.pk)
|
||||
update_document_content_maybe_archive_file(doc.pk)
|
||||
|
||||
doc = Document.objects.get(id=doc.id)
|
||||
|
||||
@ -63,7 +63,7 @@ class TestArchiver(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
doc.save()
|
||||
shutil.copy(sample_file, doc.source_path)
|
||||
|
||||
update_document_archive_file(doc.pk)
|
||||
update_document_content_maybe_archive_file(doc.pk)
|
||||
|
||||
doc = Document.objects.get(id=doc.id)
|
||||
|
||||
@ -94,8 +94,8 @@ class TestArchiver(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
os.path.join(self.dirs.originals_dir, "document_01.pdf"),
|
||||
)
|
||||
|
||||
update_document_archive_file(doc2.pk)
|
||||
update_document_archive_file(doc1.pk)
|
||||
update_document_content_maybe_archive_file(doc2.pk)
|
||||
update_document_content_maybe_archive_file(doc1.pk)
|
||||
|
||||
doc1 = Document.objects.get(id=doc1.id)
|
||||
doc2 = Document.objects.get(id=doc2.id)
|
||||
|
@ -1,5 +1,7 @@
|
||||
import os
|
||||
import shutil
|
||||
from datetime import timedelta
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from django.conf import settings
|
||||
@ -184,3 +186,75 @@ class TestEmptyTrashTask(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
|
||||
tasks.empty_trash()
|
||||
self.assertEqual(Document.global_objects.count(), 0)
|
||||
|
||||
|
||||
class TestUpdateContent(DirectoriesMixin, TestCase):
|
||||
def test_update_content_maybe_archive_file(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Existing document with archive file
|
||||
WHEN:
|
||||
- Update content task is called
|
||||
THEN:
|
||||
- Document is reprocessed, content and checksum are updated
|
||||
"""
|
||||
sample1 = self.dirs.scratch_dir / "sample.pdf"
|
||||
shutil.copy(
|
||||
Path(__file__).parent
|
||||
/ "samples"
|
||||
/ "documents"
|
||||
/ "originals"
|
||||
/ "0000001.pdf",
|
||||
sample1,
|
||||
)
|
||||
sample1_archive = self.dirs.archive_dir / "sample_archive.pdf"
|
||||
shutil.copy(
|
||||
Path(__file__).parent
|
||||
/ "samples"
|
||||
/ "documents"
|
||||
/ "originals"
|
||||
/ "0000001.pdf",
|
||||
sample1_archive,
|
||||
)
|
||||
doc = Document.objects.create(
|
||||
title="test",
|
||||
content="my document",
|
||||
checksum="wow",
|
||||
archive_checksum="wow",
|
||||
filename=sample1,
|
||||
mime_type="application/pdf",
|
||||
archive_filename=sample1_archive,
|
||||
)
|
||||
|
||||
tasks.update_document_content_maybe_archive_file(doc.pk)
|
||||
self.assertNotEqual(Document.objects.get(pk=doc.pk).content, "test")
|
||||
self.assertNotEqual(Document.objects.get(pk=doc.pk).archive_checksum, "wow")
|
||||
|
||||
def test_update_content_maybe_archive_file_no_archive(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Existing document without archive file
|
||||
WHEN:
|
||||
- Update content task is called
|
||||
THEN:
|
||||
- Document is reprocessed, content is updated
|
||||
"""
|
||||
sample1 = self.dirs.scratch_dir / "sample.pdf"
|
||||
shutil.copy(
|
||||
Path(__file__).parent
|
||||
/ "samples"
|
||||
/ "documents"
|
||||
/ "originals"
|
||||
/ "0000001.pdf",
|
||||
sample1,
|
||||
)
|
||||
doc = Document.objects.create(
|
||||
title="test",
|
||||
content="my document",
|
||||
checksum="wow",
|
||||
filename=sample1,
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
|
||||
tasks.update_document_content_maybe_archive_file(doc.pk)
|
||||
self.assertNotEqual(Document.objects.get(pk=doc.pk).content, "test")
|
||||
|
Loading…
x
Reference in New Issue
Block a user