mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Change: change update content to handle archive disabled (#8315)
This commit is contained in:
parent
d7d3fed833
commit
9c1561adfb
@ -24,7 +24,7 @@ from documents.models import StoragePath
|
|||||||
from documents.permissions import set_permissions_for_object
|
from documents.permissions import set_permissions_for_object
|
||||||
from documents.tasks import bulk_update_documents
|
from documents.tasks import bulk_update_documents
|
||||||
from documents.tasks import consume_file
|
from documents.tasks import consume_file
|
||||||
from documents.tasks import update_document_archive_file
|
from documents.tasks import update_document_content_maybe_archive_file
|
||||||
|
|
||||||
logger: logging.Logger = logging.getLogger("paperless.bulk_edit")
|
logger: logging.Logger = logging.getLogger("paperless.bulk_edit")
|
||||||
|
|
||||||
@ -191,7 +191,7 @@ def delete(doc_ids: list[int]) -> Literal["OK"]:
|
|||||||
|
|
||||||
def reprocess(doc_ids: list[int]) -> Literal["OK"]:
|
def reprocess(doc_ids: list[int]) -> Literal["OK"]:
|
||||||
for document_id in doc_ids:
|
for document_id in doc_ids:
|
||||||
update_document_archive_file.delay(
|
update_document_content_maybe_archive_file.delay(
|
||||||
document_id=document_id,
|
document_id=document_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -245,7 +245,7 @@ def rotate(doc_ids: list[int], degrees: int) -> Literal["OK"]:
|
|||||||
doc.checksum = hashlib.md5(doc.source_path.read_bytes()).hexdigest()
|
doc.checksum = hashlib.md5(doc.source_path.read_bytes()).hexdigest()
|
||||||
doc.save()
|
doc.save()
|
||||||
rotate_tasks.append(
|
rotate_tasks.append(
|
||||||
update_document_archive_file.s(
|
update_document_content_maybe_archive_file.s(
|
||||||
document_id=doc.id,
|
document_id=doc.id,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
@ -423,7 +423,7 @@ def delete_pages(doc_ids: list[int], pages: list[int]) -> Literal["OK"]:
|
|||||||
if doc.page_count is not None:
|
if doc.page_count is not None:
|
||||||
doc.page_count = doc.page_count - len(pages)
|
doc.page_count = doc.page_count - len(pages)
|
||||||
doc.save()
|
doc.save()
|
||||||
update_document_archive_file.delay(document_id=doc.id)
|
update_document_content_maybe_archive_file.delay(document_id=doc.id)
|
||||||
logger.info(f"Deleted pages {pages} from document {doc.id}")
|
logger.info(f"Deleted pages {pages} from document {doc.id}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception(f"Error deleting pages from document {doc.id}: {e}")
|
logger.exception(f"Error deleting pages from document {doc.id}: {e}")
|
||||||
|
@ -9,7 +9,7 @@ from django.core.management.base import BaseCommand
|
|||||||
from documents.management.commands.mixins import MultiProcessMixin
|
from documents.management.commands.mixins import MultiProcessMixin
|
||||||
from documents.management.commands.mixins import ProgressBarMixin
|
from documents.management.commands.mixins import ProgressBarMixin
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
from documents.tasks import update_document_archive_file
|
from documents.tasks import update_document_content_maybe_archive_file
|
||||||
|
|
||||||
logger = logging.getLogger("paperless.management.archiver")
|
logger = logging.getLogger("paperless.management.archiver")
|
||||||
|
|
||||||
@ -77,13 +77,13 @@ class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
|
|||||||
|
|
||||||
if self.process_count == 1:
|
if self.process_count == 1:
|
||||||
for doc_id in document_ids:
|
for doc_id in document_ids:
|
||||||
update_document_archive_file(doc_id)
|
update_document_content_maybe_archive_file(doc_id)
|
||||||
else: # pragma: no cover
|
else: # pragma: no cover
|
||||||
with multiprocessing.Pool(self.process_count) as pool:
|
with multiprocessing.Pool(self.process_count) as pool:
|
||||||
list(
|
list(
|
||||||
tqdm.tqdm(
|
tqdm.tqdm(
|
||||||
pool.imap_unordered(
|
pool.imap_unordered(
|
||||||
update_document_archive_file,
|
update_document_content_maybe_archive_file,
|
||||||
document_ids,
|
document_ids,
|
||||||
),
|
),
|
||||||
total=len(document_ids),
|
total=len(document_ids),
|
||||||
|
@ -206,9 +206,10 @@ def bulk_update_documents(document_ids):
|
|||||||
|
|
||||||
|
|
||||||
@shared_task
|
@shared_task
|
||||||
def update_document_archive_file(document_id):
|
def update_document_content_maybe_archive_file(document_id):
|
||||||
"""
|
"""
|
||||||
Re-creates the archive file of a document, including new OCR content and thumbnail
|
Re-creates OCR content and thumbnail for a document, and archive file if
|
||||||
|
it exists.
|
||||||
"""
|
"""
|
||||||
document = Document.objects.get(id=document_id)
|
document = Document.objects.get(id=document_id)
|
||||||
|
|
||||||
@ -234,8 +235,9 @@ def update_document_archive_file(document_id):
|
|||||||
document.get_public_filename(),
|
document.get_public_filename(),
|
||||||
)
|
)
|
||||||
|
|
||||||
if parser.get_archive_path():
|
with transaction.atomic():
|
||||||
with transaction.atomic():
|
oldDocument = Document.objects.get(pk=document.pk)
|
||||||
|
if parser.get_archive_path():
|
||||||
with open(parser.get_archive_path(), "rb") as f:
|
with open(parser.get_archive_path(), "rb") as f:
|
||||||
checksum = hashlib.md5(f.read()).hexdigest()
|
checksum = hashlib.md5(f.read()).hexdigest()
|
||||||
# I'm going to save first so that in case the file move
|
# I'm going to save first so that in case the file move
|
||||||
@ -246,7 +248,6 @@ def update_document_archive_file(document_id):
|
|||||||
document,
|
document,
|
||||||
archive_filename=True,
|
archive_filename=True,
|
||||||
)
|
)
|
||||||
oldDocument = Document.objects.get(pk=document.pk)
|
|
||||||
Document.objects.filter(pk=document.pk).update(
|
Document.objects.filter(pk=document.pk).update(
|
||||||
archive_checksum=checksum,
|
archive_checksum=checksum,
|
||||||
content=parser.get_text(),
|
content=parser.get_text(),
|
||||||
@ -268,24 +269,41 @@ def update_document_archive_file(document_id):
|
|||||||
],
|
],
|
||||||
},
|
},
|
||||||
additional_data={
|
additional_data={
|
||||||
"reason": "Update document archive file",
|
"reason": "Update document content",
|
||||||
|
},
|
||||||
|
action=LogEntry.Action.UPDATE,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
Document.objects.filter(pk=document.pk).update(
|
||||||
|
content=parser.get_text(),
|
||||||
|
)
|
||||||
|
|
||||||
|
if settings.AUDIT_LOG_ENABLED:
|
||||||
|
LogEntry.objects.log_create(
|
||||||
|
instance=oldDocument,
|
||||||
|
changes={
|
||||||
|
"content": [oldDocument.content, parser.get_text()],
|
||||||
|
},
|
||||||
|
additional_data={
|
||||||
|
"reason": "Update document content",
|
||||||
},
|
},
|
||||||
action=LogEntry.Action.UPDATE,
|
action=LogEntry.Action.UPDATE,
|
||||||
)
|
)
|
||||||
|
|
||||||
with FileLock(settings.MEDIA_LOCK):
|
with FileLock(settings.MEDIA_LOCK):
|
||||||
|
if parser.get_archive_path():
|
||||||
create_source_path_directory(document.archive_path)
|
create_source_path_directory(document.archive_path)
|
||||||
shutil.move(parser.get_archive_path(), document.archive_path)
|
shutil.move(parser.get_archive_path(), document.archive_path)
|
||||||
shutil.move(thumbnail, document.thumbnail_path)
|
shutil.move(thumbnail, document.thumbnail_path)
|
||||||
|
|
||||||
document.refresh_from_db()
|
document.refresh_from_db()
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Updating index for document {document_id} ({document.archive_checksum})",
|
f"Updating index for document {document_id} ({document.archive_checksum})",
|
||||||
)
|
)
|
||||||
with index.open_index_writer() as writer:
|
with index.open_index_writer() as writer:
|
||||||
index.update_document(writer, document)
|
index.update_document(writer, document)
|
||||||
|
|
||||||
clear_document_caches(document.pk)
|
clear_document_caches(document.pk)
|
||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.exception(
|
logger.exception(
|
||||||
|
@ -607,7 +607,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
|||||||
mock_consume_file.assert_not_called()
|
mock_consume_file.assert_not_called()
|
||||||
|
|
||||||
@mock.patch("documents.tasks.bulk_update_documents.si")
|
@mock.patch("documents.tasks.bulk_update_documents.si")
|
||||||
@mock.patch("documents.tasks.update_document_archive_file.s")
|
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.s")
|
||||||
@mock.patch("celery.chord.delay")
|
@mock.patch("celery.chord.delay")
|
||||||
def test_rotate(self, mock_chord, mock_update_document, mock_update_documents):
|
def test_rotate(self, mock_chord, mock_update_document, mock_update_documents):
|
||||||
"""
|
"""
|
||||||
@ -626,7 +626,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
|||||||
self.assertEqual(result, "OK")
|
self.assertEqual(result, "OK")
|
||||||
|
|
||||||
@mock.patch("documents.tasks.bulk_update_documents.si")
|
@mock.patch("documents.tasks.bulk_update_documents.si")
|
||||||
@mock.patch("documents.tasks.update_document_archive_file.s")
|
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.s")
|
||||||
@mock.patch("pikepdf.Pdf.save")
|
@mock.patch("pikepdf.Pdf.save")
|
||||||
def test_rotate_with_error(
|
def test_rotate_with_error(
|
||||||
self,
|
self,
|
||||||
@ -654,7 +654,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
|||||||
mock_update_archive_file.assert_not_called()
|
mock_update_archive_file.assert_not_called()
|
||||||
|
|
||||||
@mock.patch("documents.tasks.bulk_update_documents.si")
|
@mock.patch("documents.tasks.bulk_update_documents.si")
|
||||||
@mock.patch("documents.tasks.update_document_archive_file.s")
|
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.s")
|
||||||
@mock.patch("celery.chord.delay")
|
@mock.patch("celery.chord.delay")
|
||||||
def test_rotate_non_pdf(
|
def test_rotate_non_pdf(
|
||||||
self,
|
self,
|
||||||
@ -680,7 +680,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
|||||||
mock_chord.assert_called_once()
|
mock_chord.assert_called_once()
|
||||||
self.assertEqual(result, "OK")
|
self.assertEqual(result, "OK")
|
||||||
|
|
||||||
@mock.patch("documents.tasks.update_document_archive_file.delay")
|
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.delay")
|
||||||
@mock.patch("pikepdf.Pdf.save")
|
@mock.patch("pikepdf.Pdf.save")
|
||||||
def test_delete_pages(self, mock_pdf_save, mock_update_archive_file):
|
def test_delete_pages(self, mock_pdf_save, mock_update_archive_file):
|
||||||
"""
|
"""
|
||||||
@ -705,7 +705,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
|||||||
self.doc2.refresh_from_db()
|
self.doc2.refresh_from_db()
|
||||||
self.assertEqual(self.doc2.page_count, expected_page_count)
|
self.assertEqual(self.doc2.page_count, expected_page_count)
|
||||||
|
|
||||||
@mock.patch("documents.tasks.update_document_archive_file.delay")
|
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.delay")
|
||||||
@mock.patch("pikepdf.Pdf.save")
|
@mock.patch("pikepdf.Pdf.save")
|
||||||
def test_delete_pages_with_error(self, mock_pdf_save, mock_update_archive_file):
|
def test_delete_pages_with_error(self, mock_pdf_save, mock_update_archive_file):
|
||||||
"""
|
"""
|
||||||
|
@ -13,7 +13,7 @@ from django.test import override_settings
|
|||||||
|
|
||||||
from documents.file_handling import generate_filename
|
from documents.file_handling import generate_filename
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
from documents.tasks import update_document_archive_file
|
from documents.tasks import update_document_content_maybe_archive_file
|
||||||
from documents.tests.utils import DirectoriesMixin
|
from documents.tests.utils import DirectoriesMixin
|
||||||
from documents.tests.utils import FileSystemAssertsMixin
|
from documents.tests.utils import FileSystemAssertsMixin
|
||||||
|
|
||||||
@ -46,7 +46,7 @@ class TestArchiver(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"),
|
os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"),
|
||||||
)
|
)
|
||||||
|
|
||||||
update_document_archive_file(doc.pk)
|
update_document_content_maybe_archive_file(doc.pk)
|
||||||
|
|
||||||
doc = Document.objects.get(id=doc.id)
|
doc = Document.objects.get(id=doc.id)
|
||||||
|
|
||||||
@ -63,7 +63,7 @@ class TestArchiver(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
doc.save()
|
doc.save()
|
||||||
shutil.copy(sample_file, doc.source_path)
|
shutil.copy(sample_file, doc.source_path)
|
||||||
|
|
||||||
update_document_archive_file(doc.pk)
|
update_document_content_maybe_archive_file(doc.pk)
|
||||||
|
|
||||||
doc = Document.objects.get(id=doc.id)
|
doc = Document.objects.get(id=doc.id)
|
||||||
|
|
||||||
@ -94,8 +94,8 @@ class TestArchiver(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
os.path.join(self.dirs.originals_dir, "document_01.pdf"),
|
os.path.join(self.dirs.originals_dir, "document_01.pdf"),
|
||||||
)
|
)
|
||||||
|
|
||||||
update_document_archive_file(doc2.pk)
|
update_document_content_maybe_archive_file(doc2.pk)
|
||||||
update_document_archive_file(doc1.pk)
|
update_document_content_maybe_archive_file(doc1.pk)
|
||||||
|
|
||||||
doc1 = Document.objects.get(id=doc1.id)
|
doc1 = Document.objects.get(id=doc1.id)
|
||||||
doc2 = Document.objects.get(id=doc2.id)
|
doc2 = Document.objects.get(id=doc2.id)
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
|
import shutil
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
|
from pathlib import Path
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
@ -184,3 +186,75 @@ class TestEmptyTrashTask(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
|
|
||||||
tasks.empty_trash()
|
tasks.empty_trash()
|
||||||
self.assertEqual(Document.global_objects.count(), 0)
|
self.assertEqual(Document.global_objects.count(), 0)
|
||||||
|
|
||||||
|
|
||||||
|
class TestUpdateContent(DirectoriesMixin, TestCase):
|
||||||
|
def test_update_content_maybe_archive_file(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Existing document with archive file
|
||||||
|
WHEN:
|
||||||
|
- Update content task is called
|
||||||
|
THEN:
|
||||||
|
- Document is reprocessed, content and checksum are updated
|
||||||
|
"""
|
||||||
|
sample1 = self.dirs.scratch_dir / "sample.pdf"
|
||||||
|
shutil.copy(
|
||||||
|
Path(__file__).parent
|
||||||
|
/ "samples"
|
||||||
|
/ "documents"
|
||||||
|
/ "originals"
|
||||||
|
/ "0000001.pdf",
|
||||||
|
sample1,
|
||||||
|
)
|
||||||
|
sample1_archive = self.dirs.archive_dir / "sample_archive.pdf"
|
||||||
|
shutil.copy(
|
||||||
|
Path(__file__).parent
|
||||||
|
/ "samples"
|
||||||
|
/ "documents"
|
||||||
|
/ "originals"
|
||||||
|
/ "0000001.pdf",
|
||||||
|
sample1_archive,
|
||||||
|
)
|
||||||
|
doc = Document.objects.create(
|
||||||
|
title="test",
|
||||||
|
content="my document",
|
||||||
|
checksum="wow",
|
||||||
|
archive_checksum="wow",
|
||||||
|
filename=sample1,
|
||||||
|
mime_type="application/pdf",
|
||||||
|
archive_filename=sample1_archive,
|
||||||
|
)
|
||||||
|
|
||||||
|
tasks.update_document_content_maybe_archive_file(doc.pk)
|
||||||
|
self.assertNotEqual(Document.objects.get(pk=doc.pk).content, "test")
|
||||||
|
self.assertNotEqual(Document.objects.get(pk=doc.pk).archive_checksum, "wow")
|
||||||
|
|
||||||
|
def test_update_content_maybe_archive_file_no_archive(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Existing document without archive file
|
||||||
|
WHEN:
|
||||||
|
- Update content task is called
|
||||||
|
THEN:
|
||||||
|
- Document is reprocessed, content is updated
|
||||||
|
"""
|
||||||
|
sample1 = self.dirs.scratch_dir / "sample.pdf"
|
||||||
|
shutil.copy(
|
||||||
|
Path(__file__).parent
|
||||||
|
/ "samples"
|
||||||
|
/ "documents"
|
||||||
|
/ "originals"
|
||||||
|
/ "0000001.pdf",
|
||||||
|
sample1,
|
||||||
|
)
|
||||||
|
doc = Document.objects.create(
|
||||||
|
title="test",
|
||||||
|
content="my document",
|
||||||
|
checksum="wow",
|
||||||
|
filename=sample1,
|
||||||
|
mime_type="application/pdf",
|
||||||
|
)
|
||||||
|
|
||||||
|
tasks.update_document_content_maybe_archive_file(doc.pk)
|
||||||
|
self.assertNotEqual(Document.objects.get(pk=doc.pk).content, "test")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user