mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
Feature: page count (#7750)
--------- Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
This commit is contained in:
@@ -387,6 +387,8 @@ def delete_pages(doc_ids: list[int], pages: list[int]):
|
||||
pdf.remove_unreferenced_resources()
|
||||
pdf.save()
|
||||
doc.checksum = hashlib.md5(doc.source_path.read_bytes()).hexdigest()
|
||||
if doc.page_count is not None:
|
||||
doc.page_count = doc.page_count - len(pages)
|
||||
doc.save()
|
||||
update_document_archive_file.delay(document_id=doc.id)
|
||||
logger.info(f"Deleted pages {pages} from document {doc.id}")
|
||||
|
@@ -586,6 +586,7 @@ class ConsumerPlugin(
|
||||
date = None
|
||||
thumbnail = None
|
||||
archive_path = None
|
||||
page_count = None
|
||||
|
||||
try:
|
||||
self._send_progress(
|
||||
@@ -621,6 +622,7 @@ class ConsumerPlugin(
|
||||
)
|
||||
date = parse_date(self.filename, text)
|
||||
archive_path = document_parser.get_archive_path()
|
||||
page_count = document_parser.get_page_count(self.working_copy, mime_type)
|
||||
|
||||
except ParseError as e:
|
||||
document_parser.cleanup()
|
||||
@@ -662,7 +664,12 @@ class ConsumerPlugin(
|
||||
try:
|
||||
with transaction.atomic():
|
||||
# store the document.
|
||||
document = self._store(text=text, date=date, mime_type=mime_type)
|
||||
document = self._store(
|
||||
text=text,
|
||||
date=date,
|
||||
page_count=page_count,
|
||||
mime_type=mime_type,
|
||||
)
|
||||
|
||||
# If we get here, it was successful. Proceed with post-consume
|
||||
# hooks. If they fail, nothing will get changed.
|
||||
@@ -790,6 +797,7 @@ class ConsumerPlugin(
|
||||
self,
|
||||
text: str,
|
||||
date: Optional[datetime.datetime],
|
||||
page_count: Optional[int],
|
||||
mime_type: str,
|
||||
) -> Document:
|
||||
# If someone gave us the original filename, use it instead of doc.
|
||||
@@ -835,6 +843,7 @@ class ConsumerPlugin(
|
||||
created=create_date,
|
||||
modified=create_date,
|
||||
storage_type=storage_type,
|
||||
page_count=page_count,
|
||||
original_filename=self.filename,
|
||||
)
|
||||
|
||||
|
@@ -80,6 +80,7 @@ def get_schema():
|
||||
has_owner=BOOLEAN(),
|
||||
viewer_id=KEYWORD(commas=True),
|
||||
checksum=TEXT(),
|
||||
page_count=NUMERIC(sortable=True),
|
||||
original_filename=TEXT(sortable=True),
|
||||
is_shared=BOOLEAN(),
|
||||
)
|
||||
@@ -181,6 +182,7 @@ def update_document(writer: AsyncWriter, doc: Document):
|
||||
has_owner=doc.owner is not None,
|
||||
viewer_id=viewer_ids if viewer_ids else None,
|
||||
checksum=doc.checksum,
|
||||
page_count=doc.page_count,
|
||||
original_filename=doc.original_filename,
|
||||
is_shared=len(viewer_ids) > 0,
|
||||
)
|
||||
@@ -247,6 +249,7 @@ class DelayedQuery:
|
||||
"archive_serial_number": "asn",
|
||||
"num_notes": "num_notes",
|
||||
"owner": "owner",
|
||||
"page_count": "page_count",
|
||||
}
|
||||
|
||||
if field.startswith("-"):
|
||||
|
62
src/documents/migrations/1053_document_page_count.py
Normal file
62
src/documents/migrations/1053_document_page_count.py
Normal file
@@ -0,0 +1,62 @@
|
||||
# Generated by Django 4.2.16 on 2024-09-21 15:44
|
||||
from pathlib import Path
|
||||
|
||||
import pikepdf
|
||||
from django.conf import settings
|
||||
from django.db import migrations
|
||||
from django.db import models
|
||||
from django.utils.termcolors import colorize as colourise
|
||||
|
||||
|
||||
def source_path(self):
|
||||
if self.filename:
|
||||
fname = str(self.filename)
|
||||
|
||||
return Path(settings.ORIGINALS_DIR / fname).resolve()
|
||||
|
||||
|
||||
def add_number_of_pages_to_page_count(apps, schema_editor):
|
||||
Document = apps.get_model("documents", "Document")
|
||||
|
||||
if not Document.objects.all().exists():
|
||||
return
|
||||
|
||||
for doc in Document.objects.filter(mime_type="application/pdf"):
|
||||
print(
|
||||
" {} {} {}".format(
|
||||
colourise("*", fg="green"),
|
||||
colourise("Calculating number of pages for", fg="white"),
|
||||
colourise(doc.filename, fg="cyan"),
|
||||
),
|
||||
)
|
||||
|
||||
try:
|
||||
with pikepdf.Pdf.open(source_path(doc)) as pdf:
|
||||
if pdf.pages is not None:
|
||||
doc.page_count = len(pdf.pages)
|
||||
doc.save()
|
||||
except Exception as e: # pragma: no cover
|
||||
print(f"Error retrieving number of pages for {doc.filename}: {e}")
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "1052_document_transaction_id"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="document",
|
||||
name="page_count",
|
||||
field=models.PositiveIntegerField(
|
||||
blank=False,
|
||||
null=True,
|
||||
unique=False,
|
||||
db_index=False,
|
||||
),
|
||||
),
|
||||
migrations.RunPython(
|
||||
add_number_of_pages_to_page_count,
|
||||
migrations.RunPython.noop,
|
||||
),
|
||||
]
|
@@ -205,6 +205,18 @@ class Document(SoftDeleteModel, ModelWithOwner):
|
||||
help_text=_("The checksum of the archived document."),
|
||||
)
|
||||
|
||||
page_count = models.PositiveIntegerField(
|
||||
_("page count"),
|
||||
blank=False,
|
||||
null=True,
|
||||
unique=False,
|
||||
db_index=False,
|
||||
validators=[MinValueValidator(1)],
|
||||
help_text=_(
|
||||
"The number of pages of the document.",
|
||||
),
|
||||
)
|
||||
|
||||
created = models.DateTimeField(_("created"), default=timezone.now, db_index=True)
|
||||
|
||||
modified = models.DateTimeField(
|
||||
@@ -414,6 +426,7 @@ class SavedView(ModelWithOwner):
|
||||
OWNER = ("owner", _("Owner"))
|
||||
SHARED = ("shared", _("Shared"))
|
||||
ASN = ("asn", _("ASN"))
|
||||
PAGE_COUNT = ("pagecount", _("Pages"))
|
||||
CUSTOM_FIELD = ("custom_field_%d", ("Custom Field"))
|
||||
|
||||
name = models.CharField(_("name"), max_length=128)
|
||||
|
@@ -367,6 +367,9 @@ class DocumentParser(LoggingMixin):
|
||||
def extract_metadata(self, document_path, mime_type):
|
||||
return []
|
||||
|
||||
def get_page_count(self, document_path, mime_type):
|
||||
return None
|
||||
|
||||
def parse(self, document_path, mime_type, file_name=None):
|
||||
raise NotImplementedError
|
||||
|
||||
|
@@ -750,6 +750,7 @@ class DocumentSerializer(
|
||||
original_file_name = SerializerMethodField()
|
||||
archived_file_name = SerializerMethodField()
|
||||
created_date = serializers.DateField(required=False)
|
||||
page_count = SerializerMethodField()
|
||||
|
||||
custom_fields = CustomFieldInstanceSerializer(
|
||||
many=True,
|
||||
@@ -770,6 +771,9 @@ class DocumentSerializer(
|
||||
required=False,
|
||||
)
|
||||
|
||||
def get_page_count(self, obj):
|
||||
return obj.page_count
|
||||
|
||||
def get_original_file_name(self, obj):
|
||||
return obj.original_filename
|
||||
|
||||
@@ -885,6 +889,7 @@ class DocumentSerializer(
|
||||
"notes",
|
||||
"custom_fields",
|
||||
"remove_inbox_tags",
|
||||
"page_count",
|
||||
)
|
||||
list_serializer_class = OwnedObjectListSerializer
|
||||
|
||||
|
@@ -389,6 +389,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
||||
title="B",
|
||||
filename=sample2,
|
||||
mime_type="application/pdf",
|
||||
page_count=8,
|
||||
)
|
||||
self.doc2.archive_filename = sample2_archive
|
||||
self.doc2.save()
|
||||
@@ -681,14 +682,20 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
||||
THEN:
|
||||
- Save should be called once
|
||||
- Archive file should be updated once
|
||||
- The document's page_count should be reduced by the number of deleted pages
|
||||
"""
|
||||
doc_ids = [self.doc2.id]
|
||||
initial_page_count = self.doc2.page_count
|
||||
pages = [1, 3]
|
||||
result = bulk_edit.delete_pages(doc_ids, pages)
|
||||
mock_pdf_save.assert_called_once()
|
||||
mock_update_archive_file.assert_called_once()
|
||||
self.assertEqual(result, "OK")
|
||||
|
||||
expected_page_count = initial_page_count - len(pages)
|
||||
self.doc2.refresh_from_db()
|
||||
self.assertEqual(self.doc2.page_count, expected_page_count)
|
||||
|
||||
@mock.patch("documents.tasks.update_document_archive_file.delay")
|
||||
@mock.patch("pikepdf.Pdf.save")
|
||||
def test_delete_pages_with_error(self, mock_pdf_save, mock_update_archive_file):
|
||||
|
59
src/documents/tests/test_migration_document_pages_count.py
Normal file
59
src/documents/tests/test_migration_document_pages_count.py
Normal file
@@ -0,0 +1,59 @@
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
from documents.tests.utils import TestMigrations
|
||||
|
||||
|
||||
def source_path_before(self):
|
||||
if self.filename:
|
||||
fname = str(self.filename)
|
||||
|
||||
return os.path.join(settings.ORIGINALS_DIR, fname)
|
||||
|
||||
|
||||
class TestMigrateDocumentPageCount(TestMigrations):
|
||||
migrate_from = "1052_document_transaction_id"
|
||||
migrate_to = "1053_document_page_count"
|
||||
|
||||
def setUpBeforeMigration(self, apps):
|
||||
Document = apps.get_model("documents", "Document")
|
||||
doc = Document.objects.create(
|
||||
title="test1",
|
||||
mime_type="application/pdf",
|
||||
filename="file1.pdf",
|
||||
)
|
||||
self.doc_id = doc.id
|
||||
shutil.copy(
|
||||
Path(__file__).parent / "samples" / "simple.pdf",
|
||||
source_path_before(doc),
|
||||
)
|
||||
|
||||
def testDocumentPageCountMigrated(self):
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
|
||||
doc = Document.objects.get(id=self.doc_id)
|
||||
self.assertEqual(doc.page_count, 1)
|
||||
|
||||
|
||||
class TestMigrateDocumentPageCountBackwards(TestMigrations):
|
||||
migrate_from = "1053_document_page_count"
|
||||
migrate_to = "1052_document_transaction_id"
|
||||
|
||||
def setUpBeforeMigration(self, apps):
|
||||
Document = apps.get_model("documents", "Document")
|
||||
doc = Document.objects.create(
|
||||
title="test1",
|
||||
mime_type="application/pdf",
|
||||
filename="file1.pdf",
|
||||
page_count=8,
|
||||
)
|
||||
self.doc_id = doc.id
|
||||
|
||||
def test_remove_number_of_pages_to_page_count(self):
|
||||
Document = self.apps.get_model("documents", "Document")
|
||||
self.assertFalse(
|
||||
"page_count" in [field.name for field in Document._meta.get_fields()],
|
||||
)
|
@@ -361,6 +361,7 @@ class DocumentViewSet(
|
||||
"archive_serial_number",
|
||||
"num_notes",
|
||||
"owner",
|
||||
"page_count",
|
||||
)
|
||||
|
||||
def get_queryset(self):
|
||||
|
Reference in New Issue
Block a user