diff --git a/src-ui/src/app/components/dashboard/widgets/saved-view-widget/saved-view-widget.component.spec.ts b/src-ui/src/app/components/dashboard/widgets/saved-view-widget/saved-view-widget.component.spec.ts
index cb120bb64..ef36b0806 100644
--- a/src-ui/src/app/components/dashboard/widgets/saved-view-widget/saved-view-widget.component.spec.ts
+++ b/src-ui/src/app/components/dashboard/widgets/saved-view-widget/saved-view-widget.component.spec.ts
@@ -65,6 +65,7 @@ const savedView: SavedView = {
DisplayField.CORRESPONDENT,
DisplayField.DOCUMENT_TYPE,
DisplayField.STORAGE_PATH,
+ DisplayField.PAGE_COUNT,
`${DisplayField.CUSTOM_FIELD}11` as any,
`${DisplayField.CUSTOM_FIELD}15` as any,
],
@@ -344,6 +345,7 @@ describe('SavedViewWidgetComponent', () => {
expect(component.getColumnTitle(DisplayField.STORAGE_PATH)).toEqual(
'Storage path'
)
+ expect(component.getColumnTitle(DisplayField.PAGE_COUNT)).toEqual('Pages')
})
it('should get correct column title for custom field', () => {
diff --git a/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.html b/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.html
index 1a8c7df82..f60056c42 100644
--- a/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.html
+++ b/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.html
@@ -111,6 +111,12 @@
}
}
+ @if (displayFields.includes(DisplayField.PAGE_COUNT) && document.page_count) {
+
+
+ {document.page_count, plural, =1 {1 page} other {{{document.page_count}} pages}}
+
+ }
@if (displayFields.includes(DisplayField.OWNER) && document.owner && document.owner !== settingsService.currentUser.id) {
{{document.owner | username}}
diff --git a/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.spec.ts b/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.spec.ts
index a3f047f03..efd5076be 100644
--- a/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.spec.ts
+++ b/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.spec.ts
@@ -31,6 +31,7 @@ const doc = {
correspondent: 8,
document_type: 10,
storage_path: null,
+ page_count: 8,
notes: [
{
id: 11,
@@ -80,6 +81,7 @@ describe('DocumentCardLargeComponent', () => {
it('should display a document', () => {
expect(fixture.nativeElement.textContent).toContain('Document 10')
expect(fixture.nativeElement.textContent).toContain('Cupcake ipsum')
+ expect(fixture.nativeElement.textContent).toContain('8 pages')
})
it('should show preview on mouseover after delay to preload content', fakeAsync(() => {
diff --git a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.html b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.html
index 92449214e..26f71ee8b 100644
--- a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.html
+++ b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.html
@@ -88,6 +88,14 @@
}
+ @if (displayFields.includes(DisplayField.PAGE_COUNT) && document.page_count) {
+
+
+
+ {document.page_count, plural, =1 {1 page} other {{{document.page_count}} pages}}
+
+
+ }
@if (displayFields.includes(DisplayField.ASN) && document.archive_serial_number | isNumber) {
diff --git a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.spec.ts b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.spec.ts
index fc15453be..b86453a25 100644
--- a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.spec.ts
+++ b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.spec.ts
@@ -34,6 +34,7 @@ const doc = {
correspondent: 8,
document_type: 10,
storage_path: null,
+ page_count: 12,
notes: [
{
id: 11,
@@ -91,6 +92,10 @@ describe('DocumentCardSmallComponent', () => {
fixture.detectChanges()
})
+ it('should display page count', () => {
+ expect(fixture.nativeElement.textContent).toContain('12 pages')
+ })
+
it('should display a document, limit tags to 5', () => {
expect(fixture.nativeElement.textContent).toContain('Document 10')
expect(
diff --git a/src-ui/src/app/components/document-list/document-list.component.html b/src-ui/src/app/components/document-list/document-list.component.html
index 368515970..8ca8e111d 100644
--- a/src-ui/src/app/components/document-list/document-list.component.html
+++ b/src-ui/src/app/components/document-list/document-list.component.html
@@ -246,6 +246,15 @@
(sort)="onSort($event)"
i18n>Added
}
+ @if (activeDisplayFields.includes(DisplayField.PAGE_COUNT)) {
+
Pages |
+ }
@if (activeDisplayFields.includes(DisplayField.SHARED)) {
Shared
@@ -330,6 +339,11 @@
{{d.added | customDate}}
}
+ @if (activeDisplayFields.includes(DisplayField.PAGE_COUNT)) {
+ |
+ {{ d.page_count }}
+ |
+ }
@if (activeDisplayFields.includes(DisplayField.SHARED)) {
@if (d.is_shared_by_requester) { Yes } @else { No }
diff --git a/src-ui/src/app/components/document-list/document-list.component.spec.ts b/src-ui/src/app/components/document-list/document-list.component.spec.ts
index 26758b3c0..ad85652b8 100644
--- a/src-ui/src/app/components/document-list/document-list.component.spec.ts
+++ b/src-ui/src/app/components/document-list/document-list.component.spec.ts
@@ -602,7 +602,7 @@ describe('DocumentListComponent', () => {
expect(
fixture.debugElement.queryAll(By.directive(SortableDirective))
- ).toHaveLength(9)
+ ).toHaveLength(10)
expect(component.notesEnabled).toBeTruthy()
settingsService.set(SETTINGS_KEYS.NOTES_ENABLED, false)
@@ -610,14 +610,14 @@ describe('DocumentListComponent', () => {
expect(component.notesEnabled).toBeFalsy()
expect(
fixture.debugElement.queryAll(By.directive(SortableDirective))
- ).toHaveLength(8)
+ ).toHaveLength(9)
// insufficient perms
jest.spyOn(permissionService, 'currentUserCan').mockReturnValue(false)
fixture.detectChanges()
expect(
fixture.debugElement.queryAll(By.directive(SortableDirective))
- ).toHaveLength(4)
+ ).toHaveLength(5)
})
it('should support toggle on document objects', () => {
diff --git a/src-ui/src/app/data/document.ts b/src-ui/src/app/data/document.ts
index 1571d2a53..0b630b8cd 100644
--- a/src-ui/src/app/data/document.ts
+++ b/src-ui/src/app/data/document.ts
@@ -26,6 +26,7 @@ export enum DisplayField {
OWNER = 'owner',
SHARED = 'shared',
ASN = 'asn',
+ PAGE_COUNT = 'pagecount',
}
export const DEFAULT_DISPLAY_FIELDS = [
@@ -73,6 +74,10 @@ export const DEFAULT_DISPLAY_FIELDS = [
id: DisplayField.ASN,
name: $localize`ASN`,
},
+ {
+ id: DisplayField.PAGE_COUNT,
+ name: $localize`Pages`,
+ },
]
export const DEFAULT_DASHBOARD_VIEW_PAGE_SIZE = 10
@@ -94,6 +99,7 @@ export const DOCUMENT_SORT_FIELDS = [
{ field: 'modified', name: $localize`Modified` },
{ field: 'num_notes', name: $localize`Notes` },
{ field: 'owner', name: $localize`Owner` },
+ { field: 'page_count', name: $localize`Pages` },
]
export const DOCUMENT_SORT_FIELDS_FULLTEXT = [
@@ -164,4 +170,6 @@ export interface Document extends ObjectWithPermissions {
// write-only field
remove_inbox_tags?: boolean
+
+ page_count?: number
}
diff --git a/src-ui/src/app/services/settings.service.ts b/src-ui/src/app/services/settings.service.ts
index 91d1cc320..c3ea3f856 100644
--- a/src-ui/src/app/services/settings.service.ts
+++ b/src-ui/src/app/services/settings.service.ts
@@ -345,6 +345,7 @@ export class SettingsService {
DisplayField.CREATED,
DisplayField.ADDED,
DisplayField.ASN,
+ DisplayField.PAGE_COUNT,
DisplayField.SHARED,
].includes(field.id)
) {
diff --git a/src/documents/bulk_edit.py b/src/documents/bulk_edit.py
index 1f7a2a403..4481b2b3f 100644
--- a/src/documents/bulk_edit.py
+++ b/src/documents/bulk_edit.py
@@ -387,6 +387,8 @@ def delete_pages(doc_ids: list[int], pages: list[int]):
pdf.remove_unreferenced_resources()
pdf.save()
doc.checksum = hashlib.md5(doc.source_path.read_bytes()).hexdigest()
+ if doc.page_count is not None:
+ doc.page_count = doc.page_count - len(pages)
doc.save()
update_document_archive_file.delay(document_id=doc.id)
logger.info(f"Deleted pages {pages} from document {doc.id}")
diff --git a/src/documents/consumer.py b/src/documents/consumer.py
index d90b88f5a..57277e4a6 100644
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -586,6 +586,7 @@ class ConsumerPlugin(
date = None
thumbnail = None
archive_path = None
+ page_count = None
try:
self._send_progress(
@@ -621,6 +622,7 @@ class ConsumerPlugin(
)
date = parse_date(self.filename, text)
archive_path = document_parser.get_archive_path()
+ page_count = document_parser.get_page_count(self.working_copy, mime_type)
except ParseError as e:
document_parser.cleanup()
@@ -662,7 +664,12 @@ class ConsumerPlugin(
try:
with transaction.atomic():
# store the document.
- document = self._store(text=text, date=date, mime_type=mime_type)
+ document = self._store(
+ text=text,
+ date=date,
+ page_count=page_count,
+ mime_type=mime_type,
+ )
# If we get here, it was successful. Proceed with post-consume
# hooks. If they fail, nothing will get changed.
@@ -790,6 +797,7 @@ class ConsumerPlugin(
self,
text: str,
date: Optional[datetime.datetime],
+ page_count: Optional[int],
mime_type: str,
) -> Document:
# If someone gave us the original filename, use it instead of doc.
@@ -835,6 +843,7 @@ class ConsumerPlugin(
created=create_date,
modified=create_date,
storage_type=storage_type,
+ page_count=page_count,
original_filename=self.filename,
)
diff --git a/src/documents/index.py b/src/documents/index.py
index d95a80213..03b8c4f35 100644
--- a/src/documents/index.py
+++ b/src/documents/index.py
@@ -80,6 +80,7 @@ def get_schema():
has_owner=BOOLEAN(),
viewer_id=KEYWORD(commas=True),
checksum=TEXT(),
+ page_count=NUMERIC(sortable=True),
original_filename=TEXT(sortable=True),
is_shared=BOOLEAN(),
)
@@ -181,6 +182,7 @@ def update_document(writer: AsyncWriter, doc: Document):
has_owner=doc.owner is not None,
viewer_id=viewer_ids if viewer_ids else None,
checksum=doc.checksum,
+ page_count=doc.page_count,
original_filename=doc.original_filename,
is_shared=len(viewer_ids) > 0,
)
@@ -247,6 +249,7 @@ class DelayedQuery:
"archive_serial_number": "asn",
"num_notes": "num_notes",
"owner": "owner",
+ "page_count": "page_count",
}
if field.startswith("-"):
diff --git a/src/documents/migrations/1053_document_page_count.py b/src/documents/migrations/1053_document_page_count.py
new file mode 100644
index 000000000..13549e00f
--- /dev/null
+++ b/src/documents/migrations/1053_document_page_count.py
@@ -0,0 +1,62 @@
+# Generated by Django 4.2.16 on 2024-09-21 15:44
+from pathlib import Path
+
+import pikepdf
+from django.conf import settings
+from django.db import migrations
+from django.db import models
+from django.utils.termcolors import colorize as colourise
+
+
+def source_path(self):
+ if self.filename:
+ fname = str(self.filename)
+
+ return Path(settings.ORIGINALS_DIR / fname).resolve()
+
+
+def add_number_of_pages_to_page_count(apps, schema_editor):
+ Document = apps.get_model("documents", "Document")
+
+ if not Document.objects.all().exists():
+ return
+
+ for doc in Document.objects.filter(mime_type="application/pdf"):
+ print(
+ " {} {} {}".format(
+ colourise("*", fg="green"),
+ colourise("Calculating number of pages for", fg="white"),
+ colourise(doc.filename, fg="cyan"),
+ ),
+ )
+
+ try:
+ with pikepdf.Pdf.open(source_path(doc)) as pdf:
+ if pdf.pages is not None:
+ doc.page_count = len(pdf.pages)
+ doc.save()
+ except Exception as e: # pragma: no cover
+ print(f"Error retrieving number of pages for {doc.filename}: {e}")
+
+
+class Migration(migrations.Migration):
+ dependencies = [
+ ("documents", "1052_document_transaction_id"),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name="document",
+ name="page_count",
+ field=models.PositiveIntegerField(
+ blank=False,
+ null=True,
+ unique=False,
+ db_index=False,
+ ),
+ ),
+ migrations.RunPython(
+ add_number_of_pages_to_page_count,
+ migrations.RunPython.noop,
+ ),
+ ]
diff --git a/src/documents/models.py b/src/documents/models.py
index 24e8c2b26..772e10fde 100644
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -205,6 +205,18 @@ class Document(SoftDeleteModel, ModelWithOwner):
help_text=_("The checksum of the archived document."),
)
+ page_count = models.PositiveIntegerField(
+ _("page count"),
+ blank=False,
+ null=True,
+ unique=False,
+ db_index=False,
+ validators=[MinValueValidator(1)],
+ help_text=_(
+ "The number of pages of the document.",
+ ),
+ )
+
created = models.DateTimeField(_("created"), default=timezone.now, db_index=True)
modified = models.DateTimeField(
@@ -414,6 +426,7 @@ class SavedView(ModelWithOwner):
OWNER = ("owner", _("Owner"))
SHARED = ("shared", _("Shared"))
ASN = ("asn", _("ASN"))
+ PAGE_COUNT = ("pagecount", _("Pages"))
CUSTOM_FIELD = ("custom_field_%d", ("Custom Field"))
name = models.CharField(_("name"), max_length=128)
diff --git a/src/documents/parsers.py b/src/documents/parsers.py
index 1297162e2..63599c0c5 100644
--- a/src/documents/parsers.py
+++ b/src/documents/parsers.py
@@ -367,6 +367,9 @@ class DocumentParser(LoggingMixin):
def extract_metadata(self, document_path, mime_type):
return []
+ def get_page_count(self, document_path, mime_type):
+ return None
+
def parse(self, document_path, mime_type, file_name=None):
raise NotImplementedError
diff --git a/src/documents/serialisers.py b/src/documents/serialisers.py
index 5218cbf8a..737d1256f 100644
--- a/src/documents/serialisers.py
+++ b/src/documents/serialisers.py
@@ -750,6 +750,7 @@ class DocumentSerializer(
original_file_name = SerializerMethodField()
archived_file_name = SerializerMethodField()
created_date = serializers.DateField(required=False)
+ page_count = SerializerMethodField()
custom_fields = CustomFieldInstanceSerializer(
many=True,
@@ -770,6 +771,9 @@ class DocumentSerializer(
required=False,
)
+ def get_page_count(self, obj):
+ return obj.page_count
+
def get_original_file_name(self, obj):
return obj.original_filename
@@ -885,6 +889,7 @@ class DocumentSerializer(
"notes",
"custom_fields",
"remove_inbox_tags",
+ "page_count",
)
list_serializer_class = OwnedObjectListSerializer
diff --git a/src/documents/tests/test_bulk_edit.py b/src/documents/tests/test_bulk_edit.py
index fed93cd01..d80116a80 100644
--- a/src/documents/tests/test_bulk_edit.py
+++ b/src/documents/tests/test_bulk_edit.py
@@ -389,6 +389,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
title="B",
filename=sample2,
mime_type="application/pdf",
+ page_count=8,
)
self.doc2.archive_filename = sample2_archive
self.doc2.save()
@@ -681,14 +682,20 @@ class TestPDFActions(DirectoriesMixin, TestCase):
THEN:
- Save should be called once
- Archive file should be updated once
+ - The document's page_count should be reduced by the number of deleted pages
"""
doc_ids = [self.doc2.id]
+ initial_page_count = self.doc2.page_count
pages = [1, 3]
result = bulk_edit.delete_pages(doc_ids, pages)
mock_pdf_save.assert_called_once()
mock_update_archive_file.assert_called_once()
self.assertEqual(result, "OK")
+ expected_page_count = initial_page_count - len(pages)
+ self.doc2.refresh_from_db()
+ self.assertEqual(self.doc2.page_count, expected_page_count)
+
@mock.patch("documents.tasks.update_document_archive_file.delay")
@mock.patch("pikepdf.Pdf.save")
def test_delete_pages_with_error(self, mock_pdf_save, mock_update_archive_file):
diff --git a/src/documents/tests/test_migration_document_pages_count.py b/src/documents/tests/test_migration_document_pages_count.py
new file mode 100644
index 000000000..e656bf1b8
--- /dev/null
+++ b/src/documents/tests/test_migration_document_pages_count.py
@@ -0,0 +1,59 @@
+import os
+import shutil
+from pathlib import Path
+
+from django.conf import settings
+
+from documents.tests.utils import TestMigrations
+
+
+def source_path_before(self):
+ if self.filename:
+ fname = str(self.filename)
+
+ return os.path.join(settings.ORIGINALS_DIR, fname)
+
+
+class TestMigrateDocumentPageCount(TestMigrations):
+ migrate_from = "1052_document_transaction_id"
+ migrate_to = "1053_document_page_count"
+
+ def setUpBeforeMigration(self, apps):
+ Document = apps.get_model("documents", "Document")
+ doc = Document.objects.create(
+ title="test1",
+ mime_type="application/pdf",
+ filename="file1.pdf",
+ )
+ self.doc_id = doc.id
+ shutil.copy(
+ Path(__file__).parent / "samples" / "simple.pdf",
+ source_path_before(doc),
+ )
+
+ def testDocumentPageCountMigrated(self):
+ Document = self.apps.get_model("documents", "Document")
+
+ doc = Document.objects.get(id=self.doc_id)
+ self.assertEqual(doc.page_count, 1)
+
+
+class TestMigrateDocumentPageCountBackwards(TestMigrations):
+ migrate_from = "1053_document_page_count"
+ migrate_to = "1052_document_transaction_id"
+
+ def setUpBeforeMigration(self, apps):
+ Document = apps.get_model("documents", "Document")
+ doc = Document.objects.create(
+ title="test1",
+ mime_type="application/pdf",
+ filename="file1.pdf",
+ page_count=8,
+ )
+ self.doc_id = doc.id
+
+ def test_remove_number_of_pages_to_page_count(self):
+ Document = self.apps.get_model("documents", "Document")
+ self.assertFalse(
+ "page_count" in [field.name for field in Document._meta.get_fields()],
+ )
diff --git a/src/documents/views.py b/src/documents/views.py
index a8a5bf97d..723eddd00 100644
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -361,6 +361,7 @@ class DocumentViewSet(
"archive_serial_number",
"num_notes",
"owner",
+ "page_count",
)
def get_queryset(self):
diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py
index 4e92990f1..5731fe037 100644
--- a/src/paperless_tesseract/parsers.py
+++ b/src/paperless_tesseract/parsers.py
@@ -41,6 +41,15 @@ class RasterisedDocumentParser(DocumentParser):
"""
return OcrConfig()
+ def get_page_count(self, document_path, mime_type):
+ page_count = None
+ if mime_type == "application/pdf":
+ import pikepdf
+
+ with pikepdf.Pdf.open(document_path) as pdf:
+ page_count = len(pdf.pages)
+ return page_count
+
def extract_metadata(self, document_path, mime_type):
result = []
if mime_type == "application/pdf":
diff --git a/src/paperless_tesseract/tests/test_parser.py b/src/paperless_tesseract/tests/test_parser.py
index d63d965c5..45a5939ab 100644
--- a/src/paperless_tesseract/tests/test_parser.py
+++ b/src/paperless_tesseract/tests/test_parser.py
@@ -57,6 +57,30 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.assertContainsStrings(text.strip(), ["This is a test document."])
+ def test_get_page_count(self):
+ """
+ GIVEN:
+ - PDF file with a single page
+ - PDF file with multiple pages
+ WHEN:
+ - The number of pages is requested
+ THEN:
+ - The method returns 1 as the expected number of pages
+ - The method returns the correct number of pages (6)
+ """
+ parser = RasterisedDocumentParser(uuid.uuid4())
+ page_count = parser.get_page_count(
+ os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"),
+ "application/pdf",
+ )
+ self.assertEqual(page_count, 1)
+
+ page_count = parser.get_page_count(
+ os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"),
+ "application/pdf",
+ )
+ self.assertEqual(page_count, 6)
+
def test_thumbnail(self):
parser = RasterisedDocumentParser(uuid.uuid4())
thumb = parser.get_thumbnail(
|