mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
Feature: page count (#7750)
--------- Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
This commit is contained in:
@@ -41,6 +41,15 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
"""
|
||||
return OcrConfig()
|
||||
|
||||
def get_page_count(self, document_path, mime_type):
|
||||
page_count = None
|
||||
if mime_type == "application/pdf":
|
||||
import pikepdf
|
||||
|
||||
with pikepdf.Pdf.open(document_path) as pdf:
|
||||
page_count = len(pdf.pages)
|
||||
return page_count
|
||||
|
||||
def extract_metadata(self, document_path, mime_type):
|
||||
result = []
|
||||
if mime_type == "application/pdf":
|
||||
|
@@ -57,6 +57,30 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
|
||||
self.assertContainsStrings(text.strip(), ["This is a test document."])
|
||||
|
||||
def test_get_page_count(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- PDF file with a single page
|
||||
- PDF file with multiple pages
|
||||
WHEN:
|
||||
- The number of pages is requested
|
||||
THEN:
|
||||
- The method returns 1 as the expected number of pages
|
||||
- The method returns the correct number of pages (6)
|
||||
"""
|
||||
parser = RasterisedDocumentParser(uuid.uuid4())
|
||||
page_count = parser.get_page_count(
|
||||
os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
self.assertEqual(page_count, 1)
|
||||
|
||||
page_count = parser.get_page_count(
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"),
|
||||
"application/pdf",
|
||||
)
|
||||
self.assertEqual(page_count, 6)
|
||||
|
||||
def test_thumbnail(self):
|
||||
parser = RasterisedDocumentParser(uuid.uuid4())
|
||||
thumb = parser.get_thumbnail(
|
||||
|
Reference in New Issue
Block a user