mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Fix: handle page count exception for pw-protected files (#8240)
This commit is contained in:
parent
c22a80abd3
commit
a6f4c75a72
@ -43,10 +43,15 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
def get_page_count(self, document_path, mime_type):
|
def get_page_count(self, document_path, mime_type):
|
||||||
page_count = None
|
page_count = None
|
||||||
if mime_type == "application/pdf":
|
if mime_type == "application/pdf":
|
||||||
|
try:
|
||||||
import pikepdf
|
import pikepdf
|
||||||
|
|
||||||
with pikepdf.Pdf.open(document_path) as pdf:
|
with pikepdf.Pdf.open(document_path) as pdf:
|
||||||
page_count = len(pdf.pages)
|
page_count = len(pdf.pages)
|
||||||
|
except Exception as e:
|
||||||
|
self.log.warning(
|
||||||
|
f"Unable to determine PDF page count {document_path}: {e}",
|
||||||
|
)
|
||||||
return page_count
|
return page_count
|
||||||
|
|
||||||
def extract_metadata(self, document_path, mime_type):
|
def extract_metadata(self, document_path, mime_type):
|
||||||
|
@ -81,6 +81,24 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
)
|
)
|
||||||
self.assertEqual(page_count, 6)
|
self.assertEqual(page_count, 6)
|
||||||
|
|
||||||
|
def test_get_page_count_password_protected(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Password protected PDF file
|
||||||
|
WHEN:
|
||||||
|
- The number of pages is requested
|
||||||
|
THEN:
|
||||||
|
- The method returns None
|
||||||
|
"""
|
||||||
|
parser = RasterisedDocumentParser(uuid.uuid4())
|
||||||
|
with self.assertLogs("paperless.parsing.tesseract", level="WARNING") as cm:
|
||||||
|
page_count = parser.get_page_count(
|
||||||
|
os.path.join(self.SAMPLE_FILES, "password-protected.pdf"),
|
||||||
|
"application/pdf",
|
||||||
|
)
|
||||||
|
self.assertEqual(page_count, None)
|
||||||
|
self.assertIn("Unable to determine PDF page count", cm.output[0])
|
||||||
|
|
||||||
def test_thumbnail(self):
|
def test_thumbnail(self):
|
||||||
parser = RasterisedDocumentParser(uuid.uuid4())
|
parser = RasterisedDocumentParser(uuid.uuid4())
|
||||||
thumb = parser.get_thumbnail(
|
thumb = parser.get_thumbnail(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user