mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-03 03:16:10 -06:00 
			
		
		
		
	Fix: handle page count exception for pw-protected files (#8240)
This commit is contained in:
		@@ -43,10 +43,15 @@ class RasterisedDocumentParser(DocumentParser):
 | 
			
		||||
    def get_page_count(self, document_path, mime_type):
 | 
			
		||||
        page_count = None
 | 
			
		||||
        if mime_type == "application/pdf":
 | 
			
		||||
            import pikepdf
 | 
			
		||||
            try:
 | 
			
		||||
                import pikepdf
 | 
			
		||||
 | 
			
		||||
            with pikepdf.Pdf.open(document_path) as pdf:
 | 
			
		||||
                page_count = len(pdf.pages)
 | 
			
		||||
                with pikepdf.Pdf.open(document_path) as pdf:
 | 
			
		||||
                    page_count = len(pdf.pages)
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                self.log.warning(
 | 
			
		||||
                    f"Unable to determine PDF page count {document_path}: {e}",
 | 
			
		||||
                )
 | 
			
		||||
        return page_count
 | 
			
		||||
 | 
			
		||||
    def extract_metadata(self, document_path, mime_type):
 | 
			
		||||
 
 | 
			
		||||
@@ -81,6 +81,24 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
 | 
			
		||||
        )
 | 
			
		||||
        self.assertEqual(page_count, 6)
 | 
			
		||||
 | 
			
		||||
    def test_get_page_count_password_protected(self):
 | 
			
		||||
        """
 | 
			
		||||
        GIVEN:
 | 
			
		||||
            - Password protected PDF file
 | 
			
		||||
        WHEN:
 | 
			
		||||
            - The number of pages is requested
 | 
			
		||||
        THEN:
 | 
			
		||||
            - The method returns None
 | 
			
		||||
        """
 | 
			
		||||
        parser = RasterisedDocumentParser(uuid.uuid4())
 | 
			
		||||
        with self.assertLogs("paperless.parsing.tesseract", level="WARNING") as cm:
 | 
			
		||||
            page_count = parser.get_page_count(
 | 
			
		||||
                os.path.join(self.SAMPLE_FILES, "password-protected.pdf"),
 | 
			
		||||
                "application/pdf",
 | 
			
		||||
            )
 | 
			
		||||
            self.assertEqual(page_count, None)
 | 
			
		||||
            self.assertIn("Unable to determine PDF page count", cm.output[0])
 | 
			
		||||
 | 
			
		||||
    def test_thumbnail(self):
 | 
			
		||||
        parser = RasterisedDocumentParser(uuid.uuid4())
 | 
			
		||||
        thumb = parser.get_thumbnail(
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user