mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
Reverts the change around skip_noarchive to align with how it is documented to work
This commit is contained in:

committed by
Trenton H

parent
53e8d84af2
commit
d1aa08850d
@@ -249,16 +249,22 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
|
||||
if mime_type == "application/pdf":
|
||||
text_original = self.extract_text(None, document_path)
|
||||
original_has_text = text_original and len(text_original) > 50
|
||||
original_has_text = text_original is not None and len(text_original) > 50
|
||||
else:
|
||||
text_original = None
|
||||
original_has_text = False
|
||||
|
||||
# If the original has text, and the user doesn't want an archive,
|
||||
# we're done here
|
||||
if settings.OCR_MODE == "skip_noarchive" and original_has_text:
|
||||
self.log("debug", "Document has text, skipping OCRmyPDF entirely.")
|
||||
self.text = text_original
|
||||
return
|
||||
|
||||
# Either no text was in the original or there should be an archive
|
||||
# file created, so OCR the file and create an archive with any
|
||||
# test located via OCR
|
||||
|
||||
import ocrmypdf
|
||||
from ocrmypdf import InputFileError, EncryptedPdfError
|
||||
|
||||
@@ -276,9 +282,7 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
self.log("debug", f"Calling OCRmyPDF with args: {args}")
|
||||
ocrmypdf.ocr(**args)
|
||||
|
||||
# Only create archive file if archiving isn't being skipped
|
||||
if settings.OCR_MODE != "skip_noarchive":
|
||||
self.archive_path = archive_path
|
||||
self.archive_path = archive_path
|
||||
|
||||
self.text = self.extract_text(sidecar_file, archive_path)
|
||||
|
||||
|
Reference in New Issue
Block a user