Add a setting to disable creating an archive file

This commit is contained in:
Brandon Rothweiler
2023-02-22 15:27:17 -05:00
parent 782db3f324
commit 93a6391f96
4 changed files with 65 additions and 5 deletions

View File

@@ -192,7 +192,7 @@ class RasterisedDocumentParser(DocumentParser):
if settings.OCR_MODE == "force" or safe_fallback:
ocrmypdf_args["force_ocr"] = True
elif settings.OCR_MODE in ["skip", "skip_noarchive"]:
elif settings.OCR_MODE in ["skip", "skip_noarchive", "skip_neverarchive"]:
ocrmypdf_args["skip_text"] = True
elif settings.OCR_MODE == "redo":
ocrmypdf_args["redo_ocr"] = True
@@ -294,7 +294,10 @@ class RasterisedDocumentParser(DocumentParser):
# If the original has text, and the user doesn't want an archive,
# we're done here
if settings.OCR_MODE == "skip_noarchive" and original_has_text:
if (
settings.OCR_MODE in ["skip_noarchive", "skip_neverarchive"]
and original_has_text
):
self.log("debug", "Document has text, skipping OCRmyPDF entirely.")
self.text = text_original
return
@@ -320,7 +323,9 @@ class RasterisedDocumentParser(DocumentParser):
self.log("debug", f"Calling OCRmyPDF with args: {args}")
ocrmypdf.ocr(**args)
self.archive_path = archive_path
# Only create archive file if archiving isn't being skipped
if settings.OCR_MODE != "skip_neverarchive":
self.archive_path = archive_path
self.text = self.extract_text(sidecar_file, archive_path)