Merge pull request #421 from ddddavidmartin/clarify_forgiving_ocr_handling

Clarify forgiving ocr handling
2026-02-03 23:22:42 -06:00 · 2018-10-08 09:35:57 +00:00
parent 8dc355a66f 818780a191
commit bd95804fbf
2 changed files with 9 additions and 1 deletions
--- a/paperless.conf.example
+++ b/paperless.conf.example
@@ -188,6 +188,11 @@ PAPERLESS_DEBUG="false"
 #PAPERLESS_CONSUMER_LOOP_TIME=10
 # By default Paperless stops consuming a document if no language can be detected.
 # Set to true to consume documents even if the language detection fails.
 #PAPERLESS_FORGIVING_OCR="false"
 ###############################################################################
 ####                            Interface                                  ####
 ###############################################################################
--- a/src/paperless_tesseract/parsers.py
+++ b/src/paperless_tesseract/parsers.py
@@ -153,7 +153,10 @@ class RasterisedDocumentParser(DocumentParser):
                )
                raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
                return raw_text
-            raise OCRError("Language detection failed")
+            error_msg = ("Language detection failed. Set "
                         "PAPERLESS_FORGIVING_OCR in config file to continue "
                         "anyway.")
            raise OCRError(error_msg)
        if ISO639[guessed_language] == self.DEFAULT_OCR_LANGUAGE:
            raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)