From b0afa37ec14913819614d245e93d947198f40dbe Mon Sep 17 00:00:00 2001 From: David Martin Date: Mon, 8 Oct 2018 19:37:05 +1100 Subject: [PATCH 1/2] Mention FORGIVING_OCR config option when language detection fails. It is not obvious that the PAPERLESS_FORGIVING_OCR allows to let document consumption happen even if no language can be detected. Mentioning it in the actual error message in the log seems like the best way to make it clear. --- src/paperless_tesseract/parsers.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py index dc5dbd637..ffa2727e5 100644 --- a/src/paperless_tesseract/parsers.py +++ b/src/paperless_tesseract/parsers.py @@ -153,7 +153,10 @@ class RasterisedDocumentParser(DocumentParser): ) raw_text = self._assemble_ocr_sections(imgs, middle, raw_text) return raw_text - raise OCRError("Language detection failed") + error_msg = ("Language detection failed. Set " + "PAPERLESS_FORGIVING_OCR in config file to continue " + "anyway.") + raise OCRError(error_msg) if ISO639[guessed_language] == self.DEFAULT_OCR_LANGUAGE: raw_text = self._assemble_ocr_sections(imgs, middle, raw_text) From 8cf32d2a5a4d51b68f39970443d8aece6270d499 Mon Sep 17 00:00:00 2001 From: David Martin Date: Mon, 8 Oct 2018 19:38:38 +1100 Subject: [PATCH 2/2] Add PAPERLESS_FORGIVING_OCR option to example config. It helps having it in the example config as that makes it more clear that it exists. --- paperless.conf.example | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/paperless.conf.example b/paperless.conf.example index 3604505cb..11e6d905b 100644 --- a/paperless.conf.example +++ b/paperless.conf.example @@ -188,6 +188,11 @@ PAPERLESS_DEBUG="false" #PAPERLESS_CONSUMER_LOOP_TIME=10 +# By default Paperless stops consuming a document if no language can be detected. +# Set to true to consume documents even if the language detection fails. +#PAPERLESS_FORGIVING_OCR="false" + + ############################################################################### #### Interface #### ###############################################################################