Merge pull request #421 from ddddavidmartin/clarify_forgiving_ocr_handling

Clarify forgiving ocr handling
This commit is contained in:
Daniel Quinn 2018-10-08 09:35:57 +00:00 committed by GitHub
commit bd95804fbf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 9 additions and 1 deletions

View File

@ -188,6 +188,11 @@ PAPERLESS_DEBUG="false"
#PAPERLESS_CONSUMER_LOOP_TIME=10
# By default Paperless stops consuming a document if no language can be detected.
# Set to true to consume documents even if the language detection fails.
#PAPERLESS_FORGIVING_OCR="false"
###############################################################################
#### Interface ####
###############################################################################

View File

@ -153,7 +153,10 @@ class RasterisedDocumentParser(DocumentParser):
)
raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
return raw_text
raise OCRError("Language detection failed")
error_msg = ("Language detection failed. Set "
"PAPERLESS_FORGIVING_OCR in config file to continue "
"anyway.")
raise OCRError(error_msg)
if ISO639[guessed_language] == self.DEFAULT_OCR_LANGUAGE:
raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)