From fad466477b3ccf5e6f433871e2fd89a840b738eb Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Thu, 3 Mar 2016 18:18:48 +0000 Subject: [PATCH] More verbose error logging --- src/documents/consumer.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/documents/consumer.py b/src/documents/consumer.py index f3d5b71cb..5617ed550 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -123,9 +123,9 @@ class Consumer(object): try: text = self._get_ocr(pngs) self._store(text, doc) - except OCRError: + except OCRError as e: self._ignore.append(doc) - self.log("error", "OCR FAILURE: {}".format(doc)) + self.log("error", "OCR FAILURE for {}: {}".format(doc, e)) self._cleanup_tempdir(tempdir) continue else: @@ -165,7 +165,7 @@ class Consumer(object): """ if not pngs: - raise OCRError + raise OCRError("No images found") self.log("info", "OCRing the document") @@ -186,7 +186,7 @@ class Consumer(object): ) raw_text = self._assemble_ocr_sections(pngs, middle, raw_text) return raw_text - raise OCRError + raise OCRError("Language detection failed") if ISO639[guessed_language] == self.DEFAULT_OCR_LANGUAGE: raw_text = self._assemble_ocr_sections(pngs, middle, raw_text) @@ -205,7 +205,10 @@ class Consumer(object): ) raw_text = self._assemble_ocr_sections(pngs, middle, raw_text) return raw_text - raise OCRError + raise OCRError( + "The guessed language is not available in this instance of " + "Tesseract." + ) def _assemble_ocr_sections(self, pngs, middle, text): """