From fad466477b3ccf5e6f433871e2fd89a840b738eb Mon Sep 17 00:00:00 2001
From: Daniel Quinn <code@danielquinn.org>
Date: Thu, 3 Mar 2016 18:18:48 +0000
Subject: [PATCH] More verbose error logging

---
 src/documents/consumer.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/documents/consumer.py b/src/documents/consumer.py
index f3d5b71cb..5617ed550 100644
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -123,9 +123,9 @@ class Consumer(object):
             try:
                 text = self._get_ocr(pngs)
                 self._store(text, doc)
-            except OCRError:
+            except OCRError as e:
                 self._ignore.append(doc)
-                self.log("error", "OCR FAILURE: {}".format(doc))
+                self.log("error", "OCR FAILURE for {}: {}".format(doc, e))
                 self._cleanup_tempdir(tempdir)
                 continue
             else:
@@ -165,7 +165,7 @@ class Consumer(object):
         """
 
         if not pngs:
-            raise OCRError
+            raise OCRError("No images found")
 
         self.log("info", "OCRing the document")
 
@@ -186,7 +186,7 @@ class Consumer(object):
                 )
                 raw_text = self._assemble_ocr_sections(pngs, middle, raw_text)
                 return raw_text
-            raise OCRError
+            raise OCRError("Language detection failed")
 
         if ISO639[guessed_language] == self.DEFAULT_OCR_LANGUAGE:
             raw_text = self._assemble_ocr_sections(pngs, middle, raw_text)
@@ -205,7 +205,10 @@ class Consumer(object):
                 )
                 raw_text = self._assemble_ocr_sections(pngs, middle, raw_text)
                 return raw_text
-            raise OCRError
+            raise OCRError(
+                "The guessed language is not available in this instance of "
+                "Tesseract."
+            )
 
     def _assemble_ocr_sections(self, pngs, middle, text):
         """