More verbose error logging

This commit is contained in:
Daniel Quinn 2016-03-03 18:18:48 +00:00
parent 55dcbcc47f
commit fad466477b

View File

@ -123,9 +123,9 @@ class Consumer(object):
try: try:
text = self._get_ocr(pngs) text = self._get_ocr(pngs)
self._store(text, doc) self._store(text, doc)
except OCRError: except OCRError as e:
self._ignore.append(doc) self._ignore.append(doc)
self.log("error", "OCR FAILURE: {}".format(doc)) self.log("error", "OCR FAILURE for {}: {}".format(doc, e))
self._cleanup_tempdir(tempdir) self._cleanup_tempdir(tempdir)
continue continue
else: else:
@ -165,7 +165,7 @@ class Consumer(object):
""" """
if not pngs: if not pngs:
raise OCRError raise OCRError("No images found")
self.log("info", "OCRing the document") self.log("info", "OCRing the document")
@ -186,7 +186,7 @@ class Consumer(object):
) )
raw_text = self._assemble_ocr_sections(pngs, middle, raw_text) raw_text = self._assemble_ocr_sections(pngs, middle, raw_text)
return raw_text return raw_text
raise OCRError raise OCRError("Language detection failed")
if ISO639[guessed_language] == self.DEFAULT_OCR_LANGUAGE: if ISO639[guessed_language] == self.DEFAULT_OCR_LANGUAGE:
raw_text = self._assemble_ocr_sections(pngs, middle, raw_text) raw_text = self._assemble_ocr_sections(pngs, middle, raw_text)
@ -205,7 +205,10 @@ class Consumer(object):
) )
raw_text = self._assemble_ocr_sections(pngs, middle, raw_text) raw_text = self._assemble_ocr_sections(pngs, middle, raw_text)
return raw_text return raw_text
raise OCRError raise OCRError(
"The guessed language is not available in this instance of "
"Tesseract."
)
def _assemble_ocr_sections(self, pngs, middle, text): def _assemble_ocr_sections(self, pngs, middle, text):
""" """