From 3a7923e32dba6d76949788cecd361e6f19df04d4 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 21 Feb 2016 02:24:05 +0000 Subject: [PATCH] Moved pyocr.get_available_tools() into a method --- .travis.yml | 3 ++- src/documents/consumer.py | 27 +++++++++++++-------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/.travis.yml b/.travis.yml index 6d3d5d217..dcaaeab8d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,8 @@ matrix: env: TOXENV=py34 - python: 3.5 env: TOXENV=py35 - - env: TOXENV=pep8 + - python: 3.5 + env: TOXENV=pep8 install: - pip install --requirement requirements.txt diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 6cf3b3d9d..2bd47c6da 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -26,18 +26,6 @@ from .models import Sender, Tag, Document from .languages import ISO639 -def image_to_string(args): - self, png, lang = args - with Image.open(os.path.join(self.SCRATCH, png)) as f: - if self.OCR.can_detect_orientation(): - try: - orientation = self.OCR.detect_orientation(f, lang=lang) - f = f.rotate(orientation["angle"], expand=1) - except TesseractError: - pass - return self.OCR.image_to_string(f, lang=lang) - - class OCRError(Exception): pass @@ -61,7 +49,6 @@ class Consumer(object): CONSUME = settings.CONSUMPTION_DIR THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None - OCR = pyocr.get_available_tools()[0] DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE REGEX_TITLE = re.compile( @@ -239,12 +226,24 @@ class Consumer(object): with Pool(processes=self.THREADS) as pool: r = pool.map( - image_to_string, itertools.product([self], pngs, [lang])) + self.image_to_string, itertools.product(pngs, [lang])) r = " ".join(r) # Strip out excess white space to allow matching to go smoother return re.sub(r"\s+", " ", r) + def image_to_string(self, args): + png, lang = args + ocr = pyocr.get_available_tools()[0] + with Image.open(os.path.join(self.SCRATCH, png)) as f: + if ocr.can_detect_orientation(): + try: + orientation = ocr.detect_orientation(f, lang=lang) + f = f.rotate(orientation["angle"], expand=1) + except TesseractError: + pass + return ocr.image_to_string(f, lang=lang) + def _guess_attributes_from_name(self, parseable): """ We use a crude naming convention to make handling the sender, title,