Moved pyocr.get_available_tools() into a method

2026-02-24 00:59:35 -06:00 · 2016-02-21 02:24:05 +00:00
parent 5f0962bc3e
commit 3a7923e32d
2 changed files with 15 additions and 15 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -8,7 +8,8 @@ matrix:
          env: TOXENV=py34
        - python: 3.5
          env: TOXENV=py35
-        - env: TOXENV=pep8
+        - python: 3.5
+          env: TOXENV=pep8

 install:
    - pip install --requirement requirements.txt
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -26,18 +26,6 @@ from .models import Sender, Tag, Document
 from .languages import ISO639


-def image_to_string(args):
-    self, png, lang = args
-    with Image.open(os.path.join(self.SCRATCH, png)) as f:
-        if self.OCR.can_detect_orientation():
-            try:
-                orientation = self.OCR.detect_orientation(f, lang=lang)
-                f = f.rotate(orientation["angle"], expand=1)
-            except TesseractError:
-                pass
-        return self.OCR.image_to_string(f, lang=lang)
-
-
 class OCRError(Exception):
    pass

@@ -61,7 +49,6 @@ class Consumer(object):
    CONSUME = settings.CONSUMPTION_DIR
    THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None

-    OCR = pyocr.get_available_tools()[0]
    DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE

    REGEX_TITLE = re.compile(
@@ -239,12 +226,24 @@ class Consumer(object):

        with Pool(processes=self.THREADS) as pool:
            r = pool.map(
-                image_to_string, itertools.product([self], pngs, [lang]))
+                self.image_to_string, itertools.product(pngs, [lang]))
            r = " ".join(r)

        # Strip out excess white space to allow matching to go smoother
        return re.sub(r"\s+", " ", r)

+    def image_to_string(self, args):
+        png, lang = args
+        ocr = pyocr.get_available_tools()[0]
+        with Image.open(os.path.join(self.SCRATCH, png)) as f:
+            if ocr.can_detect_orientation():
+                try:
+                    orientation = ocr.detect_orientation(f, lang=lang)
+                    f = f.rotate(orientation["angle"], expand=1)
+                except TesseractError:
+                    pass
+            return ocr.image_to_string(f, lang=lang)
+
    def _guess_attributes_from_name(self, parseable):
        """
        We use a crude naming convention to make handling the sender, title,