Style and removal of Python 2.7 stuff

2026-02-16 00:19:32 -06:00 · 2018-02-18 15:55:55 +00:00
parent 997a869124
commit caf44146db
2 changed files with 4 additions and 6 deletions
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -22,7 +22,7 @@ class ConsumerError(Exception):
    pass


-class Consumer(object):
+class Consumer:
    """
    Loop over every file found in CONSUMPTION_DIR and:
      1. Convert it to a greyscale pnm
--- a/src/paperless_tesseract/parsers.py
+++ b/src/paperless_tesseract/parsers.py
@@ -52,15 +52,13 @@ class RasterisedDocumentParser(DocumentParser):
        return os.path.join(self.tempdir, "convert-0000.png")

    def _is_ocred(self):
+
        # Extract text from PDF using pdftotext
        text = get_text_from_pdf(self.document_path)

        # We assume, that a PDF with at least 50 characters contains text
        # (so no OCR required)
-        if len(text) > 50:
-            return True
-
-        return False
+        return len(text) > 50

    def get_text(self):
        if self.TEXT_CACHE is not None:
@@ -74,7 +72,6 @@ class RasterisedDocumentParser(DocumentParser):
        images = self._get_greyscale()

        try:
-
            self.TEXT_CACHE = self._get_ocr(images)
            return self.TEXT_CACHE
        except OCRError as e:
@@ -262,6 +259,7 @@ def image_to_string(args):


 def get_text_from_pdf(pdf_file):
+
    with open(pdf_file, "rb") as f:
        try:
            pdf = pdftotext.PDF(f)