mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Moved pyocr.get_available_tools() into a method
This commit is contained in:
parent
5f0962bc3e
commit
3a7923e32d
@ -8,7 +8,8 @@ matrix:
|
||||
env: TOXENV=py34
|
||||
- python: 3.5
|
||||
env: TOXENV=py35
|
||||
- env: TOXENV=pep8
|
||||
- python: 3.5
|
||||
env: TOXENV=pep8
|
||||
|
||||
install:
|
||||
- pip install --requirement requirements.txt
|
||||
|
@ -26,18 +26,6 @@ from .models import Sender, Tag, Document
|
||||
from .languages import ISO639
|
||||
|
||||
|
||||
def image_to_string(args):
|
||||
self, png, lang = args
|
||||
with Image.open(os.path.join(self.SCRATCH, png)) as f:
|
||||
if self.OCR.can_detect_orientation():
|
||||
try:
|
||||
orientation = self.OCR.detect_orientation(f, lang=lang)
|
||||
f = f.rotate(orientation["angle"], expand=1)
|
||||
except TesseractError:
|
||||
pass
|
||||
return self.OCR.image_to_string(f, lang=lang)
|
||||
|
||||
|
||||
class OCRError(Exception):
|
||||
pass
|
||||
|
||||
@ -61,7 +49,6 @@ class Consumer(object):
|
||||
CONSUME = settings.CONSUMPTION_DIR
|
||||
THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None
|
||||
|
||||
OCR = pyocr.get_available_tools()[0]
|
||||
DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE
|
||||
|
||||
REGEX_TITLE = re.compile(
|
||||
@ -239,12 +226,24 @@ class Consumer(object):
|
||||
|
||||
with Pool(processes=self.THREADS) as pool:
|
||||
r = pool.map(
|
||||
image_to_string, itertools.product([self], pngs, [lang]))
|
||||
self.image_to_string, itertools.product(pngs, [lang]))
|
||||
r = " ".join(r)
|
||||
|
||||
# Strip out excess white space to allow matching to go smoother
|
||||
return re.sub(r"\s+", " ", r)
|
||||
|
||||
def image_to_string(self, args):
|
||||
png, lang = args
|
||||
ocr = pyocr.get_available_tools()[0]
|
||||
with Image.open(os.path.join(self.SCRATCH, png)) as f:
|
||||
if ocr.can_detect_orientation():
|
||||
try:
|
||||
orientation = ocr.detect_orientation(f, lang=lang)
|
||||
f = f.rotate(orientation["angle"], expand=1)
|
||||
except TesseractError:
|
||||
pass
|
||||
return ocr.image_to_string(f, lang=lang)
|
||||
|
||||
def _guess_attributes_from_name(self, parseable):
|
||||
"""
|
||||
We use a crude naming convention to make handling the sender, title,
|
||||
|
Loading…
x
Reference in New Issue
Block a user