mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Moved pyocr.get_available_tools() into a method
This commit is contained in:
		| @@ -8,7 +8,8 @@ matrix: | |||||||
|           env: TOXENV=py34 |           env: TOXENV=py34 | ||||||
|         - python: 3.5 |         - python: 3.5 | ||||||
|           env: TOXENV=py35 |           env: TOXENV=py35 | ||||||
|         - env: TOXENV=pep8 |         - python: 3.5 | ||||||
|  |           env: TOXENV=pep8 | ||||||
|  |  | ||||||
| install: | install: | ||||||
|     - pip install --requirement requirements.txt |     - pip install --requirement requirements.txt | ||||||
|   | |||||||
| @@ -26,18 +26,6 @@ from .models import Sender, Tag, Document | |||||||
| from .languages import ISO639 | from .languages import ISO639 | ||||||
|  |  | ||||||
|  |  | ||||||
| def image_to_string(args): |  | ||||||
|     self, png, lang = args |  | ||||||
|     with Image.open(os.path.join(self.SCRATCH, png)) as f: |  | ||||||
|         if self.OCR.can_detect_orientation(): |  | ||||||
|             try: |  | ||||||
|                 orientation = self.OCR.detect_orientation(f, lang=lang) |  | ||||||
|                 f = f.rotate(orientation["angle"], expand=1) |  | ||||||
|             except TesseractError: |  | ||||||
|                 pass |  | ||||||
|         return self.OCR.image_to_string(f, lang=lang) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class OCRError(Exception): | class OCRError(Exception): | ||||||
|     pass |     pass | ||||||
|  |  | ||||||
| @@ -61,7 +49,6 @@ class Consumer(object): | |||||||
|     CONSUME = settings.CONSUMPTION_DIR |     CONSUME = settings.CONSUMPTION_DIR | ||||||
|     THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None |     THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None | ||||||
|  |  | ||||||
|     OCR = pyocr.get_available_tools()[0] |  | ||||||
|     DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE |     DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE | ||||||
|  |  | ||||||
|     REGEX_TITLE = re.compile( |     REGEX_TITLE = re.compile( | ||||||
| @@ -239,12 +226,24 @@ class Consumer(object): | |||||||
|  |  | ||||||
|         with Pool(processes=self.THREADS) as pool: |         with Pool(processes=self.THREADS) as pool: | ||||||
|             r = pool.map( |             r = pool.map( | ||||||
|                 image_to_string, itertools.product([self], pngs, [lang])) |                 self.image_to_string, itertools.product(pngs, [lang])) | ||||||
|             r = " ".join(r) |             r = " ".join(r) | ||||||
|  |  | ||||||
|         # Strip out excess white space to allow matching to go smoother |         # Strip out excess white space to allow matching to go smoother | ||||||
|         return re.sub(r"\s+", " ", r) |         return re.sub(r"\s+", " ", r) | ||||||
|  |  | ||||||
|  |     def image_to_string(self, args): | ||||||
|  |         png, lang = args | ||||||
|  |         ocr = pyocr.get_available_tools()[0] | ||||||
|  |         with Image.open(os.path.join(self.SCRATCH, png)) as f: | ||||||
|  |             if ocr.can_detect_orientation(): | ||||||
|  |                 try: | ||||||
|  |                     orientation = ocr.detect_orientation(f, lang=lang) | ||||||
|  |                     f = f.rotate(orientation["angle"], expand=1) | ||||||
|  |                 except TesseractError: | ||||||
|  |                     pass | ||||||
|  |             return ocr.image_to_string(f, lang=lang) | ||||||
|  |  | ||||||
|     def _guess_attributes_from_name(self, parseable): |     def _guess_attributes_from_name(self, parseable): | ||||||
|         """ |         """ | ||||||
|         We use a crude naming convention to make handling the sender, title, |         We use a crude naming convention to make handling the sender, title, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Daniel Quinn
					Daniel Quinn