mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Style and removal of Python 2.7 stuff
This commit is contained in:
		| @@ -22,7 +22,7 @@ class ConsumerError(Exception): | |||||||
|     pass |     pass | ||||||
|  |  | ||||||
|  |  | ||||||
| class Consumer(object): | class Consumer: | ||||||
|     """ |     """ | ||||||
|     Loop over every file found in CONSUMPTION_DIR and: |     Loop over every file found in CONSUMPTION_DIR and: | ||||||
|       1. Convert it to a greyscale pnm |       1. Convert it to a greyscale pnm | ||||||
|   | |||||||
| @@ -52,15 +52,13 @@ class RasterisedDocumentParser(DocumentParser): | |||||||
|         return os.path.join(self.tempdir, "convert-0000.png") |         return os.path.join(self.tempdir, "convert-0000.png") | ||||||
|  |  | ||||||
|     def _is_ocred(self): |     def _is_ocred(self): | ||||||
|  |  | ||||||
|         # Extract text from PDF using pdftotext |         # Extract text from PDF using pdftotext | ||||||
|         text = get_text_from_pdf(self.document_path) |         text = get_text_from_pdf(self.document_path) | ||||||
|  |  | ||||||
|         # We assume, that a PDF with at least 50 characters contains text |         # We assume, that a PDF with at least 50 characters contains text | ||||||
|         # (so no OCR required) |         # (so no OCR required) | ||||||
|         if len(text) > 50: |         return len(text) > 50 | ||||||
|             return True |  | ||||||
|  |  | ||||||
|         return False |  | ||||||
|  |  | ||||||
|     def get_text(self): |     def get_text(self): | ||||||
|         if self.TEXT_CACHE is not None: |         if self.TEXT_CACHE is not None: | ||||||
| @@ -74,7 +72,6 @@ class RasterisedDocumentParser(DocumentParser): | |||||||
|         images = self._get_greyscale() |         images = self._get_greyscale() | ||||||
|  |  | ||||||
|         try: |         try: | ||||||
|  |  | ||||||
|             self.TEXT_CACHE = self._get_ocr(images) |             self.TEXT_CACHE = self._get_ocr(images) | ||||||
|             return self.TEXT_CACHE |             return self.TEXT_CACHE | ||||||
|         except OCRError as e: |         except OCRError as e: | ||||||
| @@ -262,6 +259,7 @@ def image_to_string(args): | |||||||
|  |  | ||||||
|  |  | ||||||
| def get_text_from_pdf(pdf_file): | def get_text_from_pdf(pdf_file): | ||||||
|  |  | ||||||
|     with open(pdf_file, "rb") as f: |     with open(pdf_file, "rb") as f: | ||||||
|         try: |         try: | ||||||
|             pdf = pdftotext.PDF(f) |             pdf = pdftotext.PDF(f) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Daniel Quinn
					Daniel Quinn