mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	add TIFF barcode support
Signed-off-by: Florian Brandes <florian.brandes@posteo.de>
This commit is contained in:
		| @@ -626,6 +626,12 @@ PAPERLESS_CONSUMER_ENABLE_BARCODES=<bool> | ||||
|  | ||||
|     Defaults to false. | ||||
|  | ||||
| PAPERLESS_CONSUMER_BARCODE_TIFF_SUPPORT=<bool> | ||||
|     Whether TIFF image files should be scanned for barcodes. | ||||
|     This will automatically convert any TIFF image(s) to pdfs for later | ||||
|     processing. | ||||
|  | ||||
|     Defaults to false. | ||||
|  | ||||
| PAPERLESS_CONSUMER_BARCODE_STRING=PATCHT | ||||
|   Defines the string to be detected as a separator barcode. | ||||
|   | ||||
| @@ -22,6 +22,8 @@ from documents.models import Tag | ||||
| from documents.sanity_checker import SanityCheckFailedException | ||||
| from pdf2image import convert_from_path | ||||
| from pikepdf import Pdf | ||||
| from PIL import Image | ||||
| from PIL import ImageSequence | ||||
| from pyzbar import pyzbar | ||||
| from whoosh.writing import AsyncWriter | ||||
|  | ||||
| @@ -93,6 +95,41 @@ def barcode_reader(image) -> List[str]: | ||||
|     return barcodes | ||||
|  | ||||
|  | ||||
| def convert_from_tiff_to_pdf(filepath: str) -> str: | ||||
|     """ | ||||
|     converts a given TIFF image file to pdf. | ||||
|     Returns the new pdf file. | ||||
|     """ | ||||
|     file_extension = os.path.splitext(os.path.basename(filepath))[1] | ||||
|     # use old file name with pdf extension | ||||
|     if file_extension == ".tif": | ||||
|         newpath = filepath.replace(".tif", ".pdf") | ||||
|     elif file_extension == ".tiff": | ||||
|         newpath = filepath.replace(".tiff", ".pdf") | ||||
|     else: | ||||
|         logger.warning(f"Cannot convert from {str(file_extension)} to pdf.") | ||||
|         return "" | ||||
|     image = Image.open(filepath) | ||||
|     images = [] | ||||
|     for i, page in enumerate(ImageSequence.Iterator(image)): | ||||
|         page = page.convert("RGB") | ||||
|         images.append(page) | ||||
|     try: | ||||
|         if len(images) == 1: | ||||
|             images[0].save(newpath) | ||||
|         else: | ||||
|             images[0].save(newpath, save_all=True, append_images=images[1:]) | ||||
|         os.unlink(filepath) | ||||
|     except OSError as e: | ||||
|         logger.warning( | ||||
|             f"Could not save the file as pdf. " | ||||
|             f"The original image file was not deleted. Error: " | ||||
|             f"{str(e)}", | ||||
|         ) | ||||
|     image.close() | ||||
|     return newpath | ||||
|  | ||||
|  | ||||
| def scan_file_for_separating_barcodes(filepath: str) -> List[int]: | ||||
|     """ | ||||
|     Scan the provided file for page separating barcodes | ||||
| @@ -195,42 +232,56 @@ def consume_file( | ||||
|     if settings.CONSUMER_ENABLE_BARCODES: | ||||
|         separators = [] | ||||
|         document_list = [] | ||||
|         separators = scan_file_for_separating_barcodes(path) | ||||
|         if separators: | ||||
|             logger.debug(f"Pages with separators found in: {str(path)}") | ||||
|             document_list = separate_pages(path, separators) | ||||
|         if document_list: | ||||
|             for n, document in enumerate(document_list): | ||||
|                 # save to consumption dir | ||||
|                 # rename it to the original filename  with number prefix | ||||
|                 if override_filename: | ||||
|                     newname = f"{str(n)}_" + override_filename | ||||
|                 else: | ||||
|                     newname = None | ||||
|                 save_to_dir(document, newname=newname) | ||||
|             # if we got here, the document was successfully split | ||||
|             # and can safely be deleted | ||||
|             logger.debug("Deleting file {}".format(path)) | ||||
|             os.unlink(path) | ||||
|             # notify the sender, otherwise the progress bar | ||||
|             # in the UI stays stuck | ||||
|             payload = { | ||||
|                 "filename": override_filename, | ||||
|                 "task_id": task_id, | ||||
|                 "current_progress": 100, | ||||
|                 "max_progress": 100, | ||||
|                 "status": "SUCCESS", | ||||
|                 "message": "finished", | ||||
|             } | ||||
|             try: | ||||
|                 async_to_sync(get_channel_layer().group_send)( | ||||
|                     "status_updates", | ||||
|                     {"type": "status_update", "data": payload}, | ||||
|                 ) | ||||
|             except OSError as e: | ||||
|                 logger.warning("OSError. It could be, the broker cannot be reached.") | ||||
|                 logger.warning(str(e)) | ||||
|             return "File successfully split" | ||||
|         if settings.CONSUMER_BARCODE_TIFF_SUPPORT: | ||||
|             supported_extensions = [".pdf", ".tiff", ".tif"] | ||||
|         else: | ||||
|             supported_extensions = [".pdf"] | ||||
|         file_extension = os.path.splitext(os.path.basename(path))[1] | ||||
|         if file_extension not in supported_extensions: | ||||
|             logger.warning( | ||||
|                 f"Unsupported file format for barcode reader: {str(file_extension)}", | ||||
|             ) | ||||
|         else: | ||||
|             if file_extension == ".tif" or file_extension == ".tiff": | ||||
|                 path = convert_from_tiff_to_pdf(path) | ||||
|             separators = scan_file_for_separating_barcodes(path) | ||||
|             if separators: | ||||
|                 logger.debug(f"Pages with separators found in: {str(path)}") | ||||
|                 document_list = separate_pages(path, separators) | ||||
|             if document_list: | ||||
|                 for n, document in enumerate(document_list): | ||||
|                     # save to consumption dir | ||||
|                     # rename it to the original filename  with number prefix | ||||
|                     if override_filename: | ||||
|                         newname = f"{str(n)}_" + override_filename | ||||
|                     else: | ||||
|                         newname = None | ||||
|                     save_to_dir(document, newname=newname) | ||||
|                 # if we got here, the document was successfully split | ||||
|                 # and can safely be deleted | ||||
|                 logger.debug("Deleting file {}".format(path)) | ||||
|                 os.unlink(path) | ||||
|                 # notify the sender, otherwise the progress bar | ||||
|                 # in the UI stays stuck | ||||
|                 payload = { | ||||
|                     "filename": override_filename, | ||||
|                     "task_id": task_id, | ||||
|                     "current_progress": 100, | ||||
|                     "max_progress": 100, | ||||
|                     "status": "SUCCESS", | ||||
|                     "message": "finished", | ||||
|                 } | ||||
|                 try: | ||||
|                     async_to_sync(get_channel_layer().group_send)( | ||||
|                         "status_updates", | ||||
|                         {"type": "status_update", "data": payload}, | ||||
|                     ) | ||||
|                 except OSError as e: | ||||
|                     logger.warning( | ||||
|                         "OSError. It could be, the broker cannot be reached.", | ||||
|                     ) | ||||
|                     logger.warning(str(e)) | ||||
|                 return "File successfully split" | ||||
|  | ||||
|     # continue with consumption if no barcode was found | ||||
|     document = Consumer().try_consume_file( | ||||
|   | ||||
							
								
								
									
										
											BIN
										
									
								
								src/documents/tests/samples/barcodes/patch-code-t-middle.tiff
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								src/documents/tests/samples/barcodes/patch-code-t-middle.tiff
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								src/documents/tests/samples/simple.tiff
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								src/documents/tests/samples/simple.tiff
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| @@ -204,6 +204,30 @@ class TestTasks(DirectoriesMixin, TestCase): | ||||
|         img = Image.open(test_file) | ||||
|         self.assertEqual(tasks.barcode_reader(img), ["CUSTOM BARCODE"]) | ||||
|  | ||||
|     def test_convert_from_tiff_to_pdf(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "simple.tiff", | ||||
|         ) | ||||
|         dst = os.path.join(settings.SCRATCH_DIR, "simple.tiff") | ||||
|         shutil.copy(test_file, dst) | ||||
|         target_file = tasks.convert_from_tiff_to_pdf(dst) | ||||
|         file_extension = os.path.splitext(os.path.basename(target_file))[1] | ||||
|         self.assertTrue(os.path.isfile(target_file)) | ||||
|         self.assertEqual(file_extension, ".pdf") | ||||
|  | ||||
|     def test_convert_error_from_pdf_to_pdf(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "simple.pdf", | ||||
|         ) | ||||
|         dst = os.path.join(settings.SCRATCH_DIR, "simple.pdf") | ||||
|         shutil.copy(test_file, dst) | ||||
|         target_file = tasks.convert_from_tiff_to_pdf(dst) | ||||
|         self.assertFalse(os.path.isfile(target_file)) | ||||
|  | ||||
|     def test_scan_file_for_separating_barcodes(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
| @@ -400,7 +424,23 @@ class TestTasks(DirectoriesMixin, TestCase): | ||||
|             "barcodes", | ||||
|             "patch-code-t-middle.pdf", | ||||
|         ) | ||||
|         dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle.pd") | ||||
|         dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle.pdf") | ||||
|         shutil.copy(test_file, dst) | ||||
|  | ||||
|         self.assertEqual(tasks.consume_file(dst), "File successfully split") | ||||
|  | ||||
|     @override_settings( | ||||
|         CONSUMER_ENABLE_BARCODES=True, | ||||
|         CONSUMER_BARCODE_TIFF_SUPPORT=True, | ||||
|     ) | ||||
|     def test_consume_barcode_tiff_file(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "barcodes", | ||||
|             "patch-code-t-middle.tiff", | ||||
|         ) | ||||
|         dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle.tiff") | ||||
|         shutil.copy(test_file, dst) | ||||
|  | ||||
|         self.assertEqual(tasks.consume_file(dst), "File successfully split") | ||||
|   | ||||
| @@ -502,6 +502,10 @@ CONSUMER_ENABLE_BARCODES = __get_boolean( | ||||
|     "PAPERLESS_CONSUMER_ENABLE_BARCODES", | ||||
| ) | ||||
|  | ||||
| CONSUMER_BARCODE_TIFF_SUPPORT = __get_boolean( | ||||
|     "PAPERLESS_CONSUMER_BARCODE_TIFF_SUPPORT", | ||||
| ) | ||||
|  | ||||
| CONSUMER_BARCODE_STRING = os.getenv("PAPERLESS_CONSUMER_BARCODE_STRING", "PATCHT") | ||||
|  | ||||
| OPTIMIZE_THUMBNAILS = __get_boolean("PAPERLESS_OPTIMIZE_THUMBNAILS", "true") | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Florian Brandes
					Florian Brandes