mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-26 03:36:08 -05:00 
			
		
		
		
	Updates how barcodes are detected, using pikepdf images, instead of converting each page to an image
This commit is contained in:
		 Trenton Holmes
					Trenton Holmes
				
			
				
					committed by
					
						 Trenton H
						Trenton H
					
				
			
			
				
	
			
			
			 Trenton H
						Trenton H
					
				
			
						parent
						
							d9b345ffd9
						
					
				
				
					commit
					b21f64de8a
				
			
							
								
								
									
										1
									
								
								Pipfile
									
									
									
									
									
								
							
							
						
						
									
										1
									
								
								Pipfile
									
									
									
									
									
								
							| @@ -53,7 +53,6 @@ concurrent-log-handler = "*" | ||||
| "importlib-resources" = {version = "*", markers = "python_version < '3.9'"} | ||||
| zipp = {version = "*", markers = "python_version < '3.9'"} | ||||
| pyzbar = "*" | ||||
| pdf2image = "*" | ||||
| mysqlclient = "*" | ||||
| setproctitle = "*" | ||||
|  | ||||
|   | ||||
							
								
								
									
										24
									
								
								Pipfile.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										24
									
								
								Pipfile.lock
									
									
									
										generated
									
									
									
								
							| @@ -1,7 +1,7 @@ | ||||
| { | ||||
|     "_meta": { | ||||
|         "hash": { | ||||
|             "sha256": "6f46be21b67938add11dbf0ecea4f722836f161f58fa5e47dec3f92edb346371" | ||||
|             "sha256": "896665b8ff6d8a99af44b729c581033add1ba5cbd927723ef275649491c92a4f" | ||||
|         }, | ||||
|         "pipfile-spec": 6, | ||||
|         "requires": {}, | ||||
| @@ -788,14 +788,6 @@ | ||||
|             "index": "pypi", | ||||
|             "version": "==2.5.2" | ||||
|         }, | ||||
|         "pdf2image": { | ||||
|             "hashes": [ | ||||
|                 "sha256:84f79f2b8fad943e36323ea4e937fcb05f26ded0caa0a01181df66049e42fb65", | ||||
|                 "sha256:d58ed94d978a70c73c2bb7fdf8acbaf2a7089c29ff8141be5f45433c0c4293bb" | ||||
|             ], | ||||
|             "index": "pypi", | ||||
|             "version": "==1.16.0" | ||||
|         }, | ||||
|         "pdfminer.six": { | ||||
|             "hashes": [ | ||||
|                 "sha256:5a64c924410ac48501d6060b21638bf401db69f5b1bd57207df7fbc070ac8ae2", | ||||
| @@ -1055,6 +1047,7 @@ | ||||
|         }, | ||||
|         "pyyaml": { | ||||
|             "hashes": [ | ||||
|                 "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf", | ||||
|                 "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293", | ||||
|                 "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b", | ||||
|                 "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57", | ||||
| @@ -1066,26 +1059,32 @@ | ||||
|                 "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287", | ||||
|                 "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513", | ||||
|                 "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0", | ||||
|                 "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782", | ||||
|                 "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0", | ||||
|                 "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92", | ||||
|                 "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f", | ||||
|                 "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2", | ||||
|                 "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc", | ||||
|                 "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1", | ||||
|                 "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c", | ||||
|                 "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86", | ||||
|                 "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4", | ||||
|                 "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c", | ||||
|                 "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34", | ||||
|                 "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b", | ||||
|                 "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d", | ||||
|                 "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c", | ||||
|                 "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb", | ||||
|                 "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7", | ||||
|                 "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737", | ||||
|                 "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3", | ||||
|                 "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d", | ||||
|                 "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358", | ||||
|                 "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53", | ||||
|                 "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78", | ||||
|                 "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803", | ||||
|                 "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a", | ||||
|                 "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f", | ||||
|                 "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174", | ||||
|                 "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5" | ||||
|             ], | ||||
| @@ -2261,6 +2260,7 @@ | ||||
|         }, | ||||
|         "pyyaml": { | ||||
|             "hashes": [ | ||||
|                 "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf", | ||||
|                 "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293", | ||||
|                 "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b", | ||||
|                 "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57", | ||||
| @@ -2272,26 +2272,32 @@ | ||||
|                 "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287", | ||||
|                 "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513", | ||||
|                 "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0", | ||||
|                 "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782", | ||||
|                 "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0", | ||||
|                 "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92", | ||||
|                 "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f", | ||||
|                 "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2", | ||||
|                 "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc", | ||||
|                 "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1", | ||||
|                 "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c", | ||||
|                 "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86", | ||||
|                 "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4", | ||||
|                 "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c", | ||||
|                 "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34", | ||||
|                 "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b", | ||||
|                 "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d", | ||||
|                 "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c", | ||||
|                 "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb", | ||||
|                 "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7", | ||||
|                 "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737", | ||||
|                 "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3", | ||||
|                 "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d", | ||||
|                 "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358", | ||||
|                 "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53", | ||||
|                 "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78", | ||||
|                 "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803", | ||||
|                 "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a", | ||||
|                 "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f", | ||||
|                 "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174", | ||||
|                 "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5" | ||||
|             ], | ||||
|   | ||||
| @@ -3,13 +3,15 @@ import os | ||||
| import shutil | ||||
| import tempfile | ||||
| from functools import lru_cache | ||||
| from typing import List  # for type hinting. Can be removed, if only Python >3.8 is used | ||||
| from typing import List | ||||
| from typing import Optional | ||||
| from typing import Tuple | ||||
|  | ||||
| import magic | ||||
| from django.conf import settings | ||||
| from pdf2image import convert_from_path | ||||
| from pikepdf import Page | ||||
| from pikepdf import Pdf | ||||
| from pikepdf import PdfImage | ||||
| from PIL import Image | ||||
| from PIL import ImageSequence | ||||
| from pyzbar import pyzbar | ||||
| @@ -32,7 +34,7 @@ def supported_file_type(mime_type) -> bool: | ||||
|     return mime_type in supported_mime | ||||
|  | ||||
|  | ||||
| def barcode_reader(image) -> List[str]: | ||||
| def barcode_reader(image: Image) -> List[str]: | ||||
|     """ | ||||
|     Read any barcodes contained in image | ||||
|     Returns a list containing all found barcodes | ||||
| @@ -99,21 +101,39 @@ def convert_from_tiff_to_pdf(filepath: str) -> str: | ||||
|     return newpath | ||||
|  | ||||
|  | ||||
| def scan_file_for_separating_barcodes(filepath: str) -> List[int]: | ||||
| def scan_file_for_separating_barcodes(filepath: str) -> Tuple[Optional[str], List[int]]: | ||||
|     """ | ||||
|     Scan the provided pdf file for page separating barcodes | ||||
|     Returns a list of pagenumbers, which separate the file | ||||
|     Returns a the PDF filepath and a list of pagenumbers, | ||||
|     which separate the file into new files | ||||
|     """ | ||||
|  | ||||
|     separator_page_numbers = [] | ||||
|     separator_barcode = str(settings.CONSUMER_BARCODE_STRING) | ||||
|     # use a temporary directory in case the file os too big to handle in memory | ||||
|     with tempfile.TemporaryDirectory() as path: | ||||
|         pages_from_path = convert_from_path(filepath, output_folder=path) | ||||
|         for current_page_number, page in enumerate(pages_from_path): | ||||
|             current_barcodes = barcode_reader(page) | ||||
|             if separator_barcode in current_barcodes: | ||||
|                 separator_page_numbers.append(current_page_number) | ||||
|     return separator_page_numbers | ||||
|     pdf_filepath = None | ||||
|  | ||||
|     mime_type = get_file_mime_type(filepath) | ||||
|  | ||||
|     if supported_file_type(mime_type): | ||||
|         pdf_filepath = filepath | ||||
|         if mime_type == "image/tiff": | ||||
|             pdf_filepath = convert_from_tiff_to_pdf(filepath) | ||||
|  | ||||
|         pdf = Pdf.open(pdf_filepath) | ||||
|  | ||||
|         for page_num, page in enumerate(pdf.pages): | ||||
|             for image_key in page.images: | ||||
|                 pdfimage = PdfImage(page.images[image_key]) | ||||
|                 pillow_img = pdfimage.as_pil_image() | ||||
|  | ||||
|                 detected_barcodes = barcode_reader(pillow_img) | ||||
|  | ||||
|                 if settings.CONSUMER_BARCODE_STRING in detected_barcodes: | ||||
|                     separator_page_numbers.append(page_num) | ||||
|     else: | ||||
|         logger.warning( | ||||
|             f"Unsupported file format for barcode reader: {str(mime_type)}", | ||||
|         ) | ||||
|     return pdf_filepath, separator_page_numbers | ||||
|  | ||||
|  | ||||
| def separate_pages(filepath: str, pages_to_split_on: List[int]) -> List[str]: | ||||
|   | ||||
| @@ -96,29 +96,13 @@ def consume_file( | ||||
|     # check for separators in current document | ||||
|     if settings.CONSUMER_ENABLE_BARCODES: | ||||
|  | ||||
|         mime_type = barcodes.get_file_mime_type(path) | ||||
|  | ||||
|         if not barcodes.supported_file_type(mime_type): | ||||
|             # if not supported, skip this routine | ||||
|             logger.warning( | ||||
|                 f"Unsupported file format for barcode reader: {str(mime_type)}", | ||||
|             ) | ||||
|         else: | ||||
|             separators = [] | ||||
|             document_list = [] | ||||
|  | ||||
|             if mime_type == "image/tiff": | ||||
|                 file_to_process = barcodes.convert_from_tiff_to_pdf(path) | ||||
|             else: | ||||
|                 file_to_process = path | ||||
|  | ||||
|             separators = barcodes.scan_file_for_separating_barcodes(file_to_process) | ||||
|         pdf_filepath, separators = barcodes.scan_file_for_separating_barcodes(path) | ||||
|  | ||||
|         if separators: | ||||
|             logger.debug( | ||||
|                 f"Pages with separators found in: {str(path)}", | ||||
|             ) | ||||
|                 document_list = barcodes.separate_pages(file_to_process, separators) | ||||
|             document_list = barcodes.separate_pages(pdf_filepath, separators) | ||||
|  | ||||
|             if document_list: | ||||
|                 for n, document in enumerate(document_list): | ||||
| @@ -134,13 +118,11 @@ def consume_file( | ||||
|                         target_dir=path.parent, | ||||
|                     ) | ||||
|  | ||||
|                 # if we got here, the document was successfully split | ||||
|                 # and can safely be deleted | ||||
|                 if mime_type == "image/tiff": | ||||
|                     # Remove the TIFF converted to PDF file | ||||
|                     logger.debug(f"Deleting file {file_to_process}") | ||||
|                     os.unlink(file_to_process) | ||||
|                 # Remove the original file (new file is saved above) | ||||
|                 # Delete the PDF file which was split | ||||
|                 os.remove(pdf_filepath) | ||||
|  | ||||
|                 # If the original was a TIFF, remove the original file as well | ||||
|                 if str(pdf_filepath) != str(path): | ||||
|                     logger.debug(f"Deleting file {path}") | ||||
|                     os.unlink(path) | ||||
|  | ||||
|   | ||||
| @@ -13,22 +13,23 @@ from PIL import Image | ||||
|  | ||||
|  | ||||
| class TestBarcode(DirectoriesMixin, TestCase): | ||||
|     def test_barcode_reader(self): | ||||
|         test_file = os.path.join( | ||||
|  | ||||
|     SAMPLE_DIR = os.path.join( | ||||
|         os.path.dirname(__file__), | ||||
|         "samples", | ||||
|             "barcodes", | ||||
|             "barcode-39-PATCHT.png", | ||||
|     ) | ||||
|  | ||||
|     BARCODE_SAMPLE_DIR = os.path.join(SAMPLE_DIR, "barcodes") | ||||
|  | ||||
|     def test_barcode_reader(self): | ||||
|         test_file = os.path.join(self.BARCODE_SAMPLE_DIR, "barcode-39-PATCHT.png") | ||||
|         img = Image.open(test_file) | ||||
|         separator_barcode = str(settings.CONSUMER_BARCODE_STRING) | ||||
|         self.assertEqual(barcodes.barcode_reader(img), [separator_barcode]) | ||||
|  | ||||
|     def test_barcode_reader2(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "barcodes", | ||||
|             self.BARCODE_SAMPLE_DIR, | ||||
|             "patch-code-t.pbm", | ||||
|         ) | ||||
|         img = Image.open(test_file) | ||||
| @@ -37,9 +38,7 @@ class TestBarcode(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def test_barcode_reader_distorsion(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "barcodes", | ||||
|             self.BARCODE_SAMPLE_DIR, | ||||
|             "barcode-39-PATCHT-distorsion.png", | ||||
|         ) | ||||
|         img = Image.open(test_file) | ||||
| @@ -48,9 +47,7 @@ class TestBarcode(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def test_barcode_reader_distorsion2(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "barcodes", | ||||
|             self.BARCODE_SAMPLE_DIR, | ||||
|             "barcode-39-PATCHT-distorsion2.png", | ||||
|         ) | ||||
|         img = Image.open(test_file) | ||||
| @@ -59,9 +56,7 @@ class TestBarcode(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def test_barcode_reader_unreadable(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "barcodes", | ||||
|             self.BARCODE_SAMPLE_DIR, | ||||
|             "barcode-39-PATCHT-unreadable.png", | ||||
|         ) | ||||
|         img = Image.open(test_file) | ||||
| @@ -69,9 +64,7 @@ class TestBarcode(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def test_barcode_reader_qr(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "barcodes", | ||||
|             self.BARCODE_SAMPLE_DIR, | ||||
|             "qr-code-PATCHT.png", | ||||
|         ) | ||||
|         img = Image.open(test_file) | ||||
| @@ -80,9 +73,7 @@ class TestBarcode(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def test_barcode_reader_128(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "barcodes", | ||||
|             self.BARCODE_SAMPLE_DIR, | ||||
|             "barcode-128-PATCHT.png", | ||||
|         ) | ||||
|         img = Image.open(test_file) | ||||
| @@ -90,15 +81,13 @@ class TestBarcode(DirectoriesMixin, TestCase): | ||||
|         self.assertEqual(barcodes.barcode_reader(img), [separator_barcode]) | ||||
|  | ||||
|     def test_barcode_reader_no_barcode(self): | ||||
|         test_file = os.path.join(os.path.dirname(__file__), "samples", "simple.png") | ||||
|         test_file = os.path.join(self.SAMPLE_DIR, "simple.png") | ||||
|         img = Image.open(test_file) | ||||
|         self.assertEqual(barcodes.barcode_reader(img), []) | ||||
|  | ||||
|     def test_barcode_reader_custom_separator(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "barcodes", | ||||
|             self.BARCODE_SAMPLE_DIR, | ||||
|             "barcode-39-custom.png", | ||||
|         ) | ||||
|         img = Image.open(test_file) | ||||
| @@ -106,9 +95,7 @@ class TestBarcode(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def test_barcode_reader_custom_qr_separator(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "barcodes", | ||||
|             self.BARCODE_SAMPLE_DIR, | ||||
|             "barcode-qr-custom.png", | ||||
|         ) | ||||
|         img = Image.open(test_file) | ||||
| @@ -116,9 +103,7 @@ class TestBarcode(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def test_barcode_reader_custom_128_separator(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "barcodes", | ||||
|             self.BARCODE_SAMPLE_DIR, | ||||
|             "barcode-128-custom.png", | ||||
|         ) | ||||
|         img = Image.open(test_file) | ||||
| @@ -126,19 +111,15 @@ class TestBarcode(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def test_get_mime_type(self): | ||||
|         tiff_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             self.SAMPLE_DIR, | ||||
|             "simple.tiff", | ||||
|         ) | ||||
|         pdf_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             self.SAMPLE_DIR, | ||||
|             "simple.pdf", | ||||
|         ) | ||||
|         png_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "barcodes", | ||||
|             self.BARCODE_SAMPLE_DIR, | ||||
|             "barcode-128-custom.png", | ||||
|         ) | ||||
|         tiff_file_no_extension = os.path.join(settings.SCRATCH_DIR, "testfile1") | ||||
| @@ -173,8 +154,7 @@ class TestBarcode(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def test_convert_error_from_pdf_to_pdf(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             self.SAMPLE_DIR, | ||||
|             "simple.pdf", | ||||
|         ) | ||||
|         dst = os.path.join(settings.SCRATCH_DIR, "simple.pdf") | ||||
| @@ -183,107 +163,127 @@ class TestBarcode(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def test_scan_file_for_separating_barcodes(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "barcodes", | ||||
|             self.BARCODE_SAMPLE_DIR, | ||||
|             "patch-code-t.pdf", | ||||
|         ) | ||||
|         pages = barcodes.scan_file_for_separating_barcodes(test_file) | ||||
|         self.assertEqual(pages, [0]) | ||||
|         pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes( | ||||
|             test_file, | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(pdf_file, test_file) | ||||
|         self.assertListEqual(separator_page_numbers, [0]) | ||||
|  | ||||
|     def test_scan_file_for_separating_barcodes2(self): | ||||
|         test_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf") | ||||
|         pages = barcodes.scan_file_for_separating_barcodes(test_file) | ||||
|         self.assertEqual(pages, []) | ||||
|         test_file = os.path.join(self.SAMPLE_DIR, "simple.pdf") | ||||
|         pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes( | ||||
|             test_file, | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(pdf_file, test_file) | ||||
|         self.assertListEqual(separator_page_numbers, []) | ||||
|  | ||||
|     def test_scan_file_for_separating_barcodes3(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "barcodes", | ||||
|             self.BARCODE_SAMPLE_DIR, | ||||
|             "patch-code-t-middle.pdf", | ||||
|         ) | ||||
|         pages = barcodes.scan_file_for_separating_barcodes(test_file) | ||||
|         self.assertEqual(pages, [1]) | ||||
|         pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes( | ||||
|             test_file, | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(pdf_file, test_file) | ||||
|         self.assertListEqual(separator_page_numbers, [1]) | ||||
|  | ||||
|     def test_scan_file_for_separating_barcodes4(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "barcodes", | ||||
|             self.BARCODE_SAMPLE_DIR, | ||||
|             "several-patcht-codes.pdf", | ||||
|         ) | ||||
|         pages = barcodes.scan_file_for_separating_barcodes(test_file) | ||||
|         self.assertEqual(pages, [2, 5]) | ||||
|         pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes( | ||||
|             test_file, | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(pdf_file, test_file) | ||||
|         self.assertListEqual(separator_page_numbers, [2, 5]) | ||||
|  | ||||
|     def test_scan_file_for_separating_barcodes_upsidedown(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "barcodes", | ||||
|             self.BARCODE_SAMPLE_DIR, | ||||
|             "patch-code-t-middle_reverse.pdf", | ||||
|         ) | ||||
|         pages = barcodes.scan_file_for_separating_barcodes(test_file) | ||||
|         self.assertEqual(pages, [1]) | ||||
|         pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes( | ||||
|             test_file, | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(pdf_file, test_file) | ||||
|         self.assertListEqual(separator_page_numbers, [1]) | ||||
|  | ||||
|     def test_scan_file_for_separating_qr_barcodes(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "barcodes", | ||||
|             self.BARCODE_SAMPLE_DIR, | ||||
|             "patch-code-t-qr.pdf", | ||||
|         ) | ||||
|         pages = barcodes.scan_file_for_separating_barcodes(test_file) | ||||
|         self.assertEqual(pages, [0]) | ||||
|         pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes( | ||||
|             test_file, | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(pdf_file, test_file) | ||||
|         self.assertListEqual(separator_page_numbers, [0]) | ||||
|  | ||||
|     @override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE") | ||||
|     def test_scan_file_for_separating_custom_barcodes(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "barcodes", | ||||
|             self.BARCODE_SAMPLE_DIR, | ||||
|             "barcode-39-custom.pdf", | ||||
|         ) | ||||
|         pages = barcodes.scan_file_for_separating_barcodes(test_file) | ||||
|         self.assertEqual(pages, [0]) | ||||
|         pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes( | ||||
|             test_file, | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(pdf_file, test_file) | ||||
|         self.assertListEqual(separator_page_numbers, [0]) | ||||
|  | ||||
|     @override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE") | ||||
|     def test_scan_file_for_separating_custom_qr_barcodes(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "barcodes", | ||||
|             self.BARCODE_SAMPLE_DIR, | ||||
|             "barcode-qr-custom.pdf", | ||||
|         ) | ||||
|         pages = barcodes.scan_file_for_separating_barcodes(test_file) | ||||
|         self.assertEqual(pages, [0]) | ||||
|         pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes( | ||||
|             test_file, | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(pdf_file, test_file) | ||||
|         self.assertListEqual(separator_page_numbers, [0]) | ||||
|  | ||||
|     @override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE") | ||||
|     def test_scan_file_for_separating_custom_128_barcodes(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "barcodes", | ||||
|             self.BARCODE_SAMPLE_DIR, | ||||
|             "barcode-128-custom.pdf", | ||||
|         ) | ||||
|         pages = barcodes.scan_file_for_separating_barcodes(test_file) | ||||
|         self.assertEqual(pages, [0]) | ||||
|         pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes( | ||||
|             test_file, | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(pdf_file, test_file) | ||||
|         self.assertListEqual(separator_page_numbers, [0]) | ||||
|  | ||||
|     def test_scan_file_for_separating_wrong_qr_barcodes(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "barcodes", | ||||
|             self.BARCODE_SAMPLE_DIR, | ||||
|             "barcode-39-custom.pdf", | ||||
|         ) | ||||
|         pages = barcodes.scan_file_for_separating_barcodes(test_file) | ||||
|         self.assertEqual(pages, []) | ||||
|         pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes( | ||||
|             test_file, | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(pdf_file, test_file) | ||||
|         self.assertListEqual(separator_page_numbers, []) | ||||
|  | ||||
|     def test_separate_pages(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "barcodes", | ||||
|             self.BARCODE_SAMPLE_DIR, | ||||
|             "patch-code-t-middle.pdf", | ||||
|         ) | ||||
|         pages = barcodes.separate_pages(test_file, [1]) | ||||
| @@ -311,9 +311,7 @@ class TestBarcode(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def test_separate_pages_no_list(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "barcodes", | ||||
|             self.BARCODE_SAMPLE_DIR, | ||||
|             "patch-code-t-middle.pdf", | ||||
|         ) | ||||
|         with self.assertLogs("paperless.barcodes", level="WARNING") as cm: | ||||
| @@ -328,9 +326,7 @@ class TestBarcode(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def test_save_to_dir(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "barcodes", | ||||
|             self.BARCODE_SAMPLE_DIR, | ||||
|             "patch-code-t.pdf", | ||||
|         ) | ||||
|         tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR) | ||||
| @@ -340,9 +336,7 @@ class TestBarcode(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def test_save_to_dir2(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "barcodes", | ||||
|             self.BARCODE_SAMPLE_DIR, | ||||
|             "patch-code-t.pdf", | ||||
|         ) | ||||
|         nonexistingdir = "/nowhere" | ||||
| @@ -360,9 +354,7 @@ class TestBarcode(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def test_save_to_dir3(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "barcodes", | ||||
|             self.BARCODE_SAMPLE_DIR, | ||||
|             "patch-code-t.pdf", | ||||
|         ) | ||||
|         tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR) | ||||
| @@ -372,31 +364,36 @@ class TestBarcode(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def test_barcode_splitter(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "barcodes", | ||||
|             self.BARCODE_SAMPLE_DIR, | ||||
|             "patch-code-t-middle.pdf", | ||||
|         ) | ||||
|         tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR) | ||||
|         separators = barcodes.scan_file_for_separating_barcodes(test_file) | ||||
|         self.assertTrue(separators) | ||||
|         document_list = barcodes.separate_pages(test_file, separators) | ||||
|  | ||||
|         pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes( | ||||
|             test_file, | ||||
|         ) | ||||
|  | ||||
|         self.assertEqual(test_file, pdf_file) | ||||
|         self.assertTrue(len(separator_page_numbers) > 0) | ||||
|  | ||||
|         document_list = barcodes.separate_pages(test_file, separator_page_numbers) | ||||
|         self.assertTrue(document_list) | ||||
|         for document in document_list: | ||||
|             barcodes.save_to_dir(document, target_dir=tempdir) | ||||
|  | ||||
|         target_file1 = os.path.join(tempdir, "patch-code-t-middle_document_0.pdf") | ||||
|         target_file2 = os.path.join(tempdir, "patch-code-t-middle_document_1.pdf") | ||||
|  | ||||
|         self.assertTrue(os.path.isfile(target_file1)) | ||||
|         self.assertTrue(os.path.isfile(target_file2)) | ||||
|  | ||||
|     @override_settings(CONSUMER_ENABLE_BARCODES=True) | ||||
|     def test_consume_barcode_file(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "barcodes", | ||||
|             self.BARCODE_SAMPLE_DIR, | ||||
|             "patch-code-t-middle.pdf", | ||||
|         ) | ||||
|  | ||||
|         dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle.pdf") | ||||
|         shutil.copy(test_file, dst) | ||||
|  | ||||
| @@ -408,9 +405,7 @@ class TestBarcode(DirectoriesMixin, TestCase): | ||||
|     ) | ||||
|     def test_consume_barcode_tiff_file(self): | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "barcodes", | ||||
|             self.BARCODE_SAMPLE_DIR, | ||||
|             "patch-code-t-middle.tiff", | ||||
|         ) | ||||
|         dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle.tiff") | ||||
| @@ -432,18 +427,17 @@ class TestBarcode(DirectoriesMixin, TestCase): | ||||
|         and continue archiving the file as is. | ||||
|         """ | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             self.SAMPLE_DIR, | ||||
|             "simple.jpg", | ||||
|         ) | ||||
|         dst = os.path.join(settings.SCRATCH_DIR, "simple.jpg") | ||||
|         shutil.copy(test_file, dst) | ||||
|         with self.assertLogs("paperless.tasks", level="WARNING") as cm: | ||||
|         with self.assertLogs("paperless.barcodes", level="WARNING") as cm: | ||||
|             self.assertIn("Success", tasks.consume_file(dst)) | ||||
|         self.assertListEqual( | ||||
|             cm.output, | ||||
|             [ | ||||
|                 "WARNING:paperless.tasks:Unsupported file format for barcode reader: image/jpeg", | ||||
|                 "WARNING:paperless.barcodes:Unsupported file format for barcode reader: image/jpeg", | ||||
|             ], | ||||
|         ) | ||||
|         m.assert_called_once() | ||||
| @@ -465,9 +459,7 @@ class TestBarcode(DirectoriesMixin, TestCase): | ||||
|         the user uploads a supported image file, but without extension | ||||
|         """ | ||||
|         test_file = os.path.join( | ||||
|             os.path.dirname(__file__), | ||||
|             "samples", | ||||
|             "barcodes", | ||||
|             self.BARCODE_SAMPLE_DIR, | ||||
|             "patch-code-t-middle.tiff", | ||||
|         ) | ||||
|         dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle") | ||||
|   | ||||
		Reference in New Issue
	
	Block a user