From 7aa0e5650b290cbc39e37418508863043f0de008 Mon Sep 17 00:00:00 2001 From: Trenton Holmes Date: Wed, 14 Sep 2022 11:49:22 -0700 Subject: [PATCH] Updates how barcodes are detected, using pikepdf images, instead of converting each page to an image --- Pipfile | 1 - Pipfile.lock | 24 ++- src/documents/barcodes.py | 48 ++++-- src/documents/tasks.py | 42 ++--- src/documents/tests/test_barcodes.py | 242 +++++++++++++-------------- 5 files changed, 178 insertions(+), 179 deletions(-) diff --git a/Pipfile b/Pipfile index 98655ee78..ef5212f50 100644 --- a/Pipfile +++ b/Pipfile @@ -53,7 +53,6 @@ concurrent-log-handler = "*" "importlib-resources" = {version = "*", markers = "python_version < '3.9'"} zipp = {version = "*", markers = "python_version < '3.9'"} pyzbar = "*" -pdf2image = "*" mysqlclient = "*" setproctitle = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 8497d06d1..98499df9e 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "6f46be21b67938add11dbf0ecea4f722836f161f58fa5e47dec3f92edb346371" + "sha256": "896665b8ff6d8a99af44b729c581033add1ba5cbd927723ef275649491c92a4f" }, "pipfile-spec": 6, "requires": {}, @@ -788,14 +788,6 @@ "index": "pypi", "version": "==2.5.2" }, - "pdf2image": { - "hashes": [ - "sha256:84f79f2b8fad943e36323ea4e937fcb05f26ded0caa0a01181df66049e42fb65", - "sha256:d58ed94d978a70c73c2bb7fdf8acbaf2a7089c29ff8141be5f45433c0c4293bb" - ], - "index": "pypi", - "version": "==1.16.0" - }, "pdfminer.six": { "hashes": [ "sha256:5a64c924410ac48501d6060b21638bf401db69f5b1bd57207df7fbc070ac8ae2", @@ -1055,6 +1047,7 @@ }, "pyyaml": { "hashes": [ + "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf", "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293", "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b", "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57", @@ -1066,26 +1059,32 @@ "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287", "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513", "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0", + "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782", "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0", "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92", "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f", "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2", "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc", + "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1", "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c", "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86", "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4", "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c", "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34", "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b", + "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d", "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c", "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb", + "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7", "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737", "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3", "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d", + "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358", "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53", "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78", "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803", "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a", + "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f", "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174", "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5" ], @@ -2261,6 +2260,7 @@ }, "pyyaml": { "hashes": [ + "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf", "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293", "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b", "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57", @@ -2272,26 +2272,32 @@ "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287", "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513", "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0", + "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782", "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0", "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92", "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f", "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2", "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc", + "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1", "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c", "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86", "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4", "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c", "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34", "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b", + "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d", "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c", "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb", + "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7", "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737", "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3", "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d", + "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358", "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53", "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78", "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803", "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a", + "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f", "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174", "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5" ], diff --git a/src/documents/barcodes.py b/src/documents/barcodes.py index d8a73e277..e473ce938 100644 --- a/src/documents/barcodes.py +++ b/src/documents/barcodes.py @@ -3,13 +3,15 @@ import os import shutil import tempfile from functools import lru_cache -from typing import List # for type hinting. Can be removed, if only Python >3.8 is used +from typing import List +from typing import Optional +from typing import Tuple import magic from django.conf import settings -from pdf2image import convert_from_path from pikepdf import Page from pikepdf import Pdf +from pikepdf import PdfImage from PIL import Image from PIL import ImageSequence from pyzbar import pyzbar @@ -32,7 +34,7 @@ def supported_file_type(mime_type) -> bool: return mime_type in supported_mime -def barcode_reader(image) -> List[str]: +def barcode_reader(image: Image) -> List[str]: """ Read any barcodes contained in image Returns a list containing all found barcodes @@ -99,21 +101,39 @@ def convert_from_tiff_to_pdf(filepath: str) -> str: return newpath -def scan_file_for_separating_barcodes(filepath: str) -> List[int]: +def scan_file_for_separating_barcodes(filepath: str) -> Tuple[Optional[str], List[int]]: """ Scan the provided pdf file for page separating barcodes - Returns a list of pagenumbers, which separate the file + Returns a the PDF filepath and a list of pagenumbers, + which separate the file into new files """ + separator_page_numbers = [] - separator_barcode = str(settings.CONSUMER_BARCODE_STRING) - # use a temporary directory in case the file os too big to handle in memory - with tempfile.TemporaryDirectory() as path: - pages_from_path = convert_from_path(filepath, output_folder=path) - for current_page_number, page in enumerate(pages_from_path): - current_barcodes = barcode_reader(page) - if separator_barcode in current_barcodes: - separator_page_numbers.append(current_page_number) - return separator_page_numbers + pdf_filepath = None + + mime_type = get_file_mime_type(filepath) + + if supported_file_type(mime_type): + pdf_filepath = filepath + if mime_type == "image/tiff": + pdf_filepath = convert_from_tiff_to_pdf(filepath) + + pdf = Pdf.open(pdf_filepath) + + for page_num, page in enumerate(pdf.pages): + for image_key in page.images: + pdfimage = PdfImage(page.images[image_key]) + pillow_img = pdfimage.as_pil_image() + + detected_barcodes = barcode_reader(pillow_img) + + if settings.CONSUMER_BARCODE_STRING in detected_barcodes: + separator_page_numbers.append(page_num) + else: + logger.warning( + f"Unsupported file format for barcode reader: {str(mime_type)}", + ) + return pdf_filepath, separator_page_numbers def separate_pages(filepath: str, pages_to_split_on: List[int]) -> List[str]: diff --git a/src/documents/tasks.py b/src/documents/tasks.py index b1793e760..94b849456 100644 --- a/src/documents/tasks.py +++ b/src/documents/tasks.py @@ -96,29 +96,13 @@ def consume_file( # check for separators in current document if settings.CONSUMER_ENABLE_BARCODES: - mime_type = barcodes.get_file_mime_type(path) + pdf_filepath, separators = barcodes.scan_file_for_separating_barcodes(path) - if not barcodes.supported_file_type(mime_type): - # if not supported, skip this routine - logger.warning( - f"Unsupported file format for barcode reader: {str(mime_type)}", + if separators: + logger.debug( + f"Pages with separators found in: {str(path)}", ) - else: - separators = [] - document_list = [] - - if mime_type == "image/tiff": - file_to_process = barcodes.convert_from_tiff_to_pdf(path) - else: - file_to_process = path - - separators = barcodes.scan_file_for_separating_barcodes(file_to_process) - - if separators: - logger.debug( - f"Pages with separators found in: {str(path)}", - ) - document_list = barcodes.separate_pages(file_to_process, separators) + document_list = barcodes.separate_pages(pdf_filepath, separators) if document_list: for n, document in enumerate(document_list): @@ -134,15 +118,13 @@ def consume_file( target_dir=path.parent, ) - # if we got here, the document was successfully split - # and can safely be deleted - if mime_type == "image/tiff": - # Remove the TIFF converted to PDF file - logger.debug(f"Deleting file {file_to_process}") - os.unlink(file_to_process) - # Remove the original file (new file is saved above) - logger.debug(f"Deleting file {path}") - os.unlink(path) + # Delete the PDF file which was split + os.remove(pdf_filepath) + + # If the original was a TIFF, remove the original file as well + if str(pdf_filepath) != str(path): + logger.debug(f"Deleting file {path}") + os.unlink(path) # notify the sender, otherwise the progress bar # in the UI stays stuck diff --git a/src/documents/tests/test_barcodes.py b/src/documents/tests/test_barcodes.py index 3ffd5d753..c58596a1f 100644 --- a/src/documents/tests/test_barcodes.py +++ b/src/documents/tests/test_barcodes.py @@ -13,22 +13,23 @@ from PIL import Image class TestBarcode(DirectoriesMixin, TestCase): + + SAMPLE_DIR = os.path.join( + os.path.dirname(__file__), + "samples", + ) + + BARCODE_SAMPLE_DIR = os.path.join(SAMPLE_DIR, "barcodes") + def test_barcode_reader(self): - test_file = os.path.join( - os.path.dirname(__file__), - "samples", - "barcodes", - "barcode-39-PATCHT.png", - ) + test_file = os.path.join(self.BARCODE_SAMPLE_DIR, "barcode-39-PATCHT.png") img = Image.open(test_file) separator_barcode = str(settings.CONSUMER_BARCODE_STRING) self.assertEqual(barcodes.barcode_reader(img), [separator_barcode]) def test_barcode_reader2(self): test_file = os.path.join( - os.path.dirname(__file__), - "samples", - "barcodes", + self.BARCODE_SAMPLE_DIR, "patch-code-t.pbm", ) img = Image.open(test_file) @@ -37,9 +38,7 @@ class TestBarcode(DirectoriesMixin, TestCase): def test_barcode_reader_distorsion(self): test_file = os.path.join( - os.path.dirname(__file__), - "samples", - "barcodes", + self.BARCODE_SAMPLE_DIR, "barcode-39-PATCHT-distorsion.png", ) img = Image.open(test_file) @@ -48,9 +47,7 @@ class TestBarcode(DirectoriesMixin, TestCase): def test_barcode_reader_distorsion2(self): test_file = os.path.join( - os.path.dirname(__file__), - "samples", - "barcodes", + self.BARCODE_SAMPLE_DIR, "barcode-39-PATCHT-distorsion2.png", ) img = Image.open(test_file) @@ -59,9 +56,7 @@ class TestBarcode(DirectoriesMixin, TestCase): def test_barcode_reader_unreadable(self): test_file = os.path.join( - os.path.dirname(__file__), - "samples", - "barcodes", + self.BARCODE_SAMPLE_DIR, "barcode-39-PATCHT-unreadable.png", ) img = Image.open(test_file) @@ -69,9 +64,7 @@ class TestBarcode(DirectoriesMixin, TestCase): def test_barcode_reader_qr(self): test_file = os.path.join( - os.path.dirname(__file__), - "samples", - "barcodes", + self.BARCODE_SAMPLE_DIR, "qr-code-PATCHT.png", ) img = Image.open(test_file) @@ -80,9 +73,7 @@ class TestBarcode(DirectoriesMixin, TestCase): def test_barcode_reader_128(self): test_file = os.path.join( - os.path.dirname(__file__), - "samples", - "barcodes", + self.BARCODE_SAMPLE_DIR, "barcode-128-PATCHT.png", ) img = Image.open(test_file) @@ -90,15 +81,13 @@ class TestBarcode(DirectoriesMixin, TestCase): self.assertEqual(barcodes.barcode_reader(img), [separator_barcode]) def test_barcode_reader_no_barcode(self): - test_file = os.path.join(os.path.dirname(__file__), "samples", "simple.png") + test_file = os.path.join(self.SAMPLE_DIR, "simple.png") img = Image.open(test_file) self.assertEqual(barcodes.barcode_reader(img), []) def test_barcode_reader_custom_separator(self): test_file = os.path.join( - os.path.dirname(__file__), - "samples", - "barcodes", + self.BARCODE_SAMPLE_DIR, "barcode-39-custom.png", ) img = Image.open(test_file) @@ -106,9 +95,7 @@ class TestBarcode(DirectoriesMixin, TestCase): def test_barcode_reader_custom_qr_separator(self): test_file = os.path.join( - os.path.dirname(__file__), - "samples", - "barcodes", + self.BARCODE_SAMPLE_DIR, "barcode-qr-custom.png", ) img = Image.open(test_file) @@ -116,9 +103,7 @@ class TestBarcode(DirectoriesMixin, TestCase): def test_barcode_reader_custom_128_separator(self): test_file = os.path.join( - os.path.dirname(__file__), - "samples", - "barcodes", + self.BARCODE_SAMPLE_DIR, "barcode-128-custom.png", ) img = Image.open(test_file) @@ -126,19 +111,15 @@ class TestBarcode(DirectoriesMixin, TestCase): def test_get_mime_type(self): tiff_file = os.path.join( - os.path.dirname(__file__), - "samples", + self.SAMPLE_DIR, "simple.tiff", ) pdf_file = os.path.join( - os.path.dirname(__file__), - "samples", + self.SAMPLE_DIR, "simple.pdf", ) png_file = os.path.join( - os.path.dirname(__file__), - "samples", - "barcodes", + self.BARCODE_SAMPLE_DIR, "barcode-128-custom.png", ) tiff_file_no_extension = os.path.join(settings.SCRATCH_DIR, "testfile1") @@ -173,8 +154,7 @@ class TestBarcode(DirectoriesMixin, TestCase): def test_convert_error_from_pdf_to_pdf(self): test_file = os.path.join( - os.path.dirname(__file__), - "samples", + self.SAMPLE_DIR, "simple.pdf", ) dst = os.path.join(settings.SCRATCH_DIR, "simple.pdf") @@ -183,107 +163,127 @@ class TestBarcode(DirectoriesMixin, TestCase): def test_scan_file_for_separating_barcodes(self): test_file = os.path.join( - os.path.dirname(__file__), - "samples", - "barcodes", + self.BARCODE_SAMPLE_DIR, "patch-code-t.pdf", ) - pages = barcodes.scan_file_for_separating_barcodes(test_file) - self.assertEqual(pages, [0]) + pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes( + test_file, + ) + + self.assertEqual(pdf_file, test_file) + self.assertListEqual(separator_page_numbers, [0]) def test_scan_file_for_separating_barcodes2(self): - test_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf") - pages = barcodes.scan_file_for_separating_barcodes(test_file) - self.assertEqual(pages, []) + test_file = os.path.join(self.SAMPLE_DIR, "simple.pdf") + pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes( + test_file, + ) + + self.assertEqual(pdf_file, test_file) + self.assertListEqual(separator_page_numbers, []) def test_scan_file_for_separating_barcodes3(self): test_file = os.path.join( - os.path.dirname(__file__), - "samples", - "barcodes", + self.BARCODE_SAMPLE_DIR, "patch-code-t-middle.pdf", ) - pages = barcodes.scan_file_for_separating_barcodes(test_file) - self.assertEqual(pages, [1]) + pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes( + test_file, + ) + + self.assertEqual(pdf_file, test_file) + self.assertListEqual(separator_page_numbers, [1]) def test_scan_file_for_separating_barcodes4(self): test_file = os.path.join( - os.path.dirname(__file__), - "samples", - "barcodes", + self.BARCODE_SAMPLE_DIR, "several-patcht-codes.pdf", ) - pages = barcodes.scan_file_for_separating_barcodes(test_file) - self.assertEqual(pages, [2, 5]) + pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes( + test_file, + ) + + self.assertEqual(pdf_file, test_file) + self.assertListEqual(separator_page_numbers, [2, 5]) def test_scan_file_for_separating_barcodes_upsidedown(self): test_file = os.path.join( - os.path.dirname(__file__), - "samples", - "barcodes", + self.BARCODE_SAMPLE_DIR, "patch-code-t-middle_reverse.pdf", ) - pages = barcodes.scan_file_for_separating_barcodes(test_file) - self.assertEqual(pages, [1]) + pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes( + test_file, + ) + + self.assertEqual(pdf_file, test_file) + self.assertListEqual(separator_page_numbers, [1]) def test_scan_file_for_separating_qr_barcodes(self): test_file = os.path.join( - os.path.dirname(__file__), - "samples", - "barcodes", + self.BARCODE_SAMPLE_DIR, "patch-code-t-qr.pdf", ) - pages = barcodes.scan_file_for_separating_barcodes(test_file) - self.assertEqual(pages, [0]) + pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes( + test_file, + ) + + self.assertEqual(pdf_file, test_file) + self.assertListEqual(separator_page_numbers, [0]) @override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE") def test_scan_file_for_separating_custom_barcodes(self): test_file = os.path.join( - os.path.dirname(__file__), - "samples", - "barcodes", + self.BARCODE_SAMPLE_DIR, "barcode-39-custom.pdf", ) - pages = barcodes.scan_file_for_separating_barcodes(test_file) - self.assertEqual(pages, [0]) + pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes( + test_file, + ) + + self.assertEqual(pdf_file, test_file) + self.assertListEqual(separator_page_numbers, [0]) @override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE") def test_scan_file_for_separating_custom_qr_barcodes(self): test_file = os.path.join( - os.path.dirname(__file__), - "samples", - "barcodes", + self.BARCODE_SAMPLE_DIR, "barcode-qr-custom.pdf", ) - pages = barcodes.scan_file_for_separating_barcodes(test_file) - self.assertEqual(pages, [0]) + pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes( + test_file, + ) + + self.assertEqual(pdf_file, test_file) + self.assertListEqual(separator_page_numbers, [0]) @override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE") def test_scan_file_for_separating_custom_128_barcodes(self): test_file = os.path.join( - os.path.dirname(__file__), - "samples", - "barcodes", + self.BARCODE_SAMPLE_DIR, "barcode-128-custom.pdf", ) - pages = barcodes.scan_file_for_separating_barcodes(test_file) - self.assertEqual(pages, [0]) + pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes( + test_file, + ) + + self.assertEqual(pdf_file, test_file) + self.assertListEqual(separator_page_numbers, [0]) def test_scan_file_for_separating_wrong_qr_barcodes(self): test_file = os.path.join( - os.path.dirname(__file__), - "samples", - "barcodes", + self.BARCODE_SAMPLE_DIR, "barcode-39-custom.pdf", ) - pages = barcodes.scan_file_for_separating_barcodes(test_file) - self.assertEqual(pages, []) + pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes( + test_file, + ) + + self.assertEqual(pdf_file, test_file) + self.assertListEqual(separator_page_numbers, []) def test_separate_pages(self): test_file = os.path.join( - os.path.dirname(__file__), - "samples", - "barcodes", + self.BARCODE_SAMPLE_DIR, "patch-code-t-middle.pdf", ) pages = barcodes.separate_pages(test_file, [1]) @@ -311,9 +311,7 @@ class TestBarcode(DirectoriesMixin, TestCase): def test_separate_pages_no_list(self): test_file = os.path.join( - os.path.dirname(__file__), - "samples", - "barcodes", + self.BARCODE_SAMPLE_DIR, "patch-code-t-middle.pdf", ) with self.assertLogs("paperless.barcodes", level="WARNING") as cm: @@ -328,9 +326,7 @@ class TestBarcode(DirectoriesMixin, TestCase): def test_save_to_dir(self): test_file = os.path.join( - os.path.dirname(__file__), - "samples", - "barcodes", + self.BARCODE_SAMPLE_DIR, "patch-code-t.pdf", ) tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR) @@ -340,9 +336,7 @@ class TestBarcode(DirectoriesMixin, TestCase): def test_save_to_dir2(self): test_file = os.path.join( - os.path.dirname(__file__), - "samples", - "barcodes", + self.BARCODE_SAMPLE_DIR, "patch-code-t.pdf", ) nonexistingdir = "/nowhere" @@ -360,9 +354,7 @@ class TestBarcode(DirectoriesMixin, TestCase): def test_save_to_dir3(self): test_file = os.path.join( - os.path.dirname(__file__), - "samples", - "barcodes", + self.BARCODE_SAMPLE_DIR, "patch-code-t.pdf", ) tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR) @@ -372,31 +364,36 @@ class TestBarcode(DirectoriesMixin, TestCase): def test_barcode_splitter(self): test_file = os.path.join( - os.path.dirname(__file__), - "samples", - "barcodes", + self.BARCODE_SAMPLE_DIR, "patch-code-t-middle.pdf", ) tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR) - separators = barcodes.scan_file_for_separating_barcodes(test_file) - self.assertTrue(separators) - document_list = barcodes.separate_pages(test_file, separators) + + pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes( + test_file, + ) + + self.assertEqual(test_file, pdf_file) + self.assertTrue(len(separator_page_numbers) > 0) + + document_list = barcodes.separate_pages(test_file, separator_page_numbers) self.assertTrue(document_list) for document in document_list: barcodes.save_to_dir(document, target_dir=tempdir) + target_file1 = os.path.join(tempdir, "patch-code-t-middle_document_0.pdf") target_file2 = os.path.join(tempdir, "patch-code-t-middle_document_1.pdf") + self.assertTrue(os.path.isfile(target_file1)) self.assertTrue(os.path.isfile(target_file2)) @override_settings(CONSUMER_ENABLE_BARCODES=True) def test_consume_barcode_file(self): test_file = os.path.join( - os.path.dirname(__file__), - "samples", - "barcodes", + self.BARCODE_SAMPLE_DIR, "patch-code-t-middle.pdf", ) + dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle.pdf") shutil.copy(test_file, dst) @@ -408,9 +405,7 @@ class TestBarcode(DirectoriesMixin, TestCase): ) def test_consume_barcode_tiff_file(self): test_file = os.path.join( - os.path.dirname(__file__), - "samples", - "barcodes", + self.BARCODE_SAMPLE_DIR, "patch-code-t-middle.tiff", ) dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle.tiff") @@ -432,18 +427,17 @@ class TestBarcode(DirectoriesMixin, TestCase): and continue archiving the file as is. """ test_file = os.path.join( - os.path.dirname(__file__), - "samples", + self.SAMPLE_DIR, "simple.jpg", ) dst = os.path.join(settings.SCRATCH_DIR, "simple.jpg") shutil.copy(test_file, dst) - with self.assertLogs("paperless.tasks", level="WARNING") as cm: + with self.assertLogs("paperless.barcodes", level="WARNING") as cm: self.assertIn("Success", tasks.consume_file(dst)) self.assertListEqual( cm.output, [ - "WARNING:paperless.tasks:Unsupported file format for barcode reader: image/jpeg", + "WARNING:paperless.barcodes:Unsupported file format for barcode reader: image/jpeg", ], ) m.assert_called_once() @@ -465,9 +459,7 @@ class TestBarcode(DirectoriesMixin, TestCase): the user uploads a supported image file, but without extension """ test_file = os.path.join( - os.path.dirname(__file__), - "samples", - "barcodes", + self.BARCODE_SAMPLE_DIR, "patch-code-t-middle.tiff", ) dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle")