Updates how barcodes are detected, using pikepdf images, instead of converting each page to an image

This commit is contained in:
Trenton Holmes 2022-09-14 11:49:22 -07:00 committed by Trenton H
parent f9a0adc64e
commit 7aa0e5650b
5 changed files with 178 additions and 179 deletions

View File

@ -53,7 +53,6 @@ concurrent-log-handler = "*"
"importlib-resources" = {version = "*", markers = "python_version < '3.9'"} "importlib-resources" = {version = "*", markers = "python_version < '3.9'"}
zipp = {version = "*", markers = "python_version < '3.9'"} zipp = {version = "*", markers = "python_version < '3.9'"}
pyzbar = "*" pyzbar = "*"
pdf2image = "*"
mysqlclient = "*" mysqlclient = "*"
setproctitle = "*" setproctitle = "*"

24
Pipfile.lock generated
View File

@ -1,7 +1,7 @@
{ {
"_meta": { "_meta": {
"hash": { "hash": {
"sha256": "6f46be21b67938add11dbf0ecea4f722836f161f58fa5e47dec3f92edb346371" "sha256": "896665b8ff6d8a99af44b729c581033add1ba5cbd927723ef275649491c92a4f"
}, },
"pipfile-spec": 6, "pipfile-spec": 6,
"requires": {}, "requires": {},
@ -788,14 +788,6 @@
"index": "pypi", "index": "pypi",
"version": "==2.5.2" "version": "==2.5.2"
}, },
"pdf2image": {
"hashes": [
"sha256:84f79f2b8fad943e36323ea4e937fcb05f26ded0caa0a01181df66049e42fb65",
"sha256:d58ed94d978a70c73c2bb7fdf8acbaf2a7089c29ff8141be5f45433c0c4293bb"
],
"index": "pypi",
"version": "==1.16.0"
},
"pdfminer.six": { "pdfminer.six": {
"hashes": [ "hashes": [
"sha256:5a64c924410ac48501d6060b21638bf401db69f5b1bd57207df7fbc070ac8ae2", "sha256:5a64c924410ac48501d6060b21638bf401db69f5b1bd57207df7fbc070ac8ae2",
@ -1055,6 +1047,7 @@
}, },
"pyyaml": { "pyyaml": {
"hashes": [ "hashes": [
"sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf",
"sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293", "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293",
"sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b", "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b",
"sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57", "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57",
@ -1066,26 +1059,32 @@
"sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287", "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287",
"sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513", "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513",
"sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0", "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0",
"sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782",
"sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0", "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0",
"sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92", "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92",
"sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f", "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f",
"sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2", "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2",
"sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc", "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc",
"sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1",
"sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c", "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c",
"sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86", "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86",
"sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4", "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4",
"sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c", "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c",
"sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34", "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34",
"sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b", "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b",
"sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d",
"sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c", "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c",
"sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb", "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb",
"sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7",
"sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737", "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737",
"sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3", "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3",
"sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d", "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d",
"sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358",
"sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53", "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53",
"sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78", "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78",
"sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803", "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803",
"sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a", "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a",
"sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f",
"sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174", "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174",
"sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5" "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"
], ],
@ -2261,6 +2260,7 @@
}, },
"pyyaml": { "pyyaml": {
"hashes": [ "hashes": [
"sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf",
"sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293", "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293",
"sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b", "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b",
"sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57", "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57",
@ -2272,26 +2272,32 @@
"sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287", "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287",
"sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513", "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513",
"sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0", "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0",
"sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782",
"sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0", "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0",
"sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92", "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92",
"sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f", "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f",
"sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2", "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2",
"sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc", "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc",
"sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1",
"sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c", "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c",
"sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86", "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86",
"sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4", "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4",
"sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c", "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c",
"sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34", "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34",
"sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b", "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b",
"sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d",
"sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c", "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c",
"sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb", "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb",
"sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7",
"sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737", "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737",
"sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3", "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3",
"sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d", "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d",
"sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358",
"sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53", "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53",
"sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78", "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78",
"sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803", "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803",
"sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a", "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a",
"sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f",
"sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174", "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174",
"sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5" "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"
], ],

View File

@ -3,13 +3,15 @@ import os
import shutil import shutil
import tempfile import tempfile
from functools import lru_cache from functools import lru_cache
from typing import List # for type hinting. Can be removed, if only Python >3.8 is used from typing import List
from typing import Optional
from typing import Tuple
import magic import magic
from django.conf import settings from django.conf import settings
from pdf2image import convert_from_path
from pikepdf import Page from pikepdf import Page
from pikepdf import Pdf from pikepdf import Pdf
from pikepdf import PdfImage
from PIL import Image from PIL import Image
from PIL import ImageSequence from PIL import ImageSequence
from pyzbar import pyzbar from pyzbar import pyzbar
@ -32,7 +34,7 @@ def supported_file_type(mime_type) -> bool:
return mime_type in supported_mime return mime_type in supported_mime
def barcode_reader(image) -> List[str]: def barcode_reader(image: Image) -> List[str]:
""" """
Read any barcodes contained in image Read any barcodes contained in image
Returns a list containing all found barcodes Returns a list containing all found barcodes
@ -99,21 +101,39 @@ def convert_from_tiff_to_pdf(filepath: str) -> str:
return newpath return newpath
def scan_file_for_separating_barcodes(filepath: str) -> List[int]: def scan_file_for_separating_barcodes(filepath: str) -> Tuple[Optional[str], List[int]]:
""" """
Scan the provided pdf file for page separating barcodes Scan the provided pdf file for page separating barcodes
Returns a list of pagenumbers, which separate the file Returns a the PDF filepath and a list of pagenumbers,
which separate the file into new files
""" """
separator_page_numbers = [] separator_page_numbers = []
separator_barcode = str(settings.CONSUMER_BARCODE_STRING) pdf_filepath = None
# use a temporary directory in case the file os too big to handle in memory
with tempfile.TemporaryDirectory() as path: mime_type = get_file_mime_type(filepath)
pages_from_path = convert_from_path(filepath, output_folder=path)
for current_page_number, page in enumerate(pages_from_path): if supported_file_type(mime_type):
current_barcodes = barcode_reader(page) pdf_filepath = filepath
if separator_barcode in current_barcodes: if mime_type == "image/tiff":
separator_page_numbers.append(current_page_number) pdf_filepath = convert_from_tiff_to_pdf(filepath)
return separator_page_numbers
pdf = Pdf.open(pdf_filepath)
for page_num, page in enumerate(pdf.pages):
for image_key in page.images:
pdfimage = PdfImage(page.images[image_key])
pillow_img = pdfimage.as_pil_image()
detected_barcodes = barcode_reader(pillow_img)
if settings.CONSUMER_BARCODE_STRING in detected_barcodes:
separator_page_numbers.append(page_num)
else:
logger.warning(
f"Unsupported file format for barcode reader: {str(mime_type)}",
)
return pdf_filepath, separator_page_numbers
def separate_pages(filepath: str, pages_to_split_on: List[int]) -> List[str]: def separate_pages(filepath: str, pages_to_split_on: List[int]) -> List[str]:

View File

@ -96,29 +96,13 @@ def consume_file(
# check for separators in current document # check for separators in current document
if settings.CONSUMER_ENABLE_BARCODES: if settings.CONSUMER_ENABLE_BARCODES:
mime_type = barcodes.get_file_mime_type(path) pdf_filepath, separators = barcodes.scan_file_for_separating_barcodes(path)
if not barcodes.supported_file_type(mime_type): if separators:
# if not supported, skip this routine logger.debug(
logger.warning( f"Pages with separators found in: {str(path)}",
f"Unsupported file format for barcode reader: {str(mime_type)}",
) )
else: document_list = barcodes.separate_pages(pdf_filepath, separators)
separators = []
document_list = []
if mime_type == "image/tiff":
file_to_process = barcodes.convert_from_tiff_to_pdf(path)
else:
file_to_process = path
separators = barcodes.scan_file_for_separating_barcodes(file_to_process)
if separators:
logger.debug(
f"Pages with separators found in: {str(path)}",
)
document_list = barcodes.separate_pages(file_to_process, separators)
if document_list: if document_list:
for n, document in enumerate(document_list): for n, document in enumerate(document_list):
@ -134,15 +118,13 @@ def consume_file(
target_dir=path.parent, target_dir=path.parent,
) )
# if we got here, the document was successfully split # Delete the PDF file which was split
# and can safely be deleted os.remove(pdf_filepath)
if mime_type == "image/tiff":
# Remove the TIFF converted to PDF file # If the original was a TIFF, remove the original file as well
logger.debug(f"Deleting file {file_to_process}") if str(pdf_filepath) != str(path):
os.unlink(file_to_process) logger.debug(f"Deleting file {path}")
# Remove the original file (new file is saved above) os.unlink(path)
logger.debug(f"Deleting file {path}")
os.unlink(path)
# notify the sender, otherwise the progress bar # notify the sender, otherwise the progress bar
# in the UI stays stuck # in the UI stays stuck

View File

@ -13,22 +13,23 @@ from PIL import Image
class TestBarcode(DirectoriesMixin, TestCase): class TestBarcode(DirectoriesMixin, TestCase):
SAMPLE_DIR = os.path.join(
os.path.dirname(__file__),
"samples",
)
BARCODE_SAMPLE_DIR = os.path.join(SAMPLE_DIR, "barcodes")
def test_barcode_reader(self): def test_barcode_reader(self):
test_file = os.path.join( test_file = os.path.join(self.BARCODE_SAMPLE_DIR, "barcode-39-PATCHT.png")
os.path.dirname(__file__),
"samples",
"barcodes",
"barcode-39-PATCHT.png",
)
img = Image.open(test_file) img = Image.open(test_file)
separator_barcode = str(settings.CONSUMER_BARCODE_STRING) separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode]) self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
def test_barcode_reader2(self): def test_barcode_reader2(self):
test_file = os.path.join( test_file = os.path.join(
os.path.dirname(__file__), self.BARCODE_SAMPLE_DIR,
"samples",
"barcodes",
"patch-code-t.pbm", "patch-code-t.pbm",
) )
img = Image.open(test_file) img = Image.open(test_file)
@ -37,9 +38,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
def test_barcode_reader_distorsion(self): def test_barcode_reader_distorsion(self):
test_file = os.path.join( test_file = os.path.join(
os.path.dirname(__file__), self.BARCODE_SAMPLE_DIR,
"samples",
"barcodes",
"barcode-39-PATCHT-distorsion.png", "barcode-39-PATCHT-distorsion.png",
) )
img = Image.open(test_file) img = Image.open(test_file)
@ -48,9 +47,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
def test_barcode_reader_distorsion2(self): def test_barcode_reader_distorsion2(self):
test_file = os.path.join( test_file = os.path.join(
os.path.dirname(__file__), self.BARCODE_SAMPLE_DIR,
"samples",
"barcodes",
"barcode-39-PATCHT-distorsion2.png", "barcode-39-PATCHT-distorsion2.png",
) )
img = Image.open(test_file) img = Image.open(test_file)
@ -59,9 +56,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
def test_barcode_reader_unreadable(self): def test_barcode_reader_unreadable(self):
test_file = os.path.join( test_file = os.path.join(
os.path.dirname(__file__), self.BARCODE_SAMPLE_DIR,
"samples",
"barcodes",
"barcode-39-PATCHT-unreadable.png", "barcode-39-PATCHT-unreadable.png",
) )
img = Image.open(test_file) img = Image.open(test_file)
@ -69,9 +64,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
def test_barcode_reader_qr(self): def test_barcode_reader_qr(self):
test_file = os.path.join( test_file = os.path.join(
os.path.dirname(__file__), self.BARCODE_SAMPLE_DIR,
"samples",
"barcodes",
"qr-code-PATCHT.png", "qr-code-PATCHT.png",
) )
img = Image.open(test_file) img = Image.open(test_file)
@ -80,9 +73,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
def test_barcode_reader_128(self): def test_barcode_reader_128(self):
test_file = os.path.join( test_file = os.path.join(
os.path.dirname(__file__), self.BARCODE_SAMPLE_DIR,
"samples",
"barcodes",
"barcode-128-PATCHT.png", "barcode-128-PATCHT.png",
) )
img = Image.open(test_file) img = Image.open(test_file)
@ -90,15 +81,13 @@ class TestBarcode(DirectoriesMixin, TestCase):
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode]) self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
def test_barcode_reader_no_barcode(self): def test_barcode_reader_no_barcode(self):
test_file = os.path.join(os.path.dirname(__file__), "samples", "simple.png") test_file = os.path.join(self.SAMPLE_DIR, "simple.png")
img = Image.open(test_file) img = Image.open(test_file)
self.assertEqual(barcodes.barcode_reader(img), []) self.assertEqual(barcodes.barcode_reader(img), [])
def test_barcode_reader_custom_separator(self): def test_barcode_reader_custom_separator(self):
test_file = os.path.join( test_file = os.path.join(
os.path.dirname(__file__), self.BARCODE_SAMPLE_DIR,
"samples",
"barcodes",
"barcode-39-custom.png", "barcode-39-custom.png",
) )
img = Image.open(test_file) img = Image.open(test_file)
@ -106,9 +95,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
def test_barcode_reader_custom_qr_separator(self): def test_barcode_reader_custom_qr_separator(self):
test_file = os.path.join( test_file = os.path.join(
os.path.dirname(__file__), self.BARCODE_SAMPLE_DIR,
"samples",
"barcodes",
"barcode-qr-custom.png", "barcode-qr-custom.png",
) )
img = Image.open(test_file) img = Image.open(test_file)
@ -116,9 +103,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
def test_barcode_reader_custom_128_separator(self): def test_barcode_reader_custom_128_separator(self):
test_file = os.path.join( test_file = os.path.join(
os.path.dirname(__file__), self.BARCODE_SAMPLE_DIR,
"samples",
"barcodes",
"barcode-128-custom.png", "barcode-128-custom.png",
) )
img = Image.open(test_file) img = Image.open(test_file)
@ -126,19 +111,15 @@ class TestBarcode(DirectoriesMixin, TestCase):
def test_get_mime_type(self): def test_get_mime_type(self):
tiff_file = os.path.join( tiff_file = os.path.join(
os.path.dirname(__file__), self.SAMPLE_DIR,
"samples",
"simple.tiff", "simple.tiff",
) )
pdf_file = os.path.join( pdf_file = os.path.join(
os.path.dirname(__file__), self.SAMPLE_DIR,
"samples",
"simple.pdf", "simple.pdf",
) )
png_file = os.path.join( png_file = os.path.join(
os.path.dirname(__file__), self.BARCODE_SAMPLE_DIR,
"samples",
"barcodes",
"barcode-128-custom.png", "barcode-128-custom.png",
) )
tiff_file_no_extension = os.path.join(settings.SCRATCH_DIR, "testfile1") tiff_file_no_extension = os.path.join(settings.SCRATCH_DIR, "testfile1")
@ -173,8 +154,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
def test_convert_error_from_pdf_to_pdf(self): def test_convert_error_from_pdf_to_pdf(self):
test_file = os.path.join( test_file = os.path.join(
os.path.dirname(__file__), self.SAMPLE_DIR,
"samples",
"simple.pdf", "simple.pdf",
) )
dst = os.path.join(settings.SCRATCH_DIR, "simple.pdf") dst = os.path.join(settings.SCRATCH_DIR, "simple.pdf")
@ -183,107 +163,127 @@ class TestBarcode(DirectoriesMixin, TestCase):
def test_scan_file_for_separating_barcodes(self): def test_scan_file_for_separating_barcodes(self):
test_file = os.path.join( test_file = os.path.join(
os.path.dirname(__file__), self.BARCODE_SAMPLE_DIR,
"samples",
"barcodes",
"patch-code-t.pdf", "patch-code-t.pdf",
) )
pages = barcodes.scan_file_for_separating_barcodes(test_file) pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
self.assertEqual(pages, [0]) test_file,
)
self.assertEqual(pdf_file, test_file)
self.assertListEqual(separator_page_numbers, [0])
def test_scan_file_for_separating_barcodes2(self): def test_scan_file_for_separating_barcodes2(self):
test_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf") test_file = os.path.join(self.SAMPLE_DIR, "simple.pdf")
pages = barcodes.scan_file_for_separating_barcodes(test_file) pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
self.assertEqual(pages, []) test_file,
)
self.assertEqual(pdf_file, test_file)
self.assertListEqual(separator_page_numbers, [])
def test_scan_file_for_separating_barcodes3(self): def test_scan_file_for_separating_barcodes3(self):
test_file = os.path.join( test_file = os.path.join(
os.path.dirname(__file__), self.BARCODE_SAMPLE_DIR,
"samples",
"barcodes",
"patch-code-t-middle.pdf", "patch-code-t-middle.pdf",
) )
pages = barcodes.scan_file_for_separating_barcodes(test_file) pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
self.assertEqual(pages, [1]) test_file,
)
self.assertEqual(pdf_file, test_file)
self.assertListEqual(separator_page_numbers, [1])
def test_scan_file_for_separating_barcodes4(self): def test_scan_file_for_separating_barcodes4(self):
test_file = os.path.join( test_file = os.path.join(
os.path.dirname(__file__), self.BARCODE_SAMPLE_DIR,
"samples",
"barcodes",
"several-patcht-codes.pdf", "several-patcht-codes.pdf",
) )
pages = barcodes.scan_file_for_separating_barcodes(test_file) pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
self.assertEqual(pages, [2, 5]) test_file,
)
self.assertEqual(pdf_file, test_file)
self.assertListEqual(separator_page_numbers, [2, 5])
def test_scan_file_for_separating_barcodes_upsidedown(self): def test_scan_file_for_separating_barcodes_upsidedown(self):
test_file = os.path.join( test_file = os.path.join(
os.path.dirname(__file__), self.BARCODE_SAMPLE_DIR,
"samples",
"barcodes",
"patch-code-t-middle_reverse.pdf", "patch-code-t-middle_reverse.pdf",
) )
pages = barcodes.scan_file_for_separating_barcodes(test_file) pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
self.assertEqual(pages, [1]) test_file,
)
self.assertEqual(pdf_file, test_file)
self.assertListEqual(separator_page_numbers, [1])
def test_scan_file_for_separating_qr_barcodes(self): def test_scan_file_for_separating_qr_barcodes(self):
test_file = os.path.join( test_file = os.path.join(
os.path.dirname(__file__), self.BARCODE_SAMPLE_DIR,
"samples",
"barcodes",
"patch-code-t-qr.pdf", "patch-code-t-qr.pdf",
) )
pages = barcodes.scan_file_for_separating_barcodes(test_file) pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
self.assertEqual(pages, [0]) test_file,
)
self.assertEqual(pdf_file, test_file)
self.assertListEqual(separator_page_numbers, [0])
@override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE") @override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
def test_scan_file_for_separating_custom_barcodes(self): def test_scan_file_for_separating_custom_barcodes(self):
test_file = os.path.join( test_file = os.path.join(
os.path.dirname(__file__), self.BARCODE_SAMPLE_DIR,
"samples",
"barcodes",
"barcode-39-custom.pdf", "barcode-39-custom.pdf",
) )
pages = barcodes.scan_file_for_separating_barcodes(test_file) pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
self.assertEqual(pages, [0]) test_file,
)
self.assertEqual(pdf_file, test_file)
self.assertListEqual(separator_page_numbers, [0])
@override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE") @override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
def test_scan_file_for_separating_custom_qr_barcodes(self): def test_scan_file_for_separating_custom_qr_barcodes(self):
test_file = os.path.join( test_file = os.path.join(
os.path.dirname(__file__), self.BARCODE_SAMPLE_DIR,
"samples",
"barcodes",
"barcode-qr-custom.pdf", "barcode-qr-custom.pdf",
) )
pages = barcodes.scan_file_for_separating_barcodes(test_file) pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
self.assertEqual(pages, [0]) test_file,
)
self.assertEqual(pdf_file, test_file)
self.assertListEqual(separator_page_numbers, [0])
@override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE") @override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
def test_scan_file_for_separating_custom_128_barcodes(self): def test_scan_file_for_separating_custom_128_barcodes(self):
test_file = os.path.join( test_file = os.path.join(
os.path.dirname(__file__), self.BARCODE_SAMPLE_DIR,
"samples",
"barcodes",
"barcode-128-custom.pdf", "barcode-128-custom.pdf",
) )
pages = barcodes.scan_file_for_separating_barcodes(test_file) pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
self.assertEqual(pages, [0]) test_file,
)
self.assertEqual(pdf_file, test_file)
self.assertListEqual(separator_page_numbers, [0])
def test_scan_file_for_separating_wrong_qr_barcodes(self): def test_scan_file_for_separating_wrong_qr_barcodes(self):
test_file = os.path.join( test_file = os.path.join(
os.path.dirname(__file__), self.BARCODE_SAMPLE_DIR,
"samples",
"barcodes",
"barcode-39-custom.pdf", "barcode-39-custom.pdf",
) )
pages = barcodes.scan_file_for_separating_barcodes(test_file) pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
self.assertEqual(pages, []) test_file,
)
self.assertEqual(pdf_file, test_file)
self.assertListEqual(separator_page_numbers, [])
def test_separate_pages(self): def test_separate_pages(self):
test_file = os.path.join( test_file = os.path.join(
os.path.dirname(__file__), self.BARCODE_SAMPLE_DIR,
"samples",
"barcodes",
"patch-code-t-middle.pdf", "patch-code-t-middle.pdf",
) )
pages = barcodes.separate_pages(test_file, [1]) pages = barcodes.separate_pages(test_file, [1])
@ -311,9 +311,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
def test_separate_pages_no_list(self): def test_separate_pages_no_list(self):
test_file = os.path.join( test_file = os.path.join(
os.path.dirname(__file__), self.BARCODE_SAMPLE_DIR,
"samples",
"barcodes",
"patch-code-t-middle.pdf", "patch-code-t-middle.pdf",
) )
with self.assertLogs("paperless.barcodes", level="WARNING") as cm: with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
@ -328,9 +326,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
def test_save_to_dir(self): def test_save_to_dir(self):
test_file = os.path.join( test_file = os.path.join(
os.path.dirname(__file__), self.BARCODE_SAMPLE_DIR,
"samples",
"barcodes",
"patch-code-t.pdf", "patch-code-t.pdf",
) )
tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR) tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
@ -340,9 +336,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
def test_save_to_dir2(self): def test_save_to_dir2(self):
test_file = os.path.join( test_file = os.path.join(
os.path.dirname(__file__), self.BARCODE_SAMPLE_DIR,
"samples",
"barcodes",
"patch-code-t.pdf", "patch-code-t.pdf",
) )
nonexistingdir = "/nowhere" nonexistingdir = "/nowhere"
@ -360,9 +354,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
def test_save_to_dir3(self): def test_save_to_dir3(self):
test_file = os.path.join( test_file = os.path.join(
os.path.dirname(__file__), self.BARCODE_SAMPLE_DIR,
"samples",
"barcodes",
"patch-code-t.pdf", "patch-code-t.pdf",
) )
tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR) tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
@ -372,31 +364,36 @@ class TestBarcode(DirectoriesMixin, TestCase):
def test_barcode_splitter(self): def test_barcode_splitter(self):
test_file = os.path.join( test_file = os.path.join(
os.path.dirname(__file__), self.BARCODE_SAMPLE_DIR,
"samples",
"barcodes",
"patch-code-t-middle.pdf", "patch-code-t-middle.pdf",
) )
tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR) tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
separators = barcodes.scan_file_for_separating_barcodes(test_file)
self.assertTrue(separators) pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
document_list = barcodes.separate_pages(test_file, separators) test_file,
)
self.assertEqual(test_file, pdf_file)
self.assertTrue(len(separator_page_numbers) > 0)
document_list = barcodes.separate_pages(test_file, separator_page_numbers)
self.assertTrue(document_list) self.assertTrue(document_list)
for document in document_list: for document in document_list:
barcodes.save_to_dir(document, target_dir=tempdir) barcodes.save_to_dir(document, target_dir=tempdir)
target_file1 = os.path.join(tempdir, "patch-code-t-middle_document_0.pdf") target_file1 = os.path.join(tempdir, "patch-code-t-middle_document_0.pdf")
target_file2 = os.path.join(tempdir, "patch-code-t-middle_document_1.pdf") target_file2 = os.path.join(tempdir, "patch-code-t-middle_document_1.pdf")
self.assertTrue(os.path.isfile(target_file1)) self.assertTrue(os.path.isfile(target_file1))
self.assertTrue(os.path.isfile(target_file2)) self.assertTrue(os.path.isfile(target_file2))
@override_settings(CONSUMER_ENABLE_BARCODES=True) @override_settings(CONSUMER_ENABLE_BARCODES=True)
def test_consume_barcode_file(self): def test_consume_barcode_file(self):
test_file = os.path.join( test_file = os.path.join(
os.path.dirname(__file__), self.BARCODE_SAMPLE_DIR,
"samples",
"barcodes",
"patch-code-t-middle.pdf", "patch-code-t-middle.pdf",
) )
dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle.pdf") dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle.pdf")
shutil.copy(test_file, dst) shutil.copy(test_file, dst)
@ -408,9 +405,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
) )
def test_consume_barcode_tiff_file(self): def test_consume_barcode_tiff_file(self):
test_file = os.path.join( test_file = os.path.join(
os.path.dirname(__file__), self.BARCODE_SAMPLE_DIR,
"samples",
"barcodes",
"patch-code-t-middle.tiff", "patch-code-t-middle.tiff",
) )
dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle.tiff") dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle.tiff")
@ -432,18 +427,17 @@ class TestBarcode(DirectoriesMixin, TestCase):
and continue archiving the file as is. and continue archiving the file as is.
""" """
test_file = os.path.join( test_file = os.path.join(
os.path.dirname(__file__), self.SAMPLE_DIR,
"samples",
"simple.jpg", "simple.jpg",
) )
dst = os.path.join(settings.SCRATCH_DIR, "simple.jpg") dst = os.path.join(settings.SCRATCH_DIR, "simple.jpg")
shutil.copy(test_file, dst) shutil.copy(test_file, dst)
with self.assertLogs("paperless.tasks", level="WARNING") as cm: with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
self.assertIn("Success", tasks.consume_file(dst)) self.assertIn("Success", tasks.consume_file(dst))
self.assertListEqual( self.assertListEqual(
cm.output, cm.output,
[ [
"WARNING:paperless.tasks:Unsupported file format for barcode reader: image/jpeg", "WARNING:paperless.barcodes:Unsupported file format for barcode reader: image/jpeg",
], ],
) )
m.assert_called_once() m.assert_called_once()
@ -465,9 +459,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
the user uploads a supported image file, but without extension the user uploads a supported image file, but without extension
""" """
test_file = os.path.join( test_file = os.path.join(
os.path.dirname(__file__), self.BARCODE_SAMPLE_DIR,
"samples",
"barcodes",
"patch-code-t-middle.tiff", "patch-code-t-middle.tiff",
) )
dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle") dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle")