mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Updates how barcodes are detected, using pikepdf images, instead of converting each page to an image
This commit is contained in:
parent
f9a0adc64e
commit
7aa0e5650b
1
Pipfile
1
Pipfile
@ -53,7 +53,6 @@ concurrent-log-handler = "*"
|
||||
"importlib-resources" = {version = "*", markers = "python_version < '3.9'"}
|
||||
zipp = {version = "*", markers = "python_version < '3.9'"}
|
||||
pyzbar = "*"
|
||||
pdf2image = "*"
|
||||
mysqlclient = "*"
|
||||
setproctitle = "*"
|
||||
|
||||
|
24
Pipfile.lock
generated
24
Pipfile.lock
generated
@ -1,7 +1,7 @@
|
||||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "6f46be21b67938add11dbf0ecea4f722836f161f58fa5e47dec3f92edb346371"
|
||||
"sha256": "896665b8ff6d8a99af44b729c581033add1ba5cbd927723ef275649491c92a4f"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {},
|
||||
@ -788,14 +788,6 @@
|
||||
"index": "pypi",
|
||||
"version": "==2.5.2"
|
||||
},
|
||||
"pdf2image": {
|
||||
"hashes": [
|
||||
"sha256:84f79f2b8fad943e36323ea4e937fcb05f26ded0caa0a01181df66049e42fb65",
|
||||
"sha256:d58ed94d978a70c73c2bb7fdf8acbaf2a7089c29ff8141be5f45433c0c4293bb"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.16.0"
|
||||
},
|
||||
"pdfminer.six": {
|
||||
"hashes": [
|
||||
"sha256:5a64c924410ac48501d6060b21638bf401db69f5b1bd57207df7fbc070ac8ae2",
|
||||
@ -1055,6 +1047,7 @@
|
||||
},
|
||||
"pyyaml": {
|
||||
"hashes": [
|
||||
"sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf",
|
||||
"sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293",
|
||||
"sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b",
|
||||
"sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57",
|
||||
@ -1066,26 +1059,32 @@
|
||||
"sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287",
|
||||
"sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513",
|
||||
"sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0",
|
||||
"sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782",
|
||||
"sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0",
|
||||
"sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92",
|
||||
"sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f",
|
||||
"sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2",
|
||||
"sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc",
|
||||
"sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1",
|
||||
"sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c",
|
||||
"sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86",
|
||||
"sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4",
|
||||
"sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c",
|
||||
"sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34",
|
||||
"sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b",
|
||||
"sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d",
|
||||
"sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c",
|
||||
"sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb",
|
||||
"sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7",
|
||||
"sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737",
|
||||
"sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3",
|
||||
"sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d",
|
||||
"sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358",
|
||||
"sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53",
|
||||
"sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78",
|
||||
"sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803",
|
||||
"sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a",
|
||||
"sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f",
|
||||
"sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174",
|
||||
"sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"
|
||||
],
|
||||
@ -2261,6 +2260,7 @@
|
||||
},
|
||||
"pyyaml": {
|
||||
"hashes": [
|
||||
"sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf",
|
||||
"sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293",
|
||||
"sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b",
|
||||
"sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57",
|
||||
@ -2272,26 +2272,32 @@
|
||||
"sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287",
|
||||
"sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513",
|
||||
"sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0",
|
||||
"sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782",
|
||||
"sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0",
|
||||
"sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92",
|
||||
"sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f",
|
||||
"sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2",
|
||||
"sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc",
|
||||
"sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1",
|
||||
"sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c",
|
||||
"sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86",
|
||||
"sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4",
|
||||
"sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c",
|
||||
"sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34",
|
||||
"sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b",
|
||||
"sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d",
|
||||
"sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c",
|
||||
"sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb",
|
||||
"sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7",
|
||||
"sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737",
|
||||
"sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3",
|
||||
"sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d",
|
||||
"sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358",
|
||||
"sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53",
|
||||
"sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78",
|
||||
"sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803",
|
||||
"sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a",
|
||||
"sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f",
|
||||
"sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174",
|
||||
"sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"
|
||||
],
|
||||
|
@ -3,13 +3,15 @@ import os
|
||||
import shutil
|
||||
import tempfile
|
||||
from functools import lru_cache
|
||||
from typing import List # for type hinting. Can be removed, if only Python >3.8 is used
|
||||
from typing import List
|
||||
from typing import Optional
|
||||
from typing import Tuple
|
||||
|
||||
import magic
|
||||
from django.conf import settings
|
||||
from pdf2image import convert_from_path
|
||||
from pikepdf import Page
|
||||
from pikepdf import Pdf
|
||||
from pikepdf import PdfImage
|
||||
from PIL import Image
|
||||
from PIL import ImageSequence
|
||||
from pyzbar import pyzbar
|
||||
@ -32,7 +34,7 @@ def supported_file_type(mime_type) -> bool:
|
||||
return mime_type in supported_mime
|
||||
|
||||
|
||||
def barcode_reader(image) -> List[str]:
|
||||
def barcode_reader(image: Image) -> List[str]:
|
||||
"""
|
||||
Read any barcodes contained in image
|
||||
Returns a list containing all found barcodes
|
||||
@ -99,21 +101,39 @@ def convert_from_tiff_to_pdf(filepath: str) -> str:
|
||||
return newpath
|
||||
|
||||
|
||||
def scan_file_for_separating_barcodes(filepath: str) -> List[int]:
|
||||
def scan_file_for_separating_barcodes(filepath: str) -> Tuple[Optional[str], List[int]]:
|
||||
"""
|
||||
Scan the provided pdf file for page separating barcodes
|
||||
Returns a list of pagenumbers, which separate the file
|
||||
Returns a the PDF filepath and a list of pagenumbers,
|
||||
which separate the file into new files
|
||||
"""
|
||||
|
||||
separator_page_numbers = []
|
||||
separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
|
||||
# use a temporary directory in case the file os too big to handle in memory
|
||||
with tempfile.TemporaryDirectory() as path:
|
||||
pages_from_path = convert_from_path(filepath, output_folder=path)
|
||||
for current_page_number, page in enumerate(pages_from_path):
|
||||
current_barcodes = barcode_reader(page)
|
||||
if separator_barcode in current_barcodes:
|
||||
separator_page_numbers.append(current_page_number)
|
||||
return separator_page_numbers
|
||||
pdf_filepath = None
|
||||
|
||||
mime_type = get_file_mime_type(filepath)
|
||||
|
||||
if supported_file_type(mime_type):
|
||||
pdf_filepath = filepath
|
||||
if mime_type == "image/tiff":
|
||||
pdf_filepath = convert_from_tiff_to_pdf(filepath)
|
||||
|
||||
pdf = Pdf.open(pdf_filepath)
|
||||
|
||||
for page_num, page in enumerate(pdf.pages):
|
||||
for image_key in page.images:
|
||||
pdfimage = PdfImage(page.images[image_key])
|
||||
pillow_img = pdfimage.as_pil_image()
|
||||
|
||||
detected_barcodes = barcode_reader(pillow_img)
|
||||
|
||||
if settings.CONSUMER_BARCODE_STRING in detected_barcodes:
|
||||
separator_page_numbers.append(page_num)
|
||||
else:
|
||||
logger.warning(
|
||||
f"Unsupported file format for barcode reader: {str(mime_type)}",
|
||||
)
|
||||
return pdf_filepath, separator_page_numbers
|
||||
|
||||
|
||||
def separate_pages(filepath: str, pages_to_split_on: List[int]) -> List[str]:
|
||||
|
@ -96,29 +96,13 @@ def consume_file(
|
||||
# check for separators in current document
|
||||
if settings.CONSUMER_ENABLE_BARCODES:
|
||||
|
||||
mime_type = barcodes.get_file_mime_type(path)
|
||||
pdf_filepath, separators = barcodes.scan_file_for_separating_barcodes(path)
|
||||
|
||||
if not barcodes.supported_file_type(mime_type):
|
||||
# if not supported, skip this routine
|
||||
logger.warning(
|
||||
f"Unsupported file format for barcode reader: {str(mime_type)}",
|
||||
if separators:
|
||||
logger.debug(
|
||||
f"Pages with separators found in: {str(path)}",
|
||||
)
|
||||
else:
|
||||
separators = []
|
||||
document_list = []
|
||||
|
||||
if mime_type == "image/tiff":
|
||||
file_to_process = barcodes.convert_from_tiff_to_pdf(path)
|
||||
else:
|
||||
file_to_process = path
|
||||
|
||||
separators = barcodes.scan_file_for_separating_barcodes(file_to_process)
|
||||
|
||||
if separators:
|
||||
logger.debug(
|
||||
f"Pages with separators found in: {str(path)}",
|
||||
)
|
||||
document_list = barcodes.separate_pages(file_to_process, separators)
|
||||
document_list = barcodes.separate_pages(pdf_filepath, separators)
|
||||
|
||||
if document_list:
|
||||
for n, document in enumerate(document_list):
|
||||
@ -134,15 +118,13 @@ def consume_file(
|
||||
target_dir=path.parent,
|
||||
)
|
||||
|
||||
# if we got here, the document was successfully split
|
||||
# and can safely be deleted
|
||||
if mime_type == "image/tiff":
|
||||
# Remove the TIFF converted to PDF file
|
||||
logger.debug(f"Deleting file {file_to_process}")
|
||||
os.unlink(file_to_process)
|
||||
# Remove the original file (new file is saved above)
|
||||
logger.debug(f"Deleting file {path}")
|
||||
os.unlink(path)
|
||||
# Delete the PDF file which was split
|
||||
os.remove(pdf_filepath)
|
||||
|
||||
# If the original was a TIFF, remove the original file as well
|
||||
if str(pdf_filepath) != str(path):
|
||||
logger.debug(f"Deleting file {path}")
|
||||
os.unlink(path)
|
||||
|
||||
# notify the sender, otherwise the progress bar
|
||||
# in the UI stays stuck
|
||||
|
@ -13,22 +13,23 @@ from PIL import Image
|
||||
|
||||
|
||||
class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
SAMPLE_DIR = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
)
|
||||
|
||||
BARCODE_SAMPLE_DIR = os.path.join(SAMPLE_DIR, "barcodes")
|
||||
|
||||
def test_barcode_reader(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
"barcode-39-PATCHT.png",
|
||||
)
|
||||
test_file = os.path.join(self.BARCODE_SAMPLE_DIR, "barcode-39-PATCHT.png")
|
||||
img = Image.open(test_file)
|
||||
separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
|
||||
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
|
||||
|
||||
def test_barcode_reader2(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t.pbm",
|
||||
)
|
||||
img = Image.open(test_file)
|
||||
@ -37,9 +38,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_barcode_reader_distorsion(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-39-PATCHT-distorsion.png",
|
||||
)
|
||||
img = Image.open(test_file)
|
||||
@ -48,9 +47,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_barcode_reader_distorsion2(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-39-PATCHT-distorsion2.png",
|
||||
)
|
||||
img = Image.open(test_file)
|
||||
@ -59,9 +56,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_barcode_reader_unreadable(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-39-PATCHT-unreadable.png",
|
||||
)
|
||||
img = Image.open(test_file)
|
||||
@ -69,9 +64,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_barcode_reader_qr(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"qr-code-PATCHT.png",
|
||||
)
|
||||
img = Image.open(test_file)
|
||||
@ -80,9 +73,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_barcode_reader_128(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-128-PATCHT.png",
|
||||
)
|
||||
img = Image.open(test_file)
|
||||
@ -90,15 +81,13 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
|
||||
|
||||
def test_barcode_reader_no_barcode(self):
|
||||
test_file = os.path.join(os.path.dirname(__file__), "samples", "simple.png")
|
||||
test_file = os.path.join(self.SAMPLE_DIR, "simple.png")
|
||||
img = Image.open(test_file)
|
||||
self.assertEqual(barcodes.barcode_reader(img), [])
|
||||
|
||||
def test_barcode_reader_custom_separator(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-39-custom.png",
|
||||
)
|
||||
img = Image.open(test_file)
|
||||
@ -106,9 +95,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_barcode_reader_custom_qr_separator(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-qr-custom.png",
|
||||
)
|
||||
img = Image.open(test_file)
|
||||
@ -116,9 +103,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_barcode_reader_custom_128_separator(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-128-custom.png",
|
||||
)
|
||||
img = Image.open(test_file)
|
||||
@ -126,19 +111,15 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_get_mime_type(self):
|
||||
tiff_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
self.SAMPLE_DIR,
|
||||
"simple.tiff",
|
||||
)
|
||||
pdf_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
self.SAMPLE_DIR,
|
||||
"simple.pdf",
|
||||
)
|
||||
png_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-128-custom.png",
|
||||
)
|
||||
tiff_file_no_extension = os.path.join(settings.SCRATCH_DIR, "testfile1")
|
||||
@ -173,8 +154,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_convert_error_from_pdf_to_pdf(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
self.SAMPLE_DIR,
|
||||
"simple.pdf",
|
||||
)
|
||||
dst = os.path.join(settings.SCRATCH_DIR, "simple.pdf")
|
||||
@ -183,107 +163,127 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_scan_file_for_separating_barcodes(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t.pdf",
|
||||
)
|
||||
pages = barcodes.scan_file_for_separating_barcodes(test_file)
|
||||
self.assertEqual(pages, [0])
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
test_file,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [0])
|
||||
|
||||
def test_scan_file_for_separating_barcodes2(self):
|
||||
test_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
|
||||
pages = barcodes.scan_file_for_separating_barcodes(test_file)
|
||||
self.assertEqual(pages, [])
|
||||
test_file = os.path.join(self.SAMPLE_DIR, "simple.pdf")
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
test_file,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [])
|
||||
|
||||
def test_scan_file_for_separating_barcodes3(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-middle.pdf",
|
||||
)
|
||||
pages = barcodes.scan_file_for_separating_barcodes(test_file)
|
||||
self.assertEqual(pages, [1])
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
test_file,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [1])
|
||||
|
||||
def test_scan_file_for_separating_barcodes4(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"several-patcht-codes.pdf",
|
||||
)
|
||||
pages = barcodes.scan_file_for_separating_barcodes(test_file)
|
||||
self.assertEqual(pages, [2, 5])
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
test_file,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [2, 5])
|
||||
|
||||
def test_scan_file_for_separating_barcodes_upsidedown(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-middle_reverse.pdf",
|
||||
)
|
||||
pages = barcodes.scan_file_for_separating_barcodes(test_file)
|
||||
self.assertEqual(pages, [1])
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
test_file,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [1])
|
||||
|
||||
def test_scan_file_for_separating_qr_barcodes(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-qr.pdf",
|
||||
)
|
||||
pages = barcodes.scan_file_for_separating_barcodes(test_file)
|
||||
self.assertEqual(pages, [0])
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
test_file,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [0])
|
||||
|
||||
@override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
|
||||
def test_scan_file_for_separating_custom_barcodes(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-39-custom.pdf",
|
||||
)
|
||||
pages = barcodes.scan_file_for_separating_barcodes(test_file)
|
||||
self.assertEqual(pages, [0])
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
test_file,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [0])
|
||||
|
||||
@override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
|
||||
def test_scan_file_for_separating_custom_qr_barcodes(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-qr-custom.pdf",
|
||||
)
|
||||
pages = barcodes.scan_file_for_separating_barcodes(test_file)
|
||||
self.assertEqual(pages, [0])
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
test_file,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [0])
|
||||
|
||||
@override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
|
||||
def test_scan_file_for_separating_custom_128_barcodes(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-128-custom.pdf",
|
||||
)
|
||||
pages = barcodes.scan_file_for_separating_barcodes(test_file)
|
||||
self.assertEqual(pages, [0])
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
test_file,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [0])
|
||||
|
||||
def test_scan_file_for_separating_wrong_qr_barcodes(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-39-custom.pdf",
|
||||
)
|
||||
pages = barcodes.scan_file_for_separating_barcodes(test_file)
|
||||
self.assertEqual(pages, [])
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
test_file,
|
||||
)
|
||||
|
||||
self.assertEqual(pdf_file, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [])
|
||||
|
||||
def test_separate_pages(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-middle.pdf",
|
||||
)
|
||||
pages = barcodes.separate_pages(test_file, [1])
|
||||
@ -311,9 +311,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_separate_pages_no_list(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-middle.pdf",
|
||||
)
|
||||
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
|
||||
@ -328,9 +326,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_save_to_dir(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t.pdf",
|
||||
)
|
||||
tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
|
||||
@ -340,9 +336,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_save_to_dir2(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t.pdf",
|
||||
)
|
||||
nonexistingdir = "/nowhere"
|
||||
@ -360,9 +354,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_save_to_dir3(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t.pdf",
|
||||
)
|
||||
tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
|
||||
@ -372,31 +364,36 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_barcode_splitter(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-middle.pdf",
|
||||
)
|
||||
tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
|
||||
separators = barcodes.scan_file_for_separating_barcodes(test_file)
|
||||
self.assertTrue(separators)
|
||||
document_list = barcodes.separate_pages(test_file, separators)
|
||||
|
||||
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
|
||||
test_file,
|
||||
)
|
||||
|
||||
self.assertEqual(test_file, pdf_file)
|
||||
self.assertTrue(len(separator_page_numbers) > 0)
|
||||
|
||||
document_list = barcodes.separate_pages(test_file, separator_page_numbers)
|
||||
self.assertTrue(document_list)
|
||||
for document in document_list:
|
||||
barcodes.save_to_dir(document, target_dir=tempdir)
|
||||
|
||||
target_file1 = os.path.join(tempdir, "patch-code-t-middle_document_0.pdf")
|
||||
target_file2 = os.path.join(tempdir, "patch-code-t-middle_document_1.pdf")
|
||||
|
||||
self.assertTrue(os.path.isfile(target_file1))
|
||||
self.assertTrue(os.path.isfile(target_file2))
|
||||
|
||||
@override_settings(CONSUMER_ENABLE_BARCODES=True)
|
||||
def test_consume_barcode_file(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-middle.pdf",
|
||||
)
|
||||
|
||||
dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle.pdf")
|
||||
shutil.copy(test_file, dst)
|
||||
|
||||
@ -408,9 +405,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
)
|
||||
def test_consume_barcode_tiff_file(self):
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-middle.tiff",
|
||||
)
|
||||
dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle.tiff")
|
||||
@ -432,18 +427,17 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
and continue archiving the file as is.
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
self.SAMPLE_DIR,
|
||||
"simple.jpg",
|
||||
)
|
||||
dst = os.path.join(settings.SCRATCH_DIR, "simple.jpg")
|
||||
shutil.copy(test_file, dst)
|
||||
with self.assertLogs("paperless.tasks", level="WARNING") as cm:
|
||||
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
|
||||
self.assertIn("Success", tasks.consume_file(dst))
|
||||
self.assertListEqual(
|
||||
cm.output,
|
||||
[
|
||||
"WARNING:paperless.tasks:Unsupported file format for barcode reader: image/jpeg",
|
||||
"WARNING:paperless.barcodes:Unsupported file format for barcode reader: image/jpeg",
|
||||
],
|
||||
)
|
||||
m.assert_called_once()
|
||||
@ -465,9 +459,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
the user uploads a supported image file, but without extension
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-middle.tiff",
|
||||
)
|
||||
dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle")
|
||||
|
Loading…
x
Reference in New Issue
Block a user