Updates how barcodes are detected, using pikepdf images, instead of converting each page to an image

This commit is contained in:
Trenton Holmes
2022-09-14 11:49:22 -07:00
committed by Trenton H
parent d9b345ffd9
commit b21f64de8a
5 changed files with 178 additions and 179 deletions

View File

@@ -13,22 +13,23 @@ from PIL import Image
class TestBarcode(DirectoriesMixin, TestCase):
SAMPLE_DIR = os.path.join(
os.path.dirname(__file__),
"samples",
)
BARCODE_SAMPLE_DIR = os.path.join(SAMPLE_DIR, "barcodes")
def test_barcode_reader(self):
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
"barcode-39-PATCHT.png",
)
test_file = os.path.join(self.BARCODE_SAMPLE_DIR, "barcode-39-PATCHT.png")
img = Image.open(test_file)
separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
def test_barcode_reader2(self):
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
self.BARCODE_SAMPLE_DIR,
"patch-code-t.pbm",
)
img = Image.open(test_file)
@@ -37,9 +38,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
def test_barcode_reader_distorsion(self):
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
self.BARCODE_SAMPLE_DIR,
"barcode-39-PATCHT-distorsion.png",
)
img = Image.open(test_file)
@@ -48,9 +47,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
def test_barcode_reader_distorsion2(self):
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
self.BARCODE_SAMPLE_DIR,
"barcode-39-PATCHT-distorsion2.png",
)
img = Image.open(test_file)
@@ -59,9 +56,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
def test_barcode_reader_unreadable(self):
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
self.BARCODE_SAMPLE_DIR,
"barcode-39-PATCHT-unreadable.png",
)
img = Image.open(test_file)
@@ -69,9 +64,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
def test_barcode_reader_qr(self):
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
self.BARCODE_SAMPLE_DIR,
"qr-code-PATCHT.png",
)
img = Image.open(test_file)
@@ -80,9 +73,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
def test_barcode_reader_128(self):
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
self.BARCODE_SAMPLE_DIR,
"barcode-128-PATCHT.png",
)
img = Image.open(test_file)
@@ -90,15 +81,13 @@ class TestBarcode(DirectoriesMixin, TestCase):
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
def test_barcode_reader_no_barcode(self):
test_file = os.path.join(os.path.dirname(__file__), "samples", "simple.png")
test_file = os.path.join(self.SAMPLE_DIR, "simple.png")
img = Image.open(test_file)
self.assertEqual(barcodes.barcode_reader(img), [])
def test_barcode_reader_custom_separator(self):
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
self.BARCODE_SAMPLE_DIR,
"barcode-39-custom.png",
)
img = Image.open(test_file)
@@ -106,9 +95,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
def test_barcode_reader_custom_qr_separator(self):
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
self.BARCODE_SAMPLE_DIR,
"barcode-qr-custom.png",
)
img = Image.open(test_file)
@@ -116,9 +103,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
def test_barcode_reader_custom_128_separator(self):
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
self.BARCODE_SAMPLE_DIR,
"barcode-128-custom.png",
)
img = Image.open(test_file)
@@ -126,19 +111,15 @@ class TestBarcode(DirectoriesMixin, TestCase):
def test_get_mime_type(self):
tiff_file = os.path.join(
os.path.dirname(__file__),
"samples",
self.SAMPLE_DIR,
"simple.tiff",
)
pdf_file = os.path.join(
os.path.dirname(__file__),
"samples",
self.SAMPLE_DIR,
"simple.pdf",
)
png_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
self.BARCODE_SAMPLE_DIR,
"barcode-128-custom.png",
)
tiff_file_no_extension = os.path.join(settings.SCRATCH_DIR, "testfile1")
@@ -173,8 +154,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
def test_convert_error_from_pdf_to_pdf(self):
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
self.SAMPLE_DIR,
"simple.pdf",
)
dst = os.path.join(settings.SCRATCH_DIR, "simple.pdf")
@@ -183,107 +163,127 @@ class TestBarcode(DirectoriesMixin, TestCase):
def test_scan_file_for_separating_barcodes(self):
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
self.BARCODE_SAMPLE_DIR,
"patch-code-t.pdf",
)
pages = barcodes.scan_file_for_separating_barcodes(test_file)
self.assertEqual(pages, [0])
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
test_file,
)
self.assertEqual(pdf_file, test_file)
self.assertListEqual(separator_page_numbers, [0])
def test_scan_file_for_separating_barcodes2(self):
test_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
pages = barcodes.scan_file_for_separating_barcodes(test_file)
self.assertEqual(pages, [])
test_file = os.path.join(self.SAMPLE_DIR, "simple.pdf")
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
test_file,
)
self.assertEqual(pdf_file, test_file)
self.assertListEqual(separator_page_numbers, [])
def test_scan_file_for_separating_barcodes3(self):
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle.pdf",
)
pages = barcodes.scan_file_for_separating_barcodes(test_file)
self.assertEqual(pages, [1])
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
test_file,
)
self.assertEqual(pdf_file, test_file)
self.assertListEqual(separator_page_numbers, [1])
def test_scan_file_for_separating_barcodes4(self):
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
self.BARCODE_SAMPLE_DIR,
"several-patcht-codes.pdf",
)
pages = barcodes.scan_file_for_separating_barcodes(test_file)
self.assertEqual(pages, [2, 5])
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
test_file,
)
self.assertEqual(pdf_file, test_file)
self.assertListEqual(separator_page_numbers, [2, 5])
def test_scan_file_for_separating_barcodes_upsidedown(self):
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle_reverse.pdf",
)
pages = barcodes.scan_file_for_separating_barcodes(test_file)
self.assertEqual(pages, [1])
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
test_file,
)
self.assertEqual(pdf_file, test_file)
self.assertListEqual(separator_page_numbers, [1])
def test_scan_file_for_separating_qr_barcodes(self):
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
self.BARCODE_SAMPLE_DIR,
"patch-code-t-qr.pdf",
)
pages = barcodes.scan_file_for_separating_barcodes(test_file)
self.assertEqual(pages, [0])
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
test_file,
)
self.assertEqual(pdf_file, test_file)
self.assertListEqual(separator_page_numbers, [0])
@override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
def test_scan_file_for_separating_custom_barcodes(self):
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
self.BARCODE_SAMPLE_DIR,
"barcode-39-custom.pdf",
)
pages = barcodes.scan_file_for_separating_barcodes(test_file)
self.assertEqual(pages, [0])
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
test_file,
)
self.assertEqual(pdf_file, test_file)
self.assertListEqual(separator_page_numbers, [0])
@override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
def test_scan_file_for_separating_custom_qr_barcodes(self):
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
self.BARCODE_SAMPLE_DIR,
"barcode-qr-custom.pdf",
)
pages = barcodes.scan_file_for_separating_barcodes(test_file)
self.assertEqual(pages, [0])
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
test_file,
)
self.assertEqual(pdf_file, test_file)
self.assertListEqual(separator_page_numbers, [0])
@override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
def test_scan_file_for_separating_custom_128_barcodes(self):
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
self.BARCODE_SAMPLE_DIR,
"barcode-128-custom.pdf",
)
pages = barcodes.scan_file_for_separating_barcodes(test_file)
self.assertEqual(pages, [0])
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
test_file,
)
self.assertEqual(pdf_file, test_file)
self.assertListEqual(separator_page_numbers, [0])
def test_scan_file_for_separating_wrong_qr_barcodes(self):
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
self.BARCODE_SAMPLE_DIR,
"barcode-39-custom.pdf",
)
pages = barcodes.scan_file_for_separating_barcodes(test_file)
self.assertEqual(pages, [])
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
test_file,
)
self.assertEqual(pdf_file, test_file)
self.assertListEqual(separator_page_numbers, [])
def test_separate_pages(self):
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle.pdf",
)
pages = barcodes.separate_pages(test_file, [1])
@@ -311,9 +311,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
def test_separate_pages_no_list(self):
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle.pdf",
)
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
@@ -328,9 +326,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
def test_save_to_dir(self):
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
self.BARCODE_SAMPLE_DIR,
"patch-code-t.pdf",
)
tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
@@ -340,9 +336,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
def test_save_to_dir2(self):
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
self.BARCODE_SAMPLE_DIR,
"patch-code-t.pdf",
)
nonexistingdir = "/nowhere"
@@ -360,9 +354,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
def test_save_to_dir3(self):
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
self.BARCODE_SAMPLE_DIR,
"patch-code-t.pdf",
)
tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
@@ -372,31 +364,36 @@ class TestBarcode(DirectoriesMixin, TestCase):
def test_barcode_splitter(self):
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle.pdf",
)
tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
separators = barcodes.scan_file_for_separating_barcodes(test_file)
self.assertTrue(separators)
document_list = barcodes.separate_pages(test_file, separators)
pdf_file, separator_page_numbers = barcodes.scan_file_for_separating_barcodes(
test_file,
)
self.assertEqual(test_file, pdf_file)
self.assertTrue(len(separator_page_numbers) > 0)
document_list = barcodes.separate_pages(test_file, separator_page_numbers)
self.assertTrue(document_list)
for document in document_list:
barcodes.save_to_dir(document, target_dir=tempdir)
target_file1 = os.path.join(tempdir, "patch-code-t-middle_document_0.pdf")
target_file2 = os.path.join(tempdir, "patch-code-t-middle_document_1.pdf")
self.assertTrue(os.path.isfile(target_file1))
self.assertTrue(os.path.isfile(target_file2))
@override_settings(CONSUMER_ENABLE_BARCODES=True)
def test_consume_barcode_file(self):
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle.pdf",
)
dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle.pdf")
shutil.copy(test_file, dst)
@@ -408,9 +405,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
)
def test_consume_barcode_tiff_file(self):
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle.tiff",
)
dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle.tiff")
@@ -432,18 +427,17 @@ class TestBarcode(DirectoriesMixin, TestCase):
and continue archiving the file as is.
"""
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
self.SAMPLE_DIR,
"simple.jpg",
)
dst = os.path.join(settings.SCRATCH_DIR, "simple.jpg")
shutil.copy(test_file, dst)
with self.assertLogs("paperless.tasks", level="WARNING") as cm:
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
self.assertIn("Success", tasks.consume_file(dst))
self.assertListEqual(
cm.output,
[
"WARNING:paperless.tasks:Unsupported file format for barcode reader: image/jpeg",
"WARNING:paperless.barcodes:Unsupported file format for barcode reader: image/jpeg",
],
)
m.assert_called_once()
@@ -465,9 +459,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
the user uploads a supported image file, but without extension
"""
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle.tiff",
)
dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle")