Removes pikepdf based scanning, fixes up unit testing (+ commenting)

This commit is contained in:
Trenton H 2023-01-27 08:34:00 -08:00
parent 94ad290e14
commit 2ab77fbaf7
4 changed files with 352 additions and 169 deletions

View File

@ -4,7 +4,6 @@ import shutil
import tempfile
from dataclasses import dataclass
from functools import lru_cache
from math import ceil
from pathlib import Path
from typing import List
from typing import Optional
@ -12,10 +11,9 @@ from typing import Optional
import magic
from django.conf import settings
from pdf2image import convert_from_path
from pdf2image.exceptions import PDFPageCountError
from pikepdf import Page
from pikepdf import PasswordError
from pikepdf import Pdf
from pikepdf import PdfImage
from PIL import Image
from PIL import ImageSequence
from pyzbar import pyzbar
@ -154,52 +152,15 @@ def scan_file_for_barcodes(
(page_number, barcode_text) tuples
"""
def _pikepdf_barcode_scan(pdf_filepath: str) -> List[Barcode]:
detected_barcodes = []
with Pdf.open(pdf_filepath) as pdf:
for page_num, page in enumerate(pdf.pages):
for image_key in page.images:
pdfimage = PdfImage(page.images[image_key])
# This type is known to have issues:
# https://github.com/pikepdf/pikepdf/issues/401
if "/CCITTFaxDecode" in pdfimage.filters:
raise BarcodeImageFormatError(
"Unable to decode CCITTFaxDecode images",
)
# Not all images can be transcoded to a PIL image, which
# is what pyzbar expects to receive, so this may
# raise an exception, triggering fallback
pillow_img = pdfimage.as_pil_image()
# Scale the image down
# See: https://github.com/paperless-ngx/paperless-ngx/issues/2385
# TLDR: zbar has issues with larger images
width, height = pillow_img.size
if width > 1024:
scaler = ceil(width / 1024)
new_width = int(width / scaler)
new_height = int(height / scaler)
pillow_img = pillow_img.resize((new_width, new_height))
width, height = pillow_img.size
if height > 2048:
scaler = ceil(height / 2048)
new_width = int(width / scaler)
new_height = int(height / scaler)
pillow_img = pillow_img.resize((new_width, new_height))
for barcode_value in barcode_reader(pillow_img):
detected_barcodes.append(Barcode(page_num, barcode_value))
return detected_barcodes
def _pdf2image_barcode_scan(pdf_filepath: str) -> List[Barcode]:
detected_barcodes = []
# use a temporary directory in case the file is too big to handle in memory
with tempfile.TemporaryDirectory() as path:
pages_from_path = convert_from_path(pdf_filepath, output_folder=path)
pages_from_path = convert_from_path(
pdf_filepath,
dpi=300,
output_folder=path,
)
for current_page_number, page in enumerate(pages_from_path):
for barcode_value in barcode_reader(page):
detected_barcodes.append(
@ -219,27 +180,19 @@ def scan_file_for_barcodes(
# Always try pikepdf first, it's usually fine, faster and
# uses less memory
try:
barcodes = _pikepdf_barcode_scan(pdf_filepath)
barcodes = _pdf2image_barcode_scan(pdf_filepath)
# Password protected files can't be checked
except PasswordError as e:
# This is the exception raised for those
except PDFPageCountError as e:
logger.warning(
f"File is likely password protected, not checking for barcodes: {e}",
)
# Handle pikepdf related image decoding issues with a fallback to page
# by page conversion to images in a temporary directory
except Exception as e:
# This file is really borked, allow the consumption to continue
# but it may fail further on
except Exception as e: # pragma: no cover
logger.warning(
f"Falling back to pdf2image because: {e}",
f"Exception during barcode scanning: {e}",
)
try:
barcodes = _pdf2image_barcode_scan(pdf_filepath)
# This file is really borked, allow the consumption to continue
# but it may fail further on
except Exception as e: # pragma: no cover
logger.warning(
f"Exception during barcode scanning: {e}",
)
else:
logger.warning(
f"Unsupported file format for barcode reader: {str(mime_type)}",

View File

@ -3,7 +3,6 @@ import shutil
import tempfile
from unittest import mock
import pikepdf
from django.conf import settings
from django.test import override_settings
from django.test import TestCase
@ -23,13 +22,29 @@ class TestBarcode(DirectoriesMixin, TestCase):
BARCODE_SAMPLE_DIR = os.path.join(SAMPLE_DIR, "barcodes")
def test_barcode_reader(self):
def test_barcode_reader_png(self):
"""
GIVEN:
- PNG file with separator barcode
WHEN:
- Image is scanned for codes
THEN:
- The barcode is detected
"""
test_file = os.path.join(self.BARCODE_SAMPLE_DIR, "barcode-39-PATCHT.png")
img = Image.open(test_file)
separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
separator_barcode = settings.CONSUMER_BARCODE_STRING
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
def test_barcode_reader2(self):
def test_barcode_reader_pbm(self):
"""
GIVEN:
- Netpbm bitmap file with separator barcode
WHEN:
- Image is scanned for codes
THEN:
- The barcode is detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t.pbm",
@ -38,25 +53,49 @@ class TestBarcode(DirectoriesMixin, TestCase):
separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
def test_barcode_reader_distorsion(self):
def test_barcode_reader_distortion_scratchy(self):
"""
GIVEN:
- Image containing high noise
WHEN:
- Image is scanned for codes
THEN:
- The barcode is detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-PATCHT-distorsion.png",
"barcode-39-PATCHT-distortion.png",
)
img = Image.open(test_file)
separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
def test_barcode_reader_distorsion2(self):
def test_barcode_reader_distortion_stretched(self):
"""
GIVEN:
- Image with a stretched barcode
WHEN:
- Image is scanned for codes
THEN:
- The barcode is detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-PATCHT-distorsion2.png",
"barcode-39-PATCHT-distortion2.png",
)
img = Image.open(test_file)
separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
def test_barcode_reader_unreadable(self):
"""
GIVEN:
- Image with a truly unreadable barcode
WHEN:
- Image is scanned for codes
THEN:
- No barcode is detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-PATCHT-unreadable.png",
@ -65,6 +104,14 @@ class TestBarcode(DirectoriesMixin, TestCase):
self.assertEqual(barcodes.barcode_reader(img), [])
def test_barcode_reader_qr(self):
"""
GIVEN:
- Image file with QR separator barcode
WHEN:
- Image is scanned for codes
THEN:
- The barcode is detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"qr-code-PATCHT.png",
@ -74,6 +121,14 @@ class TestBarcode(DirectoriesMixin, TestCase):
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
def test_barcode_reader_128(self):
"""
GIVEN:
- Image file with 128 style separator barcode
WHEN:
- Image is scanned for codes
THEN:
- The barcode is detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-128-PATCHT.png",
@ -83,11 +138,27 @@ class TestBarcode(DirectoriesMixin, TestCase):
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
def test_barcode_reader_no_barcode(self):
"""
GIVEN:
- Image file with no barcode
WHEN:
- Image is scanned for codes
THEN:
- No barcode is detected
"""
test_file = os.path.join(self.SAMPLE_DIR, "simple.png")
img = Image.open(test_file)
self.assertEqual(barcodes.barcode_reader(img), [])
self.assertListEqual(barcodes.barcode_reader(img), [])
def test_barcode_reader_custom_separator(self):
"""
GIVEN:
- Image file with custom separator barcode value
WHEN:
- Image is scanned for codes
THEN:
- The barcode is detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-custom.png",
@ -96,6 +167,14 @@ class TestBarcode(DirectoriesMixin, TestCase):
self.assertEqual(barcodes.barcode_reader(img), ["CUSTOM BARCODE"])
def test_barcode_reader_custom_qr_separator(self):
"""
GIVEN:
- Image file with custom separator barcode value as a QR code
WHEN:
- Image is scanned for codes
THEN:
- The barcode is detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-qr-custom.png",
@ -104,6 +183,14 @@ class TestBarcode(DirectoriesMixin, TestCase):
self.assertEqual(barcodes.barcode_reader(img), ["CUSTOM BARCODE"])
def test_barcode_reader_custom_128_separator(self):
"""
GIVEN:
- Image file with custom separator 128 barcode value
WHEN:
- Image is scanned for codes
THEN:
- The barcode is detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-128-custom.png",
@ -164,6 +251,14 @@ class TestBarcode(DirectoriesMixin, TestCase):
self.assertEqual(barcodes.barcode_reader(img), ["CUSTOM-PREFIX-00123"])
def test_get_mime_type(self):
"""
GIVEN:
-
WHEN:
-
THEN:
-
"""
tiff_file = os.path.join(
self.SAMPLE_DIR,
"simple.tiff",
@ -194,6 +289,14 @@ class TestBarcode(DirectoriesMixin, TestCase):
self.assertEqual(barcodes.get_file_mime_type(png_file), "image/png")
def test_convert_from_tiff_to_pdf(self):
"""
GIVEN:
-
WHEN:
-
THEN:
-
"""
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
@ -207,6 +310,14 @@ class TestBarcode(DirectoriesMixin, TestCase):
self.assertEqual(file_extension, ".pdf")
def test_convert_error_from_pdf_to_pdf(self):
"""
GIVEN:
-
WHEN:
-
THEN:
-
"""
test_file = os.path.join(
self.SAMPLE_DIR,
"simple.pdf",
@ -216,6 +327,14 @@ class TestBarcode(DirectoriesMixin, TestCase):
self.assertIsNone(barcodes.convert_from_tiff_to_pdf(dst))
def test_scan_file_for_separating_barcodes(self):
"""
GIVEN:
-
WHEN:
-
THEN:
-
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t.pdf",
@ -231,6 +350,14 @@ class TestBarcode(DirectoriesMixin, TestCase):
self.assertListEqual(separator_page_numbers, [0])
def test_scan_file_for_separating_barcodes_none_present(self):
"""
GIVEN:
-
WHEN:
-
THEN:
-
"""
test_file = os.path.join(self.SAMPLE_DIR, "simple.pdf")
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
@ -242,7 +369,15 @@ class TestBarcode(DirectoriesMixin, TestCase):
self.assertEqual(doc_barcode_info.pdf_path, test_file)
self.assertListEqual(separator_page_numbers, [])
def test_scan_file_for_separating_barcodes3(self):
def test_scan_file_for_separating_barcodes_middle_page(self):
"""
GIVEN:
- PDF file containing a separator on page 1 (zero indexed)
WHEN:
- File is scanned for barcodes
THEN:
- Barcode is detected on page 1 (zero indexed)
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle.pdf",
@ -257,7 +392,15 @@ class TestBarcode(DirectoriesMixin, TestCase):
self.assertEqual(doc_barcode_info.pdf_path, test_file)
self.assertListEqual(separator_page_numbers, [1])
def test_scan_file_for_separating_barcodes4(self):
def test_scan_file_for_separating_barcodes_multiple_pages(self):
"""
GIVEN:
- PDF file containing a separator on pages 2 and 5 (zero indexed)
WHEN:
- File is scanned for barcodes
THEN:
- Barcode is detected on pages 2 and 5 (zero indexed)
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"several-patcht-codes.pdf",
@ -272,7 +415,16 @@ class TestBarcode(DirectoriesMixin, TestCase):
self.assertEqual(doc_barcode_info.pdf_path, test_file)
self.assertListEqual(separator_page_numbers, [2, 5])
def test_scan_file_for_separating_barcodes_upsidedown(self):
def test_scan_file_for_separating_barcodes_upside_down(self):
"""
GIVEN:
- PDF file containing a separator on page 1 (zero indexed)
- The barcode is upside down
WHEN:
- File is scanned for barcodes
THEN:
- Barcode is detected on page 1 (zero indexed)
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle_reverse.pdf",
@ -287,66 +439,6 @@ class TestBarcode(DirectoriesMixin, TestCase):
self.assertEqual(doc_barcode_info.pdf_path, test_file)
self.assertListEqual(separator_page_numbers, [1])
def test_scan_file_for_barcodes_pillow_transcode_error(self):
"""
GIVEN:
- A PDF containing an image which cannot be transcoded to a PIL image
WHEN:
- The image tries to be transcoded to a PIL image, but fails
THEN:
- The barcode reader is still called
"""
def _build_device_n_pdf(self, save_path: str):
# Based on the pikepdf tests
# https://github.com/pikepdf/pikepdf/blob/abb35ebe17d579d76abe08265e00cf8890a12a95/tests/test_image_access.py
pdf = pikepdf.new()
pdf.add_blank_page(page_size=(72, 72))
imobj = pikepdf.Stream(
pdf,
bytes(range(0, 256)),
BitsPerComponent=8,
ColorSpace=pikepdf.Array(
[
pikepdf.Name.DeviceN,
pikepdf.Array([pikepdf.Name.Black]),
pikepdf.Name.DeviceCMYK,
pikepdf.Stream(
pdf,
b"{0 0 0 4 -1 roll}", # Colorspace conversion function
FunctionType=4,
Domain=[0.0, 1.0],
Range=[0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0],
),
],
),
Width=16,
Height=16,
Type=pikepdf.Name.XObject,
Subtype=pikepdf.Name.Image,
)
pim = pikepdf.PdfImage(imobj)
self.assertEqual(pim.mode, "DeviceN")
self.assertTrue(pim.is_device_n)
pdf.pages[0].Contents = pikepdf.Stream(pdf, b"72 0 0 72 0 0 cm /Im0 Do")
pdf.pages[0].Resources = pikepdf.Dictionary(
XObject=pikepdf.Dictionary(Im0=imobj),
)
pdf.save(save_path)
with tempfile.NamedTemporaryFile(suffix="pdf") as device_n_pdf:
# Build an offending file
_build_device_n_pdf(self, str(device_n_pdf.name))
with mock.patch("documents.barcodes.barcode_reader") as reader:
reader.return_value = list()
_ = barcodes.scan_file_for_barcodes(
str(device_n_pdf.name),
)
reader.assert_called()
def test_scan_file_for_separating_barcodes_fax_decode(self):
"""
GIVEN:
@ -371,6 +463,15 @@ class TestBarcode(DirectoriesMixin, TestCase):
self.assertListEqual(separator_page_numbers, [1])
def test_scan_file_for_separating_qr_barcodes(self):
"""
GIVEN:
- PDF file containing a separator on page 0 (zero indexed)
- The barcode is a QR code
WHEN:
- File is scanned for barcodes
THEN:
- Barcode is detected on page 0 (zero indexed)
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t-qr.pdf",
@ -387,6 +488,15 @@ class TestBarcode(DirectoriesMixin, TestCase):
@override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
def test_scan_file_for_separating_custom_barcodes(self):
"""
GIVEN:
- PDF file containing a separator on page 0 (zero indexed)
- The barcode separation value is customized
WHEN:
- File is scanned for barcodes
THEN:
- Barcode is detected on page 0 (zero indexed)
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-custom.pdf",
@ -403,6 +513,16 @@ class TestBarcode(DirectoriesMixin, TestCase):
@override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
def test_scan_file_for_separating_custom_qr_barcodes(self):
"""
GIVEN:
- PDF file containing a separator on page 0 (zero indexed)
- The barcode separation value is customized
- The barcode is a QR code
WHEN:
- File is scanned for barcodes
THEN:
- Barcode is detected on page 0 (zero indexed)
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-qr-custom.pdf",
@ -419,6 +539,16 @@ class TestBarcode(DirectoriesMixin, TestCase):
@override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
def test_scan_file_for_separating_custom_128_barcodes(self):
"""
GIVEN:
- PDF file containing a separator on page 0 (zero indexed)
- The barcode separation value is customized
- The barcode is a 128 code
WHEN:
- File is scanned for barcodes
THEN:
- Barcode is detected on page 0 (zero indexed)
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-128-custom.pdf",
@ -434,6 +564,16 @@ class TestBarcode(DirectoriesMixin, TestCase):
self.assertListEqual(separator_page_numbers, [0])
def test_scan_file_for_separating_wrong_qr_barcodes(self):
"""
GIVEN:
- PDF file containing a separator on page 0 (zero indexed)
- The barcode value is customized
- The separation value is NOT customized
WHEN:
- File is scanned for barcodes
THEN:
- No split pages are detected
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"barcode-39-custom.pdf",
@ -474,13 +614,21 @@ class TestBarcode(DirectoriesMixin, TestCase):
self.assertListEqual(separator_page_numbers, [1])
def test_separate_pages(self):
"""
GIVEN:
- Input PDF 2 pages after separation
WHEN:
- The input file separated at the barcode
THEN:
- Two new documents are produced
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle.pdf",
)
pages = barcodes.separate_pages(test_file, [1])
documents = barcodes.separate_pages(test_file, [1])
self.assertEqual(len(pages), 2)
self.assertEqual(len(documents), 2)
def test_separate_pages_double_code(self):
"""
@ -493,8 +641,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
"""
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
self.BARCODE_SAMPLE_DIR,
"patch-code-t-double.pdf",
)
pages = barcodes.separate_pages(test_file, [1, 2])
@ -502,6 +649,15 @@ class TestBarcode(DirectoriesMixin, TestCase):
self.assertEqual(len(pages), 2)
def test_separate_pages_no_list(self):
"""
GIVEN:
- Input file to separate
WHEN:
- No separation pages are provided
THEN:
- No new documents are produced
- A warning is logged
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle.pdf",
@ -517,16 +673,32 @@ class TestBarcode(DirectoriesMixin, TestCase):
)
def test_save_to_dir(self):
"""
GIVEN:
- File to save to a directory
WHEN:
- The file is saved
THEN:
- The file exists
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t.pdf",
)
tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
barcodes.save_to_dir(test_file, target_dir=tempdir)
target_file = os.path.join(tempdir, "patch-code-t.pdf")
barcodes.save_to_dir(test_file, target_dir=settings.SCRATCH_DIR)
target_file = os.path.join(settings.SCRATCH_DIR, "patch-code-t.pdf")
self.assertTrue(os.path.isfile(target_file))
def test_save_to_dir2(self):
def test_save_to_dir_not_existing(self):
"""
GIVEN:
- File to save to a directory
- The directory doesn't exist
WHEN:
- The file is saved
THEN:
- The file exists
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t.pdf",
@ -534,32 +706,51 @@ class TestBarcode(DirectoriesMixin, TestCase):
nonexistingdir = "/nowhere"
if os.path.isdir(nonexistingdir):
self.fail("non-existing dir exists")
else:
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
barcodes.save_to_dir(test_file, target_dir=nonexistingdir)
self.assertEqual(
cm.output,
[
f"WARNING:paperless.barcodes:{str(test_file)} or {str(nonexistingdir)} don't exist.",
],
)
def test_save_to_dir3(self):
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
barcodes.save_to_dir(test_file, target_dir=nonexistingdir)
self.assertEqual(
cm.output,
[
f"WARNING:paperless.barcodes:{str(test_file)} or {str(nonexistingdir)} don't exist.",
],
)
def test_save_to_dir_given_name(self):
"""
GIVEN:
- File to save to a directory
- There is a name override
WHEN:
- The file is saved
THEN:
- The file exists
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t.pdf",
)
tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
barcodes.save_to_dir(test_file, newname="newname.pdf", target_dir=tempdir)
target_file = os.path.join(tempdir, "newname.pdf")
barcodes.save_to_dir(
test_file,
newname="newname.pdf",
target_dir=settings.SCRATCH_DIR,
)
target_file = os.path.join(settings.SCRATCH_DIR, "newname.pdf")
self.assertTrue(os.path.isfile(target_file))
def test_barcode_splitter(self):
"""
GIVEN:
- Input file containing barcodes
WHEN:
- Input file is split on barcodes
THEN:
- Correct number of files produced
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle.pdf",
)
tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
@ -572,18 +763,33 @@ class TestBarcode(DirectoriesMixin, TestCase):
self.assertTrue(len(separator_page_numbers) > 0)
document_list = barcodes.separate_pages(test_file, separator_page_numbers)
self.assertTrue(document_list)
for document in document_list:
barcodes.save_to_dir(document, target_dir=tempdir)
self.assertGreater(len(document_list), 0)
target_file1 = os.path.join(tempdir, "patch-code-t-middle_document_0.pdf")
target_file2 = os.path.join(tempdir, "patch-code-t-middle_document_1.pdf")
for document in document_list:
barcodes.save_to_dir(document, target_dir=settings.SCRATCH_DIR)
target_file1 = os.path.join(
settings.SCRATCH_DIR,
"patch-code-t-middle_document_0.pdf",
)
target_file2 = os.path.join(
settings.SCRATCH_DIR,
"patch-code-t-middle_document_1.pdf",
)
self.assertTrue(os.path.isfile(target_file1))
self.assertTrue(os.path.isfile(target_file2))
@override_settings(CONSUMER_ENABLE_BARCODES=True)
def test_consume_barcode_file(self):
"""
GIVEN:
- Input file with barcodes given to consume task
WHEN:
- Consume task returns
THEN:
- The file was split
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle.pdf",
@ -600,6 +806,14 @@ class TestBarcode(DirectoriesMixin, TestCase):
CONSUMER_BARCODE_TIFF_SUPPORT=True,
)
def test_consume_barcode_tiff_file(self):
"""
GIVEN:
- TIFF image containing barcodes
WHEN:
- Consume task returns
THEN:
- The file was split
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
"patch-code-t-middle.tiff",
@ -617,11 +831,13 @@ class TestBarcode(DirectoriesMixin, TestCase):
@mock.patch("documents.consumer.Consumer.try_consume_file")
def test_consume_barcode_unsupported_jpg_file(self, m):
"""
This test assumes barcode and TIFF support are enabled and
the user uploads an unsupported image file (e.g. jpg)
The function shouldn't try to scan for separating barcodes
and continue archiving the file as is.
GIVEN:
- JPEG image as input
WHEN:
- Consume task returns
THEN:
- Barcode reader reported warning
- Consumption continued with the file
"""
test_file = os.path.join(
self.SAMPLE_DIR,
@ -629,8 +845,10 @@ class TestBarcode(DirectoriesMixin, TestCase):
)
dst = os.path.join(settings.SCRATCH_DIR, "simple.jpg")
shutil.copy(test_file, dst)
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
self.assertIn("Success", tasks.consume_file(dst))
self.assertListEqual(
cm.output,
[
@ -652,8 +870,13 @@ class TestBarcode(DirectoriesMixin, TestCase):
)
def test_consume_barcode_supported_no_extension_file(self):
"""
This test assumes barcode and TIFF support are enabled and
the user uploads a supported image file, but without extension
GIVEN:
- TIFF image containing barcodes
- TIFF file is given without extension
WHEN:
- Consume task returns
THEN:
- The file was split
"""
test_file = os.path.join(
self.BARCODE_SAMPLE_DIR,
@ -669,11 +892,10 @@ class TestBarcode(DirectoriesMixin, TestCase):
"""
GIVEN:
- Password protected PDF
- pikepdf based scanning
WHEN:
- File is scanned for barcode
THEN:
- Scanning handles the exception without exception
- Scanning handles the exception without crashing
"""
test_file = os.path.join(self.SAMPLE_DIR, "password-is-test.pdf")
doc_barcode_info = barcodes.scan_file_for_barcodes(
@ -808,7 +1030,15 @@ class TestBarcode(DirectoriesMixin, TestCase):
@override_settings(CONSUMER_ENABLE_ASN_BARCODE=True)
def test_asn_too_large(self):
"""
GIVEN:
- ASN from barcode enabled
- Barcode contains too large an ASN value
WHEN:
- ASN from barcode checked for correctness
THEN:
- Exception is raised regarding size limits
"""
src = os.path.join(
os.path.dirname(__file__),
"samples",