mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Removes pikepdf based scanning, fixes up unit testing (+ commenting)
This commit is contained in:
parent
94ad290e14
commit
2ab77fbaf7
@ -4,7 +4,6 @@ import shutil
|
||||
import tempfile
|
||||
from dataclasses import dataclass
|
||||
from functools import lru_cache
|
||||
from math import ceil
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
from typing import Optional
|
||||
@ -12,10 +11,9 @@ from typing import Optional
|
||||
import magic
|
||||
from django.conf import settings
|
||||
from pdf2image import convert_from_path
|
||||
from pdf2image.exceptions import PDFPageCountError
|
||||
from pikepdf import Page
|
||||
from pikepdf import PasswordError
|
||||
from pikepdf import Pdf
|
||||
from pikepdf import PdfImage
|
||||
from PIL import Image
|
||||
from PIL import ImageSequence
|
||||
from pyzbar import pyzbar
|
||||
@ -154,52 +152,15 @@ def scan_file_for_barcodes(
|
||||
(page_number, barcode_text) tuples
|
||||
"""
|
||||
|
||||
def _pikepdf_barcode_scan(pdf_filepath: str) -> List[Barcode]:
|
||||
detected_barcodes = []
|
||||
with Pdf.open(pdf_filepath) as pdf:
|
||||
for page_num, page in enumerate(pdf.pages):
|
||||
for image_key in page.images:
|
||||
pdfimage = PdfImage(page.images[image_key])
|
||||
|
||||
# This type is known to have issues:
|
||||
# https://github.com/pikepdf/pikepdf/issues/401
|
||||
if "/CCITTFaxDecode" in pdfimage.filters:
|
||||
raise BarcodeImageFormatError(
|
||||
"Unable to decode CCITTFaxDecode images",
|
||||
)
|
||||
|
||||
# Not all images can be transcoded to a PIL image, which
|
||||
# is what pyzbar expects to receive, so this may
|
||||
# raise an exception, triggering fallback
|
||||
pillow_img = pdfimage.as_pil_image()
|
||||
|
||||
# Scale the image down
|
||||
# See: https://github.com/paperless-ngx/paperless-ngx/issues/2385
|
||||
# TLDR: zbar has issues with larger images
|
||||
width, height = pillow_img.size
|
||||
if width > 1024:
|
||||
scaler = ceil(width / 1024)
|
||||
new_width = int(width / scaler)
|
||||
new_height = int(height / scaler)
|
||||
pillow_img = pillow_img.resize((new_width, new_height))
|
||||
|
||||
width, height = pillow_img.size
|
||||
if height > 2048:
|
||||
scaler = ceil(height / 2048)
|
||||
new_width = int(width / scaler)
|
||||
new_height = int(height / scaler)
|
||||
pillow_img = pillow_img.resize((new_width, new_height))
|
||||
|
||||
for barcode_value in barcode_reader(pillow_img):
|
||||
detected_barcodes.append(Barcode(page_num, barcode_value))
|
||||
|
||||
return detected_barcodes
|
||||
|
||||
def _pdf2image_barcode_scan(pdf_filepath: str) -> List[Barcode]:
|
||||
detected_barcodes = []
|
||||
# use a temporary directory in case the file is too big to handle in memory
|
||||
with tempfile.TemporaryDirectory() as path:
|
||||
pages_from_path = convert_from_path(pdf_filepath, output_folder=path)
|
||||
pages_from_path = convert_from_path(
|
||||
pdf_filepath,
|
||||
dpi=300,
|
||||
output_folder=path,
|
||||
)
|
||||
for current_page_number, page in enumerate(pages_from_path):
|
||||
for barcode_value in barcode_reader(page):
|
||||
detected_barcodes.append(
|
||||
@ -219,27 +180,19 @@ def scan_file_for_barcodes(
|
||||
# Always try pikepdf first, it's usually fine, faster and
|
||||
# uses less memory
|
||||
try:
|
||||
barcodes = _pikepdf_barcode_scan(pdf_filepath)
|
||||
barcodes = _pdf2image_barcode_scan(pdf_filepath)
|
||||
# Password protected files can't be checked
|
||||
except PasswordError as e:
|
||||
# This is the exception raised for those
|
||||
except PDFPageCountError as e:
|
||||
logger.warning(
|
||||
f"File is likely password protected, not checking for barcodes: {e}",
|
||||
)
|
||||
# Handle pikepdf related image decoding issues with a fallback to page
|
||||
# by page conversion to images in a temporary directory
|
||||
except Exception as e:
|
||||
# This file is really borked, allow the consumption to continue
|
||||
# but it may fail further on
|
||||
except Exception as e: # pragma: no cover
|
||||
logger.warning(
|
||||
f"Falling back to pdf2image because: {e}",
|
||||
f"Exception during barcode scanning: {e}",
|
||||
)
|
||||
try:
|
||||
barcodes = _pdf2image_barcode_scan(pdf_filepath)
|
||||
# This file is really borked, allow the consumption to continue
|
||||
# but it may fail further on
|
||||
except Exception as e: # pragma: no cover
|
||||
logger.warning(
|
||||
f"Exception during barcode scanning: {e}",
|
||||
)
|
||||
|
||||
else:
|
||||
logger.warning(
|
||||
f"Unsupported file format for barcode reader: {str(mime_type)}",
|
||||
|
Before Width: | Height: | Size: 33 KiB After Width: | Height: | Size: 33 KiB |
Before Width: | Height: | Size: 39 KiB After Width: | Height: | Size: 39 KiB |
@ -3,7 +3,6 @@ import shutil
|
||||
import tempfile
|
||||
from unittest import mock
|
||||
|
||||
import pikepdf
|
||||
from django.conf import settings
|
||||
from django.test import override_settings
|
||||
from django.test import TestCase
|
||||
@ -23,13 +22,29 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
BARCODE_SAMPLE_DIR = os.path.join(SAMPLE_DIR, "barcodes")
|
||||
|
||||
def test_barcode_reader(self):
|
||||
def test_barcode_reader_png(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- PNG file with separator barcode
|
||||
WHEN:
|
||||
- Image is scanned for codes
|
||||
THEN:
|
||||
- The barcode is detected
|
||||
"""
|
||||
test_file = os.path.join(self.BARCODE_SAMPLE_DIR, "barcode-39-PATCHT.png")
|
||||
img = Image.open(test_file)
|
||||
separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
|
||||
separator_barcode = settings.CONSUMER_BARCODE_STRING
|
||||
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
|
||||
|
||||
def test_barcode_reader2(self):
|
||||
def test_barcode_reader_pbm(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Netpbm bitmap file with separator barcode
|
||||
WHEN:
|
||||
- Image is scanned for codes
|
||||
THEN:
|
||||
- The barcode is detected
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t.pbm",
|
||||
@ -38,25 +53,49 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
|
||||
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
|
||||
|
||||
def test_barcode_reader_distorsion(self):
|
||||
def test_barcode_reader_distortion_scratchy(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Image containing high noise
|
||||
WHEN:
|
||||
- Image is scanned for codes
|
||||
THEN:
|
||||
- The barcode is detected
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-39-PATCHT-distorsion.png",
|
||||
"barcode-39-PATCHT-distortion.png",
|
||||
)
|
||||
img = Image.open(test_file)
|
||||
separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
|
||||
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
|
||||
|
||||
def test_barcode_reader_distorsion2(self):
|
||||
def test_barcode_reader_distortion_stretched(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Image with a stretched barcode
|
||||
WHEN:
|
||||
- Image is scanned for codes
|
||||
THEN:
|
||||
- The barcode is detected
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-39-PATCHT-distorsion2.png",
|
||||
"barcode-39-PATCHT-distortion2.png",
|
||||
)
|
||||
img = Image.open(test_file)
|
||||
separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
|
||||
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
|
||||
|
||||
def test_barcode_reader_unreadable(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Image with a truly unreadable barcode
|
||||
WHEN:
|
||||
- Image is scanned for codes
|
||||
THEN:
|
||||
- No barcode is detected
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-39-PATCHT-unreadable.png",
|
||||
@ -65,6 +104,14 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(barcodes.barcode_reader(img), [])
|
||||
|
||||
def test_barcode_reader_qr(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Image file with QR separator barcode
|
||||
WHEN:
|
||||
- Image is scanned for codes
|
||||
THEN:
|
||||
- The barcode is detected
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"qr-code-PATCHT.png",
|
||||
@ -74,6 +121,14 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
|
||||
|
||||
def test_barcode_reader_128(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Image file with 128 style separator barcode
|
||||
WHEN:
|
||||
- Image is scanned for codes
|
||||
THEN:
|
||||
- The barcode is detected
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-128-PATCHT.png",
|
||||
@ -83,11 +138,27 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
|
||||
|
||||
def test_barcode_reader_no_barcode(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Image file with no barcode
|
||||
WHEN:
|
||||
- Image is scanned for codes
|
||||
THEN:
|
||||
- No barcode is detected
|
||||
"""
|
||||
test_file = os.path.join(self.SAMPLE_DIR, "simple.png")
|
||||
img = Image.open(test_file)
|
||||
self.assertEqual(barcodes.barcode_reader(img), [])
|
||||
self.assertListEqual(barcodes.barcode_reader(img), [])
|
||||
|
||||
def test_barcode_reader_custom_separator(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Image file with custom separator barcode value
|
||||
WHEN:
|
||||
- Image is scanned for codes
|
||||
THEN:
|
||||
- The barcode is detected
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-39-custom.png",
|
||||
@ -96,6 +167,14 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(barcodes.barcode_reader(img), ["CUSTOM BARCODE"])
|
||||
|
||||
def test_barcode_reader_custom_qr_separator(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Image file with custom separator barcode value as a QR code
|
||||
WHEN:
|
||||
- Image is scanned for codes
|
||||
THEN:
|
||||
- The barcode is detected
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-qr-custom.png",
|
||||
@ -104,6 +183,14 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(barcodes.barcode_reader(img), ["CUSTOM BARCODE"])
|
||||
|
||||
def test_barcode_reader_custom_128_separator(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Image file with custom separator 128 barcode value
|
||||
WHEN:
|
||||
- Image is scanned for codes
|
||||
THEN:
|
||||
- The barcode is detected
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-128-custom.png",
|
||||
@ -164,6 +251,14 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(barcodes.barcode_reader(img), ["CUSTOM-PREFIX-00123"])
|
||||
|
||||
def test_get_mime_type(self):
|
||||
"""
|
||||
GIVEN:
|
||||
-
|
||||
WHEN:
|
||||
-
|
||||
THEN:
|
||||
-
|
||||
"""
|
||||
tiff_file = os.path.join(
|
||||
self.SAMPLE_DIR,
|
||||
"simple.tiff",
|
||||
@ -194,6 +289,14 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(barcodes.get_file_mime_type(png_file), "image/png")
|
||||
|
||||
def test_convert_from_tiff_to_pdf(self):
|
||||
"""
|
||||
GIVEN:
|
||||
-
|
||||
WHEN:
|
||||
-
|
||||
THEN:
|
||||
-
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
@ -207,6 +310,14 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(file_extension, ".pdf")
|
||||
|
||||
def test_convert_error_from_pdf_to_pdf(self):
|
||||
"""
|
||||
GIVEN:
|
||||
-
|
||||
WHEN:
|
||||
-
|
||||
THEN:
|
||||
-
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.SAMPLE_DIR,
|
||||
"simple.pdf",
|
||||
@ -216,6 +327,14 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.assertIsNone(barcodes.convert_from_tiff_to_pdf(dst))
|
||||
|
||||
def test_scan_file_for_separating_barcodes(self):
|
||||
"""
|
||||
GIVEN:
|
||||
-
|
||||
WHEN:
|
||||
-
|
||||
THEN:
|
||||
-
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t.pdf",
|
||||
@ -231,6 +350,14 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.assertListEqual(separator_page_numbers, [0])
|
||||
|
||||
def test_scan_file_for_separating_barcodes_none_present(self):
|
||||
"""
|
||||
GIVEN:
|
||||
-
|
||||
WHEN:
|
||||
-
|
||||
THEN:
|
||||
-
|
||||
"""
|
||||
test_file = os.path.join(self.SAMPLE_DIR, "simple.pdf")
|
||||
doc_barcode_info = barcodes.scan_file_for_barcodes(
|
||||
test_file,
|
||||
@ -242,7 +369,15 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [])
|
||||
|
||||
def test_scan_file_for_separating_barcodes3(self):
|
||||
def test_scan_file_for_separating_barcodes_middle_page(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- PDF file containing a separator on page 1 (zero indexed)
|
||||
WHEN:
|
||||
- File is scanned for barcodes
|
||||
THEN:
|
||||
- Barcode is detected on page 1 (zero indexed)
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-middle.pdf",
|
||||
@ -257,7 +392,15 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [1])
|
||||
|
||||
def test_scan_file_for_separating_barcodes4(self):
|
||||
def test_scan_file_for_separating_barcodes_multiple_pages(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- PDF file containing a separator on pages 2 and 5 (zero indexed)
|
||||
WHEN:
|
||||
- File is scanned for barcodes
|
||||
THEN:
|
||||
- Barcode is detected on pages 2 and 5 (zero indexed)
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"several-patcht-codes.pdf",
|
||||
@ -272,7 +415,16 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [2, 5])
|
||||
|
||||
def test_scan_file_for_separating_barcodes_upsidedown(self):
|
||||
def test_scan_file_for_separating_barcodes_upside_down(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- PDF file containing a separator on page 1 (zero indexed)
|
||||
- The barcode is upside down
|
||||
WHEN:
|
||||
- File is scanned for barcodes
|
||||
THEN:
|
||||
- Barcode is detected on page 1 (zero indexed)
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-middle_reverse.pdf",
|
||||
@ -287,66 +439,6 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [1])
|
||||
|
||||
def test_scan_file_for_barcodes_pillow_transcode_error(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- A PDF containing an image which cannot be transcoded to a PIL image
|
||||
WHEN:
|
||||
- The image tries to be transcoded to a PIL image, but fails
|
||||
THEN:
|
||||
- The barcode reader is still called
|
||||
"""
|
||||
|
||||
def _build_device_n_pdf(self, save_path: str):
|
||||
# Based on the pikepdf tests
|
||||
# https://github.com/pikepdf/pikepdf/blob/abb35ebe17d579d76abe08265e00cf8890a12a95/tests/test_image_access.py
|
||||
pdf = pikepdf.new()
|
||||
pdf.add_blank_page(page_size=(72, 72))
|
||||
imobj = pikepdf.Stream(
|
||||
pdf,
|
||||
bytes(range(0, 256)),
|
||||
BitsPerComponent=8,
|
||||
ColorSpace=pikepdf.Array(
|
||||
[
|
||||
pikepdf.Name.DeviceN,
|
||||
pikepdf.Array([pikepdf.Name.Black]),
|
||||
pikepdf.Name.DeviceCMYK,
|
||||
pikepdf.Stream(
|
||||
pdf,
|
||||
b"{0 0 0 4 -1 roll}", # Colorspace conversion function
|
||||
FunctionType=4,
|
||||
Domain=[0.0, 1.0],
|
||||
Range=[0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0],
|
||||
),
|
||||
],
|
||||
),
|
||||
Width=16,
|
||||
Height=16,
|
||||
Type=pikepdf.Name.XObject,
|
||||
Subtype=pikepdf.Name.Image,
|
||||
)
|
||||
pim = pikepdf.PdfImage(imobj)
|
||||
self.assertEqual(pim.mode, "DeviceN")
|
||||
self.assertTrue(pim.is_device_n)
|
||||
|
||||
pdf.pages[0].Contents = pikepdf.Stream(pdf, b"72 0 0 72 0 0 cm /Im0 Do")
|
||||
pdf.pages[0].Resources = pikepdf.Dictionary(
|
||||
XObject=pikepdf.Dictionary(Im0=imobj),
|
||||
)
|
||||
pdf.save(save_path)
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix="pdf") as device_n_pdf:
|
||||
# Build an offending file
|
||||
_build_device_n_pdf(self, str(device_n_pdf.name))
|
||||
with mock.patch("documents.barcodes.barcode_reader") as reader:
|
||||
reader.return_value = list()
|
||||
|
||||
_ = barcodes.scan_file_for_barcodes(
|
||||
str(device_n_pdf.name),
|
||||
)
|
||||
|
||||
reader.assert_called()
|
||||
|
||||
def test_scan_file_for_separating_barcodes_fax_decode(self):
|
||||
"""
|
||||
GIVEN:
|
||||
@ -371,6 +463,15 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.assertListEqual(separator_page_numbers, [1])
|
||||
|
||||
def test_scan_file_for_separating_qr_barcodes(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- PDF file containing a separator on page 0 (zero indexed)
|
||||
- The barcode is a QR code
|
||||
WHEN:
|
||||
- File is scanned for barcodes
|
||||
THEN:
|
||||
- Barcode is detected on page 0 (zero indexed)
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-qr.pdf",
|
||||
@ -387,6 +488,15 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
@override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
|
||||
def test_scan_file_for_separating_custom_barcodes(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- PDF file containing a separator on page 0 (zero indexed)
|
||||
- The barcode separation value is customized
|
||||
WHEN:
|
||||
- File is scanned for barcodes
|
||||
THEN:
|
||||
- Barcode is detected on page 0 (zero indexed)
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-39-custom.pdf",
|
||||
@ -403,6 +513,16 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
@override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
|
||||
def test_scan_file_for_separating_custom_qr_barcodes(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- PDF file containing a separator on page 0 (zero indexed)
|
||||
- The barcode separation value is customized
|
||||
- The barcode is a QR code
|
||||
WHEN:
|
||||
- File is scanned for barcodes
|
||||
THEN:
|
||||
- Barcode is detected on page 0 (zero indexed)
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-qr-custom.pdf",
|
||||
@ -419,6 +539,16 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
@override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
|
||||
def test_scan_file_for_separating_custom_128_barcodes(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- PDF file containing a separator on page 0 (zero indexed)
|
||||
- The barcode separation value is customized
|
||||
- The barcode is a 128 code
|
||||
WHEN:
|
||||
- File is scanned for barcodes
|
||||
THEN:
|
||||
- Barcode is detected on page 0 (zero indexed)
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-128-custom.pdf",
|
||||
@ -434,6 +564,16 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.assertListEqual(separator_page_numbers, [0])
|
||||
|
||||
def test_scan_file_for_separating_wrong_qr_barcodes(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- PDF file containing a separator on page 0 (zero indexed)
|
||||
- The barcode value is customized
|
||||
- The separation value is NOT customized
|
||||
WHEN:
|
||||
- File is scanned for barcodes
|
||||
THEN:
|
||||
- No split pages are detected
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"barcode-39-custom.pdf",
|
||||
@ -474,13 +614,21 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.assertListEqual(separator_page_numbers, [1])
|
||||
|
||||
def test_separate_pages(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Input PDF 2 pages after separation
|
||||
WHEN:
|
||||
- The input file separated at the barcode
|
||||
THEN:
|
||||
- Two new documents are produced
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-middle.pdf",
|
||||
)
|
||||
pages = barcodes.separate_pages(test_file, [1])
|
||||
documents = barcodes.separate_pages(test_file, [1])
|
||||
|
||||
self.assertEqual(len(pages), 2)
|
||||
self.assertEqual(len(documents), 2)
|
||||
|
||||
def test_separate_pages_double_code(self):
|
||||
"""
|
||||
@ -493,8 +641,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
"barcodes",
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-double.pdf",
|
||||
)
|
||||
pages = barcodes.separate_pages(test_file, [1, 2])
|
||||
@ -502,6 +649,15 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(len(pages), 2)
|
||||
|
||||
def test_separate_pages_no_list(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Input file to separate
|
||||
WHEN:
|
||||
- No separation pages are provided
|
||||
THEN:
|
||||
- No new documents are produced
|
||||
- A warning is logged
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-middle.pdf",
|
||||
@ -517,16 +673,32 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
)
|
||||
|
||||
def test_save_to_dir(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- File to save to a directory
|
||||
WHEN:
|
||||
- The file is saved
|
||||
THEN:
|
||||
- The file exists
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t.pdf",
|
||||
)
|
||||
tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
|
||||
barcodes.save_to_dir(test_file, target_dir=tempdir)
|
||||
target_file = os.path.join(tempdir, "patch-code-t.pdf")
|
||||
barcodes.save_to_dir(test_file, target_dir=settings.SCRATCH_DIR)
|
||||
target_file = os.path.join(settings.SCRATCH_DIR, "patch-code-t.pdf")
|
||||
self.assertTrue(os.path.isfile(target_file))
|
||||
|
||||
def test_save_to_dir2(self):
|
||||
def test_save_to_dir_not_existing(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- File to save to a directory
|
||||
- The directory doesn't exist
|
||||
WHEN:
|
||||
- The file is saved
|
||||
THEN:
|
||||
- The file exists
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t.pdf",
|
||||
@ -534,32 +706,51 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
nonexistingdir = "/nowhere"
|
||||
if os.path.isdir(nonexistingdir):
|
||||
self.fail("non-existing dir exists")
|
||||
else:
|
||||
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
|
||||
barcodes.save_to_dir(test_file, target_dir=nonexistingdir)
|
||||
self.assertEqual(
|
||||
cm.output,
|
||||
[
|
||||
f"WARNING:paperless.barcodes:{str(test_file)} or {str(nonexistingdir)} don't exist.",
|
||||
],
|
||||
)
|
||||
|
||||
def test_save_to_dir3(self):
|
||||
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
|
||||
barcodes.save_to_dir(test_file, target_dir=nonexistingdir)
|
||||
self.assertEqual(
|
||||
cm.output,
|
||||
[
|
||||
f"WARNING:paperless.barcodes:{str(test_file)} or {str(nonexistingdir)} don't exist.",
|
||||
],
|
||||
)
|
||||
|
||||
def test_save_to_dir_given_name(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- File to save to a directory
|
||||
- There is a name override
|
||||
WHEN:
|
||||
- The file is saved
|
||||
THEN:
|
||||
- The file exists
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t.pdf",
|
||||
)
|
||||
tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
|
||||
barcodes.save_to_dir(test_file, newname="newname.pdf", target_dir=tempdir)
|
||||
target_file = os.path.join(tempdir, "newname.pdf")
|
||||
barcodes.save_to_dir(
|
||||
test_file,
|
||||
newname="newname.pdf",
|
||||
target_dir=settings.SCRATCH_DIR,
|
||||
)
|
||||
target_file = os.path.join(settings.SCRATCH_DIR, "newname.pdf")
|
||||
self.assertTrue(os.path.isfile(target_file))
|
||||
|
||||
def test_barcode_splitter(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Input file containing barcodes
|
||||
WHEN:
|
||||
- Input file is split on barcodes
|
||||
THEN:
|
||||
- Correct number of files produced
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-middle.pdf",
|
||||
)
|
||||
tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
|
||||
|
||||
doc_barcode_info = barcodes.scan_file_for_barcodes(
|
||||
test_file,
|
||||
@ -572,18 +763,33 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.assertTrue(len(separator_page_numbers) > 0)
|
||||
|
||||
document_list = barcodes.separate_pages(test_file, separator_page_numbers)
|
||||
self.assertTrue(document_list)
|
||||
for document in document_list:
|
||||
barcodes.save_to_dir(document, target_dir=tempdir)
|
||||
self.assertGreater(len(document_list), 0)
|
||||
|
||||
target_file1 = os.path.join(tempdir, "patch-code-t-middle_document_0.pdf")
|
||||
target_file2 = os.path.join(tempdir, "patch-code-t-middle_document_1.pdf")
|
||||
for document in document_list:
|
||||
barcodes.save_to_dir(document, target_dir=settings.SCRATCH_DIR)
|
||||
|
||||
target_file1 = os.path.join(
|
||||
settings.SCRATCH_DIR,
|
||||
"patch-code-t-middle_document_0.pdf",
|
||||
)
|
||||
target_file2 = os.path.join(
|
||||
settings.SCRATCH_DIR,
|
||||
"patch-code-t-middle_document_1.pdf",
|
||||
)
|
||||
|
||||
self.assertTrue(os.path.isfile(target_file1))
|
||||
self.assertTrue(os.path.isfile(target_file2))
|
||||
|
||||
@override_settings(CONSUMER_ENABLE_BARCODES=True)
|
||||
def test_consume_barcode_file(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Input file with barcodes given to consume task
|
||||
WHEN:
|
||||
- Consume task returns
|
||||
THEN:
|
||||
- The file was split
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-middle.pdf",
|
||||
@ -600,6 +806,14 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
CONSUMER_BARCODE_TIFF_SUPPORT=True,
|
||||
)
|
||||
def test_consume_barcode_tiff_file(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- TIFF image containing barcodes
|
||||
WHEN:
|
||||
- Consume task returns
|
||||
THEN:
|
||||
- The file was split
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-middle.tiff",
|
||||
@ -617,11 +831,13 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
@mock.patch("documents.consumer.Consumer.try_consume_file")
|
||||
def test_consume_barcode_unsupported_jpg_file(self, m):
|
||||
"""
|
||||
This test assumes barcode and TIFF support are enabled and
|
||||
the user uploads an unsupported image file (e.g. jpg)
|
||||
|
||||
The function shouldn't try to scan for separating barcodes
|
||||
and continue archiving the file as is.
|
||||
GIVEN:
|
||||
- JPEG image as input
|
||||
WHEN:
|
||||
- Consume task returns
|
||||
THEN:
|
||||
- Barcode reader reported warning
|
||||
- Consumption continued with the file
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.SAMPLE_DIR,
|
||||
@ -629,8 +845,10 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
)
|
||||
dst = os.path.join(settings.SCRATCH_DIR, "simple.jpg")
|
||||
shutil.copy(test_file, dst)
|
||||
|
||||
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
|
||||
self.assertIn("Success", tasks.consume_file(dst))
|
||||
|
||||
self.assertListEqual(
|
||||
cm.output,
|
||||
[
|
||||
@ -652,8 +870,13 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
)
|
||||
def test_consume_barcode_supported_no_extension_file(self):
|
||||
"""
|
||||
This test assumes barcode and TIFF support are enabled and
|
||||
the user uploads a supported image file, but without extension
|
||||
GIVEN:
|
||||
- TIFF image containing barcodes
|
||||
- TIFF file is given without extension
|
||||
WHEN:
|
||||
- Consume task returns
|
||||
THEN:
|
||||
- The file was split
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
@ -669,11 +892,10 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
"""
|
||||
GIVEN:
|
||||
- Password protected PDF
|
||||
- pikepdf based scanning
|
||||
WHEN:
|
||||
- File is scanned for barcode
|
||||
THEN:
|
||||
- Scanning handles the exception without exception
|
||||
- Scanning handles the exception without crashing
|
||||
"""
|
||||
test_file = os.path.join(self.SAMPLE_DIR, "password-is-test.pdf")
|
||||
doc_barcode_info = barcodes.scan_file_for_barcodes(
|
||||
@ -808,7 +1030,15 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
|
||||
@override_settings(CONSUMER_ENABLE_ASN_BARCODE=True)
|
||||
def test_asn_too_large(self):
|
||||
|
||||
"""
|
||||
GIVEN:
|
||||
- ASN from barcode enabled
|
||||
- Barcode contains too large an ASN value
|
||||
WHEN:
|
||||
- ASN from barcode checked for correctness
|
||||
THEN:
|
||||
- Exception is raised regarding size limits
|
||||
"""
|
||||
src = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"samples",
|
||||
|
Loading…
x
Reference in New Issue
Block a user