Updates handling of barcodes to encapsulate logic, moving it out of tasks and into barcodes

This commit is contained in:
Trenton H 2023-05-19 09:59:57 -07:00
parent 58f95c1891
commit 07e07fc7e8
21 changed files with 589 additions and 827 deletions

View File

@ -1,12 +1,11 @@
import logging import logging
import os
import shutil import shutil
import tempfile import tempfile
from dataclasses import dataclass from dataclasses import dataclass
from functools import lru_cache
from pathlib import Path from pathlib import Path
from subprocess import run from subprocess import run
from typing import Dict from typing import Dict
from typing import Final
from typing import List from typing import List
from typing import Optional from typing import Optional
@ -18,13 +17,11 @@ from pikepdf import Page
from pikepdf import Pdf from pikepdf import Pdf
from PIL import Image from PIL import Image
from documents.data_models import DocumentSource
logger = logging.getLogger("paperless.barcodes") logger = logging.getLogger("paperless.barcodes")
class BarcodeImageFormatError(Exception):
pass
@dataclass(frozen=True) @dataclass(frozen=True)
class Barcode: class Barcode:
""" """
@ -51,176 +48,38 @@ class Barcode:
return self.value.startswith(settings.CONSUMER_ASN_BARCODE_PREFIX) return self.value.startswith(settings.CONSUMER_ASN_BARCODE_PREFIX)
@dataclass class BarcodeReader:
class DocumentBarcodeInfo: def __init__(self, filepath: Path, mime_type: str) -> None:
""" self.file: Final[Path] = filepath
Describes a single document's barcode status self.mime: Final[str] = mime_type
""" self.pdf_file: Path = self.file
self.barcodes: List[Barcode] = []
self.temp_dir: Optional[Path] = None
pdf_path: Path
barcodes: List[Barcode]
@lru_cache(maxsize=8)
def supported_file_type(mime_type: str) -> bool:
"""
Determines if the file is valid for barcode
processing, based on MIME type and settings
:return: True if the file is supported, False otherwise
"""
supported_mime = ["application/pdf"]
if settings.CONSUMER_BARCODE_TIFF_SUPPORT: if settings.CONSUMER_BARCODE_TIFF_SUPPORT:
supported_mime += ["image/tiff"] self.SUPPORTED_FILE_MIMES = {"application/pdf", "image/tiff"}
return mime_type in supported_mime
def barcode_reader(image: Image) -> List[str]:
"""
Read any barcodes contained in image
Returns a list containing all found barcodes
"""
barcodes = []
if settings.CONSUMER_BARCODE_SCANNER == "PYZBAR":
logger.debug("Scanning for barcodes using PYZBAR")
from pyzbar import pyzbar
# Decode the barcode image
detected_barcodes = pyzbar.decode(image)
if detected_barcodes:
# Traverse through all the detected barcodes in image
for barcode in detected_barcodes:
if barcode.data:
decoded_barcode = barcode.data.decode("utf-8")
barcodes.append(decoded_barcode)
logger.debug(
f"Barcode of type {str(barcode.type)} found: {decoded_barcode}",
)
elif settings.CONSUMER_BARCODE_SCANNER == "ZXING":
logger.debug("Scanning for barcodes using ZXING")
import zxingcpp
detected_barcodes = zxingcpp.read_barcodes(image)
for barcode in detected_barcodes:
if barcode.text:
barcodes.append(barcode.text)
logger.debug(
f"Barcode of type {str(barcode.format)} found: {barcode.text}",
)
return barcodes
def convert_from_tiff_to_pdf(filepath: Path) -> Path:
"""
converts a given TIFF image file to pdf into a temporary directory.
Returns the new pdf file.
"""
tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
# use old file name with pdf extension
newpath = Path(tempdir) / Path(filepath.name).with_suffix(".pdf")
with Image.open(filepath) as im:
has_alpha_layer = im.mode in ("RGBA", "LA")
if has_alpha_layer:
run(
[
settings.CONVERT_BINARY,
"-alpha",
"off",
filepath,
filepath,
],
)
with filepath.open("rb") as img_file, newpath.open("wb") as pdf_file:
pdf_file.write(img2pdf.convert(img_file))
return newpath
def scan_file_for_barcodes(
filepath: Path,
mime_type: str,
) -> DocumentBarcodeInfo:
"""
Scan the provided pdf file for any barcodes
Returns a PDF filepath and a list of
(page_number, barcode_text) tuples
"""
def _pdf2image_barcode_scan(pdf_filepath: str) -> List[Barcode]:
detected_barcodes = []
# use a temporary directory in case the file is too big to handle in memory
with tempfile.TemporaryDirectory() as path:
pages_from_path = convert_from_path(
pdf_filepath,
dpi=300,
output_folder=path,
)
for current_page_number, page in enumerate(pages_from_path):
for barcode_value in barcode_reader(page):
detected_barcodes.append(
Barcode(current_page_number, barcode_value),
)
return detected_barcodes
pdf_filepath = None
barcodes = []
if supported_file_type(mime_type):
pdf_filepath = filepath
if mime_type == "image/tiff":
pdf_filepath = convert_from_tiff_to_pdf(filepath)
# Always try pikepdf first, it's usually fine, faster and
# uses less memory
try:
barcodes = _pdf2image_barcode_scan(pdf_filepath)
# Password protected files can't be checked
# This is the exception raised for those
except PDFPageCountError as e:
logger.warning(
f"File is likely password protected, not checking for barcodes: {e}",
)
# This file is really borked, allow the consumption to continue
# but it may fail further on
except Exception as e: # pragma: no cover
logger.warning(
f"Exception during barcode scanning: {e}",
)
else: else:
logger.warning( self.SUPPORTED_FILE_MIMES = {"application/pdf"}
f"Unsupported file format for barcode reader: {str(mime_type)}",
)
return DocumentBarcodeInfo(pdf_filepath, barcodes) def __enter__(self):
if self.supported_mime_type:
self.temp_dir = tempfile.TemporaryDirectory(prefix="paperless-barcodes")
return self
def __exit__(self, exc_type, exc_val, exc_tb):
if self.temp_dir is not None:
self.temp_dir.cleanup()
self.temp_dir = None
def get_separating_barcodes(barcodes: List[Barcode]) -> Dict[int, bool]: @property
def supported_mime_type(self) -> bool:
""" """
Search the parsed barcodes for separators Return True if the given mime type is supported for barcodes, false otherwise
and returns a dict of page numbers, which
separate the file into new files, together
with the information whether to keep the page.
""" """
# filter all barcodes for the separator string return self.mime in self.SUPPORTED_FILE_MIMES
# get the page numbers of the separating barcodes
separator_pages = {bc.page: False for bc in barcodes if bc.is_separator}
if not settings.CONSUMER_ENABLE_ASN_BARCODE:
return separator_pages
# add the page numbers of the ASN barcodes @property
# (except for first page, that might lead to infinite loops). def asn(self) -> Optional[int]:
return {
**separator_pages,
**{bc.page: True for bc in barcodes if bc.is_asn and bc.page != 0},
}
def get_asn_from_barcodes(barcodes: List[Barcode]) -> Optional[int]:
""" """
Search the parsed barcodes for any ASNs. Search the parsed barcodes for any ASNs.
The first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX The first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
@ -229,9 +88,12 @@ def get_asn_from_barcodes(barcodes: List[Barcode]) -> Optional[int]:
""" """
asn = None asn = None
# Ensure the barcodes have been read
self.detect()
# get the first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX # get the first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
asn_text = next( asn_text = next(
(x.value for x in barcodes if x.is_asn), (x.value for x in self.barcodes if x.is_asn),
None, None,
) )
@ -248,8 +110,139 @@ def get_asn_from_barcodes(barcodes: List[Barcode]) -> Optional[int]:
return asn return asn
@staticmethod
def read_barcodes_zxing(image: Image) -> List[str]:
barcodes = []
def separate_pages(filepath: Path, pages_to_split_on: Dict[int, bool]) -> List[Path]: import zxingcpp
detected_barcodes = zxingcpp.read_barcodes(image)
for barcode in detected_barcodes:
if barcode.text:
barcodes.append(barcode.text)
logger.debug(
f"Barcode of type {str(barcode.format)} found: {barcode.text}",
)
return barcodes
@staticmethod
def read_barcodes_pyzbar(image: Image) -> List[str]:
barcodes = []
from pyzbar import pyzbar
# Decode the barcode image
detected_barcodes = pyzbar.decode(image)
# Traverse through all the detected barcodes in image
for barcode in detected_barcodes:
if barcode.data:
decoded_barcode = barcode.data.decode("utf-8")
barcodes.append(decoded_barcode)
logger.debug(
f"Barcode of type {str(barcode.type)} found: {decoded_barcode}",
)
return barcodes
def convert_from_tiff_to_pdf(self):
"""
May convert a TIFF image into a PDF, if the input is a TIFF
"""
# Nothing to do, pdf_file is already assigned correctly
if self.mime != "image/tiff":
return
with Image.open(self.file) as im:
has_alpha_layer = im.mode in ("RGBA", "LA")
if has_alpha_layer:
# Note the save into the temp folder, so as not to trigger a new
# consume
scratch_image = Path(self.temp_dir.name) / Path(self.file.name)
run(
[
settings.CONVERT_BINARY,
"-alpha",
"off",
self.file,
scratch_image,
],
)
else:
# Not modifying the original, safe to use in place
scratch_image = self.file
self.pdf_file = Path(self.temp_dir.name) / Path(self.file.name).with_suffix(
".pdf",
)
with scratch_image.open("rb") as img_file, self.pdf_file.open("wb") as pdf_file:
pdf_file.write(img2pdf.convert(img_file))
def detect(self) -> None:
"""
Scan all pages of the PDF as images, updating barcodes and the pages
found on as we go
"""
# Bail if barcodes already exist
if self.barcodes:
return
# Choose the library for reading
if settings.CONSUMER_BARCODE_SCANNER == "PYZBAR":
reader = self.read_barcodes_pyzbar
logger.debug("Scanning for barcodes using PYZBAR")
else:
reader = self.read_barcodes_zxing
logger.debug("Scanning for barcodes using ZXING")
try:
pages_from_path = convert_from_path(
self.pdf_file,
dpi=300,
output_folder=self.temp_dir.name,
)
for current_page_number, page in enumerate(pages_from_path):
for barcode_value in reader(page):
self.barcodes.append(
Barcode(current_page_number, barcode_value),
)
# Password protected files can't be checked
# This is the exception raised for those
except PDFPageCountError as e:
logger.warning(
f"File is likely password protected, not checking for barcodes: {e}",
)
# This file is really borked, allow the consumption to continue
# but it may fail further on
except Exception as e: # pragma: no cover
logger.warning(
f"Exception during barcode scanning: {e}",
)
def get_separation_pages(self) -> Dict[int, bool]:
"""
Search the parsed barcodes for separators and returns a dict of page
numbers, which separate the file into new files, together with the
information whether to keep the page.
"""
# filter all barcodes for the separator string
# get the page numbers of the separating barcodes
separator_pages = {bc.page: False for bc in self.barcodes if bc.is_separator}
if not settings.CONSUMER_ENABLE_ASN_BARCODE:
return separator_pages
# add the page numbers of the ASN barcodes
# (except for first page, that might lead to infinite loops).
return {
**separator_pages,
**{bc.page: True for bc in self.barcodes if bc.is_asn and bc.page != 0},
}
def separate_pages(self, pages_to_split_on: Dict[int, bool]) -> List[Path]:
""" """
Separate the provided pdf file on the pages_to_split_on. Separate the provided pdf file on the pages_to_split_on.
The pages which are defined by the keys in page_numbers The pages which are defined by the keys in page_numbers
@ -259,22 +252,14 @@ def separate_pages(filepath: Path, pages_to_split_on: Dict[int, bool]) -> List[P
""" """
document_paths = [] document_paths = []
fname = self.file.with_suffix("").name
if not pages_to_split_on: with Pdf.open(self.pdf_file) as input_pdf:
logger.warning("No pages to split on!")
return document_paths
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
tempdir = Path(tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR))
fname = filepath.with_suffix("").name
pdf = Pdf.open(filepath)
# Start with an empty document # Start with an empty document
current_document: List[Page] = [] current_document: List[Page] = []
# A list of documents, ie a list of lists of pages # A list of documents, ie a list of lists of pages
documents: List[List[Page]] = [current_document] documents: List[List[Page]] = [current_document]
for idx, page in enumerate(pdf.pages): for idx, page in enumerate(input_pdf.pages):
# Keep building the new PDF as long as it is not a # Keep building the new PDF as long as it is not a
# separator index # separator index
if idx not in pages_to_split_on: if idx not in pages_to_split_on:
@ -304,28 +289,63 @@ def separate_pages(filepath: Path, pages_to_split_on: Dict[int, bool]) -> List[P
output_filename = f"{fname}_document_{doc_idx}.pdf" output_filename = f"{fname}_document_{doc_idx}.pdf"
logger.debug(f"pdf no:{doc_idx} has {len(dst.pages)} pages") logger.debug(f"pdf no:{doc_idx} has {len(dst.pages)} pages")
savepath = tempdir / output_filename savepath = Path(self.temp_dir.name) / output_filename
with open(savepath, "wb") as out: with open(savepath, "wb") as out:
dst.save(out) dst.save(out)
document_paths.append(savepath) document_paths.append(savepath)
return document_paths return document_paths
def separate(
self,
source: DocumentSource,
override_name: Optional[str] = None,
) -> bool:
"""
Separates the document, based on barcodes and configuration, creating new
documents as required in the appropriate location.
def save_to_dir( Returns True if a split happened, False otherwise
filepath: Path,
newname: str = None,
target_dir: Path = settings.CONSUMPTION_DIR,
):
""" """
Copies filepath to target_dir. # Do nothing
Optionally rename the file. if not self.supported_mime_type:
""" logger.warning(f"Unsupported file format for barcode reader: {self.mime}")
if filepath.is_file() and target_dir.is_dir(): return False
dest = target_dir
if newname is not None: # Does nothing unless needed
dest = dest / newname self.convert_from_tiff_to_pdf()
shutil.copy(filepath, dest)
logging.debug(f"saved {str(filepath)} to {str(dest)}") # Actually read the codes, if any
self.detect()
separator_pages = self.get_separation_pages()
# Also do nothing
if not separator_pages:
logger.warning("No pages to split on!")
return False
# Create the split documents
doc_paths = self.separate_pages(separator_pages)
# Save the new documents to correct folder
if source != DocumentSource.ConsumeFolder:
# The given file is somewhere in SCRATCH_DIR,
# and new documents must be moved to the CONSUMPTION_DIR
# for the consumer to notice them
save_to_dir = settings.CONSUMPTION_DIR
else: else:
logger.warning(f"{str(filepath)} or {str(target_dir)} don't exist.") # The given file is somewhere in CONSUMPTION_DIR,
# and may be some levels down for recursive tagging
# so use the file's parent to preserve any metadata
save_to_dir = self.file.parent
for idx, document_path in enumerate(doc_paths):
if override_name is not None:
newname = f"{str(idx)}_{override_name}"
dest = save_to_dir / newname
else:
dest = save_to_dir
logger.info(f"Saving {document_path} to {dest}")
shutil.copy2(document_path, dest)
return True

View File

@ -16,16 +16,15 @@ from filelock import FileLock
from redis.exceptions import ConnectionError from redis.exceptions import ConnectionError
from whoosh.writing import AsyncWriter from whoosh.writing import AsyncWriter
from documents import barcodes
from documents import index from documents import index
from documents import sanity_checker from documents import sanity_checker
from documents.barcodes import BarcodeReader
from documents.classifier import DocumentClassifier from documents.classifier import DocumentClassifier
from documents.classifier import load_classifier from documents.classifier import load_classifier
from documents.consumer import Consumer from documents.consumer import Consumer
from documents.consumer import ConsumerError from documents.consumer import ConsumerError
from documents.data_models import ConsumableDocument from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides from documents.data_models import DocumentMetadataOverrides
from documents.data_models import DocumentSource
from documents.file_handling import create_source_path_directory from documents.file_handling import create_source_path_directory
from documents.file_handling import generate_unique_filename from documents.file_handling import generate_unique_filename
from documents.models import Correspondent from documents.models import Correspondent
@ -96,69 +95,11 @@ def consume_file(
# read all barcodes in the current document # read all barcodes in the current document
if settings.CONSUMER_ENABLE_BARCODES or settings.CONSUMER_ENABLE_ASN_BARCODE: if settings.CONSUMER_ENABLE_BARCODES or settings.CONSUMER_ENABLE_ASN_BARCODE:
doc_barcode_info = barcodes.scan_file_for_barcodes( with BarcodeReader(input_doc.original_file, input_doc.mime_type) as reader:
input_doc.original_file, if settings.CONSUMER_ENABLE_BARCODES and reader.separate(
input_doc.mime_type, input_doc.source,
) overrides.filename,
):
# split document by separator pages, if enabled
if settings.CONSUMER_ENABLE_BARCODES:
separators = barcodes.get_separating_barcodes(doc_barcode_info.barcodes)
if len(separators) > 0:
logger.debug(
f"Pages with separators found in: {input_doc.original_file}",
)
document_list = barcodes.separate_pages(
doc_barcode_info.pdf_path,
separators,
)
if document_list:
# If the file is an upload, it's in the scratch directory
# Move it to consume directory to be picked up
# Otherwise, use the current parent to keep possible tags
# from subdirectories
if input_doc.source != DocumentSource.ConsumeFolder:
save_to_dir = settings.CONSUMPTION_DIR
else:
# Note this uses the original file, because it's in the
# consume folder already and may include additional path
# components for tagging
# the .path is somewhere in scratch in this case
save_to_dir = input_doc.original_file.parent
for n, document in enumerate(document_list):
# save to consumption dir
# rename it to the original filename with number prefix
if overrides.filename is not None:
newname = f"{str(n)}_{overrides.filename}"
else:
newname = None
barcodes.save_to_dir(
document,
newname=newname,
target_dir=save_to_dir,
)
# Split file has been copied safely, remove it
document.unlink()
# And clean up the directory as well, now it's empty
shutil.rmtree(document_list[0].parent)
# This file has been split into multiple files without issue
# remove the original and working copy
input_doc.original_file.unlink()
# If the original file was a TIFF, remove the PDF generated from it
if input_doc.mime_type == "image/tiff":
logger.debug(
f"Deleting file {doc_barcode_info.pdf_path}",
)
doc_barcode_info.pdf_path.unlink()
# notify the sender, otherwise the progress bar # notify the sender, otherwise the progress bar
# in the UI stays stuck # in the UI stays stuck
payload = { payload = {
@ -178,11 +119,13 @@ def consume_file(
logger.warning(f"ConnectionError on status send: {str(e)}") logger.warning(f"ConnectionError on status send: {str(e)}")
# consuming stops here, since the original document with # consuming stops here, since the original document with
# the barcodes has been split and will be consumed separately # the barcodes has been split and will be consumed separately
input_doc.original_file.unlink()
return "File successfully split" return "File successfully split"
# try reading the ASN from barcode # try reading the ASN from barcode
if settings.CONSUMER_ENABLE_ASN_BARCODE: if settings.CONSUMER_ENABLE_ASN_BARCODE:
overrides.asn = barcodes.get_asn_from_barcodes(doc_barcode_info.barcodes) overrides.asn = reader.asn
if overrides.asn: if overrides.asn:
logger.info(f"Found ASN in barcode: {overrides.asn}") logger.info(f"Found ASN in barcode: {overrides.asn}")

Binary file not shown.

Before

Width:  |  Height:  |  Size: 836 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 39 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 9.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 891 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 337 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.4 KiB

View File

@ -6,10 +6,9 @@ import pytest
from django.conf import settings from django.conf import settings
from django.test import TestCase from django.test import TestCase
from django.test import override_settings from django.test import override_settings
from PIL import Image
from documents import barcodes
from documents import tasks from documents import tasks
from documents.barcodes import BarcodeReader
from documents.consumer import ConsumerError from documents.consumer import ConsumerError
from documents.data_models import ConsumableDocument from documents.data_models import ConsumableDocument
from documents.data_models import DocumentSource from documents.data_models import DocumentSource
@ -30,178 +29,6 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
BARCODE_SAMPLE_DIR = SAMPLE_DIR / "barcodes" BARCODE_SAMPLE_DIR = SAMPLE_DIR / "barcodes"
def test_barcode_reader_png(self):
"""
GIVEN:
- PNG file with separator barcode
WHEN:
- Image is scanned for codes
THEN:
- The barcode is detected
"""
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-PATCHT.png"
img = Image.open(test_file)
separator_barcode = settings.CONSUMER_BARCODE_STRING
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
def test_barcode_reader_pbm(self):
"""
GIVEN:
- Netpbm bitmap file with separator barcode
WHEN:
- Image is scanned for codes
THEN:
- The barcode is detected
"""
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pbm"
img = Image.open(test_file)
separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
def test_barcode_reader_distortion_scratchy(self):
"""
GIVEN:
- Image containing high noise
WHEN:
- Image is scanned for codes
THEN:
- The barcode is detected
"""
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-PATCHT-distortion.png"
img = Image.open(test_file)
separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
def test_barcode_reader_distortion_stretched(self):
"""
GIVEN:
- Image with a stretched barcode
WHEN:
- Image is scanned for codes
THEN:
- The barcode is detected
"""
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-PATCHT-distortion2.png"
img = Image.open(test_file)
separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
def test_barcode_reader_unreadable(self):
"""
GIVEN:
- Image with a truly unreadable barcode
WHEN:
- Image is scanned for codes
THEN:
- No barcode is detected
"""
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-PATCHT-unreadable.png"
img = Image.open(test_file)
self.assertEqual(barcodes.barcode_reader(img), [])
def test_barcode_reader_qr(self):
"""
GIVEN:
- Image file with QR separator barcode
WHEN:
- Image is scanned for codes
THEN:
- The barcode is detected
"""
test_file = self.BARCODE_SAMPLE_DIR / "qr-code-PATCHT.png"
img = Image.open(test_file)
separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
def test_barcode_reader_128(self):
"""
GIVEN:
- Image file with 128 style separator barcode
WHEN:
- Image is scanned for codes
THEN:
- The barcode is detected
"""
test_file = self.BARCODE_SAMPLE_DIR / "barcode-128-PATCHT.png"
img = Image.open(test_file)
separator_barcode = str(settings.CONSUMER_BARCODE_STRING)
self.assertEqual(barcodes.barcode_reader(img), [separator_barcode])
def test_barcode_reader_no_barcode(self):
"""
GIVEN:
- Image file with no barcode
WHEN:
- Image is scanned for codes
THEN:
- No barcode is detected
"""
test_file = self.SAMPLE_DIR / "simple.png"
img = Image.open(test_file)
self.assertListEqual(barcodes.barcode_reader(img), [])
def test_barcode_reader_custom_separator(self):
"""
GIVEN:
- Image file with custom separator barcode value
WHEN:
- Image is scanned for codes
THEN:
- The barcode is detected
"""
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.png"
img = Image.open(test_file)
self.assertEqual(barcodes.barcode_reader(img), ["CUSTOM BARCODE"])
def test_barcode_reader_custom_qr_separator(self):
"""
GIVEN:
- Image file with custom separator barcode value as a QR code
WHEN:
- Image is scanned for codes
THEN:
- The barcode is detected
"""
test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-custom.png"
img = Image.open(test_file)
self.assertEqual(barcodes.barcode_reader(img), ["CUSTOM BARCODE"])
def test_barcode_reader_custom_128_separator(self):
"""
GIVEN:
- Image file with custom separator 128 barcode value
WHEN:
- Image is scanned for codes
THEN:
- The barcode is detected
"""
test_file = self.BARCODE_SAMPLE_DIR / "barcode-128-custom.png"
img = Image.open(test_file)
self.assertEqual(barcodes.barcode_reader(img), ["CUSTOM BARCODE"])
def test_convert_from_tiff_to_pdf(self):
"""
GIVEN:
- Multi-page TIFF image
WHEN:
- Conversion to PDF
THEN:
- The file converts without error
"""
test_file = self.SAMPLE_DIR / "simple.tiff"
dst = settings.SCRATCH_DIR / "simple.tiff"
shutil.copy(test_file, dst)
target_file = barcodes.convert_from_tiff_to_pdf(dst)
self.assertIsFile(target_file)
self.assertEqual(target_file.suffix, ".pdf")
def test_scan_file_for_separating_barcodes(self): def test_scan_file_for_separating_barcodes(self):
""" """
GIVEN: GIVEN:
@ -213,15 +40,11 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
""" """
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf" test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes( with BarcodeReader(test_file, "application/pdf") as reader:
test_file, reader.detect()
"application/pdf", separator_page_numbers = reader.get_separation_pages()
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
)
self.assertEqual(doc_barcode_info.pdf_path, test_file) self.assertEqual(reader.pdf_file, test_file)
self.assertDictEqual(separator_page_numbers, {0: False}) self.assertDictEqual(separator_page_numbers, {0: False})
def test_scan_file_for_separating_barcodes_none_present(self): def test_scan_file_for_separating_barcodes_none_present(self):
@ -235,15 +58,11 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
- No pages to split on - No pages to split on
""" """
test_file = self.SAMPLE_DIR / "simple.pdf" test_file = self.SAMPLE_DIR / "simple.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes( with BarcodeReader(test_file, "application/pdf") as reader:
test_file, reader.detect()
"application/pdf", separator_page_numbers = reader.get_separation_pages()
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
)
self.assertEqual(doc_barcode_info.pdf_path, test_file) self.assertEqual(reader.pdf_file, test_file)
self.assertDictEqual(separator_page_numbers, {}) self.assertDictEqual(separator_page_numbers, {})
def test_scan_file_for_separating_barcodes_middle_page(self): def test_scan_file_for_separating_barcodes_middle_page(self):
@ -257,15 +76,11 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
""" """
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf" test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes( with BarcodeReader(test_file, "application/pdf") as reader:
test_file, reader.detect()
"application/pdf", separator_page_numbers = reader.get_separation_pages()
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
)
self.assertEqual(doc_barcode_info.pdf_path, test_file) self.assertEqual(reader.pdf_file, test_file)
self.assertDictEqual(separator_page_numbers, {1: False}) self.assertDictEqual(separator_page_numbers, {1: False})
def test_scan_file_for_separating_barcodes_multiple_pages(self): def test_scan_file_for_separating_barcodes_multiple_pages(self):
@ -279,40 +94,57 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
""" """
test_file = self.BARCODE_SAMPLE_DIR / "several-patcht-codes.pdf" test_file = self.BARCODE_SAMPLE_DIR / "several-patcht-codes.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes( with BarcodeReader(test_file, "application/pdf") as reader:
test_file, reader.detect()
"application/pdf", separator_page_numbers = reader.get_separation_pages()
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
)
self.assertEqual(doc_barcode_info.pdf_path, test_file) self.assertEqual(reader.pdf_file, test_file)
self.assertDictEqual(separator_page_numbers, {2: False, 5: False}) self.assertDictEqual(separator_page_numbers, {2: False, 5: False})
def test_scan_file_for_separating_barcodes_upside_down(self): def test_scan_file_for_separating_barcodes_hard_to_detect(self):
""" """
GIVEN: GIVEN:
- PDF file containing a separator on page 1 (zero indexed) - PDF file containing a separator on page 1 (zero indexed)
- The barcode is upside down - The barcode is upside down, fuzzy or distorted
WHEN: WHEN:
- File is scanned for barcodes - File is scanned for barcodes
THEN: THEN:
- Barcode is detected on page 1 (zero indexed) - Barcode is detected on page 1 (zero indexed)
""" """
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle_reverse.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes( for test_file in [
test_file, "patch-code-t-middle-reverse.pdf",
"application/pdf", "patch-code-t-middle-distorted.pdf",
) "patch-code-t-middle-fuzzy.pdf",
separator_page_numbers = barcodes.get_separating_barcodes( ]:
doc_barcode_info.barcodes, test_file = self.BARCODE_SAMPLE_DIR / test_file
)
self.assertEqual(doc_barcode_info.pdf_path, test_file) with BarcodeReader(test_file, "application/pdf") as reader:
reader.detect()
separator_page_numbers = reader.get_separation_pages()
self.assertEqual(reader.pdf_file, test_file)
self.assertDictEqual(separator_page_numbers, {1: False}) self.assertDictEqual(separator_page_numbers, {1: False})
def test_scan_file_for_separating_barcodes_unreadable(self):
"""
GIVEN:
- PDF file containing a separator on page 1 (zero indexed)
- The barcode is not readable
WHEN:
- File is scanned for barcodes
THEN:
- Barcode is detected on page 1 (zero indexed)
"""
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle-unreadable.pdf"
with BarcodeReader(test_file, "application/pdf") as reader:
reader.detect()
separator_page_numbers = reader.get_separation_pages()
self.assertEqual(reader.pdf_file, test_file)
self.assertDictEqual(separator_page_numbers, {})
def test_scan_file_for_separating_barcodes_fax_decode(self): def test_scan_file_for_separating_barcodes_fax_decode(self):
""" """
GIVEN: GIVEN:
@ -324,15 +156,11 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
""" """
test_file = self.BARCODE_SAMPLE_DIR / "barcode-fax-image.pdf" test_file = self.BARCODE_SAMPLE_DIR / "barcode-fax-image.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes( with BarcodeReader(test_file, "application/pdf") as reader:
test_file, reader.detect()
"application/pdf", separator_page_numbers = reader.get_separation_pages()
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
)
self.assertEqual(doc_barcode_info.pdf_path, test_file) self.assertEqual(reader.pdf_file, test_file)
self.assertDictEqual(separator_page_numbers, {1: False}) self.assertDictEqual(separator_page_numbers, {1: False})
def test_scan_file_for_separating_qr_barcodes(self): def test_scan_file_for_separating_qr_barcodes(self):
@ -347,15 +175,11 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
""" """
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-qr.pdf" test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-qr.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes( with BarcodeReader(test_file, "application/pdf") as reader:
test_file, reader.detect()
"application/pdf", separator_page_numbers = reader.get_separation_pages()
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
)
self.assertEqual(doc_barcode_info.pdf_path, test_file) self.assertEqual(reader.pdf_file, test_file)
self.assertDictEqual(separator_page_numbers, {0: False}) self.assertDictEqual(separator_page_numbers, {0: False})
@override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE") @override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
@ -371,15 +195,11 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
""" """
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.pdf" test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes( with BarcodeReader(test_file, "application/pdf") as reader:
test_file, reader.detect()
"application/pdf", separator_page_numbers = reader.get_separation_pages()
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
)
self.assertEqual(doc_barcode_info.pdf_path, test_file) self.assertEqual(reader.pdf_file, test_file)
self.assertDictEqual(separator_page_numbers, {0: False}) self.assertDictEqual(separator_page_numbers, {0: False})
@override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE") @override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
@ -396,15 +216,11 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
""" """
test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-custom.pdf" test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-custom.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes( with BarcodeReader(test_file, "application/pdf") as reader:
test_file, reader.detect()
"application/pdf", separator_page_numbers = reader.get_separation_pages()
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
)
self.assertEqual(doc_barcode_info.pdf_path, test_file) self.assertEqual(reader.pdf_file, test_file)
self.assertDictEqual(separator_page_numbers, {0: False}) self.assertDictEqual(separator_page_numbers, {0: False})
@override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE") @override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
@ -421,15 +237,11 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
""" """
test_file = self.BARCODE_SAMPLE_DIR / "barcode-128-custom.pdf" test_file = self.BARCODE_SAMPLE_DIR / "barcode-128-custom.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes( with BarcodeReader(test_file, "application/pdf") as reader:
test_file, reader.detect()
"application/pdf", separator_page_numbers = reader.get_separation_pages()
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
)
self.assertEqual(doc_barcode_info.pdf_path, test_file) self.assertEqual(reader.pdf_file, test_file)
self.assertDictEqual(separator_page_numbers, {0: False}) self.assertDictEqual(separator_page_numbers, {0: False})
def test_scan_file_for_separating_wrong_qr_barcodes(self): def test_scan_file_for_separating_wrong_qr_barcodes(self):
@ -445,15 +257,11 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
""" """
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.pdf" test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes( with BarcodeReader(test_file, "application/pdf") as reader:
test_file, reader.detect()
"application/pdf", separator_page_numbers = reader.get_separation_pages()
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
)
self.assertEqual(doc_barcode_info.pdf_path, test_file) self.assertEqual(reader.pdf_file, test_file)
self.assertDictEqual(separator_page_numbers, {}) self.assertDictEqual(separator_page_numbers, {})
@override_settings(CONSUMER_BARCODE_STRING="ADAR-NEXTDOC") @override_settings(CONSUMER_BARCODE_STRING="ADAR-NEXTDOC")
@ -468,15 +276,12 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
""" """
test_file = self.BARCODE_SAMPLE_DIR / "many-qr-codes.pdf" test_file = self.BARCODE_SAMPLE_DIR / "many-qr-codes.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes( with BarcodeReader(test_file, "application/pdf") as reader:
test_file, reader.detect()
"application/pdf", separator_page_numbers = reader.get_separation_pages()
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
)
self.assertGreater(len(doc_barcode_info.barcodes), 0) self.assertEqual(reader.pdf_file, test_file)
self.assertGreater(len(reader.barcodes), 0)
self.assertDictEqual(separator_page_numbers, {1: False}) self.assertDictEqual(separator_page_numbers, {1: False})
def test_separate_pages(self): def test_separate_pages(self):
@ -490,8 +295,10 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
""" """
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf" test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
documents = barcodes.separate_pages(test_file, {1: False}) with BarcodeReader(test_file, "application/pdf") as reader:
documents = reader.separate_pages({1: False})
self.assertEqual(reader.pdf_file, test_file)
self.assertEqual(len(documents), 2) self.assertEqual(len(documents), 2)
def test_separate_pages_double_code(self): def test_separate_pages_double_code(self):
@ -505,9 +312,10 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
""" """
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-double.pdf" test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-double.pdf"
pages = barcodes.separate_pages(test_file, {1: False, 2: False}) with BarcodeReader(test_file, "application/pdf") as reader:
documents = reader.separate_pages({1: False, 2: False})
self.assertEqual(len(pages), 2) self.assertEqual(len(documents), 2)
def test_separate_pages_no_list(self): def test_separate_pages_no_list(self):
""" """
@ -519,11 +327,12 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
- No new documents are produced - No new documents are produced
- A warning is logged - A warning is logged
""" """
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf" test_file = self.SAMPLE_DIR / "simple.pdf"
with self.assertLogs("paperless.barcodes", level="WARNING") as cm: with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
pages = barcodes.separate_pages(test_file, {}) with BarcodeReader(test_file, "application/pdf") as reader:
self.assertEqual(pages, []) success = reader.separate(DocumentSource.ApiUpload)
self.assertFalse(success)
self.assertEqual( self.assertEqual(
cm.output, cm.output,
[ [
@ -531,45 +340,6 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
], ],
) )
def test_save_to_dir(self):
"""
GIVEN:
- File to save to a directory
WHEN:
- The file is saved
THEN:
- The file exists
"""
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
barcodes.save_to_dir(test_file, target_dir=settings.SCRATCH_DIR)
target_file = settings.SCRATCH_DIR / "patch-code-t.pdf"
self.assertIsFile(target_file)
def test_save_to_dir_not_existing(self):
"""
GIVEN:
- File to save to a directory
- The directory doesn't exist
WHEN:
- The file is saved
THEN:
- The file exists
"""
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
nonexistingdir = Path("/nowhere")
self.assertIsNotDir(nonexistingdir)
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
barcodes.save_to_dir(test_file, target_dir=nonexistingdir)
self.assertEqual(
cm.output,
[
f"WARNING:paperless.barcodes:{str(test_file)} or {str(nonexistingdir)} don't exist.",
],
)
def test_save_to_dir_given_name(self): def test_save_to_dir_given_name(self):
""" """
GIVEN: GIVEN:
@ -580,17 +350,17 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN: THEN:
- The file exists - The file exists
""" """
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf" test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
with BarcodeReader(test_file, "application/pdf") as reader:
reader.separate(DocumentSource.ApiUpload, "newname.pdf")
barcodes.save_to_dir( self.assertEqual(reader.pdf_file, test_file)
test_file, target_file1 = settings.CONSUMPTION_DIR / "0_newname.pdf"
newname="newname.pdf", target_file2 = settings.CONSUMPTION_DIR / "1_newname.pdf"
target_dir=settings.SCRATCH_DIR, self.assertIsFile(target_file1)
) self.assertIsFile(target_file2)
target_file = settings.SCRATCH_DIR / "newname.pdf"
self.assertIsFile(target_file)
def test_barcode_splitter(self): def test_barcode_splitter_api_upload(self):
""" """
GIVEN: GIVEN:
- Input file containing barcodes - Input file containing barcodes
@ -599,28 +369,92 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
THEN: THEN:
- Correct number of files produced - Correct number of files produced
""" """
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf" sample_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
test_file = settings.SCRATCH_DIR / "patch-code-t-middle.pdf"
shutil.copy(sample_file, test_file)
doc_barcode_info = barcodes.scan_file_for_barcodes( with BarcodeReader(test_file, "application/pdf") as reader:
test_file, reader.separate(DocumentSource.ApiUpload)
"application/pdf",
) self.assertEqual(reader.pdf_file, test_file)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes, target_file1 = (
settings.CONSUMPTION_DIR / "patch-code-t-middle_document_0.pdf"
) )
self.assertEqual(test_file, doc_barcode_info.pdf_path) target_file2 = (
self.assertTrue(len(separator_page_numbers) > 0) settings.CONSUMPTION_DIR / "patch-code-t-middle_document_1.pdf"
)
document_list = barcodes.separate_pages(test_file, separator_page_numbers) self.assertIsFile(target_file1)
self.assertGreater(len(document_list), 0) self.assertIsFile(target_file2)
for document in document_list: def test_barcode_splitter_consume_dir(self):
barcodes.save_to_dir(document, target_dir=settings.SCRATCH_DIR) """
GIVEN:
- Input file containing barcodes
WHEN:
- Input file is split on barcodes
THEN:
- Correct number of files produced
"""
sample_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
test_file = settings.CONSUMPTION_DIR / "patch-code-t-middle.pdf"
shutil.copy(sample_file, test_file)
target_file1 = settings.SCRATCH_DIR / "patch-code-t-middle_document_0.pdf" with BarcodeReader(test_file, "application/pdf") as reader:
reader.detect()
reader.separate(DocumentSource.ConsumeFolder)
target_file2 = settings.SCRATCH_DIR / "patch-code-t-middle_document_1.pdf" self.assertEqual(reader.pdf_file, test_file)
target_file1 = (
settings.CONSUMPTION_DIR / "patch-code-t-middle_document_0.pdf"
)
target_file2 = (
settings.CONSUMPTION_DIR / "patch-code-t-middle_document_1.pdf"
)
self.assertIsFile(target_file1)
self.assertIsFile(target_file2)
def test_barcode_splitter_consume_dir_recursive(self):
"""
GIVEN:
- Input file containing barcodes
- Input file is within a directory structure of the consume folder
WHEN:
- Input file is split on barcodes
THEN:
- Correct number of files produced
- Output files are within the same directory structure
"""
sample_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
test_file = (
settings.CONSUMPTION_DIR / "tag1" / "tag2" / "patch-code-t-middle.pdf"
)
test_file.parent.mkdir(parents=True)
shutil.copy(sample_file, test_file)
with BarcodeReader(test_file, "application/pdf") as reader:
reader.separate(DocumentSource.ConsumeFolder)
self.assertEqual(reader.pdf_file, test_file)
target_file1 = (
settings.CONSUMPTION_DIR
/ "tag1"
/ "tag2"
/ "patch-code-t-middle_document_0.pdf"
)
target_file2 = (
settings.CONSUMPTION_DIR
/ "tag1"
/ "tag2"
/ "patch-code-t-middle_document_1.pdf"
)
self.assertIsFile(target_file1) self.assertIsFile(target_file1)
self.assertIsFile(target_file2) self.assertIsFile(target_file2)
@ -681,7 +515,39 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
), ),
"File successfully split", "File successfully split",
) )
self.assertFalse(dst.exists()) self.assertIsNotFile(dst)
@override_settings(
CONSUMER_ENABLE_BARCODES=True,
CONSUMER_BARCODE_TIFF_SUPPORT=True,
)
def test_consume_barcode_tiff_file_with_alpha(self):
"""
GIVEN:
- TIFF image containing barcodes
- TIFF image has an alpha layer
WHEN:
- Consume task handles the alpha layer and returns
THEN:
- The file was split without issue
"""
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle-alpha.tiff"
dst = settings.SCRATCH_DIR / "patch-code-t-middle.tiff"
shutil.copy(test_file, dst)
with mock.patch("documents.tasks.async_to_sync"):
self.assertEqual(
tasks.consume_file(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=dst,
),
None,
),
"File successfully split",
)
self.assertIsNotFile(dst)
@override_settings( @override_settings(
CONSUMER_ENABLE_BARCODES=True, CONSUMER_ENABLE_BARCODES=True,
@ -760,7 +626,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
), ),
"File successfully split", "File successfully split",
) )
self.assertFalse(dst.exists()) self.assertIsNotFile(dst)
def test_scan_file_for_separating_barcodes_password(self): def test_scan_file_for_separating_barcodes_password(self):
""" """
@ -773,19 +639,15 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
""" """
test_file = self.SAMPLE_DIR / "password-is-test.pdf" test_file = self.SAMPLE_DIR / "password-is-test.pdf"
with self.assertLogs("paperless.barcodes", level="WARNING") as cm: with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
doc_barcode_info = barcodes.scan_file_for_barcodes( with BarcodeReader(test_file, "application/pdf") as reader:
test_file, reader.detect()
"application/pdf",
)
warning = cm.output[0] warning = cm.output[0]
expected_str = "WARNING:paperless.barcodes:File is likely password protected, not checking for barcodes" expected_str = "WARNING:paperless.barcodes:File is likely password protected, not checking for barcodes"
self.assertTrue(warning.startswith(expected_str)) self.assertTrue(warning.startswith(expected_str))
separator_page_numbers = barcodes.get_separating_barcodes( separator_page_numbers = reader.get_separation_pages()
doc_barcode_info.barcodes,
)
self.assertEqual(doc_barcode_info.pdf_path, test_file) self.assertEqual(reader.pdf_file, test_file)
self.assertDictEqual(separator_page_numbers, {}) self.assertDictEqual(separator_page_numbers, {})
@override_settings( @override_settings(
@ -803,15 +665,14 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
""" """
test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-2.pdf" test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-2.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes( with BarcodeReader(test_file, "application/pdf") as reader:
test_file, reader.detect()
"application/pdf", separator_page_numbers = reader.get_separation_pages()
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
)
self.assertEqual(test_file, doc_barcode_info.pdf_path) self.assertEqual(
reader.pdf_file,
test_file,
)
self.assertDictEqual( self.assertDictEqual(
separator_page_numbers, separator_page_numbers,
{ {
@ -823,7 +684,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
}, },
) )
document_list = barcodes.separate_pages(test_file, separator_page_numbers) document_list = reader.separate_pages(separator_page_numbers)
self.assertEqual(len(document_list), 6) self.assertEqual(len(document_list), 6)
@override_settings( @override_settings(
@ -841,15 +702,11 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
""" """
test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-1.pdf" test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-1.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes( with BarcodeReader(test_file, "application/pdf") as reader:
test_file, reader.detect()
"application/pdf", separator_page_numbers = reader.get_separation_pages()
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
)
self.assertEqual(test_file, doc_barcode_info.pdf_path) self.assertEqual(reader.pdf_file, test_file)
self.assertDictEqual( self.assertDictEqual(
separator_page_numbers, separator_page_numbers,
{ {
@ -860,7 +717,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
}, },
) )
document_list = barcodes.separate_pages(test_file, separator_page_numbers) document_list = reader.separate_pages(separator_page_numbers)
self.assertEqual(len(document_list), 5) self.assertEqual(len(document_list), 5)
@ -869,52 +726,6 @@ class TestAsnBarcode(DirectoriesMixin, TestCase):
BARCODE_SAMPLE_DIR = SAMPLE_DIR / "barcodes" BARCODE_SAMPLE_DIR = SAMPLE_DIR / "barcodes"
def test_barcode_reader_asn_normal(self):
"""
GIVEN:
- Image containing standard ASNxxxxx barcode
WHEN:
- Image is scanned for barcodes
THEN:
- The barcode is located
- The barcode value is correct
"""
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-123.png"
img = Image.open(test_file)
self.assertEqual(barcodes.barcode_reader(img), ["ASN00123"])
def test_barcode_reader_asn_invalid(self):
"""
GIVEN:
- Image containing invalid ASNxxxxx barcode
- The number portion of the ASN is not a number
WHEN:
- Image is scanned for barcodes
THEN:
- The barcode is located
- The barcode value is correct
"""
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-invalid.png"
img = Image.open(test_file)
self.assertEqual(barcodes.barcode_reader(img), ["ASNXYZXYZ"])
def test_barcode_reader_asn_custom_prefix(self):
"""
GIVEN:
- Image containing custom prefix barcode
WHEN:
- Image is scanned for barcodes
THEN:
- The barcode is located
- The barcode value is correct
"""
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.png"
img = Image.open(test_file)
self.assertEqual(barcodes.barcode_reader(img), ["CUSTOM-PREFIX-00123"])
@override_settings(CONSUMER_ASN_BARCODE_PREFIX="CUSTOM-PREFIX-") @override_settings(CONSUMER_ASN_BARCODE_PREFIX="CUSTOM-PREFIX-")
def test_scan_file_for_asn_custom_prefix(self): def test_scan_file_for_asn_custom_prefix(self):
""" """
@ -928,16 +739,48 @@ class TestAsnBarcode(DirectoriesMixin, TestCase):
- The ASN integer value is correct - The ASN integer value is correct
""" """
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf" test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf"
with BarcodeReader(test_file, "application/pdf") as reader:
asn = reader.asn
doc_barcode_info = barcodes.scan_file_for_barcodes( self.assertEqual(reader.pdf_file, test_file)
test_file,
"application/pdf",
)
asn = barcodes.get_asn_from_barcodes(doc_barcode_info.barcodes)
self.assertEqual(doc_barcode_info.pdf_path, test_file)
self.assertEqual(asn, 123) self.assertEqual(asn, 123)
def test_scan_file_for_asn_barcode(self):
"""
GIVEN:
- PDF containing an ASN barcode
- The ASN value is 123
WHEN:
- File is scanned for barcodes
THEN:
- The ASN is located
- The ASN integer value is correct
"""
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-123.pdf"
with BarcodeReader(test_file, "application/pdf") as reader:
asn = reader.asn
self.assertEqual(reader.pdf_file, test_file)
self.assertEqual(asn, 123)
def test_scan_file_for_asn_not_existing(self):
"""
GIVEN:
- PDF without an ASN barcode
WHEN:
- File is scanned for barcodes
THEN:
- No ASN is retrieved from the document
"""
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
with BarcodeReader(test_file, "application/pdf") as reader:
asn = reader.asn
self.assertEqual(reader.pdf_file, test_file)
self.assertEqual(asn, None)
def test_scan_file_for_asn_barcode_invalid(self): def test_scan_file_for_asn_barcode_invalid(self):
""" """
GIVEN: GIVEN:
@ -951,14 +794,12 @@ class TestAsnBarcode(DirectoriesMixin, TestCase):
""" """
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-invalid.pdf" test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-invalid.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes( with BarcodeReader(test_file, "application/pdf") as reader:
test_file, asn = reader.asn
"application/pdf",
)
asn = barcodes.get_asn_from_barcodes(doc_barcode_info.barcodes) self.assertEqual(reader.pdf_file, test_file)
self.assertEqual(doc_barcode_info.pdf_path, test_file) self.assertEqual(reader.pdf_file, test_file)
self.assertEqual(asn, None) self.assertEqual(asn, None)
@override_settings(CONSUMER_ENABLE_ASN_BARCODE=True) @override_settings(CONSUMER_ENABLE_ASN_BARCODE=True)
@ -992,48 +833,6 @@ class TestAsnBarcode(DirectoriesMixin, TestCase):
self.assertEqual(kwargs["override_asn"], 123) self.assertEqual(kwargs["override_asn"], 123)
def test_scan_file_for_asn_barcode(self):
"""
GIVEN:
- PDF containing an ASN barcode
- The ASN value is 123
WHEN:
- File is scanned for barcodes
THEN:
- The ASN is located
- The ASN integer value is correct
"""
test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-123.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
asn = barcodes.get_asn_from_barcodes(doc_barcode_info.barcodes)
self.assertEqual(doc_barcode_info.pdf_path, test_file)
self.assertEqual(asn, 123)
def test_scan_file_for_asn_not_existing(self):
"""
GIVEN:
- PDF without an ASN barcode
WHEN:
- File is scanned for barcodes
THEN:
- No ASN is retrieved from the document
"""
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
asn = barcodes.get_asn_from_barcodes(doc_barcode_info.barcodes)
self.assertEqual(doc_barcode_info.pdf_path, test_file)
self.assertEqual(asn, None)
@override_settings(CONSUMER_ENABLE_ASN_BARCODE=True) @override_settings(CONSUMER_ENABLE_ASN_BARCODE=True)
def test_asn_too_large(self): def test_asn_too_large(self):
""" """