mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Fix: Document metadata is lost during barcode splitting (#4982)
* Fixes barcode splitting dropping metadata that might be needed for the round 2
This commit is contained in:
parent
be2de4f15d
commit
122e4141b0
@ -14,6 +14,8 @@ from pikepdf import Pdf
|
|||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
from documents.converters import convert_from_tiff_to_pdf
|
from documents.converters import convert_from_tiff_to_pdf
|
||||||
|
from documents.data_models import ConsumableDocument
|
||||||
|
from documents.data_models import DocumentMetadataOverrides
|
||||||
from documents.data_models import DocumentSource
|
from documents.data_models import DocumentSource
|
||||||
from documents.utils import copy_basic_file_stats
|
from documents.utils import copy_basic_file_stats
|
||||||
from documents.utils import copy_file_with_basic_stats
|
from documents.utils import copy_file_with_basic_stats
|
||||||
@ -53,6 +55,7 @@ class BarcodeReader:
|
|||||||
self.mime: Final[str] = mime_type
|
self.mime: Final[str] = mime_type
|
||||||
self.pdf_file: Path = self.file
|
self.pdf_file: Path = self.file
|
||||||
self.barcodes: list[Barcode] = []
|
self.barcodes: list[Barcode] = []
|
||||||
|
self._tiff_conversion_done = False
|
||||||
self.temp_dir: Optional[tempfile.TemporaryDirectory] = None
|
self.temp_dir: Optional[tempfile.TemporaryDirectory] = None
|
||||||
|
|
||||||
if settings.CONSUMER_BARCODE_TIFF_SUPPORT:
|
if settings.CONSUMER_BARCODE_TIFF_SUPPORT:
|
||||||
@ -150,12 +153,14 @@ class BarcodeReader:
|
|||||||
|
|
||||||
def convert_from_tiff_to_pdf(self):
|
def convert_from_tiff_to_pdf(self):
|
||||||
"""
|
"""
|
||||||
May convert a TIFF image into a PDF, if the input is a TIFF
|
May convert a TIFF image into a PDF, if the input is a TIFF and
|
||||||
|
the TIFF has not been made into a PDF
|
||||||
"""
|
"""
|
||||||
# Nothing to do, pdf_file is already assigned correctly
|
# Nothing to do, pdf_file is already assigned correctly
|
||||||
if self.mime != "image/tiff":
|
if self.mime != "image/tiff" or self._tiff_conversion_done:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
self._tiff_conversion_done = True
|
||||||
self.pdf_file = convert_from_tiff_to_pdf(self.file, Path(self.temp_dir.name))
|
self.pdf_file = convert_from_tiff_to_pdf(self.file, Path(self.temp_dir.name))
|
||||||
|
|
||||||
def detect(self) -> None:
|
def detect(self) -> None:
|
||||||
@ -167,6 +172,9 @@ class BarcodeReader:
|
|||||||
if self.barcodes:
|
if self.barcodes:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# No op if not a TIFF
|
||||||
|
self.convert_from_tiff_to_pdf()
|
||||||
|
|
||||||
# Choose the library for reading
|
# Choose the library for reading
|
||||||
if settings.CONSUMER_BARCODE_SCANNER == "PYZBAR":
|
if settings.CONSUMER_BARCODE_SCANNER == "PYZBAR":
|
||||||
reader = self.read_barcodes_pyzbar
|
reader = self.read_barcodes_pyzbar
|
||||||
@ -240,7 +248,7 @@ class BarcodeReader:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
document_paths = []
|
document_paths = []
|
||||||
fname = self.file.with_suffix("").name
|
fname = self.file.stem
|
||||||
with Pdf.open(self.pdf_file) as input_pdf:
|
with Pdf.open(self.pdf_file) as input_pdf:
|
||||||
# Start with an empty document
|
# Start with an empty document
|
||||||
current_document: list[Page] = []
|
current_document: list[Page] = []
|
||||||
@ -290,7 +298,7 @@ class BarcodeReader:
|
|||||||
def separate(
|
def separate(
|
||||||
self,
|
self,
|
||||||
source: DocumentSource,
|
source: DocumentSource,
|
||||||
override_name: Optional[str] = None,
|
overrides: DocumentMetadataOverrides,
|
||||||
) -> bool:
|
) -> bool:
|
||||||
"""
|
"""
|
||||||
Separates the document, based on barcodes and configuration, creating new
|
Separates the document, based on barcodes and configuration, creating new
|
||||||
@ -316,27 +324,23 @@ class BarcodeReader:
|
|||||||
logger.warning("No pages to split on!")
|
logger.warning("No pages to split on!")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Create the split documents
|
tmp_dir = Path(tempfile.mkdtemp(prefix="paperless-barcode-split-")).resolve()
|
||||||
doc_paths = self.separate_pages(separator_pages)
|
|
||||||
|
|
||||||
# Save the new documents to correct folder
|
from documents import tasks
|
||||||
if source != DocumentSource.ConsumeFolder:
|
|
||||||
# The given file is somewhere in SCRATCH_DIR,
|
|
||||||
# and new documents must be moved to the CONSUMPTION_DIR
|
|
||||||
# for the consumer to notice them
|
|
||||||
save_to_dir = settings.CONSUMPTION_DIR
|
|
||||||
else:
|
|
||||||
# The given file is somewhere in CONSUMPTION_DIR,
|
|
||||||
# and may be some levels down for recursive tagging
|
|
||||||
# so use the file's parent to preserve any metadata
|
|
||||||
save_to_dir = self.file.parent
|
|
||||||
|
|
||||||
for idx, document_path in enumerate(doc_paths):
|
# Create the split document tasks
|
||||||
if override_name is not None:
|
for new_document in self.separate_pages(separator_pages):
|
||||||
newname = f"{idx}_{override_name}"
|
copy_file_with_basic_stats(new_document, tmp_dir / new_document.name)
|
||||||
dest = save_to_dir / newname
|
|
||||||
else:
|
tasks.consume_file.delay(
|
||||||
dest = save_to_dir
|
ConsumableDocument(
|
||||||
logger.info(f"Saving {document_path} to {dest}")
|
# Same source, for templates
|
||||||
copy_file_with_basic_stats(document_path, dest)
|
source=source,
|
||||||
|
# Can't use same folder or the consume might grab it again
|
||||||
|
original_file=(tmp_dir / new_document.name).resolve(),
|
||||||
|
),
|
||||||
|
# All the same metadata
|
||||||
|
overrides,
|
||||||
|
)
|
||||||
|
logger.info("Barcode splitting complete!")
|
||||||
return True
|
return True
|
||||||
|
@ -140,7 +140,7 @@ def consume_file(
|
|||||||
with BarcodeReader(input_doc.original_file, input_doc.mime_type) as reader:
|
with BarcodeReader(input_doc.original_file, input_doc.mime_type) as reader:
|
||||||
if settings.CONSUMER_ENABLE_BARCODES and reader.separate(
|
if settings.CONSUMER_ENABLE_BARCODES and reader.separate(
|
||||||
input_doc.source,
|
input_doc.source,
|
||||||
overrides.filename,
|
overrides,
|
||||||
):
|
):
|
||||||
# notify the sender, otherwise the progress bar
|
# notify the sender, otherwise the progress bar
|
||||||
# in the UI stays stuck
|
# in the UI stays stuck
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
import shutil
|
import shutil
|
||||||
from pathlib import Path
|
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
@ -11,10 +10,13 @@ from documents import tasks
|
|||||||
from documents.barcodes import BarcodeReader
|
from documents.barcodes import BarcodeReader
|
||||||
from documents.consumer import ConsumerError
|
from documents.consumer import ConsumerError
|
||||||
from documents.data_models import ConsumableDocument
|
from documents.data_models import ConsumableDocument
|
||||||
|
from documents.data_models import DocumentMetadataOverrides
|
||||||
from documents.data_models import DocumentSource
|
from documents.data_models import DocumentSource
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
from documents.tests.utils import DirectoriesMixin
|
from documents.tests.utils import DirectoriesMixin
|
||||||
|
from documents.tests.utils import DocumentConsumeDelayMixin
|
||||||
from documents.tests.utils import FileSystemAssertsMixin
|
from documents.tests.utils import FileSystemAssertsMixin
|
||||||
|
from documents.tests.utils import SampleDirMixin
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import zxingcpp # noqa: F401
|
import zxingcpp # noqa: F401
|
||||||
@ -25,11 +27,7 @@ except ImportError:
|
|||||||
|
|
||||||
|
|
||||||
@override_settings(CONSUMER_BARCODE_SCANNER="PYZBAR")
|
@override_settings(CONSUMER_BARCODE_SCANNER="PYZBAR")
|
||||||
class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, TestCase):
|
||||||
SAMPLE_DIR = Path(__file__).parent / "samples"
|
|
||||||
|
|
||||||
BARCODE_SAMPLE_DIR = SAMPLE_DIR / "barcodes"
|
|
||||||
|
|
||||||
def test_scan_file_for_separating_barcodes(self):
|
def test_scan_file_for_separating_barcodes(self):
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
@ -48,6 +46,46 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
self.assertEqual(reader.pdf_file, test_file)
|
self.assertEqual(reader.pdf_file, test_file)
|
||||||
self.assertDictEqual(separator_page_numbers, {0: False})
|
self.assertDictEqual(separator_page_numbers, {0: False})
|
||||||
|
|
||||||
|
@override_settings(
|
||||||
|
CONSUMER_BARCODE_TIFF_SUPPORT=True,
|
||||||
|
)
|
||||||
|
def test_scan_tiff_for_separating_barcodes(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- TIFF image containing barcodes
|
||||||
|
WHEN:
|
||||||
|
- Consume task returns
|
||||||
|
THEN:
|
||||||
|
- The file was split
|
||||||
|
"""
|
||||||
|
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.tiff"
|
||||||
|
|
||||||
|
with BarcodeReader(test_file, "image/tiff") as reader:
|
||||||
|
reader.detect()
|
||||||
|
separator_page_numbers = reader.get_separation_pages()
|
||||||
|
|
||||||
|
self.assertDictEqual(separator_page_numbers, {1: False})
|
||||||
|
|
||||||
|
@override_settings(
|
||||||
|
CONSUMER_BARCODE_TIFF_SUPPORT=True,
|
||||||
|
)
|
||||||
|
def test_scan_tiff_with_alpha_for_separating_barcodes(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- TIFF image containing barcodes
|
||||||
|
WHEN:
|
||||||
|
- Consume task returns
|
||||||
|
THEN:
|
||||||
|
- The file was split
|
||||||
|
"""
|
||||||
|
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle-alpha.tiff"
|
||||||
|
|
||||||
|
with BarcodeReader(test_file, "image/tiff") as reader:
|
||||||
|
reader.detect()
|
||||||
|
separator_page_numbers = reader.get_separation_pages()
|
||||||
|
|
||||||
|
self.assertDictEqual(separator_page_numbers, {1: False})
|
||||||
|
|
||||||
def test_scan_file_for_separating_barcodes_none_present(self):
|
def test_scan_file_for_separating_barcodes_none_present(self):
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
@ -285,6 +323,28 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
self.assertGreater(len(reader.barcodes), 0)
|
self.assertGreater(len(reader.barcodes), 0)
|
||||||
self.assertDictEqual(separator_page_numbers, {1: False})
|
self.assertDictEqual(separator_page_numbers, {1: False})
|
||||||
|
|
||||||
|
def test_scan_file_for_separating_barcodes_password(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Password protected PDF
|
||||||
|
WHEN:
|
||||||
|
- File is scanned for barcode
|
||||||
|
THEN:
|
||||||
|
- Scanning handles the exception without crashing
|
||||||
|
"""
|
||||||
|
test_file = self.SAMPLE_DIR / "password-is-test.pdf"
|
||||||
|
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
|
||||||
|
with BarcodeReader(test_file, "application/pdf") as reader:
|
||||||
|
reader.detect()
|
||||||
|
warning = cm.output[0]
|
||||||
|
expected_str = "WARNING:paperless.barcodes:File is likely password protected, not checking for barcodes"
|
||||||
|
self.assertTrue(warning.startswith(expected_str))
|
||||||
|
|
||||||
|
separator_page_numbers = reader.get_separation_pages()
|
||||||
|
|
||||||
|
self.assertEqual(reader.pdf_file, test_file)
|
||||||
|
self.assertDictEqual(separator_page_numbers, {})
|
||||||
|
|
||||||
def test_separate_pages(self):
|
def test_separate_pages(self):
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
@ -332,8 +392,12 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
|
|
||||||
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
|
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
with BarcodeReader(test_file, "application/pdf") as reader:
|
||||||
success = reader.separate(DocumentSource.ApiUpload)
|
self.assertFalse(
|
||||||
self.assertFalse(success)
|
reader.separate(
|
||||||
|
DocumentSource.ApiUpload,
|
||||||
|
DocumentMetadataOverrides(),
|
||||||
|
),
|
||||||
|
)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
cm.output,
|
cm.output,
|
||||||
[
|
[
|
||||||
@ -341,215 +405,6 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_save_to_dir_given_name(self):
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- File to save to a directory
|
|
||||||
- There is a name override
|
|
||||||
WHEN:
|
|
||||||
- The file is saved
|
|
||||||
THEN:
|
|
||||||
- The file exists
|
|
||||||
"""
|
|
||||||
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
|
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
|
||||||
reader.separate(DocumentSource.ApiUpload, "newname.pdf")
|
|
||||||
|
|
||||||
self.assertEqual(reader.pdf_file, test_file)
|
|
||||||
target_file1 = settings.CONSUMPTION_DIR / "0_newname.pdf"
|
|
||||||
target_file2 = settings.CONSUMPTION_DIR / "1_newname.pdf"
|
|
||||||
self.assertIsFile(target_file1)
|
|
||||||
self.assertIsFile(target_file2)
|
|
||||||
|
|
||||||
def test_barcode_splitter_api_upload(self):
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- Input file containing barcodes
|
|
||||||
WHEN:
|
|
||||||
- Input file is split on barcodes
|
|
||||||
THEN:
|
|
||||||
- Correct number of files produced
|
|
||||||
"""
|
|
||||||
sample_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
|
|
||||||
test_file = settings.SCRATCH_DIR / "patch-code-t-middle.pdf"
|
|
||||||
shutil.copy(sample_file, test_file)
|
|
||||||
|
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
|
||||||
reader.separate(DocumentSource.ApiUpload)
|
|
||||||
|
|
||||||
self.assertEqual(reader.pdf_file, test_file)
|
|
||||||
|
|
||||||
target_file1 = (
|
|
||||||
settings.CONSUMPTION_DIR / "patch-code-t-middle_document_0.pdf"
|
|
||||||
)
|
|
||||||
|
|
||||||
target_file2 = (
|
|
||||||
settings.CONSUMPTION_DIR / "patch-code-t-middle_document_1.pdf"
|
|
||||||
)
|
|
||||||
|
|
||||||
self.assertIsFile(target_file1)
|
|
||||||
self.assertIsFile(target_file2)
|
|
||||||
|
|
||||||
def test_barcode_splitter_consume_dir(self):
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- Input file containing barcodes
|
|
||||||
WHEN:
|
|
||||||
- Input file is split on barcodes
|
|
||||||
THEN:
|
|
||||||
- Correct number of files produced
|
|
||||||
"""
|
|
||||||
sample_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
|
|
||||||
test_file = settings.CONSUMPTION_DIR / "patch-code-t-middle.pdf"
|
|
||||||
shutil.copy(sample_file, test_file)
|
|
||||||
|
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
|
||||||
reader.detect()
|
|
||||||
reader.separate(DocumentSource.ConsumeFolder)
|
|
||||||
|
|
||||||
self.assertEqual(reader.pdf_file, test_file)
|
|
||||||
|
|
||||||
target_file1 = (
|
|
||||||
settings.CONSUMPTION_DIR / "patch-code-t-middle_document_0.pdf"
|
|
||||||
)
|
|
||||||
|
|
||||||
target_file2 = (
|
|
||||||
settings.CONSUMPTION_DIR / "patch-code-t-middle_document_1.pdf"
|
|
||||||
)
|
|
||||||
|
|
||||||
self.assertIsFile(target_file1)
|
|
||||||
self.assertIsFile(target_file2)
|
|
||||||
|
|
||||||
def test_barcode_splitter_consume_dir_recursive(self):
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- Input file containing barcodes
|
|
||||||
- Input file is within a directory structure of the consume folder
|
|
||||||
WHEN:
|
|
||||||
- Input file is split on barcodes
|
|
||||||
THEN:
|
|
||||||
- Correct number of files produced
|
|
||||||
- Output files are within the same directory structure
|
|
||||||
"""
|
|
||||||
sample_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
|
|
||||||
test_file = (
|
|
||||||
settings.CONSUMPTION_DIR / "tag1" / "tag2" / "patch-code-t-middle.pdf"
|
|
||||||
)
|
|
||||||
test_file.parent.mkdir(parents=True)
|
|
||||||
shutil.copy(sample_file, test_file)
|
|
||||||
|
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
|
||||||
reader.separate(DocumentSource.ConsumeFolder)
|
|
||||||
|
|
||||||
self.assertEqual(reader.pdf_file, test_file)
|
|
||||||
|
|
||||||
target_file1 = (
|
|
||||||
settings.CONSUMPTION_DIR
|
|
||||||
/ "tag1"
|
|
||||||
/ "tag2"
|
|
||||||
/ "patch-code-t-middle_document_0.pdf"
|
|
||||||
)
|
|
||||||
|
|
||||||
target_file2 = (
|
|
||||||
settings.CONSUMPTION_DIR
|
|
||||||
/ "tag1"
|
|
||||||
/ "tag2"
|
|
||||||
/ "patch-code-t-middle_document_1.pdf"
|
|
||||||
)
|
|
||||||
|
|
||||||
self.assertIsFile(target_file1)
|
|
||||||
self.assertIsFile(target_file2)
|
|
||||||
|
|
||||||
@override_settings(CONSUMER_ENABLE_BARCODES=True)
|
|
||||||
def test_consume_barcode_file(self):
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- Input file with barcodes given to consume task
|
|
||||||
WHEN:
|
|
||||||
- Consume task returns
|
|
||||||
THEN:
|
|
||||||
- The file was split
|
|
||||||
"""
|
|
||||||
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
|
|
||||||
|
|
||||||
dst = settings.SCRATCH_DIR / "patch-code-t-middle.pdf"
|
|
||||||
shutil.copy(test_file, dst)
|
|
||||||
|
|
||||||
with mock.patch("documents.tasks.async_to_sync"):
|
|
||||||
self.assertEqual(
|
|
||||||
tasks.consume_file(
|
|
||||||
ConsumableDocument(
|
|
||||||
source=DocumentSource.ConsumeFolder,
|
|
||||||
original_file=dst,
|
|
||||||
),
|
|
||||||
None,
|
|
||||||
),
|
|
||||||
"File successfully split",
|
|
||||||
)
|
|
||||||
|
|
||||||
@override_settings(
|
|
||||||
CONSUMER_ENABLE_BARCODES=True,
|
|
||||||
CONSUMER_BARCODE_TIFF_SUPPORT=True,
|
|
||||||
)
|
|
||||||
def test_consume_barcode_tiff_file(self):
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- TIFF image containing barcodes
|
|
||||||
WHEN:
|
|
||||||
- Consume task returns
|
|
||||||
THEN:
|
|
||||||
- The file was split
|
|
||||||
"""
|
|
||||||
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.tiff"
|
|
||||||
|
|
||||||
dst = settings.SCRATCH_DIR / "patch-code-t-middle.tiff"
|
|
||||||
shutil.copy(test_file, dst)
|
|
||||||
|
|
||||||
with mock.patch("documents.tasks.async_to_sync"):
|
|
||||||
self.assertEqual(
|
|
||||||
tasks.consume_file(
|
|
||||||
ConsumableDocument(
|
|
||||||
source=DocumentSource.ConsumeFolder,
|
|
||||||
original_file=dst,
|
|
||||||
),
|
|
||||||
None,
|
|
||||||
),
|
|
||||||
"File successfully split",
|
|
||||||
)
|
|
||||||
self.assertIsNotFile(dst)
|
|
||||||
|
|
||||||
@override_settings(
|
|
||||||
CONSUMER_ENABLE_BARCODES=True,
|
|
||||||
CONSUMER_BARCODE_TIFF_SUPPORT=True,
|
|
||||||
)
|
|
||||||
def test_consume_barcode_tiff_file_with_alpha(self):
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- TIFF image containing barcodes
|
|
||||||
- TIFF image has an alpha layer
|
|
||||||
WHEN:
|
|
||||||
- Consume task handles the alpha layer and returns
|
|
||||||
THEN:
|
|
||||||
- The file was split without issue
|
|
||||||
"""
|
|
||||||
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle-alpha.tiff"
|
|
||||||
|
|
||||||
dst = settings.SCRATCH_DIR / "patch-code-t-middle.tiff"
|
|
||||||
shutil.copy(test_file, dst)
|
|
||||||
|
|
||||||
with mock.patch("documents.tasks.async_to_sync"):
|
|
||||||
self.assertEqual(
|
|
||||||
tasks.consume_file(
|
|
||||||
ConsumableDocument(
|
|
||||||
source=DocumentSource.ConsumeFolder,
|
|
||||||
original_file=dst,
|
|
||||||
),
|
|
||||||
None,
|
|
||||||
),
|
|
||||||
"File successfully split",
|
|
||||||
)
|
|
||||||
self.assertIsNotFile(dst)
|
|
||||||
|
|
||||||
@override_settings(
|
@override_settings(
|
||||||
CONSUMER_ENABLE_BARCODES=True,
|
CONSUMER_ENABLE_BARCODES=True,
|
||||||
CONSUMER_BARCODE_TIFF_SUPPORT=True,
|
CONSUMER_BARCODE_TIFF_SUPPORT=True,
|
||||||
@ -597,60 +452,6 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
self.assertIsNone(kwargs["override_document_type_id"])
|
self.assertIsNone(kwargs["override_document_type_id"])
|
||||||
self.assertIsNone(kwargs["override_tag_ids"])
|
self.assertIsNone(kwargs["override_tag_ids"])
|
||||||
|
|
||||||
@override_settings(
|
|
||||||
CONSUMER_ENABLE_BARCODES=True,
|
|
||||||
CONSUMER_BARCODE_TIFF_SUPPORT=True,
|
|
||||||
)
|
|
||||||
def test_consume_barcode_supported_no_extension_file(self):
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- TIFF image containing barcodes
|
|
||||||
- TIFF file is given without extension
|
|
||||||
WHEN:
|
|
||||||
- Consume task returns
|
|
||||||
THEN:
|
|
||||||
- The file was split
|
|
||||||
"""
|
|
||||||
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.tiff"
|
|
||||||
|
|
||||||
dst = settings.SCRATCH_DIR / "patch-code-t-middle"
|
|
||||||
shutil.copy(test_file, dst)
|
|
||||||
|
|
||||||
with mock.patch("documents.tasks.async_to_sync"):
|
|
||||||
self.assertEqual(
|
|
||||||
tasks.consume_file(
|
|
||||||
ConsumableDocument(
|
|
||||||
source=DocumentSource.ConsumeFolder,
|
|
||||||
original_file=dst,
|
|
||||||
),
|
|
||||||
None,
|
|
||||||
),
|
|
||||||
"File successfully split",
|
|
||||||
)
|
|
||||||
self.assertIsNotFile(dst)
|
|
||||||
|
|
||||||
def test_scan_file_for_separating_barcodes_password(self):
|
|
||||||
"""
|
|
||||||
GIVEN:
|
|
||||||
- Password protected PDF
|
|
||||||
WHEN:
|
|
||||||
- File is scanned for barcode
|
|
||||||
THEN:
|
|
||||||
- Scanning handles the exception without crashing
|
|
||||||
"""
|
|
||||||
test_file = self.SAMPLE_DIR / "password-is-test.pdf"
|
|
||||||
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
|
|
||||||
with BarcodeReader(test_file, "application/pdf") as reader:
|
|
||||||
reader.detect()
|
|
||||||
warning = cm.output[0]
|
|
||||||
expected_str = "WARNING:paperless.barcodes:File is likely password protected, not checking for barcodes"
|
|
||||||
self.assertTrue(warning.startswith(expected_str))
|
|
||||||
|
|
||||||
separator_page_numbers = reader.get_separation_pages()
|
|
||||||
|
|
||||||
self.assertEqual(reader.pdf_file, test_file)
|
|
||||||
self.assertDictEqual(separator_page_numbers, {})
|
|
||||||
|
|
||||||
@override_settings(
|
@override_settings(
|
||||||
CONSUMER_ENABLE_BARCODES=True,
|
CONSUMER_ENABLE_BARCODES=True,
|
||||||
CONSUMER_ENABLE_ASN_BARCODE=True,
|
CONSUMER_ENABLE_ASN_BARCODE=True,
|
||||||
@ -722,11 +523,64 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
self.assertEqual(len(document_list), 5)
|
self.assertEqual(len(document_list), 5)
|
||||||
|
|
||||||
|
|
||||||
class TestAsnBarcode(DirectoriesMixin, TestCase):
|
@override_settings(CONSUMER_BARCODE_SCANNER="PYZBAR")
|
||||||
SAMPLE_DIR = Path(__file__).parent / "samples"
|
class TestBarcodeNewConsume(
|
||||||
|
DirectoriesMixin,
|
||||||
|
FileSystemAssertsMixin,
|
||||||
|
SampleDirMixin,
|
||||||
|
DocumentConsumeDelayMixin,
|
||||||
|
TestCase,
|
||||||
|
):
|
||||||
|
@override_settings(CONSUMER_ENABLE_BARCODES=True)
|
||||||
|
def test_consume_barcode_file(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Incoming file with at 1 barcode producing 2 documents
|
||||||
|
- Document includes metadata override information
|
||||||
|
WHEN:
|
||||||
|
- The document is split
|
||||||
|
THEN:
|
||||||
|
- Two new consume tasks are created
|
||||||
|
- Metadata overrides are preserved for the new consume
|
||||||
|
- The document source is unchanged (for consume templates)
|
||||||
|
"""
|
||||||
|
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
|
||||||
|
temp_copy = self.dirs.scratch_dir / test_file.name
|
||||||
|
shutil.copy(test_file, temp_copy)
|
||||||
|
|
||||||
BARCODE_SAMPLE_DIR = SAMPLE_DIR / "barcodes"
|
overrides = DocumentMetadataOverrides(tag_ids=[1, 2, 9])
|
||||||
|
|
||||||
|
with mock.patch("documents.tasks.async_to_sync") as progress_mocker:
|
||||||
|
self.assertEqual(
|
||||||
|
tasks.consume_file(
|
||||||
|
ConsumableDocument(
|
||||||
|
source=DocumentSource.ConsumeFolder,
|
||||||
|
original_file=temp_copy,
|
||||||
|
),
|
||||||
|
overrides,
|
||||||
|
),
|
||||||
|
"File successfully split",
|
||||||
|
)
|
||||||
|
# We let the consumer know progress is done
|
||||||
|
progress_mocker.assert_called_once()
|
||||||
|
# 2 new document consume tasks created
|
||||||
|
self.assertEqual(self.consume_file_mock.call_count, 2)
|
||||||
|
|
||||||
|
self.assertIsNotFile(temp_copy)
|
||||||
|
|
||||||
|
# Check the split files exist
|
||||||
|
# Check the source is unchanged
|
||||||
|
# Check the overrides are unchanged
|
||||||
|
for (
|
||||||
|
new_input_doc,
|
||||||
|
new_doc_overrides,
|
||||||
|
) in self.get_all_consume_delay_call_args():
|
||||||
|
self.assertEqual(new_input_doc.source, DocumentSource.ConsumeFolder)
|
||||||
|
self.assertIsFile(new_input_doc.original_file)
|
||||||
|
self.assertEqual(overrides, new_doc_overrides)
|
||||||
|
|
||||||
|
|
||||||
|
class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase):
|
||||||
@override_settings(CONSUMER_ASN_BARCODE_PREFIX="CUSTOM-PREFIX-")
|
@override_settings(CONSUMER_ASN_BARCODE_PREFIX="CUSTOM-PREFIX-")
|
||||||
def test_scan_file_for_asn_custom_prefix(self):
|
def test_scan_file_for_asn_custom_prefix(self):
|
||||||
"""
|
"""
|
||||||
|
@ -235,8 +235,10 @@ class DocumentConsumeDelayMixin:
|
|||||||
"""
|
"""
|
||||||
Iterates over all calls to the async task and returns the arguments
|
Iterates over all calls to the async task and returns the arguments
|
||||||
"""
|
"""
|
||||||
|
# Must be at least 1 call
|
||||||
|
self.consume_file_mock.assert_called()
|
||||||
|
|
||||||
for args, _ in self.consume_file_mock.call_args_list:
|
for args, kwargs in self.consume_file_mock.call_args_list:
|
||||||
input_doc, overrides = args
|
input_doc, overrides = args
|
||||||
|
|
||||||
yield (input_doc, overrides)
|
yield (input_doc, overrides)
|
||||||
@ -244,7 +246,7 @@ class DocumentConsumeDelayMixin:
|
|||||||
def get_specific_consume_delay_call_args(
|
def get_specific_consume_delay_call_args(
|
||||||
self,
|
self,
|
||||||
index: int,
|
index: int,
|
||||||
) -> Iterator[tuple[ConsumableDocument, DocumentMetadataOverrides]]:
|
) -> tuple[ConsumableDocument, DocumentMetadataOverrides]:
|
||||||
"""
|
"""
|
||||||
Returns the arguments of a specific call to the async task
|
Returns the arguments of a specific call to the async task
|
||||||
"""
|
"""
|
||||||
@ -299,3 +301,9 @@ class TestMigrations(TransactionTestCase):
|
|||||||
|
|
||||||
def setUpBeforeMigration(self, apps):
|
def setUpBeforeMigration(self, apps):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class SampleDirMixin:
|
||||||
|
SAMPLE_DIR = Path(__file__).parent / "samples"
|
||||||
|
|
||||||
|
BARCODE_SAMPLE_DIR = SAMPLE_DIR / "barcodes"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user