Creates a data model for the document consumption, allowing stronger typing of arguments and setting of some information about the file only once

This commit is contained in:
Trenton H
2023-01-23 15:55:49 -08:00
parent fa60251c18
commit 3c2bbf244d
14 changed files with 596 additions and 433 deletions

View File

@@ -10,6 +10,9 @@ from django.test import TestCase
from documents import barcodes
from documents import tasks
from documents.consumer import ConsumerError
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
from documents.data_models import DocumentSource
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import FileSystemAssertsMixin
from PIL import Image
@@ -183,46 +186,14 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
img = Image.open(test_file)
self.assertEqual(barcodes.barcode_reader(img), ["CUSTOM BARCODE"])
def test_get_mime_type(self):
"""
GIVEN:
-
WHEN:
-
THEN:
-
"""
tiff_file = self.SAMPLE_DIR / "simple.tiff"
pdf_file = self.SAMPLE_DIR / "simple.pdf"
png_file = self.BARCODE_SAMPLE_DIR / "barcode-128-custom.png"
tiff_file_no_extension = settings.SCRATCH_DIR / "testfile1"
pdf_file_no_extension = settings.SCRATCH_DIR / "testfile2"
shutil.copy(tiff_file, tiff_file_no_extension)
shutil.copy(pdf_file, pdf_file_no_extension)
self.assertEqual(barcodes.get_file_mime_type(tiff_file), "image/tiff")
self.assertEqual(barcodes.get_file_mime_type(pdf_file), "application/pdf")
self.assertEqual(
barcodes.get_file_mime_type(tiff_file_no_extension),
"image/tiff",
)
self.assertEqual(
barcodes.get_file_mime_type(pdf_file_no_extension),
"application/pdf",
)
self.assertEqual(barcodes.get_file_mime_type(png_file), "image/png")
def test_convert_from_tiff_to_pdf(self):
"""
GIVEN:
-
- Multi-page TIFF image
WHEN:
-
- Conversion to PDF
THEN:
-
- The file converts without error
"""
test_file = self.SAMPLE_DIR / "simple.tiff"
@@ -233,34 +204,20 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.assertIsFile(target_file)
self.assertEqual(target_file.suffix, ".pdf")
def test_convert_error_from_pdf_to_pdf(self):
"""
GIVEN:
-
WHEN:
-
THEN:
-
"""
test_file = self.SAMPLE_DIR / "simple.pdf"
dst = settings.SCRATCH_DIR / "simple.pdf"
shutil.copy(test_file, dst)
self.assertIsNone(barcodes.convert_from_tiff_to_pdf(dst))
def test_scan_file_for_separating_barcodes(self):
"""
GIVEN:
-
- PDF containing barcodes
WHEN:
-
- File is scanned for barcodes
THEN:
-
- Correct page index located
"""
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
@@ -272,15 +229,17 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
def test_scan_file_for_separating_barcodes_none_present(self):
"""
GIVEN:
-
- File with no barcodes
WHEN:
-
- File is scanned
THEN:
-
- No barcodes detected
- No pages to split on
"""
test_file = self.SAMPLE_DIR / "simple.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
@@ -302,6 +261,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
@@ -323,6 +283,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
@@ -345,6 +306,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
@@ -366,6 +328,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
@@ -388,6 +351,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
@@ -411,6 +375,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
@@ -435,6 +400,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
@@ -459,6 +425,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
@@ -482,6 +449,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
@@ -504,6 +472,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
@@ -636,6 +605,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
@@ -673,7 +643,16 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
shutil.copy(test_file, dst)
with mock.patch("documents.tasks.async_to_sync"):
self.assertEqual(tasks.consume_file(dst), "File successfully split")
self.assertEqual(
tasks.consume_file(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=dst,
),
None,
),
"File successfully split",
)
@override_settings(
CONSUMER_ENABLE_BARCODES=True,
@@ -694,7 +673,17 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
shutil.copy(test_file, dst)
with mock.patch("documents.tasks.async_to_sync"):
self.assertEqual(tasks.consume_file(dst), "File successfully split")
self.assertEqual(
tasks.consume_file(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=dst,
),
None,
),
"File successfully split",
)
self.assertFalse(dst.exists())
@override_settings(
CONSUMER_ENABLE_BARCODES=True,
@@ -717,7 +706,16 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
shutil.copy(test_file, dst)
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
self.assertIn("Success", tasks.consume_file(dst))
self.assertIn(
"Success",
tasks.consume_file(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=dst,
),
None,
),
)
self.assertListEqual(
cm.output,
@@ -754,7 +752,17 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
shutil.copy(test_file, dst)
with mock.patch("documents.tasks.async_to_sync"):
self.assertEqual(tasks.consume_file(dst), "File successfully split")
self.assertEqual(
tasks.consume_file(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=dst,
),
None,
),
"File successfully split",
)
self.assertFalse(dst.exists())
def test_scan_file_for_separating_barcodes_password(self):
"""
@@ -769,6 +777,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
warning = cm.output[0]
expected_str = "WARNING:paperless.barcodes:File is likely password protected, not checking for barcodes"
@@ -798,6 +807,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
@@ -835,6 +845,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
@@ -855,7 +866,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.assertEqual(len(document_list), 5)
class TestAsnBarcodes(DirectoriesMixin, TestCase):
class TestAsnBarcode(DirectoriesMixin, TestCase):
SAMPLE_DIR = Path(__file__).parent / "samples"
@@ -923,6 +934,7 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
asn = barcodes.get_asn_from_barcodes(doc_barcode_info.barcodes)
@@ -944,6 +956,7 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
asn = barcodes.get_asn_from_barcodes(doc_barcode_info.barcodes)
@@ -970,7 +983,13 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase):
shutil.copy(test_file, dst)
with mock.patch("documents.consumer.Consumer.try_consume_file") as mocked_call:
tasks.consume_file(dst)
tasks.consume_file(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=dst,
),
None,
)
args, kwargs = mocked_call.call_args
@@ -991,6 +1010,7 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
asn = barcodes.get_asn_from_barcodes(doc_barcode_info.barcodes)
@@ -1010,6 +1030,7 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
asn = barcodes.get_asn_from_barcodes(doc_barcode_info.barcodes)
@@ -1032,12 +1053,17 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase):
dst = self.dirs.scratch_dir / "barcode-128-asn-too-large.pdf"
shutil.copy(src, dst)
input_doc = ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=dst,
)
with mock.patch("documents.consumer.Consumer._send_progress"):
self.assertRaisesMessage(
ConsumerError,
"Given ASN 4294967296 is out of range [0, 4,294,967,295]",
tasks.consume_file,
dst,
input_doc,
)
@@ -1055,5 +1081,5 @@ class TestBarcodeZxing(TestBarcode):
reason="No zxingcpp",
)
@override_settings(CONSUMER_BARCODE_SCANNER="ZXING")
class TestAsnBarcodesZxing(TestAsnBarcodes):
class TestAsnBarcodesZxing(TestAsnBarcode):
pass