Creates a data model for the document consumption, allowing stronger typing of arguments and setting of some information about the file only once

This commit is contained in:
Trenton H
2023-01-23 15:55:49 -08:00
parent 80be254441
commit 36a6df0bae
14 changed files with 596 additions and 433 deletions

View File

@@ -32,6 +32,7 @@ from documents import bulk_edit
from documents import index
from documents.models import Correspondent
from documents.models import Document
from documents.tests.utils import DocumentConsumeDelayMixin
from documents.models import DocumentType
from documents.models import MatchingModel
from documents.models import PaperlessTask
@@ -45,7 +46,7 @@ from rest_framework.test import APITestCase
from whoosh.writing import AsyncWriter
class TestDocumentApi(DirectoriesMixin, APITestCase):
class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
def setUp(self):
super().setUp()
@@ -1085,10 +1086,11 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.data["documents_inbox"], None)
self.assertEqual(response.data["inbox_tag"], None)
@mock.patch("documents.views.consume_file.delay")
def test_upload(self, m):
def test_upload(self):
m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
self.consume_file_mock.return_value = celery.result.AsyncResult(
id=str(uuid.uuid4()),
)
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
@@ -1101,21 +1103,22 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, status.HTTP_200_OK)
m.assert_called_once()
self.consume_file_mock.assert_called_once()
args, kwargs = m.call_args
file_path = Path(args[0])
self.assertEqual(file_path.name, "simple.pdf")
self.assertIn(Path(settings.SCRATCH_DIR), file_path.parents)
self.assertIsNone(kwargs["override_title"])
self.assertIsNone(kwargs["override_correspondent_id"])
self.assertIsNone(kwargs["override_document_type_id"])
self.assertIsNone(kwargs["override_tag_ids"])
input_doc, overrides = self.get_last_consume_delay_call_args()
@mock.patch("documents.views.consume_file.delay")
def test_upload_empty_metadata(self, m):
self.assertEqual(input_doc.original_file.name, "simple.pdf")
self.assertIn(Path(settings.SCRATCH_DIR), input_doc.original_file.parents)
self.assertIsNone(overrides.title)
self.assertIsNone(overrides.correspondent_id)
self.assertIsNone(overrides.document_type_id)
self.assertIsNone(overrides.tag_ids)
m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
def test_upload_empty_metadata(self):
self.consume_file_mock.return_value = celery.result.AsyncResult(
id=str(uuid.uuid4()),
)
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
@@ -1128,21 +1131,22 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, status.HTTP_200_OK)
m.assert_called_once()
self.consume_file_mock.assert_called_once()
args, kwargs = m.call_args
file_path = Path(args[0])
self.assertEqual(file_path.name, "simple.pdf")
self.assertIn(Path(settings.SCRATCH_DIR), file_path.parents)
self.assertIsNone(kwargs["override_title"])
self.assertIsNone(kwargs["override_correspondent_id"])
self.assertIsNone(kwargs["override_document_type_id"])
self.assertIsNone(kwargs["override_tag_ids"])
input_doc, overrides = self.get_last_consume_delay_call_args()
@mock.patch("documents.views.consume_file.delay")
def test_upload_invalid_form(self, m):
self.assertEqual(input_doc.original_file.name, "simple.pdf")
self.assertIn(Path(settings.SCRATCH_DIR), input_doc.original_file.parents)
self.assertIsNone(overrides.title)
self.assertIsNone(overrides.correspondent_id)
self.assertIsNone(overrides.document_type_id)
self.assertIsNone(overrides.tag_ids)
m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
def test_upload_invalid_form(self):
self.consume_file_mock.return_value = celery.result.AsyncResult(
id=str(uuid.uuid4()),
)
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
@@ -1153,12 +1157,13 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
{"documenst": f},
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
m.assert_not_called()
self.consume_file_mock.assert_not_called()
@mock.patch("documents.views.consume_file.delay")
def test_upload_invalid_file(self, m):
def test_upload_invalid_file(self):
m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
self.consume_file_mock.return_value = celery.result.AsyncResult(
id=str(uuid.uuid4()),
)
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.zip"),
@@ -1169,12 +1174,13 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
{"document": f},
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
m.assert_not_called()
self.consume_file_mock.assert_not_called()
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_title(self, async_task):
def test_upload_with_title(self):
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
self.consume_file_mock.return_value = celery.result.AsyncResult(
id=str(uuid.uuid4()),
)
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
@@ -1186,16 +1192,20 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
async_task.assert_called_once()
self.consume_file_mock.assert_called_once()
args, kwargs = async_task.call_args
_, overrides = self.get_last_consume_delay_call_args()
self.assertEqual(kwargs["override_title"], "my custom title")
self.assertEqual(overrides.title, "my custom title")
self.assertIsNone(overrides.correspondent_id)
self.assertIsNone(overrides.document_type_id)
self.assertIsNone(overrides.tag_ids)
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_correspondent(self, async_task):
def test_upload_with_correspondent(self):
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
self.consume_file_mock.return_value = celery.result.AsyncResult(
id=str(uuid.uuid4()),
)
c = Correspondent.objects.create(name="test-corres")
with open(
@@ -1208,16 +1218,20 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
async_task.assert_called_once()
self.consume_file_mock.assert_called_once()
args, kwargs = async_task.call_args
_, overrides = self.get_last_consume_delay_call_args()
self.assertEqual(kwargs["override_correspondent_id"], c.id)
self.assertEqual(overrides.correspondent_id, c.id)
self.assertIsNone(overrides.title)
self.assertIsNone(overrides.document_type_id)
self.assertIsNone(overrides.tag_ids)
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_invalid_correspondent(self, async_task):
def test_upload_with_invalid_correspondent(self):
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
self.consume_file_mock.return_value = celery.result.AsyncResult(
id=str(uuid.uuid4()),
)
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
@@ -1229,12 +1243,13 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
async_task.assert_not_called()
self.consume_file_mock.assert_not_called()
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_document_type(self, async_task):
def test_upload_with_document_type(self):
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
self.consume_file_mock.return_value = celery.result.AsyncResult(
id=str(uuid.uuid4()),
)
dt = DocumentType.objects.create(name="invoice")
with open(
@@ -1247,16 +1262,20 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
async_task.assert_called_once()
self.consume_file_mock.assert_called_once()
args, kwargs = async_task.call_args
_, overrides = self.get_last_consume_delay_call_args()
self.assertEqual(kwargs["override_document_type_id"], dt.id)
self.assertEqual(overrides.document_type_id, dt.id)
self.assertIsNone(overrides.correspondent_id)
self.assertIsNone(overrides.title)
self.assertIsNone(overrides.tag_ids)
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_invalid_document_type(self, async_task):
def test_upload_with_invalid_document_type(self):
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
self.consume_file_mock.return_value = celery.result.AsyncResult(
id=str(uuid.uuid4()),
)
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
@@ -1268,12 +1287,13 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
async_task.assert_not_called()
self.consume_file_mock.assert_not_called()
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_tags(self, async_task):
def test_upload_with_tags(self):
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
self.consume_file_mock.return_value = celery.result.AsyncResult(
id=str(uuid.uuid4()),
)
t1 = Tag.objects.create(name="tag1")
t2 = Tag.objects.create(name="tag2")
@@ -1287,16 +1307,20 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
async_task.assert_called_once()
self.consume_file_mock.assert_called_once()
args, kwargs = async_task.call_args
_, overrides = self.get_last_consume_delay_call_args()
self.assertCountEqual(kwargs["override_tag_ids"], [t1.id, t2.id])
self.assertCountEqual(overrides.tag_ids, [t1.id, t2.id])
self.assertIsNone(overrides.document_type_id)
self.assertIsNone(overrides.correspondent_id)
self.assertIsNone(overrides.title)
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_invalid_tags(self, async_task):
def test_upload_with_invalid_tags(self):
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
self.consume_file_mock.return_value = celery.result.AsyncResult(
id=str(uuid.uuid4()),
)
t1 = Tag.objects.create(name="tag1")
t2 = Tag.objects.create(name="tag2")
@@ -1310,12 +1334,13 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
async_task.assert_not_called()
self.consume_file_mock.assert_not_called()
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_created(self, async_task):
def test_upload_with_created(self):
async_task.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
self.consume_file_mock.return_value = celery.result.AsyncResult(
id=str(uuid.uuid4()),
)
created = datetime.datetime(
2022,
@@ -1337,16 +1362,17 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
async_task.assert_called_once()
self.consume_file_mock.assert_called_once()
args, kwargs = async_task.call_args
_, overrides = self.get_last_consume_delay_call_args()
self.assertEqual(kwargs["override_created"], created)
self.assertEqual(overrides.created, created)
@mock.patch("documents.views.consume_file.delay")
def test_upload_with_asn(self, m):
def test_upload_with_asn(self):
m.return_value = celery.result.AsyncResult(id=str(uuid.uuid4()))
self.consume_file_mock.return_value = celery.result.AsyncResult(
id=str(uuid.uuid4()),
)
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
@@ -1359,17 +1385,16 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, status.HTTP_200_OK)
m.assert_called_once()
self.consume_file_mock.assert_called_once()
args, kwargs = m.call_args
file_path = Path(args[0])
self.assertEqual(file_path.name, "simple.pdf")
self.assertIn(Path(settings.SCRATCH_DIR), file_path.parents)
self.assertIsNone(kwargs["override_title"])
self.assertIsNone(kwargs["override_correspondent_id"])
self.assertIsNone(kwargs["override_document_type_id"])
self.assertIsNone(kwargs["override_tag_ids"])
self.assertEqual(500, kwargs["override_archive_serial_num"])
input_doc, overrides = self.get_last_consume_delay_call_args()
self.assertEqual(input_doc.original_file.name, "simple.pdf")
self.assertEqual(overrides.filename, "simple.pdf")
self.assertIsNone(overrides.correspondent_id)
self.assertIsNone(overrides.document_type_id)
self.assertIsNone(overrides.tag_ids)
self.assertEqual(500, overrides.asn)
def test_get_metadata(self):
doc = Document.objects.create(

View File

@@ -10,6 +10,9 @@ from django.test import TestCase
from documents import barcodes
from documents import tasks
from documents.consumer import ConsumerError
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
from documents.data_models import DocumentSource
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import FileSystemAssertsMixin
from PIL import Image
@@ -183,46 +186,14 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
img = Image.open(test_file)
self.assertEqual(barcodes.barcode_reader(img), ["CUSTOM BARCODE"])
def test_get_mime_type(self):
"""
GIVEN:
-
WHEN:
-
THEN:
-
"""
tiff_file = self.SAMPLE_DIR / "simple.tiff"
pdf_file = self.SAMPLE_DIR / "simple.pdf"
png_file = self.BARCODE_SAMPLE_DIR / "barcode-128-custom.png"
tiff_file_no_extension = settings.SCRATCH_DIR / "testfile1"
pdf_file_no_extension = settings.SCRATCH_DIR / "testfile2"
shutil.copy(tiff_file, tiff_file_no_extension)
shutil.copy(pdf_file, pdf_file_no_extension)
self.assertEqual(barcodes.get_file_mime_type(tiff_file), "image/tiff")
self.assertEqual(barcodes.get_file_mime_type(pdf_file), "application/pdf")
self.assertEqual(
barcodes.get_file_mime_type(tiff_file_no_extension),
"image/tiff",
)
self.assertEqual(
barcodes.get_file_mime_type(pdf_file_no_extension),
"application/pdf",
)
self.assertEqual(barcodes.get_file_mime_type(png_file), "image/png")
def test_convert_from_tiff_to_pdf(self):
"""
GIVEN:
-
- Multi-page TIFF image
WHEN:
-
- Conversion to PDF
THEN:
-
- The file converts without error
"""
test_file = self.SAMPLE_DIR / "simple.tiff"
@@ -233,34 +204,20 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.assertIsFile(target_file)
self.assertEqual(target_file.suffix, ".pdf")
def test_convert_error_from_pdf_to_pdf(self):
"""
GIVEN:
-
WHEN:
-
THEN:
-
"""
test_file = self.SAMPLE_DIR / "simple.pdf"
dst = settings.SCRATCH_DIR / "simple.pdf"
shutil.copy(test_file, dst)
self.assertIsNone(barcodes.convert_from_tiff_to_pdf(dst))
def test_scan_file_for_separating_barcodes(self):
"""
GIVEN:
-
- PDF containing barcodes
WHEN:
-
- File is scanned for barcodes
THEN:
-
- Correct page index located
"""
test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
@@ -272,15 +229,17 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
def test_scan_file_for_separating_barcodes_none_present(self):
"""
GIVEN:
-
- File with no barcodes
WHEN:
-
- File is scanned
THEN:
-
- No barcodes detected
- No pages to split on
"""
test_file = self.SAMPLE_DIR / "simple.pdf"
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
@@ -302,6 +261,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
@@ -323,6 +283,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
@@ -345,6 +306,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
@@ -366,6 +328,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
@@ -388,6 +351,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
@@ -411,6 +375,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
@@ -435,6 +400,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
@@ -459,6 +425,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
@@ -482,6 +449,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
@@ -504,6 +472,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
@@ -636,6 +605,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
@@ -673,7 +643,16 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
shutil.copy(test_file, dst)
with mock.patch("documents.tasks.async_to_sync"):
self.assertEqual(tasks.consume_file(dst), "File successfully split")
self.assertEqual(
tasks.consume_file(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=dst,
),
None,
),
"File successfully split",
)
@override_settings(
CONSUMER_ENABLE_BARCODES=True,
@@ -694,7 +673,17 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
shutil.copy(test_file, dst)
with mock.patch("documents.tasks.async_to_sync"):
self.assertEqual(tasks.consume_file(dst), "File successfully split")
self.assertEqual(
tasks.consume_file(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=dst,
),
None,
),
"File successfully split",
)
self.assertFalse(dst.exists())
@override_settings(
CONSUMER_ENABLE_BARCODES=True,
@@ -717,7 +706,16 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
shutil.copy(test_file, dst)
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
self.assertIn("Success", tasks.consume_file(dst))
self.assertIn(
"Success",
tasks.consume_file(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=dst,
),
None,
),
)
self.assertListEqual(
cm.output,
@@ -754,7 +752,17 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
shutil.copy(test_file, dst)
with mock.patch("documents.tasks.async_to_sync"):
self.assertEqual(tasks.consume_file(dst), "File successfully split")
self.assertEqual(
tasks.consume_file(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=dst,
),
None,
),
"File successfully split",
)
self.assertFalse(dst.exists())
def test_scan_file_for_separating_barcodes_password(self):
"""
@@ -769,6 +777,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
warning = cm.output[0]
expected_str = "WARNING:paperless.barcodes:File is likely password protected, not checking for barcodes"
@@ -798,6 +807,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
@@ -835,6 +845,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
separator_page_numbers = barcodes.get_separating_barcodes(
doc_barcode_info.barcodes,
@@ -855,7 +866,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.assertEqual(len(document_list), 5)
class TestAsnBarcodes(DirectoriesMixin, TestCase):
class TestAsnBarcode(DirectoriesMixin, TestCase):
SAMPLE_DIR = Path(__file__).parent / "samples"
@@ -923,6 +934,7 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
asn = barcodes.get_asn_from_barcodes(doc_barcode_info.barcodes)
@@ -944,6 +956,7 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
asn = barcodes.get_asn_from_barcodes(doc_barcode_info.barcodes)
@@ -970,7 +983,13 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase):
shutil.copy(test_file, dst)
with mock.patch("documents.consumer.Consumer.try_consume_file") as mocked_call:
tasks.consume_file(dst)
tasks.consume_file(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=dst,
),
None,
)
args, kwargs = mocked_call.call_args
@@ -991,6 +1010,7 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
asn = barcodes.get_asn_from_barcodes(doc_barcode_info.barcodes)
@@ -1010,6 +1030,7 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase):
doc_barcode_info = barcodes.scan_file_for_barcodes(
test_file,
"application/pdf",
)
asn = barcodes.get_asn_from_barcodes(doc_barcode_info.barcodes)
@@ -1032,12 +1053,17 @@ class TestAsnBarcodes(DirectoriesMixin, TestCase):
dst = self.dirs.scratch_dir / "barcode-128-asn-too-large.pdf"
shutil.copy(src, dst)
input_doc = ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=dst,
)
with mock.patch("documents.consumer.Consumer._send_progress"):
self.assertRaisesMessage(
ConsumerError,
"Given ASN 4294967296 is out of range [0, 4,294,967,295]",
tasks.consume_file,
dst,
input_doc,
)
@@ -1055,5 +1081,5 @@ class TestBarcodeZxing(TestBarcode):
reason="No zxingcpp",
)
@override_settings(CONSUMER_BARCODE_SCANNER="ZXING")
class TestAsnBarcodesZxing(TestAsnBarcodes):
class TestAsnBarcodesZxing(TestAsnBarcode):
pass

View File

@@ -1,6 +1,7 @@
import filecmp
import os
import shutil
from pathlib import Path
from threading import Thread
from time import sleep
from unittest import mock
@@ -11,9 +12,12 @@ from django.core.management import CommandError
from django.test import override_settings
from django.test import TransactionTestCase
from documents.consumer import ConsumerError
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
from documents.management.commands import document_consumer
from documents.models import Tag
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import DocumentConsumeDelayMixin
class ConsumerThread(Thread):
@@ -35,18 +39,19 @@ def chunked(size, source):
yield source[i : i + size]
class ConsumerMixin:
class ConsumerThreadMixin(DocumentConsumeDelayMixin):
"""
Provides a thread which runs the consumer management command at setUp
and stops it at tearDown
"""
sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
sample_file: Path = (
Path(__file__).parent / Path("samples") / Path("simple.pdf")
).resolve()
def setUp(self) -> None:
super().setUp()
self.t = None
patcher = mock.patch(
"documents.tasks.consume_file.delay",
)
self.task_mock = patcher.start()
self.addCleanup(patcher.stop)
def t_start(self):
self.t = ConsumerThread()
@@ -67,7 +72,7 @@ class ConsumerMixin:
def wait_for_task_mock_call(self, expected_call_count=1):
n = 0
while n < 50:
if self.task_mock.call_count >= expected_call_count:
if self.consume_file_mock.call_count >= expected_call_count:
# give task_mock some time to finish and raise errors
sleep(1)
return
@@ -76,8 +81,12 @@ class ConsumerMixin:
# A bogus async_task that will simply check the file for
# completeness and raise an exception otherwise.
def bogus_task(self, filename, **kwargs):
eq = filecmp.cmp(filename, self.sample_file, shallow=False)
def bogus_task(
self,
input_doc: ConsumableDocument,
overrides=None,
):
eq = filecmp.cmp(input_doc.original_file, self.sample_file, shallow=False)
if not eq:
print("Consumed an INVALID file.")
raise ConsumerError("Incomplete File READ FAILED")
@@ -103,19 +112,20 @@ class ConsumerMixin:
@override_settings(
CONSUMER_INOTIFY_DELAY=0.01,
)
class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
class TestConsumer(DirectoriesMixin, ConsumerThreadMixin, TransactionTestCase):
def test_consume_file(self):
self.t_start()
f = os.path.join(self.dirs.consumption_dir, "my_file.pdf")
f = Path(os.path.join(self.dirs.consumption_dir, "my_file.pdf"))
shutil.copy(self.sample_file, f)
self.wait_for_task_mock_call()
self.task_mock.assert_called_once()
self.consume_file_mock.assert_called_once()
args, kwargs = self.task_mock.call_args
self.assertEqual(args[0], f)
input_doc, _ = self.get_last_consume_delay_call_args()
self.assertEqual(input_doc.original_file, f)
def test_consume_file_invalid_ext(self):
self.t_start()
@@ -125,26 +135,27 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
self.wait_for_task_mock_call()
self.task_mock.assert_not_called()
self.consume_file_mock.assert_not_called()
def test_consume_existing_file(self):
f = os.path.join(self.dirs.consumption_dir, "my_file.pdf")
f = Path(os.path.join(self.dirs.consumption_dir, "my_file.pdf"))
shutil.copy(self.sample_file, f)
self.t_start()
self.task_mock.assert_called_once()
self.consume_file_mock.assert_called_once()
args, kwargs = self.task_mock.call_args
self.assertEqual(args[0], f)
input_doc, _ = self.get_last_consume_delay_call_args()
self.assertEqual(input_doc.original_file, f)
@mock.patch("documents.management.commands.document_consumer.logger.error")
def test_slow_write_pdf(self, error_logger):
self.task_mock.side_effect = self.bogus_task
self.consume_file_mock.side_effect = self.bogus_task
self.t_start()
fname = os.path.join(self.dirs.consumption_dir, "my_file.pdf")
fname = Path(os.path.join(self.dirs.consumption_dir, "my_file.pdf"))
self.slow_write_file(fname)
@@ -152,48 +163,52 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
error_logger.assert_not_called()
self.task_mock.assert_called_once()
self.consume_file_mock.assert_called_once()
args, kwargs = self.task_mock.call_args
self.assertEqual(args[0], fname)
input_doc, _ = self.get_last_consume_delay_call_args()
self.assertEqual(input_doc.original_file, fname)
@mock.patch("documents.management.commands.document_consumer.logger.error")
def test_slow_write_and_move(self, error_logger):
self.task_mock.side_effect = self.bogus_task
self.consume_file_mock.side_effect = self.bogus_task
self.t_start()
fname = os.path.join(self.dirs.consumption_dir, "my_file.~df")
fname2 = os.path.join(self.dirs.consumption_dir, "my_file.pdf")
fname = Path(os.path.join(self.dirs.consumption_dir, "my_file.~df"))
fname2 = Path(os.path.join(self.dirs.consumption_dir, "my_file.pdf"))
self.slow_write_file(fname)
shutil.move(fname, fname2)
self.wait_for_task_mock_call()
self.task_mock.assert_called_once()
self.consume_file_mock.assert_called_once()
args, kwargs = self.task_mock.call_args
self.assertEqual(args[0], fname2)
input_doc, _ = self.get_last_consume_delay_call_args()
self.assertEqual(input_doc.original_file, fname2)
error_logger.assert_not_called()
@mock.patch("documents.management.commands.document_consumer.logger.error")
def test_slow_write_incomplete(self, error_logger):
self.task_mock.side_effect = self.bogus_task
self.consume_file_mock.side_effect = self.bogus_task
self.t_start()
fname = os.path.join(self.dirs.consumption_dir, "my_file.pdf")
fname = Path(os.path.join(self.dirs.consumption_dir, "my_file.pdf"))
self.slow_write_file(fname, incomplete=True)
self.wait_for_task_mock_call()
self.task_mock.assert_called_once()
args, kwargs = self.task_mock.call_args
self.assertEqual(args[0], fname)
self.consume_file_mock.assert_called_once()
input_doc, _ = self.get_last_consume_delay_call_args()
self.assertEqual(input_doc.original_file, fname)
# assert that we have an error logged with this invalid file.
error_logger.assert_called_once()
@@ -209,7 +224,7 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
self.assertRaises(CommandError, call_command, "document_consumer", "--oneshot")
def test_mac_write(self):
self.task_mock.side_effect = self.bogus_task
self.consume_file_mock.side_effect = self.bogus_task
self.t_start()
@@ -238,12 +253,13 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
self.wait_for_task_mock_call(expected_call_count=2)
self.assertEqual(2, self.task_mock.call_count)
self.assertEqual(2, self.consume_file_mock.call_count)
fnames = [
os.path.basename(args[0]) for args, _ in self.task_mock.call_args_list
]
self.assertCountEqual(fnames, ["my_file.pdf", "my_second_file.pdf"])
consumed_files = []
for input_doc, _ in self.get_all_consume_delay_call_args():
consumed_files.append(input_doc.original_file.name)
self.assertCountEqual(consumed_files, ["my_file.pdf", "my_second_file.pdf"])
def test_is_ignored(self):
test_paths = [
@@ -341,7 +357,7 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
self.wait_for_task_mock_call()
self.task_mock.assert_not_called()
self.consume_file_mock.assert_not_called()
@override_settings(
@@ -373,7 +389,7 @@ class TestConsumerRecursivePolling(TestConsumer):
pass
class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
class TestConsumerTags(DirectoriesMixin, ConsumerThreadMixin, TransactionTestCase):
@override_settings(CONSUMER_RECURSIVE=True, CONSUMER_SUBDIRS_AS_TAGS=True)
def test_consume_file_with_path_tags(self):
@@ -387,7 +403,7 @@ class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
path = os.path.join(self.dirs.consumption_dir, *tag_names)
os.makedirs(path, exist_ok=True)
f = os.path.join(path, "my_file.pdf")
f = Path(os.path.join(path, "my_file.pdf"))
# Wait at least inotify read_delay for recursive watchers
# to be created for the new directories
sleep(1)
@@ -395,18 +411,19 @@ class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
self.wait_for_task_mock_call()
self.task_mock.assert_called_once()
self.consume_file_mock.assert_called_once()
# Add the pk of the Tag created by _consume()
tag_ids.append(Tag.objects.get(name=tag_names[1]).pk)
args, kwargs = self.task_mock.call_args
self.assertEqual(args[0], f)
input_doc, overrides = self.get_last_consume_delay_call_args()
self.assertEqual(input_doc.original_file, f)
# assertCountEqual has a bad name, but test that the first
# sequence contains the same elements as second, regardless of
# their order.
self.assertCountEqual(kwargs["override_tag_ids"], tag_ids)
self.assertCountEqual(overrides.tag_ids, tag_ids)
@override_settings(
CONSUMER_POLLING=1,

View File

@@ -1,76 +1,21 @@
import uuid
from unittest import mock
import celery
from django.test import TestCase
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
from documents.data_models import DocumentSource
from documents.models import PaperlessTask
from documents.signals.handlers import before_task_publish_handler
from documents.signals.handlers import task_postrun_handler
from documents.signals.handlers import task_prerun_handler
from documents.tests.test_consumer import fake_magic_from_file
from documents.tests.utils import DirectoriesMixin
@mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
class TestTaskSignalHandler(DirectoriesMixin, TestCase):
HEADERS_CONSUME = {
"lang": "py",
"task": "documents.tasks.consume_file",
"id": "52d31e24-9dcc-4c32-9e16-76007e9add5e",
"shadow": None,
"eta": None,
"expires": None,
"group": None,
"group_index": None,
"retries": 0,
"timelimit": [None, None],
"root_id": "52d31e24-9dcc-4c32-9e16-76007e9add5e",
"parent_id": None,
"argsrepr": "('/consume/hello-999.pdf',)",
"kwargsrepr": "{'override_tag_ids': None}",
"origin": "gen260@paperless-ngx-dev-webserver",
"ignore_result": False,
}
BODY_CONSUME = (
# args
("/consume/hello-999.pdf",),
# kwargs
{"override_tag_ids": None},
{"callbacks": None, "errbacks": None, "chain": None, "chord": None},
)
HEADERS_WEB_UI = {
"lang": "py",
"task": "documents.tasks.consume_file",
"id": "6e88a41c-e5f8-4631-9972-68c314512498",
"shadow": None,
"eta": None,
"expires": None,
"group": None,
"group_index": None,
"retries": 0,
"timelimit": [None, None],
"root_id": "6e88a41c-e5f8-4631-9972-68c314512498",
"parent_id": None,
"argsrepr": "('/tmp/paperless/paperless-upload-st9lmbvx',)",
"kwargsrepr": "{'override_filename': 'statement.pdf', 'override_title': None, 'override_correspondent_id': None, 'override_document_type_id': None, 'override_tag_ids': None, 'task_id': 'f5622ca9-3707-4ed0-b418-9680b912572f', 'override_created': None}",
"origin": "gen342@paperless-ngx-dev-webserver",
"ignore_result": False,
}
BODY_WEB_UI = (
# args
("/tmp/paperless/paperless-upload-st9lmbvx",),
# kwargs
{
"override_filename": "statement.pdf",
"override_title": None,
"override_correspondent_id": None,
"override_document_type_id": None,
"override_tag_ids": None,
"task_id": "f5622ca9-3707-4ed0-b418-9680b912572f",
"override_created": None,
},
{"callbacks": None, "errbacks": None, "chain": None, "chord": None},
)
def util_call_before_task_publish_handler(self, headers_to_use, body_to_use):
"""
Simple utility to call the pre-run handle and ensure it created a single task
@@ -91,41 +36,36 @@ class TestTaskSignalHandler(DirectoriesMixin, TestCase):
THEN:
- The task is created and marked as pending
"""
headers = {
"id": str(uuid.uuid4()),
"task": "documents.tasks.consume_file",
}
body = (
# args
(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file="/consume/hello-999.pdf",
),
None,
),
# kwargs
{},
# celery stuff
{"callbacks": None, "errbacks": None, "chain": None, "chord": None},
)
self.util_call_before_task_publish_handler(
headers_to_use=self.HEADERS_CONSUME,
body_to_use=self.BODY_CONSUME,
headers_to_use=headers,
body_to_use=body,
)
task = PaperlessTask.objects.get()
self.assertIsNotNone(task)
self.assertEqual(self.HEADERS_CONSUME["id"], task.task_id)
self.assertEqual(headers["id"], task.task_id)
self.assertEqual("hello-999.pdf", task.task_file_name)
self.assertEqual("documents.tasks.consume_file", task.task_name)
self.assertEqual(celery.states.PENDING, task.status)
def test_before_task_publish_handler_webui(self):
"""
GIVEN:
- A celery task is started via the web ui
WHEN:
- Task before publish handler is called
THEN:
- The task is created and marked as pending
"""
self.util_call_before_task_publish_handler(
headers_to_use=self.HEADERS_WEB_UI,
body_to_use=self.BODY_WEB_UI,
)
task = PaperlessTask.objects.get()
self.assertIsNotNone(task)
self.assertEqual(self.HEADERS_WEB_UI["id"], task.task_id)
self.assertEqual("statement.pdf", task.task_file_name)
self.assertEqual("documents.tasks.consume_file", task.task_name)
self.assertEqual(celery.states.PENDING, task.status)
def test_task_prerun_handler(self):
"""
GIVEN:
@@ -135,12 +75,32 @@ class TestTaskSignalHandler(DirectoriesMixin, TestCase):
THEN:
- The task is marked as started
"""
self.util_call_before_task_publish_handler(
headers_to_use=self.HEADERS_CONSUME,
body_to_use=self.BODY_CONSUME,
headers = {
"id": str(uuid.uuid4()),
"task": "documents.tasks.consume_file",
}
body = (
# args
(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file="/consume/hello-99.pdf",
),
None,
),
# kwargs
{},
# celery stuff
{"callbacks": None, "errbacks": None, "chain": None, "chord": None},
)
task_prerun_handler(task_id=self.HEADERS_CONSUME["id"])
self.util_call_before_task_publish_handler(
headers_to_use=headers,
body_to_use=body,
)
task_prerun_handler(task_id=headers["id"])
task = PaperlessTask.objects.get()
@@ -155,13 +115,31 @@ class TestTaskSignalHandler(DirectoriesMixin, TestCase):
THEN:
- The task is marked as started
"""
headers = {
"id": str(uuid.uuid4()),
"task": "documents.tasks.consume_file",
}
body = (
# args
(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file="/consume/hello-9.pdf",
),
None,
),
# kwargs
{},
# celery stuff
{"callbacks": None, "errbacks": None, "chain": None, "chord": None},
)
self.util_call_before_task_publish_handler(
headers_to_use=self.HEADERS_CONSUME,
body_to_use=self.BODY_CONSUME,
headers_to_use=headers,
body_to_use=body,
)
task_postrun_handler(
task_id=self.HEADERS_CONSUME["id"],
task_id=headers["id"],
retval="Success. New document id 1 created",
state=celery.states.SUCCESS,
)

View File

@@ -4,6 +4,8 @@ from collections import namedtuple
from contextlib import contextmanager
from os import PathLike
from pathlib import Path
from typing import Iterator
from typing import Tuple
from typing import Union
from unittest import mock
@@ -12,6 +14,8 @@ from django.db import connection
from django.db.migrations.executor import MigrationExecutor
from django.test import override_settings
from django.test import TransactionTestCase
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
def setup_directories():
@@ -116,6 +120,11 @@ class ConsumerProgressMixin:
class DocumentConsumeDelayMixin:
"""
Provides mocking of the consume_file asynchronous task and useful utilities
for decoding its arguments
"""
def setUp(self) -> None:
self.consume_file_patcher = mock.patch("documents.tasks.consume_file.delay")
self.consume_file_mock = self.consume_file_patcher.start()
@@ -125,6 +134,47 @@ class DocumentConsumeDelayMixin:
super().tearDown()
self.consume_file_patcher.stop()
def get_last_consume_delay_call_args(
self,
) -> Tuple[ConsumableDocument, DocumentMetadataOverrides]:
"""
Returns the most recent arguments to the async task
"""
# Must be at least 1 call
self.consume_file_mock.assert_called()
args, _ = self.consume_file_mock.call_args
input_doc, overrides = args
return (input_doc, overrides)
def get_all_consume_delay_call_args(
self,
) -> Iterator[Tuple[ConsumableDocument, DocumentMetadataOverrides]]:
"""
Iterates over all calls to the async task and returns the arguments
"""
for args, _ in self.consume_file_mock.call_args_list:
input_doc, overrides = args
yield (input_doc, overrides)
def get_specific_consume_delay_call_args(
self,
index: int,
) -> Iterator[Tuple[ConsumableDocument, DocumentMetadataOverrides]]:
"""
Returns the arguments of a specific call to the async task
"""
# Must be at least 1 call
self.consume_file_mock.assert_called()
args, _ = self.consume_file_mock.call_args_list[index]
input_doc, overrides = args
return (input_doc, overrides)
class TestMigrations(TransactionTestCase):
@property
@@ -140,7 +190,7 @@ class TestMigrations(TransactionTestCase):
assert (
self.migrate_from and self.migrate_to
), "TestCase '{}' must define migrate_from and migrate_to properties".format(
), "TestCase '{}' must define migrate_from and migrate_to properties".format(
type(self).__name__,
)
self.migrate_from = [(self.app, self.migrate_from)]