mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-30 18:27:45 -05:00
Feature: split documents on ASN barcode (#2554)
* also split documents when an ASN barcode is found * linter * fix test case parameters * avoid pre-python-3.9 features * simplify dict-creation in tests * simplify dict-creation in tests for empty dicts * Add test cases for the splitting by ASN barcode feature * deleted supporting files for test case construction
This commit is contained in:
BIN
src/documents/tests/samples/barcodes/split-by-asn-1.pdf
Normal file
BIN
src/documents/tests/samples/barcodes/split-by-asn-1.pdf
Normal file
Binary file not shown.
BIN
src/documents/tests/samples/barcodes/split-by-asn-2.pdf
Normal file
BIN
src/documents/tests/samples/barcodes/split-by-asn-2.pdf
Normal file
Binary file not shown.
@@ -294,7 +294,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
)
|
||||
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [0])
|
||||
self.assertDictEqual(separator_page_numbers, {0: False})
|
||||
|
||||
def test_scan_file_for_separating_barcodes_none_present(self):
|
||||
"""
|
||||
@@ -314,7 +314,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
)
|
||||
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [])
|
||||
self.assertDictEqual(separator_page_numbers, {})
|
||||
|
||||
def test_scan_file_for_separating_barcodes_middle_page(self):
|
||||
"""
|
||||
@@ -337,7 +337,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
)
|
||||
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [1])
|
||||
self.assertDictEqual(separator_page_numbers, {1: False})
|
||||
|
||||
def test_scan_file_for_separating_barcodes_multiple_pages(self):
|
||||
"""
|
||||
@@ -360,7 +360,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
)
|
||||
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [2, 5])
|
||||
self.assertDictEqual(separator_page_numbers, {2: False, 5: False})
|
||||
|
||||
def test_scan_file_for_separating_barcodes_upside_down(self):
|
||||
"""
|
||||
@@ -384,7 +384,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
)
|
||||
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [1])
|
||||
self.assertDictEqual(separator_page_numbers, {1: False})
|
||||
|
||||
def test_scan_file_for_separating_barcodes_fax_decode(self):
|
||||
"""
|
||||
@@ -407,7 +407,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
)
|
||||
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [1])
|
||||
self.assertDictEqual(separator_page_numbers, {1: False})
|
||||
|
||||
def test_scan_file_for_separating_qr_barcodes(self):
|
||||
"""
|
||||
@@ -431,7 +431,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
)
|
||||
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [0])
|
||||
self.assertDictEqual(separator_page_numbers, {0: False})
|
||||
|
||||
@override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
|
||||
def test_scan_file_for_separating_custom_barcodes(self):
|
||||
@@ -456,7 +456,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
)
|
||||
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [0])
|
||||
self.assertDictEqual(separator_page_numbers, {0: False})
|
||||
|
||||
@override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
|
||||
def test_scan_file_for_separating_custom_qr_barcodes(self):
|
||||
@@ -482,7 +482,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
)
|
||||
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [0])
|
||||
self.assertDictEqual(separator_page_numbers, {0: False})
|
||||
|
||||
@override_settings(CONSUMER_BARCODE_STRING="CUSTOM BARCODE")
|
||||
def test_scan_file_for_separating_custom_128_barcodes(self):
|
||||
@@ -508,7 +508,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
)
|
||||
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [0])
|
||||
self.assertDictEqual(separator_page_numbers, {0: False})
|
||||
|
||||
def test_scan_file_for_separating_wrong_qr_barcodes(self):
|
||||
"""
|
||||
@@ -533,7 +533,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
)
|
||||
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [])
|
||||
self.assertDictEqual(separator_page_numbers, {})
|
||||
|
||||
@override_settings(CONSUMER_BARCODE_STRING="ADAR-NEXTDOC")
|
||||
def test_scan_file_for_separating_qr_barcodes(self):
|
||||
@@ -558,7 +558,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
)
|
||||
|
||||
self.assertGreater(len(doc_barcode_info.barcodes), 0)
|
||||
self.assertListEqual(separator_page_numbers, [1])
|
||||
self.assertDictEqual(separator_page_numbers, {1: False})
|
||||
|
||||
def test_separate_pages(self):
|
||||
"""
|
||||
@@ -573,7 +573,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-middle.pdf",
|
||||
)
|
||||
documents = barcodes.separate_pages(test_file, [1])
|
||||
documents = barcodes.separate_pages(test_file, {1: False})
|
||||
|
||||
self.assertEqual(len(documents), 2)
|
||||
|
||||
@@ -591,7 +591,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"patch-code-t-double.pdf",
|
||||
)
|
||||
pages = barcodes.separate_pages(test_file, [1, 2])
|
||||
pages = barcodes.separate_pages(test_file, {1: False, 2: False})
|
||||
|
||||
self.assertEqual(len(pages), 2)
|
||||
|
||||
@@ -610,7 +610,7 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
"patch-code-t-middle.pdf",
|
||||
)
|
||||
with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
|
||||
pages = barcodes.separate_pages(test_file, [])
|
||||
pages = barcodes.separate_pages(test_file, {})
|
||||
self.assertEqual(pages, [])
|
||||
self.assertEqual(
|
||||
cm.output,
|
||||
@@ -858,7 +858,88 @@ class TestBarcode(DirectoriesMixin, TestCase):
|
||||
)
|
||||
|
||||
self.assertEqual(doc_barcode_info.pdf_path, test_file)
|
||||
self.assertListEqual(separator_page_numbers, [])
|
||||
self.assertDictEqual(separator_page_numbers, {})
|
||||
|
||||
@override_settings(
|
||||
CONSUMER_ENABLE_BARCODES=True,
|
||||
CONSUMER_ENABLE_ASN_BARCODE=True,
|
||||
)
|
||||
def test_separate_pages_by_asn_barcodes_and_patcht(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Input PDF with a patch code on page 3 and ASN barcodes on pages 1,5,6,9,11
|
||||
WHEN:
|
||||
- Input file is split on barcodes
|
||||
THEN:
|
||||
- Correct number of files produced, split correctly by correct pages
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"split-by-asn-2.pdf",
|
||||
)
|
||||
|
||||
doc_barcode_info = barcodes.scan_file_for_barcodes(
|
||||
test_file,
|
||||
)
|
||||
separator_page_numbers = barcodes.get_separating_barcodes(
|
||||
doc_barcode_info.barcodes,
|
||||
)
|
||||
|
||||
self.assertEqual(test_file, doc_barcode_info.pdf_path)
|
||||
self.assertDictEqual(
|
||||
separator_page_numbers,
|
||||
{
|
||||
2: False,
|
||||
4: True,
|
||||
5: True,
|
||||
8: True,
|
||||
10: True,
|
||||
},
|
||||
)
|
||||
|
||||
document_list = barcodes.separate_pages(test_file, separator_page_numbers)
|
||||
self.assertEqual(len(document_list), 6)
|
||||
|
||||
@override_settings(
|
||||
CONSUMER_ENABLE_BARCODES=True,
|
||||
CONSUMER_ENABLE_ASN_BARCODE=True,
|
||||
)
|
||||
def test_separate_pages_by_asn_barcodes(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Input PDF with ASN barcodes on pages 1,3,4,7,9
|
||||
WHEN:
|
||||
- Input file is split on barcodes
|
||||
THEN:
|
||||
- Correct number of files produced, split correctly by correct pages
|
||||
"""
|
||||
test_file = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
self.BARCODE_SAMPLE_DIR,
|
||||
"split-by-asn-1.pdf",
|
||||
)
|
||||
|
||||
doc_barcode_info = barcodes.scan_file_for_barcodes(
|
||||
test_file,
|
||||
)
|
||||
separator_page_numbers = barcodes.get_separating_barcodes(
|
||||
doc_barcode_info.barcodes,
|
||||
)
|
||||
|
||||
self.assertEqual(test_file, doc_barcode_info.pdf_path)
|
||||
self.assertDictEqual(
|
||||
separator_page_numbers,
|
||||
{
|
||||
2: True,
|
||||
3: True,
|
||||
6: True,
|
||||
8: True,
|
||||
},
|
||||
)
|
||||
|
||||
document_list = barcodes.separate_pages(test_file, separator_page_numbers)
|
||||
self.assertEqual(len(document_list), 5)
|
||||
|
||||
|
||||
class TestAsnBarcodes(DirectoriesMixin, TestCase):
|
||||
|
Reference in New Issue
Block a user