From 9b84dc06b63775fdb2daf3ccb8ca00ab68795de0 Mon Sep 17 00:00:00 2001 From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Sun, 13 Oct 2024 20:51:39 -0700 Subject: [PATCH] Enhancement: support retain barcode split pages (#7912) --- docs/configuration.md | 6 +++++ src/documents/barcodes.py | 7 +++++- src/documents/tests/test_barcodes.py | 36 ++++++++++++++++++++++++++++ src/paperless/settings.py | 4 ++++ 4 files changed, 52 insertions(+), 1 deletion(-) diff --git a/docs/configuration.md b/docs/configuration.md index 5fa4ab0a7..8e261f0f5 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1287,6 +1287,12 @@ change this. Defaults to "PATCHT" +#### [`PAPERLESS_CONSUMER_BARCODE_RETAIN_SPLIT_PAGES=`](#PAPERLESS_CONSUMER_BARCODE_RETAIN_SPLIT_PAGES) {#PAPERLESS_CONSUMER_BARCODE_RETAIN_SPLIT_PAGES} + +: If set to true, all pages that are split by a barcode (such as PATCHT) will be kept. + + Defaults to false. + #### [`PAPERLESS_CONSUMER_ENABLE_ASN_BARCODE=`](#PAPERLESS_CONSUMER_ENABLE_ASN_BARCODE) {#PAPERLESS_CONSUMER_ENABLE_ASN_BARCODE} : Enables the detection of barcodes in the scanned document and diff --git a/src/documents/barcodes.py b/src/documents/barcodes.py index 746d6014d..132e853b0 100644 --- a/src/documents/barcodes.py +++ b/src/documents/barcodes.py @@ -387,7 +387,12 @@ class BarcodePlugin(ConsumeTaskPlugin): """ # filter all barcodes for the separator string # get the page numbers of the separating barcodes - separator_pages = {bc.page: False for bc in self.barcodes if bc.is_separator} + retain = settings.CONSUMER_BARCODE_RETAIN_SPLIT_PAGES + separator_pages = { + bc.page: retain + for bc in self.barcodes + if bc.is_separator and (not retain or (retain and bc.page > 0)) + } # as below, dont include the first page if retain is enabled if not settings.CONSUMER_ENABLE_ASN_BARCODE: return separator_pages diff --git a/src/documents/tests/test_barcodes.py b/src/documents/tests/test_barcodes.py index b0c42963a..03b0903dd 100644 --- a/src/documents/tests/test_barcodes.py +++ b/src/documents/tests/test_barcodes.py @@ -511,6 +511,42 @@ class TestBarcode( document_list = reader.separate_pages(separator_page_numbers) self.assertEqual(len(document_list), 5) + @override_settings( + CONSUMER_ENABLE_BARCODES=True, + CONSUMER_ENABLE_ASN_BARCODE=True, + CONSUMER_BARCODE_RETAIN_SPLIT_PAGES=True, + ) + def test_separate_pages_by_asn_barcodes_and_patcht_retain_pages(self): + """ + GIVEN: + - Input PDF with a patch code on page 3 and ASN barcodes on pages 1,5,6,9,11 + - Retain split pages is enabled + WHEN: + - Input file is split on barcodes + THEN: + - Correct number of files produced, split correctly by correct pages, and the split pages are retained + """ + test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-2.pdf" + + with self.get_reader(test_file) as reader: + reader.detect() + separator_page_numbers = reader.get_separation_pages() + + self.assertEqual( + reader.pdf_file, + test_file, + ) + self.assertDictEqual( + separator_page_numbers, + { + 2: True, + 4: True, + 5: True, + 8: True, + 10: True, + }, + ) + @override_settings(CONSUMER_BARCODE_SCANNER="PYZBAR") class TestBarcodeNewConsume( diff --git a/src/paperless/settings.py b/src/paperless/settings.py index d30a9d57d..c1cb62c9e 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -932,6 +932,10 @@ CONSUMER_BARCODE_MAX_PAGES: Final[int] = __get_int( 0, ) +CONSUMER_BARCODE_RETAIN_SPLIT_PAGES = __get_boolean( + "PAPERLESS_CONSUMER_BARCODE_RETAIN_SPLIT_PAGES", +) + CONSUMER_ENABLE_TAG_BARCODE: Final[bool] = __get_boolean( "PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE", )