From 5fae5a9ee067694427294dfe3b64498c79bcdaf3 Mon Sep 17 00:00:00 2001 From: Florian Brandes Date: Fri, 8 Apr 2022 08:07:23 +0200 Subject: [PATCH] fix conditional in separate_pages add additional test for separate_pages Signed-off-by: Florian Brandes --- src/documents/tasks.py | 37 +++++++++++++++++-------------- src/documents/tests/test_tasks.py | 17 ++++++++++++++ 2 files changed, 37 insertions(+), 17 deletions(-) diff --git a/src/documents/tasks.py b/src/documents/tasks.py index 42d50549d..e9a015d67 100644 --- a/src/documents/tasks.py +++ b/src/documents/tasks.py @@ -137,23 +137,26 @@ def separate_pages(filepath: str, pages_to_split_on: List[int]) -> List[str]: dst.save(out) document_paths = [savepath] - for count, page_number in enumerate(pages_to_split_on): - logger.debug(f"Count: {str(count)} page_number: {str(page_number)}") - dst = Pdf.new() - try: - next_page = pages_to_split_on[count + 1] - except IndexError: - next_page = len(pdf.pages) - # skip the first page_number. This contains the barcode page - for page in range(page_number + 1, next_page): - logger.debug(f"page_number: {str(page_number)} next_page: {str(next_page)}") - dst.pages.append(pdf.pages[page]) - output_filename = "{}_document_{}.pdf".format(fname, str(count + 1)) - logger.debug(f"pdf no:{str(count)} has {str(len(dst.pages))} pages") - savepath = os.path.join(tempdir, output_filename) - with open(savepath, "wb") as out: - dst.save(out) - document_paths.append(savepath) + # iterate through the rest of the document + for count, page_number in enumerate(pages_to_split_on): + logger.debug(f"Count: {str(count)} page_number: {str(page_number)}") + dst = Pdf.new() + try: + next_page = pages_to_split_on[count + 1] + except IndexError: + next_page = len(pdf.pages) + # skip the first page_number. This contains the barcode page + for page in range(page_number + 1, next_page): + logger.debug( + f"page_number: {str(page_number)} next_page: {str(next_page)}", + ) + dst.pages.append(pdf.pages[page]) + output_filename = "{}_document_{}.pdf".format(fname, str(count + 1)) + logger.debug(f"pdf no:{str(count)} has {str(len(dst.pages))} pages") + savepath = os.path.join(tempdir, output_filename) + with open(savepath, "wb") as out: + dst.save(out) + document_paths.append(savepath) logger.debug(f"Temp files are {str(document_paths)}") return document_paths diff --git a/src/documents/tests/test_tasks.py b/src/documents/tests/test_tasks.py index 8160dd3f3..c78fa16c2 100644 --- a/src/documents/tests/test_tasks.py +++ b/src/documents/tests/test_tasks.py @@ -312,6 +312,23 @@ class TestTasks(DirectoriesMixin, TestCase): pages = tasks.separate_pages(test_file, [1]) self.assertEqual(len(pages), 2) + def test_separate_pages_no_list(self): + test_file = os.path.join( + os.path.dirname(__file__), + "samples", + "barcodes", + "patch-code-t-middle.pdf", + ) + with self.assertLogs("paperless.tasks", level="WARNING") as cm: + pages = tasks.separate_pages(test_file, []) + self.assertEqual(pages, []) + self.assertEqual( + cm.output, + [ + f"WARNING:paperless.tasks:No pages to split on!", + ], + ) + def test_save_to_dir(self): test_file = os.path.join( os.path.dirname(__file__),