From 12fa844c7f3d3659dbb26439565853bd0fb205a7 Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Tue, 1 Dec 2020 14:30:13 +0100 Subject: [PATCH] testing the new noarchive option. --- src/paperless_tesseract/tests/test_parser.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/paperless_tesseract/tests/test_parser.py b/src/paperless_tesseract/tests/test_parser.py index df8a0670b..a5f4a7f77 100644 --- a/src/paperless_tesseract/tests/test_parser.py +++ b/src/paperless_tesseract/tests/test_parser.py @@ -233,3 +233,17 @@ class TestParser(DirectoriesMixin, TestCase): self.assertContainsStrings(parser.get_text().lower(), ["page 1"]) self.assertFalse("page 2" in parser.get_text().lower()) self.assertFalse("page 3" in parser.get_text().lower()) + + @override_settings(OCR_MODE="skip_noarchive") + def test_skip_noarchive_withtext(self): + parser = RasterisedDocumentParser(None) + parser.parse(os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf") + self.assertIsNone(parser.archive_path) + self.assertContainsStrings(parser.get_text().lower(), ["page 1", "page 2", "page 3"]) + + @override_settings(OCR_MODE="skip_noarchive") + def test_skip_noarchive_notext(self): + parser = RasterisedDocumentParser(None) + parser.parse(os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), "application/pdf") + self.assertTrue(os.path.join(parser.archive_path)) + self.assertContainsStrings(parser.get_text().lower(), ["page 1", "page 2", "page 3"])