mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-19 10:19:27 -05:00
Adds testing coverage of multipage TIFF with alpha, without and with alpha/sRGB
This commit is contained in:
parent
59e0c1fe4e
commit
0fd51e35e1
Binary file not shown.
Binary file not shown.
BIN
src/paperless_tesseract/tests/samples/multi-page-images.tiff
Normal file
BIN
src/paperless_tesseract/tests/samples/multi-page-images.tiff
Normal file
Binary file not shown.
@ -542,6 +542,69 @@ class TestParser(DirectoriesMixin, TestCase):
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_multi_page_tiff(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Multi-page TIFF image
|
||||||
|
WHEN:
|
||||||
|
- Image is parsed
|
||||||
|
THEN:
|
||||||
|
- Text from all pages extracted
|
||||||
|
"""
|
||||||
|
parser = RasterisedDocumentParser(None)
|
||||||
|
parser.parse(
|
||||||
|
os.path.join(self.SAMPLE_FILES, "multi-page-images.tiff"),
|
||||||
|
"image/tiff",
|
||||||
|
)
|
||||||
|
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||||
|
self.assertContainsStrings(
|
||||||
|
parser.get_text().lower(),
|
||||||
|
["page 1", "page 2", "page 3"],
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_multi_page_tiff_alpha(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Multi-page TIFF image
|
||||||
|
- Image include an alpha channel
|
||||||
|
WHEN:
|
||||||
|
- Image is parsed
|
||||||
|
THEN:
|
||||||
|
- Text from all pages extracted
|
||||||
|
"""
|
||||||
|
parser = RasterisedDocumentParser(None)
|
||||||
|
parser.parse(
|
||||||
|
os.path.join(self.SAMPLE_FILES, "multi-page-images-alpha.tiff"),
|
||||||
|
"image/tiff",
|
||||||
|
)
|
||||||
|
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||||
|
self.assertContainsStrings(
|
||||||
|
parser.get_text().lower(),
|
||||||
|
["page 1", "page 2", "page 3"],
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_multi_page_tiff_alpha_srgb(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Multi-page TIFF image
|
||||||
|
- Image include an alpha channel
|
||||||
|
- Image is srgb colorspace
|
||||||
|
WHEN:
|
||||||
|
- Image is parsed
|
||||||
|
THEN:
|
||||||
|
- Text from all pages extracted
|
||||||
|
"""
|
||||||
|
parser = RasterisedDocumentParser(None)
|
||||||
|
parser.parse(
|
||||||
|
os.path.join(self.SAMPLE_FILES, "multi-page-images-alpha-rgb.tiff"),
|
||||||
|
"image/tiff",
|
||||||
|
)
|
||||||
|
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||||
|
self.assertContainsStrings(
|
||||||
|
parser.get_text().lower(),
|
||||||
|
["page 1", "page 2", "page 3"],
|
||||||
|
)
|
||||||
|
|
||||||
def test_ocrmypdf_parameters(self):
|
def test_ocrmypdf_parameters(self):
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
params = parser.construct_ocrmypdf_parameters(
|
params = parser.construct_ocrmypdf_parameters(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user