mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-09 09:58:20 -05:00
Adds testing coverage of multipage TIFF with alpha, without and with alpha/sRGB
This commit is contained in:
parent
59e0c1fe4e
commit
0fd51e35e1
Binary file not shown.
Binary file not shown.
BIN
src/paperless_tesseract/tests/samples/multi-page-images.tiff
Normal file
BIN
src/paperless_tesseract/tests/samples/multi-page-images.tiff
Normal file
Binary file not shown.
@ -542,6 +542,69 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
],
|
||||
)
|
||||
|
||||
def test_multi_page_tiff(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Multi-page TIFF image
|
||||
WHEN:
|
||||
- Image is parsed
|
||||
THEN:
|
||||
- Text from all pages extracted
|
||||
"""
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-images.tiff"),
|
||||
"image/tiff",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertContainsStrings(
|
||||
parser.get_text().lower(),
|
||||
["page 1", "page 2", "page 3"],
|
||||
)
|
||||
|
||||
def test_multi_page_tiff_alpha(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Multi-page TIFF image
|
||||
- Image include an alpha channel
|
||||
WHEN:
|
||||
- Image is parsed
|
||||
THEN:
|
||||
- Text from all pages extracted
|
||||
"""
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-images-alpha.tiff"),
|
||||
"image/tiff",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertContainsStrings(
|
||||
parser.get_text().lower(),
|
||||
["page 1", "page 2", "page 3"],
|
||||
)
|
||||
|
||||
def test_multi_page_tiff_alpha_srgb(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Multi-page TIFF image
|
||||
- Image include an alpha channel
|
||||
- Image is srgb colorspace
|
||||
WHEN:
|
||||
- Image is parsed
|
||||
THEN:
|
||||
- Text from all pages extracted
|
||||
"""
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(
|
||||
os.path.join(self.SAMPLE_FILES, "multi-page-images-alpha-rgb.tiff"),
|
||||
"image/tiff",
|
||||
)
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertContainsStrings(
|
||||
parser.get_text().lower(),
|
||||
["page 1", "page 2", "page 3"],
|
||||
)
|
||||
|
||||
def test_ocrmypdf_parameters(self):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
params = parser.construct_ocrmypdf_parameters(
|
||||
|
Loading…
x
Reference in New Issue
Block a user