diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py index 64c3030c7..9e8dbf350 100644 --- a/src/paperless_tesseract/parsers.py +++ b/src/paperless_tesseract/parsers.py @@ -108,6 +108,7 @@ class RasterisedDocumentParser(DocumentParser): "image/bmp", "image/gif", "image/webp", + "image/heic", ] def has_alpha(self, image) -> bool: diff --git a/src/paperless_tesseract/signals.py b/src/paperless_tesseract/signals.py index 7d6f6902f..e4d8449ed 100644 --- a/src/paperless_tesseract/signals.py +++ b/src/paperless_tesseract/signals.py @@ -16,5 +16,6 @@ def tesseract_consumer_declaration(sender, **kwargs): "image/gif": ".gif", "image/bmp": ".bmp", "image/webp": ".webp", + "image/heic": ".heic", }, } diff --git a/src/paperless_tesseract/tests/samples/simple.heic b/src/paperless_tesseract/tests/samples/simple.heic new file mode 100644 index 000000000..6b9a95e67 Binary files /dev/null and b/src/paperless_tesseract/tests/samples/simple.heic differ diff --git a/src/paperless_tesseract/tests/test_parser.py b/src/paperless_tesseract/tests/test_parser.py index f7490fbbf..514b7163f 100644 --- a/src/paperless_tesseract/tests/test_parser.py +++ b/src/paperless_tesseract/tests/test_parser.py @@ -880,6 +880,12 @@ class TestParserFileTypes(DirectoriesMixin, FileSystemAssertsMixin, TestCase): self.assertIsFile(parser.archive_path) self.assertIn("this is a test document", parser.get_text().lower()) + def test_heic(self): + parser = RasterisedDocumentParser(None) + parser.parse(os.path.join(self.SAMPLE_FILES, "simple.heic"), "image/heic") + self.assertIsFile(parser.archive_path) + self.assertIn("pizza", parser.get_text().lower()) + @override_settings(OCR_IMAGE_DPI=200) def test_gif(self): parser = RasterisedDocumentParser(None)