mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
a couple fixes and more supported image files
This commit is contained in:
parent
5e1543bad5
commit
e3ce573fbb
@ -268,8 +268,9 @@ def update_filename_and_move_files(sender, instance, **kwargs):
|
||||
logging.getLogger(__name__).debug(
|
||||
f"Moved file {old_source_path} to {new_source_path}.")
|
||||
|
||||
logging.getLogger(__name__).debug(
|
||||
f"Moved file {old_archive_path} to {new_archive_path}.")
|
||||
if instance.archive_checksum:
|
||||
logging.getLogger(__name__).debug(
|
||||
f"Moved file {old_archive_path} to {new_archive_path}.")
|
||||
|
||||
except OSError as e:
|
||||
instance.filename = old_filename
|
||||
|
@ -65,7 +65,10 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
def is_image(self, mime_type):
|
||||
return mime_type in [
|
||||
"image/png",
|
||||
"image/jpeg"
|
||||
"image/jpeg",
|
||||
"image/tiff",
|
||||
"image/bmp",
|
||||
"image/gif",
|
||||
]
|
||||
|
||||
def get_dpi(self, image):
|
||||
|
@ -8,6 +8,9 @@ def tesseract_consumer_declaration(sender, **kwargs):
|
||||
"mime_types": {
|
||||
"application/pdf": ".pdf",
|
||||
"image/jpeg": ".jpg",
|
||||
"image/png": ".png"
|
||||
"image/png": ".png",
|
||||
"image/tiff": ".tif",
|
||||
"image/gif": ".gif",
|
||||
"image/bmp": ".bmp",
|
||||
}
|
||||
}
|
||||
|
BIN
src/paperless_tesseract/tests/samples/simple.bmp
Normal file
BIN
src/paperless_tesseract/tests/samples/simple.bmp
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.7 MiB |
BIN
src/paperless_tesseract/tests/samples/simple.gif
Normal file
BIN
src/paperless_tesseract/tests/samples/simple.gif
Normal file
Binary file not shown.
After Width: | Height: | Size: 18 KiB |
BIN
src/paperless_tesseract/tests/samples/simple.jpg
Normal file
BIN
src/paperless_tesseract/tests/samples/simple.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 19 KiB |
BIN
src/paperless_tesseract/tests/samples/simple.tif
Normal file
BIN
src/paperless_tesseract/tests/samples/simple.tif
Normal file
Binary file not shown.
@ -247,3 +247,33 @@ class TestParser(DirectoriesMixin, TestCase):
|
||||
parser.parse(os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), "application/pdf")
|
||||
self.assertTrue(os.path.join(parser.archive_path))
|
||||
self.assertContainsStrings(parser.get_text().lower(), ["page 1", "page 2", "page 3"])
|
||||
|
||||
|
||||
class TestParserFileTypes(DirectoriesMixin, TestCase):
|
||||
|
||||
SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "samples")
|
||||
|
||||
def test_bmp(self):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(os.path.join(self.SAMPLE_FILES, "simple.bmp"), "image/bmp")
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertTrue("this is a test document" in parser.get_text().lower())
|
||||
|
||||
def test_jpg(self):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(os.path.join(self.SAMPLE_FILES, "simple.jpg"), "image/jpeg")
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertTrue("this is a test document" in parser.get_text().lower())
|
||||
|
||||
@override_settings(OCR_IMAGE_DPI=200)
|
||||
def test_gif(self):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(os.path.join(self.SAMPLE_FILES, "simple.gif"), "image/gif")
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertTrue("this is a test document" in parser.get_text().lower())
|
||||
|
||||
def test_tiff(self):
|
||||
parser = RasterisedDocumentParser(None)
|
||||
parser.parse(os.path.join(self.SAMPLE_FILES, "simple.tif"), "image/tiff")
|
||||
self.assertTrue(os.path.isfile(parser.archive_path))
|
||||
self.assertTrue("this is a test document" in parser.get_text().lower())
|
||||
|
Loading…
x
Reference in New Issue
Block a user