mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-30 18:27:45 -05:00
mime type handling
This commit is contained in:
@@ -1,5 +1,3 @@
|
||||
import re
|
||||
|
||||
from .parsers import RasterisedDocumentParser
|
||||
|
||||
|
||||
@@ -7,12 +5,9 @@ def tesseract_consumer_declaration(sender, **kwargs):
|
||||
return {
|
||||
"parser": RasterisedDocumentParser,
|
||||
"weight": 0,
|
||||
"test": tesseract_consumer_test
|
||||
"mime_types": [
|
||||
"application/pdf",
|
||||
"image/jpeg",
|
||||
"image/png"
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
MATCHING_FILES = re.compile(r"^.*\.(pdf|jpe?g|gif|png|tiff?|pnm|bmp)$")
|
||||
|
||||
|
||||
def tesseract_consumer_test(doc):
|
||||
return MATCHING_FILES.match(doc.lower())
|
||||
|
@@ -1,36 +0,0 @@
|
||||
from django.test import TestCase
|
||||
|
||||
from paperless_tesseract.signals import tesseract_consumer_test
|
||||
|
||||
|
||||
class SignalsTestCase(TestCase):
|
||||
|
||||
def test_test_handles_various_file_names_true(self):
|
||||
|
||||
prefixes = (
|
||||
"doc", "My Document", "Μυ Γρεεκ Δοψθμεντ", "Doc -with - tags",
|
||||
"A document with a . in it", "Doc with -- in it"
|
||||
)
|
||||
suffixes = (
|
||||
"pdf", "jpg", "jpeg", "gif", "png", "tiff", "tif", "pnm", "bmp",
|
||||
"PDF", "JPG", "JPEG", "GIF", "PNG", "TIFF", "TIF", "PNM", "BMP",
|
||||
"pDf", "jPg", "jpEg", "gIf", "pNg", "tIff", "tIf", "pNm", "bMp",
|
||||
)
|
||||
|
||||
for prefix in prefixes:
|
||||
for suffix in suffixes:
|
||||
name = "{}.{}".format(prefix, suffix)
|
||||
self.assertTrue(tesseract_consumer_test(name))
|
||||
|
||||
def test_test_handles_various_file_names_false(self):
|
||||
|
||||
prefixes = ("doc",)
|
||||
suffixes = ("txt", "markdown", "",)
|
||||
|
||||
for prefix in prefixes:
|
||||
for suffix in suffixes:
|
||||
name = "{}.{}".format(prefix, suffix)
|
||||
self.assertFalse(tesseract_consumer_test(name))
|
||||
|
||||
self.assertFalse(tesseract_consumer_test(""))
|
||||
self.assertFalse(tesseract_consumer_test("doc"))
|
Reference in New Issue
Block a user