FileType does not care about the extension anymore.

This commit is contained in:
Jonas Winkler
2020-11-20 16:18:59 +01:00
parent f976a0b4ba
commit fc0ba2098a
4 changed files with 118 additions and 164 deletions

View File

@@ -15,57 +15,42 @@ from ..parsers import DocumentParser, ParseError
class TestAttributes(TestCase):
TAGS = ("tag1", "tag2", "tag3")
EXTENSIONS = (
"pdf", "png", "jpg", "jpeg", "gif", "tiff", "tif",
"PDF", "PNG", "JPG", "JPEG", "GIF", "TIFF", "TIF",
"PdF", "PnG", "JpG", "JPeG", "GiF", "TiFf", "TiF",
)
def _test_guess_attributes_from_name(self, path, sender, title, tags):
def _test_guess_attributes_from_name(self, filename, sender, title, tags):
file_info = FileInfo.from_filename(filename)
for extension in self.EXTENSIONS:
if sender:
self.assertEqual(file_info.correspondent.name, sender, filename)
else:
self.assertIsNone(file_info.correspondent, filename)
f = path.format(extension)
file_info = FileInfo.from_path(f)
self.assertEqual(file_info.title, title, filename)
if sender:
self.assertEqual(file_info.correspondent.name, sender, f)
else:
self.assertIsNone(file_info.correspondent, f)
self.assertEqual(file_info.title, title, f)
self.assertEqual(tuple([t.slug for t in file_info.tags]), tags, f)
if extension.lower() == "jpeg":
self.assertEqual(file_info.extension, "jpg", f)
elif extension.lower() == "tif":
self.assertEqual(file_info.extension, "tiff", f)
else:
self.assertEqual(file_info.extension, extension.lower(), f)
self.assertEqual(tuple([t.slug for t in file_info.tags]), tags, filename)
def test_guess_attributes_from_name0(self):
self._test_guess_attributes_from_name(
"/path/to/Sender - Title.{}", "Sender", "Title", ())
"Sender - Title.pdf", "Sender", "Title", ())
def test_guess_attributes_from_name1(self):
self._test_guess_attributes_from_name(
"/path/to/Spaced Sender - Title.{}", "Spaced Sender", "Title", ())
"Spaced Sender - Title.pdf", "Spaced Sender", "Title", ())
def test_guess_attributes_from_name2(self):
self._test_guess_attributes_from_name(
"/path/to/Sender - Spaced Title.{}", "Sender", "Spaced Title", ())
"Sender - Spaced Title.pdf", "Sender", "Spaced Title", ())
def test_guess_attributes_from_name3(self):
self._test_guess_attributes_from_name(
"/path/to/Dashed-Sender - Title.{}", "Dashed-Sender", "Title", ())
"Dashed-Sender - Title.pdf", "Dashed-Sender", "Title", ())
def test_guess_attributes_from_name4(self):
self._test_guess_attributes_from_name(
"/path/to/Sender - Dashed-Title.{}", "Sender", "Dashed-Title", ())
"Sender - Dashed-Title.pdf", "Sender", "Dashed-Title", ())
def test_guess_attributes_from_name5(self):
self._test_guess_attributes_from_name(
"/path/to/Sender - Title - tag1,tag2,tag3.{}",
"Sender - Title - tag1,tag2,tag3.pdf",
"Sender",
"Title",
self.TAGS
@@ -73,7 +58,7 @@ class TestAttributes(TestCase):
def test_guess_attributes_from_name6(self):
self._test_guess_attributes_from_name(
"/path/to/Spaced Sender - Title - tag1,tag2,tag3.{}",
"Spaced Sender - Title - tag1,tag2,tag3.pdf",
"Spaced Sender",
"Title",
self.TAGS
@@ -81,7 +66,7 @@ class TestAttributes(TestCase):
def test_guess_attributes_from_name7(self):
self._test_guess_attributes_from_name(
"/path/to/Sender - Spaced Title - tag1,tag2,tag3.{}",
"Sender - Spaced Title - tag1,tag2,tag3.pdf",
"Sender",
"Spaced Title",
self.TAGS
@@ -89,7 +74,7 @@ class TestAttributes(TestCase):
def test_guess_attributes_from_name8(self):
self._test_guess_attributes_from_name(
"/path/to/Dashed-Sender - Title - tag1,tag2,tag3.{}",
"Dashed-Sender - Title - tag1,tag2,tag3.pdf",
"Dashed-Sender",
"Title",
self.TAGS
@@ -97,7 +82,7 @@ class TestAttributes(TestCase):
def test_guess_attributes_from_name9(self):
self._test_guess_attributes_from_name(
"/path/to/Sender - Dashed-Title - tag1,tag2,tag3.{}",
"Sender - Dashed-Title - tag1,tag2,tag3.pdf",
"Sender",
"Dashed-Title",
self.TAGS
@@ -105,7 +90,7 @@ class TestAttributes(TestCase):
def test_guess_attributes_from_name10(self):
self._test_guess_attributes_from_name(
"/path/to/Σενδερ - Τιτλε - tag1,tag2,tag3.{}",
"Σενδερ - Τιτλε - tag1,tag2,tag3.pdf",
"Σενδερ",
"Τιτλε",
self.TAGS
@@ -113,7 +98,7 @@ class TestAttributes(TestCase):
def test_guess_attributes_from_name_when_correspondent_empty(self):
self._test_guess_attributes_from_name(
'/path/to/ - weird empty correspondent but should not break.{}',
' - weird empty correspondent but should not break.pdf',
None,
'weird empty correspondent but should not break',
()
@@ -121,7 +106,7 @@ class TestAttributes(TestCase):
def test_guess_attributes_from_name_when_title_starts_with_dash(self):
self._test_guess_attributes_from_name(
'/path/to/- weird but should not break.{}',
'- weird but should not break.pdf',
None,
'- weird but should not break',
()
@@ -129,7 +114,7 @@ class TestAttributes(TestCase):
def test_guess_attributes_from_name_when_title_ends_with_dash(self):
self._test_guess_attributes_from_name(
'/path/to/weird but should not break -.{}',
'weird but should not break -.pdf',
None,
'weird but should not break -',
()
@@ -137,7 +122,7 @@ class TestAttributes(TestCase):
def test_guess_attributes_from_name_when_title_is_empty(self):
self._test_guess_attributes_from_name(
'/path/to/weird correspondent but should not break - .{}',
'weird correspondent but should not break - .pdf',
'weird correspondent but should not break',
'',
()
@@ -149,11 +134,11 @@ class TestAttributes(TestCase):
:return:
"""
path = "Title - Correspondent - tAg1,TAG2.pdf"
self.assertEqual(len(FileInfo.from_path(path).tags), 2)
filename = "Title - Correspondent - tAg1,TAG2.pdf"
self.assertEqual(len(FileInfo.from_filename(filename).tags), 2)
path = "Title - Correspondent - tag1,tag2.pdf"
self.assertEqual(len(FileInfo.from_path(path).tags), 2)
self.assertEqual(len(FileInfo.from_filename(filename).tags), 2)
self.assertEqual(Tag.objects.all().count(), 2)
@@ -173,13 +158,12 @@ class TestFieldPermutations(TestCase):
]
valid_titles = ["title", "Title w Spaces", "Title a-dash", "Τίτλος", ""]
valid_tags = ["tag", "tig,tag", "tag1,tag2,tag-3"]
valid_extensions = ["pdf", "png", "jpg", "jpeg", "gif"]
def _test_guessed_attributes(self, filename, created=None,
correspondent=None, title=None,
extension=None, tags=None):
tags=None):
info = FileInfo.from_path(filename)
info = FileInfo.from_filename(filename)
# Created
if created is None:
@@ -207,68 +191,56 @@ class TestFieldPermutations(TestCase):
filename
)
# Extension
if extension == 'jpeg':
extension = 'jpg'
self.assertEqual(info.extension, extension, filename)
def test_just_title(self):
template = '/path/to/{title}.{extension}'
template = '{title}.pdf'
for title in self.valid_titles:
for extension in self.valid_extensions:
spec = dict(title=title, extension=extension)
spec = dict(title=title)
filename = template.format(**spec)
self._test_guessed_attributes(filename, **spec)
def test_title_and_correspondent(self):
template = '{correspondent} - {title}.pdf'
for correspondent in self.valid_correspondents:
for title in self.valid_titles:
spec = dict(correspondent=correspondent, title=title)
filename = template.format(**spec)
self._test_guessed_attributes(filename, **spec)
def test_title_and_correspondent(self):
template = '/path/to/{correspondent} - {title}.{extension}'
for correspondent in self.valid_correspondents:
for title in self.valid_titles:
for extension in self.valid_extensions:
spec = dict(correspondent=correspondent, title=title,
extension=extension)
filename = template.format(**spec)
self._test_guessed_attributes(filename, **spec)
def test_title_and_correspondent_and_tags(self):
template = '/path/to/{correspondent} - {title} - {tags}.{extension}'
template = '{correspondent} - {title} - {tags}.pdf'
for correspondent in self.valid_correspondents:
for title in self.valid_titles:
for tags in self.valid_tags:
for extension in self.valid_extensions:
spec = dict(correspondent=correspondent, title=title,
tags=tags, extension=extension)
filename = template.format(**spec)
self._test_guessed_attributes(filename, **spec)
spec = dict(correspondent=correspondent, title=title,
tags=tags)
filename = template.format(**spec)
self._test_guessed_attributes(filename, **spec)
def test_created_and_correspondent_and_title_and_tags(self):
template = (
"/path/to/{created} - "
"{created} - "
"{correspondent} - "
"{title} - "
"{tags}"
".{extension}"
"{tags}.pdf"
)
for created in self.valid_dates:
for correspondent in self.valid_correspondents:
for title in self.valid_titles:
for tags in self.valid_tags:
for extension in self.valid_extensions:
spec = {
"created": created,
"correspondent": correspondent,
"title": title,
"tags": tags,
"extension": extension
}
self._test_guessed_attributes(
template.format(**spec), **spec)
spec = {
"created": created,
"correspondent": correspondent,
"title": title,
"tags": tags,
}
self._test_guessed_attributes(
template.format(**spec), **spec)
def test_created_and_correspondent_and_title(self):
template = "/path/to/{created} - {correspondent} - {title}.{extension}"
template = "{created} - {correspondent} - {title}.pdf"
for created in self.valid_dates:
for correspondent in self.valid_correspondents:
@@ -279,56 +251,50 @@ class TestFieldPermutations(TestCase):
if title.lower() == title:
continue
for extension in self.valid_extensions:
spec = {
"created": created,
"correspondent": correspondent,
"title": title,
"extension": extension
}
self._test_guessed_attributes(
template.format(**spec), **spec)
def test_created_and_title(self):
template = "/path/to/{created} - {title}.{extension}"
for created in self.valid_dates:
for title in self.valid_titles:
for extension in self.valid_extensions:
spec = {
"created": created,
"title": title,
"extension": extension
"correspondent": correspondent,
"title": title
}
self._test_guessed_attributes(
template.format(**spec), **spec)
def test_created_and_title(self):
template = "{created} - {title}.pdf"
for created in self.valid_dates:
for title in self.valid_titles:
spec = {
"created": created,
"title": title
}
self._test_guessed_attributes(
template.format(**spec), **spec)
def test_created_and_title_and_tags(self):
template = "/path/to/{created} - {title} - {tags}.{extension}"
template = "{created} - {title} - {tags}.pdf"
for created in self.valid_dates:
for title in self.valid_titles:
for tags in self.valid_tags:
for extension in self.valid_extensions:
spec = {
"created": created,
"title": title,
"tags": tags,
"extension": extension
}
self._test_guessed_attributes(
template.format(**spec), **spec)
spec = {
"created": created,
"title": title,
"tags": tags
}
self._test_guessed_attributes(
template.format(**spec), **spec)
def test_invalid_date_format(self):
info = FileInfo.from_path("/path/to/06112017Z - title.pdf")
info = FileInfo.from_filename("06112017Z - title.pdf")
self.assertEqual(info.title, "title")
self.assertIsNone(info.created)
def test_filename_parse_transforms(self):
path = "/some/path/to/tag1,tag2_20190908_180610_0001.pdf"
filename = "tag1,tag2_20190908_180610_0001.pdf"
all_patt = re.compile("^.*$")
none_patt = re.compile("$a")
exact_patt = re.compile("^([a-z0-9,]+)_(\\d{8})_(\\d{6})_([0-9]+)\\.")
@@ -336,50 +302,44 @@ class TestFieldPermutations(TestCase):
repl2 = "\\2Z - " + repl1 # creation date + repl1
# No transformations configured (= default)
info = FileInfo.from_path(path)
info = FileInfo.from_filename(filename)
self.assertEqual(info.title, "tag1,tag2_20190908_180610_0001")
self.assertEqual(info.extension, "pdf")
self.assertEqual(info.tags, ())
self.assertIsNone(info.created)
# Pattern doesn't match (filename unaltered)
with self.settings(
FILENAME_PARSE_TRANSFORMS=[(none_patt, "none.gif")]):
info = FileInfo.from_path(path)
info = FileInfo.from_filename(filename)
self.assertEqual(info.title, "tag1,tag2_20190908_180610_0001")
self.assertEqual(info.extension, "pdf")
# Simple transformation (match all)
with self.settings(
FILENAME_PARSE_TRANSFORMS=[(all_patt, "all.gif")]):
info = FileInfo.from_path(path)
info = FileInfo.from_filename(filename)
self.assertEqual(info.title, "all")
self.assertEqual(info.extension, "gif")
# Multiple transformations configured (first pattern matches)
with self.settings(
FILENAME_PARSE_TRANSFORMS=[
(all_patt, "all.gif"),
(all_patt, "anotherall.gif")]):
info = FileInfo.from_path(path)
info = FileInfo.from_filename(filename)
self.assertEqual(info.title, "all")
self.assertEqual(info.extension, "gif")
# Multiple transformations configured (second pattern matches)
with self.settings(
FILENAME_PARSE_TRANSFORMS=[
(none_patt, "none.gif"),
(all_patt, "anotherall.gif")]):
info = FileInfo.from_path(path)
info = FileInfo.from_filename(filename)
self.assertEqual(info.title, "anotherall")
self.assertEqual(info.extension, "gif")
# Complex transformation without date in replacement string
with self.settings(
FILENAME_PARSE_TRANSFORMS=[(exact_patt, repl1)]):
info = FileInfo.from_path(path)
info = FileInfo.from_filename(filename)
self.assertEqual(info.title, "0001")
self.assertEqual(info.extension, "pdf")
self.assertEqual(len(info.tags), 2)
self.assertEqual(info.tags[0].slug, "tag1")
self.assertEqual(info.tags[1].slug, "tag2")
@@ -392,9 +352,8 @@ class TestFieldPermutations(TestCase):
(exact_patt, repl2), # <-- matches
(exact_patt, repl1),
(all_patt, "all.gif")]):
info = FileInfo.from_path(path)
info = FileInfo.from_filename(filename)
self.assertEqual(info.title, "0001")
self.assertEqual(info.extension, "pdf")
self.assertEqual(len(info.tags), 2)
self.assertEqual(info.tags[0].slug, "tag1")
self.assertEqual(info.tags[1].slug, "tag2")