mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
FileType does not care about the extension anymore.
This commit is contained in:
parent
41650f20f4
commit
3d5b66c2b7
@ -197,7 +197,7 @@ class Consumer(LoggingMixin):
|
|||||||
|
|
||||||
# If someone gave us the original filename, use it instead of doc.
|
# If someone gave us the original filename, use it instead of doc.
|
||||||
|
|
||||||
file_info = FileInfo.from_path(self.filename)
|
file_info = FileInfo.from_filename(self.filename)
|
||||||
|
|
||||||
stats = os.stat(self.path)
|
stats = os.stat(self.path)
|
||||||
|
|
||||||
|
@ -34,8 +34,7 @@ class UploadForm(forms.Form):
|
|||||||
|
|
||||||
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
|
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
|
||||||
|
|
||||||
# TODO: dont just append pdf. This is here for taht weird regex check at the start of the consumer.
|
with tempfile.NamedTemporaryFile(prefix="paperless-upload-", dir=settings.SCRATCH_DIR, delete=False) as f:
|
||||||
with tempfile.NamedTemporaryFile(prefix="paperless-upload-", suffix=".pdf", dir=settings.SCRATCH_DIR, delete=False) as f:
|
|
||||||
|
|
||||||
f.write(document)
|
f.write(document)
|
||||||
os.utime(f.name, times=(t, t))
|
os.utime(f.name, times=(t, t))
|
||||||
|
@ -269,7 +269,7 @@ class Log(models.Model):
|
|||||||
def __str__(self):
|
def __str__(self):
|
||||||
return self.message
|
return self.message
|
||||||
|
|
||||||
|
# TODO: why is this in the models file?
|
||||||
class FileInfo:
|
class FileInfo:
|
||||||
|
|
||||||
# This epic regex *almost* worked for our needs, so I'm keeping it here for
|
# This epic regex *almost* worked for our needs, so I'm keeping it here for
|
||||||
@ -284,53 +284,44 @@ class FileInfo:
|
|||||||
non_separated_word=r"([\w,. ]|([^\s]-))"
|
non_separated_word=r"([\w,. ]|([^\s]-))"
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
# TODO: what is this used for
|
|
||||||
formats = "pdf|jpe?g|png|gif|tiff?|te?xt|md|csv"
|
|
||||||
REGEXES = OrderedDict([
|
REGEXES = OrderedDict([
|
||||||
("created-correspondent-title-tags", re.compile(
|
("created-correspondent-title-tags", re.compile(
|
||||||
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
||||||
r"(?P<correspondent>.*) - "
|
r"(?P<correspondent>.*) - "
|
||||||
r"(?P<title>.*) - "
|
r"(?P<title>.*) - "
|
||||||
r"(?P<tags>[a-z0-9\-,]*)"
|
r"(?P<tags>[a-z0-9\-,]*)$",
|
||||||
r"\.(?P<extension>{})$".format(formats),
|
|
||||||
flags=re.IGNORECASE
|
flags=re.IGNORECASE
|
||||||
)),
|
)),
|
||||||
("created-title-tags", re.compile(
|
("created-title-tags", re.compile(
|
||||||
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
||||||
r"(?P<title>.*) - "
|
r"(?P<title>.*) - "
|
||||||
r"(?P<tags>[a-z0-9\-,]*)"
|
r"(?P<tags>[a-z0-9\-,]*)$",
|
||||||
r"\.(?P<extension>{})$".format(formats),
|
|
||||||
flags=re.IGNORECASE
|
flags=re.IGNORECASE
|
||||||
)),
|
)),
|
||||||
("created-correspondent-title", re.compile(
|
("created-correspondent-title", re.compile(
|
||||||
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
||||||
r"(?P<correspondent>.*) - "
|
r"(?P<correspondent>.*) - "
|
||||||
r"(?P<title>.*)"
|
r"(?P<title>.*)$",
|
||||||
r"\.(?P<extension>{})$".format(formats),
|
|
||||||
flags=re.IGNORECASE
|
flags=re.IGNORECASE
|
||||||
)),
|
)),
|
||||||
("created-title", re.compile(
|
("created-title", re.compile(
|
||||||
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
||||||
r"(?P<title>.*)"
|
r"(?P<title>.*)$",
|
||||||
r"\.(?P<extension>{})$".format(formats),
|
|
||||||
flags=re.IGNORECASE
|
flags=re.IGNORECASE
|
||||||
)),
|
)),
|
||||||
("correspondent-title-tags", re.compile(
|
("correspondent-title-tags", re.compile(
|
||||||
r"(?P<correspondent>.*) - "
|
r"(?P<correspondent>.*) - "
|
||||||
r"(?P<title>.*) - "
|
r"(?P<title>.*) - "
|
||||||
r"(?P<tags>[a-z0-9\-,]*)"
|
r"(?P<tags>[a-z0-9\-,]*)$",
|
||||||
r"\.(?P<extension>{})$".format(formats),
|
|
||||||
flags=re.IGNORECASE
|
flags=re.IGNORECASE
|
||||||
)),
|
)),
|
||||||
("correspondent-title", re.compile(
|
("correspondent-title", re.compile(
|
||||||
r"(?P<correspondent>.*) - "
|
r"(?P<correspondent>.*) - "
|
||||||
r"(?P<title>.*)?"
|
r"(?P<title>.*)?$",
|
||||||
r"\.(?P<extension>{})$".format(formats),
|
|
||||||
flags=re.IGNORECASE
|
flags=re.IGNORECASE
|
||||||
)),
|
)),
|
||||||
("title", re.compile(
|
("title", re.compile(
|
||||||
r"(?P<title>.*)"
|
r"(?P<title>.*)$",
|
||||||
r"\.(?P<extension>{})$".format(formats),
|
|
||||||
flags=re.IGNORECASE
|
flags=re.IGNORECASE
|
||||||
))
|
))
|
||||||
])
|
])
|
||||||
@ -373,15 +364,6 @@ class FileInfo:
|
|||||||
)[0])
|
)[0])
|
||||||
return tuple(r)
|
return tuple(r)
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def _get_extension(cls, extension):
|
|
||||||
r = extension.lower()
|
|
||||||
if r == "jpeg":
|
|
||||||
return "jpg"
|
|
||||||
if r == "tif":
|
|
||||||
return "tiff"
|
|
||||||
return r
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _mangle_property(cls, properties, name):
|
def _mangle_property(cls, properties, name):
|
||||||
if name in properties:
|
if name in properties:
|
||||||
@ -390,18 +372,16 @@ class FileInfo:
|
|||||||
)
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_path(cls, path):
|
def from_filename(cls, filename):
|
||||||
"""
|
"""
|
||||||
We use a crude naming convention to make handling the correspondent,
|
We use a crude naming convention to make handling the correspondent,
|
||||||
title, and tags easier:
|
title, and tags easier:
|
||||||
"<date> - <correspondent> - <title> - <tags>.<suffix>"
|
"<date> - <correspondent> - <title> - <tags>"
|
||||||
"<correspondent> - <title> - <tags>.<suffix>"
|
"<correspondent> - <title> - <tags>"
|
||||||
"<correspondent> - <title>.<suffix>"
|
"<correspondent> - <title>"
|
||||||
"<title>.<suffix>"
|
"<title>"
|
||||||
"""
|
"""
|
||||||
|
|
||||||
filename = os.path.basename(path)
|
|
||||||
|
|
||||||
# Mutate filename in-place before parsing its components
|
# Mutate filename in-place before parsing its components
|
||||||
# by applying at most one of the configured transformations.
|
# by applying at most one of the configured transformations.
|
||||||
for (pattern, repl) in settings.FILENAME_PARSE_TRANSFORMS:
|
for (pattern, repl) in settings.FILENAME_PARSE_TRANSFORMS:
|
||||||
@ -409,6 +389,23 @@ class FileInfo:
|
|||||||
if count:
|
if count:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
# do this after the transforms so that the transforms can do whatever
|
||||||
|
# with the file extension.
|
||||||
|
filename_no_ext = os.path.splitext(filename)[0]
|
||||||
|
|
||||||
|
if filename_no_ext == filename and filename.startswith("."):
|
||||||
|
# This is a very special case where there is no text before the
|
||||||
|
# file type.
|
||||||
|
# TODO: this should be handled better. The ext is not removed
|
||||||
|
# because usually, files like '.pdf' are just hidden files
|
||||||
|
# with the name pdf, but in our case, its more likely that
|
||||||
|
# there's just no name to begin with.
|
||||||
|
filename = ""
|
||||||
|
# This isn't too bad either, since we'll just not match anything
|
||||||
|
# and return an empty title. TODO: actually, this is kinda bad.
|
||||||
|
else:
|
||||||
|
filename = filename_no_ext
|
||||||
|
|
||||||
# Parse filename components.
|
# Parse filename components.
|
||||||
for regex in cls.REGEXES.values():
|
for regex in cls.REGEXES.values():
|
||||||
m = regex.match(filename)
|
m = regex.match(filename)
|
||||||
@ -418,5 +415,4 @@ class FileInfo:
|
|||||||
cls._mangle_property(properties, "correspondent")
|
cls._mangle_property(properties, "correspondent")
|
||||||
cls._mangle_property(properties, "title")
|
cls._mangle_property(properties, "title")
|
||||||
cls._mangle_property(properties, "tags")
|
cls._mangle_property(properties, "tags")
|
||||||
cls._mangle_property(properties, "extension")
|
|
||||||
return cls(**properties)
|
return cls(**properties)
|
||||||
|
@ -15,57 +15,42 @@ from ..parsers import DocumentParser, ParseError
|
|||||||
class TestAttributes(TestCase):
|
class TestAttributes(TestCase):
|
||||||
|
|
||||||
TAGS = ("tag1", "tag2", "tag3")
|
TAGS = ("tag1", "tag2", "tag3")
|
||||||
EXTENSIONS = (
|
|
||||||
"pdf", "png", "jpg", "jpeg", "gif", "tiff", "tif",
|
|
||||||
"PDF", "PNG", "JPG", "JPEG", "GIF", "TIFF", "TIF",
|
|
||||||
"PdF", "PnG", "JpG", "JPeG", "GiF", "TiFf", "TiF",
|
|
||||||
)
|
|
||||||
|
|
||||||
def _test_guess_attributes_from_name(self, path, sender, title, tags):
|
def _test_guess_attributes_from_name(self, filename, sender, title, tags):
|
||||||
|
file_info = FileInfo.from_filename(filename)
|
||||||
|
|
||||||
for extension in self.EXTENSIONS:
|
if sender:
|
||||||
|
self.assertEqual(file_info.correspondent.name, sender, filename)
|
||||||
|
else:
|
||||||
|
self.assertIsNone(file_info.correspondent, filename)
|
||||||
|
|
||||||
f = path.format(extension)
|
self.assertEqual(file_info.title, title, filename)
|
||||||
file_info = FileInfo.from_path(f)
|
|
||||||
|
|
||||||
if sender:
|
self.assertEqual(tuple([t.slug for t in file_info.tags]), tags, filename)
|
||||||
self.assertEqual(file_info.correspondent.name, sender, f)
|
|
||||||
else:
|
|
||||||
self.assertIsNone(file_info.correspondent, f)
|
|
||||||
|
|
||||||
self.assertEqual(file_info.title, title, f)
|
|
||||||
|
|
||||||
self.assertEqual(tuple([t.slug for t in file_info.tags]), tags, f)
|
|
||||||
if extension.lower() == "jpeg":
|
|
||||||
self.assertEqual(file_info.extension, "jpg", f)
|
|
||||||
elif extension.lower() == "tif":
|
|
||||||
self.assertEqual(file_info.extension, "tiff", f)
|
|
||||||
else:
|
|
||||||
self.assertEqual(file_info.extension, extension.lower(), f)
|
|
||||||
|
|
||||||
def test_guess_attributes_from_name0(self):
|
def test_guess_attributes_from_name0(self):
|
||||||
self._test_guess_attributes_from_name(
|
self._test_guess_attributes_from_name(
|
||||||
"/path/to/Sender - Title.{}", "Sender", "Title", ())
|
"Sender - Title.pdf", "Sender", "Title", ())
|
||||||
|
|
||||||
def test_guess_attributes_from_name1(self):
|
def test_guess_attributes_from_name1(self):
|
||||||
self._test_guess_attributes_from_name(
|
self._test_guess_attributes_from_name(
|
||||||
"/path/to/Spaced Sender - Title.{}", "Spaced Sender", "Title", ())
|
"Spaced Sender - Title.pdf", "Spaced Sender", "Title", ())
|
||||||
|
|
||||||
def test_guess_attributes_from_name2(self):
|
def test_guess_attributes_from_name2(self):
|
||||||
self._test_guess_attributes_from_name(
|
self._test_guess_attributes_from_name(
|
||||||
"/path/to/Sender - Spaced Title.{}", "Sender", "Spaced Title", ())
|
"Sender - Spaced Title.pdf", "Sender", "Spaced Title", ())
|
||||||
|
|
||||||
def test_guess_attributes_from_name3(self):
|
def test_guess_attributes_from_name3(self):
|
||||||
self._test_guess_attributes_from_name(
|
self._test_guess_attributes_from_name(
|
||||||
"/path/to/Dashed-Sender - Title.{}", "Dashed-Sender", "Title", ())
|
"Dashed-Sender - Title.pdf", "Dashed-Sender", "Title", ())
|
||||||
|
|
||||||
def test_guess_attributes_from_name4(self):
|
def test_guess_attributes_from_name4(self):
|
||||||
self._test_guess_attributes_from_name(
|
self._test_guess_attributes_from_name(
|
||||||
"/path/to/Sender - Dashed-Title.{}", "Sender", "Dashed-Title", ())
|
"Sender - Dashed-Title.pdf", "Sender", "Dashed-Title", ())
|
||||||
|
|
||||||
def test_guess_attributes_from_name5(self):
|
def test_guess_attributes_from_name5(self):
|
||||||
self._test_guess_attributes_from_name(
|
self._test_guess_attributes_from_name(
|
||||||
"/path/to/Sender - Title - tag1,tag2,tag3.{}",
|
"Sender - Title - tag1,tag2,tag3.pdf",
|
||||||
"Sender",
|
"Sender",
|
||||||
"Title",
|
"Title",
|
||||||
self.TAGS
|
self.TAGS
|
||||||
@ -73,7 +58,7 @@ class TestAttributes(TestCase):
|
|||||||
|
|
||||||
def test_guess_attributes_from_name6(self):
|
def test_guess_attributes_from_name6(self):
|
||||||
self._test_guess_attributes_from_name(
|
self._test_guess_attributes_from_name(
|
||||||
"/path/to/Spaced Sender - Title - tag1,tag2,tag3.{}",
|
"Spaced Sender - Title - tag1,tag2,tag3.pdf",
|
||||||
"Spaced Sender",
|
"Spaced Sender",
|
||||||
"Title",
|
"Title",
|
||||||
self.TAGS
|
self.TAGS
|
||||||
@ -81,7 +66,7 @@ class TestAttributes(TestCase):
|
|||||||
|
|
||||||
def test_guess_attributes_from_name7(self):
|
def test_guess_attributes_from_name7(self):
|
||||||
self._test_guess_attributes_from_name(
|
self._test_guess_attributes_from_name(
|
||||||
"/path/to/Sender - Spaced Title - tag1,tag2,tag3.{}",
|
"Sender - Spaced Title - tag1,tag2,tag3.pdf",
|
||||||
"Sender",
|
"Sender",
|
||||||
"Spaced Title",
|
"Spaced Title",
|
||||||
self.TAGS
|
self.TAGS
|
||||||
@ -89,7 +74,7 @@ class TestAttributes(TestCase):
|
|||||||
|
|
||||||
def test_guess_attributes_from_name8(self):
|
def test_guess_attributes_from_name8(self):
|
||||||
self._test_guess_attributes_from_name(
|
self._test_guess_attributes_from_name(
|
||||||
"/path/to/Dashed-Sender - Title - tag1,tag2,tag3.{}",
|
"Dashed-Sender - Title - tag1,tag2,tag3.pdf",
|
||||||
"Dashed-Sender",
|
"Dashed-Sender",
|
||||||
"Title",
|
"Title",
|
||||||
self.TAGS
|
self.TAGS
|
||||||
@ -97,7 +82,7 @@ class TestAttributes(TestCase):
|
|||||||
|
|
||||||
def test_guess_attributes_from_name9(self):
|
def test_guess_attributes_from_name9(self):
|
||||||
self._test_guess_attributes_from_name(
|
self._test_guess_attributes_from_name(
|
||||||
"/path/to/Sender - Dashed-Title - tag1,tag2,tag3.{}",
|
"Sender - Dashed-Title - tag1,tag2,tag3.pdf",
|
||||||
"Sender",
|
"Sender",
|
||||||
"Dashed-Title",
|
"Dashed-Title",
|
||||||
self.TAGS
|
self.TAGS
|
||||||
@ -105,7 +90,7 @@ class TestAttributes(TestCase):
|
|||||||
|
|
||||||
def test_guess_attributes_from_name10(self):
|
def test_guess_attributes_from_name10(self):
|
||||||
self._test_guess_attributes_from_name(
|
self._test_guess_attributes_from_name(
|
||||||
"/path/to/Σενδερ - Τιτλε - tag1,tag2,tag3.{}",
|
"Σενδερ - Τιτλε - tag1,tag2,tag3.pdf",
|
||||||
"Σενδερ",
|
"Σενδερ",
|
||||||
"Τιτλε",
|
"Τιτλε",
|
||||||
self.TAGS
|
self.TAGS
|
||||||
@ -113,7 +98,7 @@ class TestAttributes(TestCase):
|
|||||||
|
|
||||||
def test_guess_attributes_from_name_when_correspondent_empty(self):
|
def test_guess_attributes_from_name_when_correspondent_empty(self):
|
||||||
self._test_guess_attributes_from_name(
|
self._test_guess_attributes_from_name(
|
||||||
'/path/to/ - weird empty correspondent but should not break.{}',
|
' - weird empty correspondent but should not break.pdf',
|
||||||
None,
|
None,
|
||||||
'weird empty correspondent but should not break',
|
'weird empty correspondent but should not break',
|
||||||
()
|
()
|
||||||
@ -121,7 +106,7 @@ class TestAttributes(TestCase):
|
|||||||
|
|
||||||
def test_guess_attributes_from_name_when_title_starts_with_dash(self):
|
def test_guess_attributes_from_name_when_title_starts_with_dash(self):
|
||||||
self._test_guess_attributes_from_name(
|
self._test_guess_attributes_from_name(
|
||||||
'/path/to/- weird but should not break.{}',
|
'- weird but should not break.pdf',
|
||||||
None,
|
None,
|
||||||
'- weird but should not break',
|
'- weird but should not break',
|
||||||
()
|
()
|
||||||
@ -129,7 +114,7 @@ class TestAttributes(TestCase):
|
|||||||
|
|
||||||
def test_guess_attributes_from_name_when_title_ends_with_dash(self):
|
def test_guess_attributes_from_name_when_title_ends_with_dash(self):
|
||||||
self._test_guess_attributes_from_name(
|
self._test_guess_attributes_from_name(
|
||||||
'/path/to/weird but should not break -.{}',
|
'weird but should not break -.pdf',
|
||||||
None,
|
None,
|
||||||
'weird but should not break -',
|
'weird but should not break -',
|
||||||
()
|
()
|
||||||
@ -137,7 +122,7 @@ class TestAttributes(TestCase):
|
|||||||
|
|
||||||
def test_guess_attributes_from_name_when_title_is_empty(self):
|
def test_guess_attributes_from_name_when_title_is_empty(self):
|
||||||
self._test_guess_attributes_from_name(
|
self._test_guess_attributes_from_name(
|
||||||
'/path/to/weird correspondent but should not break - .{}',
|
'weird correspondent but should not break - .pdf',
|
||||||
'weird correspondent but should not break',
|
'weird correspondent but should not break',
|
||||||
'',
|
'',
|
||||||
()
|
()
|
||||||
@ -149,11 +134,11 @@ class TestAttributes(TestCase):
|
|||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
path = "Title - Correspondent - tAg1,TAG2.pdf"
|
filename = "Title - Correspondent - tAg1,TAG2.pdf"
|
||||||
self.assertEqual(len(FileInfo.from_path(path).tags), 2)
|
self.assertEqual(len(FileInfo.from_filename(filename).tags), 2)
|
||||||
|
|
||||||
path = "Title - Correspondent - tag1,tag2.pdf"
|
path = "Title - Correspondent - tag1,tag2.pdf"
|
||||||
self.assertEqual(len(FileInfo.from_path(path).tags), 2)
|
self.assertEqual(len(FileInfo.from_filename(filename).tags), 2)
|
||||||
|
|
||||||
self.assertEqual(Tag.objects.all().count(), 2)
|
self.assertEqual(Tag.objects.all().count(), 2)
|
||||||
|
|
||||||
@ -173,13 +158,12 @@ class TestFieldPermutations(TestCase):
|
|||||||
]
|
]
|
||||||
valid_titles = ["title", "Title w Spaces", "Title a-dash", "Τίτλος", ""]
|
valid_titles = ["title", "Title w Spaces", "Title a-dash", "Τίτλος", ""]
|
||||||
valid_tags = ["tag", "tig,tag", "tag1,tag2,tag-3"]
|
valid_tags = ["tag", "tig,tag", "tag1,tag2,tag-3"]
|
||||||
valid_extensions = ["pdf", "png", "jpg", "jpeg", "gif"]
|
|
||||||
|
|
||||||
def _test_guessed_attributes(self, filename, created=None,
|
def _test_guessed_attributes(self, filename, created=None,
|
||||||
correspondent=None, title=None,
|
correspondent=None, title=None,
|
||||||
extension=None, tags=None):
|
tags=None):
|
||||||
|
|
||||||
info = FileInfo.from_path(filename)
|
info = FileInfo.from_filename(filename)
|
||||||
|
|
||||||
# Created
|
# Created
|
||||||
if created is None:
|
if created is None:
|
||||||
@ -207,68 +191,56 @@ class TestFieldPermutations(TestCase):
|
|||||||
filename
|
filename
|
||||||
)
|
)
|
||||||
|
|
||||||
# Extension
|
|
||||||
if extension == 'jpeg':
|
|
||||||
extension = 'jpg'
|
|
||||||
self.assertEqual(info.extension, extension, filename)
|
|
||||||
|
|
||||||
def test_just_title(self):
|
def test_just_title(self):
|
||||||
template = '/path/to/{title}.{extension}'
|
template = '{title}.pdf'
|
||||||
for title in self.valid_titles:
|
for title in self.valid_titles:
|
||||||
for extension in self.valid_extensions:
|
spec = dict(title=title)
|
||||||
spec = dict(title=title, extension=extension)
|
filename = template.format(**spec)
|
||||||
|
self._test_guessed_attributes(filename, **spec)
|
||||||
|
|
||||||
|
def test_title_and_correspondent(self):
|
||||||
|
template = '{correspondent} - {title}.pdf'
|
||||||
|
for correspondent in self.valid_correspondents:
|
||||||
|
for title in self.valid_titles:
|
||||||
|
spec = dict(correspondent=correspondent, title=title)
|
||||||
filename = template.format(**spec)
|
filename = template.format(**spec)
|
||||||
self._test_guessed_attributes(filename, **spec)
|
self._test_guessed_attributes(filename, **spec)
|
||||||
|
|
||||||
def test_title_and_correspondent(self):
|
|
||||||
template = '/path/to/{correspondent} - {title}.{extension}'
|
|
||||||
for correspondent in self.valid_correspondents:
|
|
||||||
for title in self.valid_titles:
|
|
||||||
for extension in self.valid_extensions:
|
|
||||||
spec = dict(correspondent=correspondent, title=title,
|
|
||||||
extension=extension)
|
|
||||||
filename = template.format(**spec)
|
|
||||||
self._test_guessed_attributes(filename, **spec)
|
|
||||||
|
|
||||||
def test_title_and_correspondent_and_tags(self):
|
def test_title_and_correspondent_and_tags(self):
|
||||||
template = '/path/to/{correspondent} - {title} - {tags}.{extension}'
|
template = '{correspondent} - {title} - {tags}.pdf'
|
||||||
for correspondent in self.valid_correspondents:
|
for correspondent in self.valid_correspondents:
|
||||||
for title in self.valid_titles:
|
for title in self.valid_titles:
|
||||||
for tags in self.valid_tags:
|
for tags in self.valid_tags:
|
||||||
for extension in self.valid_extensions:
|
spec = dict(correspondent=correspondent, title=title,
|
||||||
spec = dict(correspondent=correspondent, title=title,
|
tags=tags)
|
||||||
tags=tags, extension=extension)
|
filename = template.format(**spec)
|
||||||
filename = template.format(**spec)
|
self._test_guessed_attributes(filename, **spec)
|
||||||
self._test_guessed_attributes(filename, **spec)
|
|
||||||
|
|
||||||
def test_created_and_correspondent_and_title_and_tags(self):
|
def test_created_and_correspondent_and_title_and_tags(self):
|
||||||
|
|
||||||
template = (
|
template = (
|
||||||
"/path/to/{created} - "
|
"{created} - "
|
||||||
"{correspondent} - "
|
"{correspondent} - "
|
||||||
"{title} - "
|
"{title} - "
|
||||||
"{tags}"
|
"{tags}.pdf"
|
||||||
".{extension}"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
for created in self.valid_dates:
|
for created in self.valid_dates:
|
||||||
for correspondent in self.valid_correspondents:
|
for correspondent in self.valid_correspondents:
|
||||||
for title in self.valid_titles:
|
for title in self.valid_titles:
|
||||||
for tags in self.valid_tags:
|
for tags in self.valid_tags:
|
||||||
for extension in self.valid_extensions:
|
spec = {
|
||||||
spec = {
|
"created": created,
|
||||||
"created": created,
|
"correspondent": correspondent,
|
||||||
"correspondent": correspondent,
|
"title": title,
|
||||||
"title": title,
|
"tags": tags,
|
||||||
"tags": tags,
|
}
|
||||||
"extension": extension
|
self._test_guessed_attributes(
|
||||||
}
|
template.format(**spec), **spec)
|
||||||
self._test_guessed_attributes(
|
|
||||||
template.format(**spec), **spec)
|
|
||||||
|
|
||||||
def test_created_and_correspondent_and_title(self):
|
def test_created_and_correspondent_and_title(self):
|
||||||
|
|
||||||
template = "/path/to/{created} - {correspondent} - {title}.{extension}"
|
template = "{created} - {correspondent} - {title}.pdf"
|
||||||
|
|
||||||
for created in self.valid_dates:
|
for created in self.valid_dates:
|
||||||
for correspondent in self.valid_correspondents:
|
for correspondent in self.valid_correspondents:
|
||||||
@ -279,56 +251,50 @@ class TestFieldPermutations(TestCase):
|
|||||||
if title.lower() == title:
|
if title.lower() == title:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
for extension in self.valid_extensions:
|
|
||||||
spec = {
|
|
||||||
"created": created,
|
|
||||||
"correspondent": correspondent,
|
|
||||||
"title": title,
|
|
||||||
"extension": extension
|
|
||||||
}
|
|
||||||
self._test_guessed_attributes(
|
|
||||||
template.format(**spec), **spec)
|
|
||||||
|
|
||||||
def test_created_and_title(self):
|
|
||||||
|
|
||||||
template = "/path/to/{created} - {title}.{extension}"
|
|
||||||
|
|
||||||
for created in self.valid_dates:
|
|
||||||
for title in self.valid_titles:
|
|
||||||
for extension in self.valid_extensions:
|
|
||||||
spec = {
|
spec = {
|
||||||
"created": created,
|
"created": created,
|
||||||
"title": title,
|
"correspondent": correspondent,
|
||||||
"extension": extension
|
"title": title
|
||||||
}
|
}
|
||||||
self._test_guessed_attributes(
|
self._test_guessed_attributes(
|
||||||
template.format(**spec), **spec)
|
template.format(**spec), **spec)
|
||||||
|
|
||||||
|
def test_created_and_title(self):
|
||||||
|
|
||||||
|
template = "{created} - {title}.pdf"
|
||||||
|
|
||||||
|
for created in self.valid_dates:
|
||||||
|
for title in self.valid_titles:
|
||||||
|
spec = {
|
||||||
|
"created": created,
|
||||||
|
"title": title
|
||||||
|
}
|
||||||
|
self._test_guessed_attributes(
|
||||||
|
template.format(**spec), **spec)
|
||||||
|
|
||||||
def test_created_and_title_and_tags(self):
|
def test_created_and_title_and_tags(self):
|
||||||
|
|
||||||
template = "/path/to/{created} - {title} - {tags}.{extension}"
|
template = "{created} - {title} - {tags}.pdf"
|
||||||
|
|
||||||
for created in self.valid_dates:
|
for created in self.valid_dates:
|
||||||
for title in self.valid_titles:
|
for title in self.valid_titles:
|
||||||
for tags in self.valid_tags:
|
for tags in self.valid_tags:
|
||||||
for extension in self.valid_extensions:
|
spec = {
|
||||||
spec = {
|
"created": created,
|
||||||
"created": created,
|
"title": title,
|
||||||
"title": title,
|
"tags": tags
|
||||||
"tags": tags,
|
}
|
||||||
"extension": extension
|
self._test_guessed_attributes(
|
||||||
}
|
template.format(**spec), **spec)
|
||||||
self._test_guessed_attributes(
|
|
||||||
template.format(**spec), **spec)
|
|
||||||
|
|
||||||
def test_invalid_date_format(self):
|
def test_invalid_date_format(self):
|
||||||
info = FileInfo.from_path("/path/to/06112017Z - title.pdf")
|
info = FileInfo.from_filename("06112017Z - title.pdf")
|
||||||
self.assertEqual(info.title, "title")
|
self.assertEqual(info.title, "title")
|
||||||
self.assertIsNone(info.created)
|
self.assertIsNone(info.created)
|
||||||
|
|
||||||
def test_filename_parse_transforms(self):
|
def test_filename_parse_transforms(self):
|
||||||
|
|
||||||
path = "/some/path/to/tag1,tag2_20190908_180610_0001.pdf"
|
filename = "tag1,tag2_20190908_180610_0001.pdf"
|
||||||
all_patt = re.compile("^.*$")
|
all_patt = re.compile("^.*$")
|
||||||
none_patt = re.compile("$a")
|
none_patt = re.compile("$a")
|
||||||
exact_patt = re.compile("^([a-z0-9,]+)_(\\d{8})_(\\d{6})_([0-9]+)\\.")
|
exact_patt = re.compile("^([a-z0-9,]+)_(\\d{8})_(\\d{6})_([0-9]+)\\.")
|
||||||
@ -336,50 +302,44 @@ class TestFieldPermutations(TestCase):
|
|||||||
repl2 = "\\2Z - " + repl1 # creation date + repl1
|
repl2 = "\\2Z - " + repl1 # creation date + repl1
|
||||||
|
|
||||||
# No transformations configured (= default)
|
# No transformations configured (= default)
|
||||||
info = FileInfo.from_path(path)
|
info = FileInfo.from_filename(filename)
|
||||||
self.assertEqual(info.title, "tag1,tag2_20190908_180610_0001")
|
self.assertEqual(info.title, "tag1,tag2_20190908_180610_0001")
|
||||||
self.assertEqual(info.extension, "pdf")
|
|
||||||
self.assertEqual(info.tags, ())
|
self.assertEqual(info.tags, ())
|
||||||
self.assertIsNone(info.created)
|
self.assertIsNone(info.created)
|
||||||
|
|
||||||
# Pattern doesn't match (filename unaltered)
|
# Pattern doesn't match (filename unaltered)
|
||||||
with self.settings(
|
with self.settings(
|
||||||
FILENAME_PARSE_TRANSFORMS=[(none_patt, "none.gif")]):
|
FILENAME_PARSE_TRANSFORMS=[(none_patt, "none.gif")]):
|
||||||
info = FileInfo.from_path(path)
|
info = FileInfo.from_filename(filename)
|
||||||
self.assertEqual(info.title, "tag1,tag2_20190908_180610_0001")
|
self.assertEqual(info.title, "tag1,tag2_20190908_180610_0001")
|
||||||
self.assertEqual(info.extension, "pdf")
|
|
||||||
|
|
||||||
# Simple transformation (match all)
|
# Simple transformation (match all)
|
||||||
with self.settings(
|
with self.settings(
|
||||||
FILENAME_PARSE_TRANSFORMS=[(all_patt, "all.gif")]):
|
FILENAME_PARSE_TRANSFORMS=[(all_patt, "all.gif")]):
|
||||||
info = FileInfo.from_path(path)
|
info = FileInfo.from_filename(filename)
|
||||||
self.assertEqual(info.title, "all")
|
self.assertEqual(info.title, "all")
|
||||||
self.assertEqual(info.extension, "gif")
|
|
||||||
|
|
||||||
# Multiple transformations configured (first pattern matches)
|
# Multiple transformations configured (first pattern matches)
|
||||||
with self.settings(
|
with self.settings(
|
||||||
FILENAME_PARSE_TRANSFORMS=[
|
FILENAME_PARSE_TRANSFORMS=[
|
||||||
(all_patt, "all.gif"),
|
(all_patt, "all.gif"),
|
||||||
(all_patt, "anotherall.gif")]):
|
(all_patt, "anotherall.gif")]):
|
||||||
info = FileInfo.from_path(path)
|
info = FileInfo.from_filename(filename)
|
||||||
self.assertEqual(info.title, "all")
|
self.assertEqual(info.title, "all")
|
||||||
self.assertEqual(info.extension, "gif")
|
|
||||||
|
|
||||||
# Multiple transformations configured (second pattern matches)
|
# Multiple transformations configured (second pattern matches)
|
||||||
with self.settings(
|
with self.settings(
|
||||||
FILENAME_PARSE_TRANSFORMS=[
|
FILENAME_PARSE_TRANSFORMS=[
|
||||||
(none_patt, "none.gif"),
|
(none_patt, "none.gif"),
|
||||||
(all_patt, "anotherall.gif")]):
|
(all_patt, "anotherall.gif")]):
|
||||||
info = FileInfo.from_path(path)
|
info = FileInfo.from_filename(filename)
|
||||||
self.assertEqual(info.title, "anotherall")
|
self.assertEqual(info.title, "anotherall")
|
||||||
self.assertEqual(info.extension, "gif")
|
|
||||||
|
|
||||||
# Complex transformation without date in replacement string
|
# Complex transformation without date in replacement string
|
||||||
with self.settings(
|
with self.settings(
|
||||||
FILENAME_PARSE_TRANSFORMS=[(exact_patt, repl1)]):
|
FILENAME_PARSE_TRANSFORMS=[(exact_patt, repl1)]):
|
||||||
info = FileInfo.from_path(path)
|
info = FileInfo.from_filename(filename)
|
||||||
self.assertEqual(info.title, "0001")
|
self.assertEqual(info.title, "0001")
|
||||||
self.assertEqual(info.extension, "pdf")
|
|
||||||
self.assertEqual(len(info.tags), 2)
|
self.assertEqual(len(info.tags), 2)
|
||||||
self.assertEqual(info.tags[0].slug, "tag1")
|
self.assertEqual(info.tags[0].slug, "tag1")
|
||||||
self.assertEqual(info.tags[1].slug, "tag2")
|
self.assertEqual(info.tags[1].slug, "tag2")
|
||||||
@ -392,9 +352,8 @@ class TestFieldPermutations(TestCase):
|
|||||||
(exact_patt, repl2), # <-- matches
|
(exact_patt, repl2), # <-- matches
|
||||||
(exact_patt, repl1),
|
(exact_patt, repl1),
|
||||||
(all_patt, "all.gif")]):
|
(all_patt, "all.gif")]):
|
||||||
info = FileInfo.from_path(path)
|
info = FileInfo.from_filename(filename)
|
||||||
self.assertEqual(info.title, "0001")
|
self.assertEqual(info.title, "0001")
|
||||||
self.assertEqual(info.extension, "pdf")
|
|
||||||
self.assertEqual(len(info.tags), 2)
|
self.assertEqual(len(info.tags), 2)
|
||||||
self.assertEqual(info.tags[0].slug, "tag1")
|
self.assertEqual(info.tags[0].slug, "tag1")
|
||||||
self.assertEqual(info.tags[1].slug, "tag2")
|
self.assertEqual(info.tags[1].slug, "tag2")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user