mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
Merge pull request #542 from grembo/master
Allow configuring transformations to be applied to the filename before
This commit is contained in:
@@ -483,8 +483,18 @@ class FileInfo:
|
||||
"<title>.<suffix>"
|
||||
"""
|
||||
|
||||
filename = os.path.basename(path)
|
||||
|
||||
# Mutate filename in-place before parsing its components
|
||||
# by applying at most one of the configured transformations.
|
||||
for (pattern, repl) in settings.FILENAME_PARSE_TRANSFORMS:
|
||||
(filename, count) = pattern.subn(repl, filename)
|
||||
if count:
|
||||
break
|
||||
|
||||
# Parse filename components.
|
||||
for regex in cls.REGEXES.values():
|
||||
m = regex.match(os.path.basename(path))
|
||||
m = regex.match(filename)
|
||||
if m:
|
||||
properties = m.groupdict()
|
||||
cls._mangle_property(properties, "created")
|
||||
|
@@ -1,3 +1,5 @@
|
||||
import re
|
||||
|
||||
from django.test import TestCase
|
||||
from unittest import mock
|
||||
from tempfile import TemporaryDirectory
|
||||
@@ -372,3 +374,79 @@ class TestFieldPermutations(TestCase):
|
||||
info = FileInfo.from_path("/path/to/06112017Z - title.pdf")
|
||||
self.assertEqual(info.title, "title")
|
||||
self.assertIsNone(info.created)
|
||||
|
||||
def test_filename_parse_transforms(self):
|
||||
|
||||
path = "/some/path/to/tag1,tag2_20190908_180610_0001.pdf"
|
||||
all_patt = re.compile("^.*$")
|
||||
none_patt = re.compile("$a")
|
||||
exact_patt = re.compile("^([a-z0-9,]+)_(\\d{8})_(\\d{6})_([0-9]+)\\.")
|
||||
repl1 = " - \\4 - \\1." # (empty) corrspondent, title and tags
|
||||
repl2 = "\\2Z - " + repl1 # creation date + repl1
|
||||
|
||||
# No transformations configured (= default)
|
||||
info = FileInfo.from_path(path)
|
||||
self.assertEqual(info.title, "tag1,tag2_20190908_180610_0001")
|
||||
self.assertEqual(info.extension, "pdf")
|
||||
self.assertEqual(info.tags, ())
|
||||
self.assertIsNone(info.created)
|
||||
|
||||
# Pattern doesn't match (filename unaltered)
|
||||
with self.settings(
|
||||
FILENAME_PARSE_TRANSFORMS=[(none_patt, "none.gif")]):
|
||||
info = FileInfo.from_path(path)
|
||||
self.assertEqual(info.title, "tag1,tag2_20190908_180610_0001")
|
||||
self.assertEqual(info.extension, "pdf")
|
||||
|
||||
# Simple transformation (match all)
|
||||
with self.settings(
|
||||
FILENAME_PARSE_TRANSFORMS=[(all_patt, "all.gif")]):
|
||||
info = FileInfo.from_path(path)
|
||||
self.assertEqual(info.title, "all")
|
||||
self.assertEqual(info.extension, "gif")
|
||||
|
||||
# Multiple transformations configured (first pattern matches)
|
||||
with self.settings(
|
||||
FILENAME_PARSE_TRANSFORMS=[
|
||||
(all_patt, "all.gif"),
|
||||
(all_patt, "anotherall.gif")]):
|
||||
info = FileInfo.from_path(path)
|
||||
self.assertEqual(info.title, "all")
|
||||
self.assertEqual(info.extension, "gif")
|
||||
|
||||
# Multiple transformations configured (second pattern matches)
|
||||
with self.settings(
|
||||
FILENAME_PARSE_TRANSFORMS=[
|
||||
(none_patt, "none.gif"),
|
||||
(all_patt, "anotherall.gif")]):
|
||||
info = FileInfo.from_path(path)
|
||||
self.assertEqual(info.title, "anotherall")
|
||||
self.assertEqual(info.extension, "gif")
|
||||
|
||||
# Complex transformation without date in replacement string
|
||||
with self.settings(
|
||||
FILENAME_PARSE_TRANSFORMS=[(exact_patt, repl1)]):
|
||||
info = FileInfo.from_path(path)
|
||||
self.assertEqual(info.title, "0001")
|
||||
self.assertEqual(info.extension, "pdf")
|
||||
self.assertEqual(len(info.tags), 2)
|
||||
self.assertEqual(info.tags[0].slug, "tag1")
|
||||
self.assertEqual(info.tags[1].slug, "tag2")
|
||||
self.assertIsNone(info.created)
|
||||
|
||||
# Complex transformation with date in replacement string
|
||||
with self.settings(
|
||||
FILENAME_PARSE_TRANSFORMS=[
|
||||
(none_patt, "none.gif"),
|
||||
(exact_patt, repl2), # <-- matches
|
||||
(exact_patt, repl1),
|
||||
(all_patt, "all.gif")]):
|
||||
info = FileInfo.from_path(path)
|
||||
self.assertEqual(info.title, "0001")
|
||||
self.assertEqual(info.extension, "pdf")
|
||||
self.assertEqual(len(info.tags), 2)
|
||||
self.assertEqual(info.tags[0].slug, "tag1")
|
||||
self.assertEqual(info.tags[1].slug, "tag2")
|
||||
self.assertEqual(info.created.year, 2019)
|
||||
self.assertEqual(info.created.month, 9)
|
||||
self.assertEqual(info.created.day, 8)
|
||||
|
@@ -10,7 +10,9 @@ For the full list of settings and their values, see
|
||||
https://docs.djangoproject.com/en/1.10/ref/settings/
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
@@ -322,6 +324,11 @@ FY_END = os.getenv("PAPERLESS_FINANCIAL_YEAR_END")
|
||||
DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
|
||||
FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")
|
||||
|
||||
# Transformations applied before filename parsing
|
||||
FILENAME_PARSE_TRANSFORMS = []
|
||||
for t in json.loads(os.getenv("PAPERLESS_FILENAME_PARSE_TRANSFORMS", "[]")):
|
||||
FILENAME_PARSE_TRANSFORMS.append((re.compile(t["pattern"]), t["repl"]))
|
||||
|
||||
# Specify for how many years a correspondent is considered recent. Recent
|
||||
# correspondents will be shown in a separate "Recent correspondents" filter as
|
||||
# well. Set to 0 to disable this filter.
|
||||
|
Reference in New Issue
Block a user