mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
Allow configuring transformations to be applied to the filename before
parsing. The motivation was that files produced by a Brother scanner wouldn't match paperless' expectations. At most one transformation is applied (first matching). It won't affect the filename on disk. This is generic enough so that it is useful for various purposes. In my case it allows me to use the different hardware buttons on the scanner to use different profiles, feeding one instance of paperless with documents of multiple entities and tagging them accordingly. Example: PAPERLESS_FILENAME_PARSE_TRANSFORMS=[{"pattern":"^([a-z]+)_(\\d{8})_(\\d{6})_([0-9]+)\\.", "repl":"\\2\\3Z - \\4 - \\1."},{"pattern":"^([a-z]+)_([0-9]+)\\.", "repl":" - \\2 - \\1."}]
This commit is contained in:
@@ -483,8 +483,14 @@ class FileInfo:
|
||||
"<title>.<suffix>"
|
||||
"""
|
||||
|
||||
filename = os.path.basename(path)
|
||||
for (pattern, repl) in settings.FILENAME_PARSE_TRANSFORMS:
|
||||
(filename, count) = pattern.subn(repl, filename)
|
||||
if count:
|
||||
break
|
||||
|
||||
for regex in cls.REGEXES.values():
|
||||
m = regex.match(os.path.basename(path))
|
||||
m = regex.match(filename)
|
||||
if m:
|
||||
properties = m.groupdict()
|
||||
cls._mangle_property(properties, "created")
|
||||
|
@@ -10,7 +10,9 @@ For the full list of settings and their values, see
|
||||
https://docs.djangoproject.com/en/1.10/ref/settings/
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
@@ -317,6 +319,15 @@ FY_END = os.getenv("PAPERLESS_FINANCIAL_YEAR_END")
|
||||
DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
|
||||
FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")
|
||||
|
||||
# Transformations applied before filename parsing
|
||||
FILENAME_PARSE_TRANSFORMS = []
|
||||
_filename_parse_transforms = os.getenv("PAPERLESS_FILENAME_PARSE_TRANSFORMS")
|
||||
if _filename_parse_transforms:
|
||||
FILENAME_PARSE_TRANSFORMS = [(
|
||||
re.compile(t["pattern"]), t["repl"])
|
||||
for t in json.loads(_filename_parse_transforms)
|
||||
]
|
||||
|
||||
# Specify for how many years a correspondent is considered recent. Recent
|
||||
# correspondents will be shown in a separate "Recent correspondents" filter as
|
||||
# well. Set to 0 to disable this filter.
|
||||
|
Reference in New Issue
Block a user