mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
Merge pull request #721 from paperless-ngx/bug-fix-date-ignore
Fix Ignore Date Parsing
This commit is contained in:
@@ -1,9 +1,11 @@
|
||||
import datetime
|
||||
import json
|
||||
import math
|
||||
import multiprocessing
|
||||
import os
|
||||
import re
|
||||
from typing import Final
|
||||
from typing import Set
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from concurrent_log_handler.queue import setup_logging_queues
|
||||
@@ -603,16 +605,42 @@ PAPERLESS_TIKA_GOTENBERG_ENDPOINT = os.getenv(
|
||||
if PAPERLESS_TIKA_ENABLED:
|
||||
INSTALLED_APPS.append("paperless_tika.apps.PaperlessTikaConfig")
|
||||
|
||||
# List dates that should be ignored when trying to parse date from document text
|
||||
IGNORE_DATES = set()
|
||||
|
||||
if os.getenv("PAPERLESS_IGNORE_DATES", ""):
|
||||
def _parse_ignore_dates(
|
||||
env_ignore: str,
|
||||
date_order: str = DATE_ORDER,
|
||||
) -> Set[datetime.datetime]:
|
||||
"""
|
||||
If the PAPERLESS_IGNORE_DATES environment variable is set, parse the
|
||||
user provided string(s) into dates
|
||||
|
||||
Args:
|
||||
env_ignore (str): The value of the environment variable, comma seperated dates
|
||||
date_order (str, optional): The format of the date strings. Defaults to DATE_ORDER.
|
||||
|
||||
Returns:
|
||||
Set[datetime.datetime]: The set of parsed date objects
|
||||
"""
|
||||
import dateparser
|
||||
|
||||
for s in os.getenv("PAPERLESS_IGNORE_DATES", "").split(","):
|
||||
d = dateparser.parse(s)
|
||||
ignored_dates = set()
|
||||
for s in env_ignore.split(","):
|
||||
d = dateparser.parse(
|
||||
s,
|
||||
settings={
|
||||
"DATE_ORDER": date_order,
|
||||
},
|
||||
)
|
||||
if d:
|
||||
IGNORE_DATES.add(d.date())
|
||||
ignored_dates.add(d.date())
|
||||
return ignored_dates
|
||||
|
||||
|
||||
# List dates that should be ignored when trying to parse date from document text
|
||||
IGNORE_DATES: Set[datetime.date] = set()
|
||||
|
||||
if os.getenv("PAPERLESS_IGNORE_DATES") is not None:
|
||||
IGNORE_DATES = _parse_ignore_dates(os.getenv("PAPERLESS_IGNORE_DATES"))
|
||||
|
||||
ENABLE_UPDATE_CHECK = os.getenv("PAPERLESS_ENABLE_UPDATE_CHECK", "default")
|
||||
if ENABLE_UPDATE_CHECK != "default":
|
||||
|
58
src/paperless/tests/test_settings.py
Normal file
58
src/paperless/tests/test_settings.py
Normal file
@@ -0,0 +1,58 @@
|
||||
import datetime
|
||||
from unittest import TestCase
|
||||
|
||||
from paperless.settings import _parse_ignore_dates
|
||||
|
||||
|
||||
class TestIgnoreDateParsing(TestCase):
|
||||
"""
|
||||
Tests the parsing of the PAPERLESS_IGNORE_DATES setting value
|
||||
"""
|
||||
|
||||
def _parse_checker(self, test_cases):
|
||||
"""
|
||||
Helper function to check ignore date parsing
|
||||
|
||||
Args:
|
||||
test_cases (_type_): _description_
|
||||
"""
|
||||
for env_str, date_format, expected_date_set in test_cases:
|
||||
|
||||
self.assertSetEqual(
|
||||
_parse_ignore_dates(env_str, date_format),
|
||||
expected_date_set,
|
||||
)
|
||||
|
||||
def test_no_ignore_dates_set(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- No ignore dates are set
|
||||
THEN:
|
||||
- No ignore dates are parsed
|
||||
"""
|
||||
self.assertSetEqual(_parse_ignore_dates(""), set())
|
||||
|
||||
def test_single_ignore_dates_set(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Ignore dates are set per certain inputs
|
||||
THEN:
|
||||
- All ignore dates are parsed
|
||||
"""
|
||||
test_cases = [
|
||||
("1985-05-01", "YMD", {datetime.date(1985, 5, 1)}),
|
||||
(
|
||||
"1985-05-01,1991-12-05",
|
||||
"YMD",
|
||||
{datetime.date(1985, 5, 1), datetime.date(1991, 12, 5)},
|
||||
),
|
||||
("2010-12-13", "YMD", {datetime.date(2010, 12, 13)}),
|
||||
("11.01.10", "DMY", {datetime.date(2010, 1, 11)}),
|
||||
(
|
||||
"11.01.2001,15-06-1996",
|
||||
"DMY",
|
||||
{datetime.date(2001, 1, 11), datetime.date(1996, 6, 15)},
|
||||
),
|
||||
]
|
||||
|
||||
self._parse_checker(test_cases)
|
Reference in New Issue
Block a user