Adds additional testing for both date parsing and consumed document created date

This commit is contained in:
Trenton Holmes
2022-04-12 19:52:56 -07:00
parent cb62485445
commit a944ef1ca6
9 changed files with 345 additions and 42 deletions

View File

@@ -1,9 +1,11 @@
import datetime
import json
import math
import multiprocessing
import os
import re
from typing import Final
from typing import Set
from urllib.parse import urlparse
from concurrent_log_handler.queue import setup_logging_queues
@@ -604,15 +606,22 @@ if PAPERLESS_TIKA_ENABLED:
INSTALLED_APPS.append("paperless_tika.apps.PaperlessTikaConfig")
# List dates that should be ignored when trying to parse date from document text
IGNORE_DATES = set()
IGNORE_DATES: Set[datetime.date] = set()
if os.getenv("PAPERLESS_IGNORE_DATES", ""):
def _parse_ignore_dates(env_ignore: str) -> Set[datetime.datetime]:
import dateparser
for s in os.getenv("PAPERLESS_IGNORE_DATES", "").split(","):
ignored_dates = set()
for s in env_ignore.split(","):
d = dateparser.parse(s)
if d:
IGNORE_DATES.add(d.date())
ignored_dates.add(d.date())
return ignored_dates
if os.getenv("PAPERLESS_IGNORE_DATES") is not None:
IGNORE_DATES = _parse_ignore_dates(os.getenv("PAPERLESS_IGNORE_DATES"))
ENABLE_UPDATE_CHECK = os.getenv("PAPERLESS_ENABLE_UPDATE_CHECK", "default")
if ENABLE_UPDATE_CHECK != "default":

View File

@@ -0,0 +1,45 @@
import datetime
from unittest import TestCase
from paperless.settings import _parse_ignore_dates
class TestIgnoreDateParsing(TestCase):
"""
Tests the parsing of the PAPERLESS_IGNORE_DATES setting value
"""
def test_no_ignore_dates_set(self):
"""
GIVEN:
- No ignore dates are set
THEN:
- No ignore dates are parsed
"""
self.assertSetEqual(_parse_ignore_dates(""), set())
def test_single_ignore_dates_set(self):
"""
GIVEN:
- Ignore dates are set per certain inputs
THEN:
- All ignore dates are parsed
"""
test_cases = [
("1985-05-01", [datetime.date(1985, 5, 1)]),
(
"1985-05-01,1991-12-05",
[datetime.date(1985, 5, 1), datetime.date(1991, 12, 5)],
),
("2010-12-13", [datetime.date(2010, 12, 13)]),
]
for env_str, expected_dates in test_cases:
expected_date_set = set()
for expected_date in expected_dates:
expected_date_set.add(expected_date)
self.assertSetEqual(
_parse_ignore_dates(env_str),
expected_date_set,
)