mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-11-23 23:49:08 -06:00
Enhancement: add more relative dates, support modified (#11411)
This commit is contained in:
@@ -13,6 +13,7 @@ from shutil import rmtree
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Literal
|
||||
|
||||
from dateutil.relativedelta import relativedelta
|
||||
from django.conf import settings
|
||||
from django.utils import timezone as django_timezone
|
||||
from django.utils.timezone import get_current_timezone
|
||||
@@ -533,32 +534,84 @@ def get_permissions_criterias(user: User | None = None) -> list:
|
||||
def rewrite_natural_date_keywords(query_string: str) -> str:
|
||||
"""
|
||||
Rewrites natural date keywords (e.g. added:today or added:"yesterday") to UTC range syntax for Whoosh.
|
||||
This resolves timezone issues with date parsing in Whoosh as well as adding support for more
|
||||
natural date keywords.
|
||||
"""
|
||||
|
||||
tz = get_current_timezone()
|
||||
local_now = now().astimezone(tz)
|
||||
|
||||
today = local_now.date()
|
||||
yesterday = today - timedelta(days=1)
|
||||
|
||||
ranges = {
|
||||
"today": (
|
||||
datetime.combine(today, time.min, tzinfo=tz),
|
||||
datetime.combine(today, time.max, tzinfo=tz),
|
||||
),
|
||||
"yesterday": (
|
||||
datetime.combine(yesterday, time.min, tzinfo=tz),
|
||||
datetime.combine(yesterday, time.max, tzinfo=tz),
|
||||
),
|
||||
}
|
||||
|
||||
pattern = r"(\b(?:added|created))\s*:\s*[\"']?(today|yesterday)[\"']?"
|
||||
# all supported Keywords
|
||||
pattern = r"(\b(?:added|created|modified))\s*:\s*[\"']?(today|yesterday|this month|previous month|previous week|previous quarter|this year|previous year)[\"']?"
|
||||
|
||||
def repl(m):
|
||||
field, keyword = m.group(1), m.group(2)
|
||||
start, end = ranges[keyword]
|
||||
field = m.group(1)
|
||||
keyword = m.group(2).lower()
|
||||
|
||||
match keyword:
|
||||
case "today":
|
||||
start = datetime.combine(today, time.min, tzinfo=tz)
|
||||
end = datetime.combine(today, time.max, tzinfo=tz)
|
||||
|
||||
case "yesterday":
|
||||
yesterday = today - timedelta(days=1)
|
||||
start = datetime.combine(yesterday, time.min, tzinfo=tz)
|
||||
end = datetime.combine(yesterday, time.max, tzinfo=tz)
|
||||
|
||||
case "this month":
|
||||
start = datetime(local_now.year, local_now.month, 1, 0, 0, 0, tzinfo=tz)
|
||||
end = start + relativedelta(months=1) - timedelta(seconds=1)
|
||||
|
||||
case "previous month":
|
||||
this_month_start = datetime(
|
||||
local_now.year,
|
||||
local_now.month,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
tzinfo=tz,
|
||||
)
|
||||
start = this_month_start - relativedelta(months=1)
|
||||
end = this_month_start - timedelta(seconds=1)
|
||||
|
||||
case "this year":
|
||||
start = datetime(local_now.year, 1, 1, 0, 0, 0, tzinfo=tz)
|
||||
end = datetime.combine(today, time.max, tzinfo=tz)
|
||||
|
||||
case "previous week":
|
||||
days_since_monday = local_now.weekday()
|
||||
this_week_start = datetime.combine(
|
||||
today - timedelta(days=days_since_monday),
|
||||
time.min,
|
||||
tzinfo=tz,
|
||||
)
|
||||
start = this_week_start - timedelta(days=7)
|
||||
end = this_week_start - timedelta(seconds=1)
|
||||
|
||||
case "previous quarter":
|
||||
current_quarter = (local_now.month - 1) // 3 + 1
|
||||
this_quarter_start_month = (current_quarter - 1) * 3 + 1
|
||||
this_quarter_start = datetime(
|
||||
local_now.year,
|
||||
this_quarter_start_month,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
tzinfo=tz,
|
||||
)
|
||||
start = this_quarter_start - relativedelta(months=3)
|
||||
end = this_quarter_start - timedelta(seconds=1)
|
||||
|
||||
case "previous year":
|
||||
start = datetime(local_now.year - 1, 1, 1, 0, 0, 0, tzinfo=tz)
|
||||
end = datetime(local_now.year - 1, 12, 31, 23, 59, 59, tzinfo=tz)
|
||||
|
||||
# Convert to UTC and format
|
||||
start_str = start.astimezone(timezone.utc).strftime("%Y%m%d%H%M%S")
|
||||
end_str = end.astimezone(timezone.utc).strftime("%Y%m%d%H%M%S")
|
||||
return f"{field}:[{start_str} TO {end_str}]"
|
||||
|
||||
return re.sub(pattern, repl, query_string)
|
||||
return re.sub(pattern, repl, query_string, flags=re.IGNORECASE)
|
||||
|
||||
@@ -2,6 +2,7 @@ from datetime import datetime
|
||||
from unittest import mock
|
||||
|
||||
from django.contrib.auth.models import User
|
||||
from django.test import SimpleTestCase
|
||||
from django.test import TestCase
|
||||
from django.test import override_settings
|
||||
from django.utils.timezone import get_current_timezone
|
||||
@@ -127,3 +128,126 @@ class TestAutoComplete(DirectoriesMixin, TestCase):
|
||||
response = self.client.get("/api/documents/?query=added:yesterday")
|
||||
results = response.json()["results"]
|
||||
self.assertEqual(len(results), 0)
|
||||
|
||||
|
||||
@override_settings(TIME_ZONE="UTC")
|
||||
class TestRewriteNaturalDateKeywords(SimpleTestCase):
|
||||
"""
|
||||
Unit tests for rewrite_natural_date_keywords function.
|
||||
"""
|
||||
|
||||
def _rewrite_with_now(self, query: str, now_dt: datetime) -> str:
|
||||
with mock.patch("documents.index.now", return_value=now_dt):
|
||||
return index.rewrite_natural_date_keywords(query)
|
||||
|
||||
def _assert_rewrite_contains(
|
||||
self,
|
||||
query: str,
|
||||
now_dt: datetime,
|
||||
*expected_fragments: str,
|
||||
) -> str:
|
||||
result = self._rewrite_with_now(query, now_dt)
|
||||
for fragment in expected_fragments:
|
||||
self.assertIn(fragment, result)
|
||||
return result
|
||||
|
||||
def test_range_keywords(self):
|
||||
"""
|
||||
Test various different range keywords
|
||||
"""
|
||||
cases = [
|
||||
(
|
||||
"added:today",
|
||||
datetime(2025, 7, 20, 15, 30, 45, tzinfo=timezone.utc),
|
||||
("added:[20250720", "TO 20250720"),
|
||||
),
|
||||
(
|
||||
"added:yesterday",
|
||||
datetime(2025, 7, 20, 15, 30, 45, tzinfo=timezone.utc),
|
||||
("added:[20250719", "TO 20250719"),
|
||||
),
|
||||
(
|
||||
"added:this month",
|
||||
datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
|
||||
("added:[20250701", "TO 20250731"),
|
||||
),
|
||||
(
|
||||
"added:previous month",
|
||||
datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
|
||||
("added:[20250601", "TO 20250630"),
|
||||
),
|
||||
(
|
||||
"added:this year",
|
||||
datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
|
||||
("added:[20250101", "TO 20250715"),
|
||||
),
|
||||
(
|
||||
"added:previous year",
|
||||
datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
|
||||
("added:[20240101", "TO 20241231"),
|
||||
),
|
||||
# Previous quarter from July 15, 2025 is April-June.
|
||||
(
|
||||
"added:previous quarter",
|
||||
datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
|
||||
("added:[20250401", "TO 20250630"),
|
||||
),
|
||||
# July 20, 2025 is a Sunday (weekday 6) so previous week is July 7-13.
|
||||
(
|
||||
"added:previous week",
|
||||
datetime(2025, 7, 20, 12, 0, 0, tzinfo=timezone.utc),
|
||||
("added:[20250707", "TO 20250713"),
|
||||
),
|
||||
]
|
||||
|
||||
for query, now_dt, fragments in cases:
|
||||
with self.subTest(query=query):
|
||||
self._assert_rewrite_contains(query, now_dt, *fragments)
|
||||
|
||||
def test_additional_fields(self):
|
||||
fixed_now = datetime(2025, 7, 20, 15, 30, 45, tzinfo=timezone.utc)
|
||||
# created
|
||||
self._assert_rewrite_contains("created:today", fixed_now, "created:[20250720")
|
||||
# modified
|
||||
self._assert_rewrite_contains("modified:today", fixed_now, "modified:[20250720")
|
||||
|
||||
def test_basic_syntax_variants(self):
|
||||
"""
|
||||
Test that quoting, casing, and multi-clause queries are parsed.
|
||||
"""
|
||||
fixed_now = datetime(2025, 7, 20, 15, 30, 45, tzinfo=timezone.utc)
|
||||
|
||||
# quoted keywords
|
||||
result1 = self._rewrite_with_now('added:"today"', fixed_now)
|
||||
result2 = self._rewrite_with_now("added:'today'", fixed_now)
|
||||
self.assertIn("added:[20250720", result1)
|
||||
self.assertIn("added:[20250720", result2)
|
||||
|
||||
# case insensitivity
|
||||
for query in ("added:TODAY", "added:Today", "added:ToDaY"):
|
||||
with self.subTest(case_variant=query):
|
||||
self._assert_rewrite_contains(query, fixed_now, "added:[20250720")
|
||||
|
||||
# multiple clauses
|
||||
result = self._rewrite_with_now("added:today created:yesterday", fixed_now)
|
||||
self.assertIn("added:[20250720", result)
|
||||
self.assertIn("created:[20250719", result)
|
||||
|
||||
def test_no_match(self):
|
||||
"""
|
||||
Test that queries without keywords are unchanged.
|
||||
"""
|
||||
query = "title:test content:example"
|
||||
result = index.rewrite_natural_date_keywords(query)
|
||||
self.assertEqual(query, result)
|
||||
|
||||
@override_settings(TIME_ZONE="Pacific/Auckland")
|
||||
def test_timezone_awareness(self):
|
||||
"""
|
||||
Test timezone conversion.
|
||||
"""
|
||||
# July 20, 2025 1:00 AM NZST = July 19, 2025 13:00 UTC
|
||||
fixed_now = datetime(2025, 7, 20, 1, 0, 0, tzinfo=get_current_timezone())
|
||||
result = self._rewrite_with_now("added:today", fixed_now)
|
||||
# Should convert to UTC properly
|
||||
self.assertIn("added:[20250719", result)
|
||||
|
||||
Reference in New Issue
Block a user