mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-22 17:54:40 -05:00
Fix: utc-normalize natural dates for whoosh
This commit is contained in:
parent
4b8f6ed643
commit
9f55626ba6
@ -2,10 +2,12 @@ from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import math
|
||||
import re
|
||||
from collections import Counter
|
||||
from contextlib import contextmanager
|
||||
from datetime import datetime
|
||||
from datetime import time
|
||||
from datetime import timedelta
|
||||
from datetime import timezone
|
||||
from shutil import rmtree
|
||||
from typing import TYPE_CHECKING
|
||||
@ -13,6 +15,8 @@ from typing import Literal
|
||||
|
||||
from django.conf import settings
|
||||
from django.utils import timezone as django_timezone
|
||||
from django.utils.timezone import get_current_timezone
|
||||
from django.utils.timezone import now
|
||||
from guardian.shortcuts import get_users_with_perms
|
||||
from whoosh import classify
|
||||
from whoosh import highlight
|
||||
@ -344,6 +348,7 @@ class LocalDateParser(English):
|
||||
class DelayedFullTextQuery(DelayedQuery):
|
||||
def _get_query(self) -> tuple:
|
||||
q_str = self.query_params["query"]
|
||||
q_str = rewrite_natural_date_keywords(q_str)
|
||||
qp = MultifieldParser(
|
||||
[
|
||||
"content",
|
||||
@ -450,3 +455,47 @@ def get_permissions_criterias(user: User | None = None) -> list:
|
||||
query.Term("viewer_id", str(user.id)),
|
||||
)
|
||||
return user_criterias
|
||||
|
||||
|
||||
def rewrite_natural_date_keywords(query_string: str) -> str:
|
||||
"""
|
||||
Rewrites `added:today`, `created:yesterday` into whoosh datetime ranges.
|
||||
This prevents UTC confusion when searching with natural language date keywords.
|
||||
"""
|
||||
|
||||
replacements = {}
|
||||
patterns = [
|
||||
("added:today", "added"),
|
||||
("added:yesterday", "added"),
|
||||
("created:today", "created"),
|
||||
("created:yesterday", "created"),
|
||||
]
|
||||
|
||||
tz = get_current_timezone()
|
||||
local_now = now().astimezone(tz)
|
||||
|
||||
today_start_local = datetime.combine(local_now.date(), time.min).replace(tzinfo=tz)
|
||||
today_end_local = datetime.combine(local_now.date(), time.max).replace(tzinfo=tz)
|
||||
yesterday_start_local = today_start_local - timedelta(days=1)
|
||||
yesterday_end_local = today_end_local - timedelta(days=1)
|
||||
|
||||
for pattern, field in patterns:
|
||||
if pattern in query_string:
|
||||
if pattern.endswith("today"):
|
||||
start = today_start_local
|
||||
end = today_end_local
|
||||
else:
|
||||
start = yesterday_start_local
|
||||
end = yesterday_end_local
|
||||
|
||||
start_str = start.astimezone(timezone.utc).strftime("%Y%m%d%H%M%S")
|
||||
end_str = end.astimezone(timezone.utc).strftime("%Y%m%d%H%M%S")
|
||||
|
||||
range_expr = f"{field}:[{start_str} TO {end_str}]"
|
||||
logger.warning(f"RANGE: {range_expr}")
|
||||
replacements[pattern] = range_expr
|
||||
|
||||
for match, replacement in replacements.items():
|
||||
query_string = re.sub(rf"\b{re.escape(match)}\b", replacement, query_string)
|
||||
|
||||
return query_string
|
||||
|
@ -1,6 +1,11 @@
|
||||
from datetime import datetime
|
||||
from unittest import mock
|
||||
|
||||
from django.contrib.auth.models import User
|
||||
from django.test import TestCase
|
||||
from django.test import override_settings
|
||||
from django.utils.timezone import get_current_timezone
|
||||
from django.utils.timezone import timezone
|
||||
|
||||
from documents import index
|
||||
from documents.models import Document
|
||||
@ -90,3 +95,35 @@ class TestAutoComplete(DirectoriesMixin, TestCase):
|
||||
_, kwargs = mocked_update_doc.call_args
|
||||
|
||||
self.assertIsNone(kwargs["asn"])
|
||||
|
||||
@override_settings(TIME_ZONE="Pacific/Auckland")
|
||||
def test_added_today_respects_local_timezone_boundary(self):
|
||||
tz = get_current_timezone()
|
||||
fixed_now = datetime(2025, 7, 20, 15, 0, 0, tzinfo=tz)
|
||||
|
||||
# Fake a time near the local boundary (1 AM NZT = 13:00 UTC on previous UTC day)
|
||||
local_dt = datetime(2025, 7, 20, 1, 0, 0).replace(tzinfo=tz)
|
||||
utc_dt = local_dt.astimezone(timezone.utc)
|
||||
|
||||
doc = Document.objects.create(
|
||||
title="Time zone",
|
||||
content="Testing added:today",
|
||||
checksum="edgecase123",
|
||||
added=utc_dt,
|
||||
)
|
||||
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, doc)
|
||||
|
||||
superuser = User.objects.create_superuser(username="testuser")
|
||||
self.client.force_login(superuser)
|
||||
|
||||
with mock.patch("documents.index.now", return_value=fixed_now):
|
||||
response = self.client.get("/api/documents/?query=added:today")
|
||||
results = response.json()["results"]
|
||||
self.assertEqual(len(results), 1)
|
||||
self.assertEqual(results[0]["id"], doc.id)
|
||||
|
||||
response = self.client.get("/api/documents/?query=added:yesterday")
|
||||
results = response.json()["results"]
|
||||
self.assertEqual(len(results), 0)
|
||||
|
Loading…
x
Reference in New Issue
Block a user