mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-22 17:54:40 -05:00
Fix: utc-normalize natural dates for whoosh
This commit is contained in:
parent
4b8f6ed643
commit
9f55626ba6
@ -2,10 +2,12 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
import math
|
import math
|
||||||
|
import re
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from datetime import time
|
from datetime import time
|
||||||
|
from datetime import timedelta
|
||||||
from datetime import timezone
|
from datetime import timezone
|
||||||
from shutil import rmtree
|
from shutil import rmtree
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
@ -13,6 +15,8 @@ from typing import Literal
|
|||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.utils import timezone as django_timezone
|
from django.utils import timezone as django_timezone
|
||||||
|
from django.utils.timezone import get_current_timezone
|
||||||
|
from django.utils.timezone import now
|
||||||
from guardian.shortcuts import get_users_with_perms
|
from guardian.shortcuts import get_users_with_perms
|
||||||
from whoosh import classify
|
from whoosh import classify
|
||||||
from whoosh import highlight
|
from whoosh import highlight
|
||||||
@ -344,6 +348,7 @@ class LocalDateParser(English):
|
|||||||
class DelayedFullTextQuery(DelayedQuery):
|
class DelayedFullTextQuery(DelayedQuery):
|
||||||
def _get_query(self) -> tuple:
|
def _get_query(self) -> tuple:
|
||||||
q_str = self.query_params["query"]
|
q_str = self.query_params["query"]
|
||||||
|
q_str = rewrite_natural_date_keywords(q_str)
|
||||||
qp = MultifieldParser(
|
qp = MultifieldParser(
|
||||||
[
|
[
|
||||||
"content",
|
"content",
|
||||||
@ -450,3 +455,47 @@ def get_permissions_criterias(user: User | None = None) -> list:
|
|||||||
query.Term("viewer_id", str(user.id)),
|
query.Term("viewer_id", str(user.id)),
|
||||||
)
|
)
|
||||||
return user_criterias
|
return user_criterias
|
||||||
|
|
||||||
|
|
||||||
|
def rewrite_natural_date_keywords(query_string: str) -> str:
|
||||||
|
"""
|
||||||
|
Rewrites `added:today`, `created:yesterday` into whoosh datetime ranges.
|
||||||
|
This prevents UTC confusion when searching with natural language date keywords.
|
||||||
|
"""
|
||||||
|
|
||||||
|
replacements = {}
|
||||||
|
patterns = [
|
||||||
|
("added:today", "added"),
|
||||||
|
("added:yesterday", "added"),
|
||||||
|
("created:today", "created"),
|
||||||
|
("created:yesterday", "created"),
|
||||||
|
]
|
||||||
|
|
||||||
|
tz = get_current_timezone()
|
||||||
|
local_now = now().astimezone(tz)
|
||||||
|
|
||||||
|
today_start_local = datetime.combine(local_now.date(), time.min).replace(tzinfo=tz)
|
||||||
|
today_end_local = datetime.combine(local_now.date(), time.max).replace(tzinfo=tz)
|
||||||
|
yesterday_start_local = today_start_local - timedelta(days=1)
|
||||||
|
yesterday_end_local = today_end_local - timedelta(days=1)
|
||||||
|
|
||||||
|
for pattern, field in patterns:
|
||||||
|
if pattern in query_string:
|
||||||
|
if pattern.endswith("today"):
|
||||||
|
start = today_start_local
|
||||||
|
end = today_end_local
|
||||||
|
else:
|
||||||
|
start = yesterday_start_local
|
||||||
|
end = yesterday_end_local
|
||||||
|
|
||||||
|
start_str = start.astimezone(timezone.utc).strftime("%Y%m%d%H%M%S")
|
||||||
|
end_str = end.astimezone(timezone.utc).strftime("%Y%m%d%H%M%S")
|
||||||
|
|
||||||
|
range_expr = f"{field}:[{start_str} TO {end_str}]"
|
||||||
|
logger.warning(f"RANGE: {range_expr}")
|
||||||
|
replacements[pattern] = range_expr
|
||||||
|
|
||||||
|
for match, replacement in replacements.items():
|
||||||
|
query_string = re.sub(rf"\b{re.escape(match)}\b", replacement, query_string)
|
||||||
|
|
||||||
|
return query_string
|
||||||
|
@ -1,6 +1,11 @@
|
|||||||
|
from datetime import datetime
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
|
from django.contrib.auth.models import User
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
|
from django.test import override_settings
|
||||||
|
from django.utils.timezone import get_current_timezone
|
||||||
|
from django.utils.timezone import timezone
|
||||||
|
|
||||||
from documents import index
|
from documents import index
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
@ -90,3 +95,35 @@ class TestAutoComplete(DirectoriesMixin, TestCase):
|
|||||||
_, kwargs = mocked_update_doc.call_args
|
_, kwargs = mocked_update_doc.call_args
|
||||||
|
|
||||||
self.assertIsNone(kwargs["asn"])
|
self.assertIsNone(kwargs["asn"])
|
||||||
|
|
||||||
|
@override_settings(TIME_ZONE="Pacific/Auckland")
|
||||||
|
def test_added_today_respects_local_timezone_boundary(self):
|
||||||
|
tz = get_current_timezone()
|
||||||
|
fixed_now = datetime(2025, 7, 20, 15, 0, 0, tzinfo=tz)
|
||||||
|
|
||||||
|
# Fake a time near the local boundary (1 AM NZT = 13:00 UTC on previous UTC day)
|
||||||
|
local_dt = datetime(2025, 7, 20, 1, 0, 0).replace(tzinfo=tz)
|
||||||
|
utc_dt = local_dt.astimezone(timezone.utc)
|
||||||
|
|
||||||
|
doc = Document.objects.create(
|
||||||
|
title="Time zone",
|
||||||
|
content="Testing added:today",
|
||||||
|
checksum="edgecase123",
|
||||||
|
added=utc_dt,
|
||||||
|
)
|
||||||
|
|
||||||
|
with index.open_index_writer() as writer:
|
||||||
|
index.update_document(writer, doc)
|
||||||
|
|
||||||
|
superuser = User.objects.create_superuser(username="testuser")
|
||||||
|
self.client.force_login(superuser)
|
||||||
|
|
||||||
|
with mock.patch("documents.index.now", return_value=fixed_now):
|
||||||
|
response = self.client.get("/api/documents/?query=added:today")
|
||||||
|
results = response.json()["results"]
|
||||||
|
self.assertEqual(len(results), 1)
|
||||||
|
self.assertEqual(results[0]["id"], doc.id)
|
||||||
|
|
||||||
|
response = self.client.get("/api/documents/?query=added:yesterday")
|
||||||
|
results = response.json()["results"]
|
||||||
|
self.assertEqual(len(results), 0)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user