Fix: Make some natural keyword date searches timezone-aware (#10416)

This commit is contained in:
shamoon 2025-07-23 22:05:55 -07:00 committed by GitHub
parent 5410074062
commit 1fe8599266
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 76 additions and 0 deletions

View File

@ -2,10 +2,12 @@ from __future__ import annotations
import logging
import math
import re
from collections import Counter
from contextlib import contextmanager
from datetime import datetime
from datetime import time
from datetime import timedelta
from datetime import timezone
from shutil import rmtree
from typing import TYPE_CHECKING
@ -13,6 +15,8 @@ from typing import Literal
from django.conf import settings
from django.utils import timezone as django_timezone
from django.utils.timezone import get_current_timezone
from django.utils.timezone import now
from guardian.shortcuts import get_users_with_perms
from whoosh import classify
from whoosh import highlight
@ -344,6 +348,7 @@ class LocalDateParser(English):
class DelayedFullTextQuery(DelayedQuery):
def _get_query(self) -> tuple:
q_str = self.query_params["query"]
q_str = rewrite_natural_date_keywords(q_str)
qp = MultifieldParser(
[
"content",
@ -450,3 +455,37 @@ def get_permissions_criterias(user: User | None = None) -> list:
query.Term("viewer_id", str(user.id)),
)
return user_criterias
def rewrite_natural_date_keywords(query_string: str) -> str:
"""
Rewrites natural date keywords (e.g. added:today or added:"yesterday") to UTC range syntax for Whoosh.
"""
tz = get_current_timezone()
local_now = now().astimezone(tz)
today = local_now.date()
yesterday = today - timedelta(days=1)
ranges = {
"today": (
datetime.combine(today, time.min, tzinfo=tz),
datetime.combine(today, time.max, tzinfo=tz),
),
"yesterday": (
datetime.combine(yesterday, time.min, tzinfo=tz),
datetime.combine(yesterday, time.max, tzinfo=tz),
),
}
pattern = r"(\b(?:added|created))\s*:\s*[\"']?(today|yesterday)[\"']?"
def repl(m):
field, keyword = m.group(1), m.group(2)
start, end = ranges[keyword]
start_str = start.astimezone(timezone.utc).strftime("%Y%m%d%H%M%S")
end_str = end.astimezone(timezone.utc).strftime("%Y%m%d%H%M%S")
return f"{field}:[{start_str} TO {end_str}]"
return re.sub(pattern, repl, query_string)

View File

@ -1,6 +1,11 @@
from datetime import datetime
from unittest import mock
from django.contrib.auth.models import User
from django.test import TestCase
from django.test import override_settings
from django.utils.timezone import get_current_timezone
from django.utils.timezone import timezone
from documents import index
from documents.models import Document
@ -90,3 +95,35 @@ class TestAutoComplete(DirectoriesMixin, TestCase):
_, kwargs = mocked_update_doc.call_args
self.assertIsNone(kwargs["asn"])
@override_settings(TIME_ZONE="Pacific/Auckland")
def test_added_today_respects_local_timezone_boundary(self):
tz = get_current_timezone()
fixed_now = datetime(2025, 7, 20, 15, 0, 0, tzinfo=tz)
# Fake a time near the local boundary (1 AM NZT = 13:00 UTC on previous UTC day)
local_dt = datetime(2025, 7, 20, 1, 0, 0).replace(tzinfo=tz)
utc_dt = local_dt.astimezone(timezone.utc)
doc = Document.objects.create(
title="Time zone",
content="Testing added:today",
checksum="edgecase123",
added=utc_dt,
)
with index.open_index_writer() as writer:
index.update_document(writer, doc)
superuser = User.objects.create_superuser(username="testuser")
self.client.force_login(superuser)
with mock.patch("documents.index.now", return_value=fixed_now):
response = self.client.get("/api/documents/?query=added:today")
results = response.json()["results"]
self.assertEqual(len(results), 1)
self.assertEqual(results[0]["id"], doc.id)
response = self.client.get("/api/documents/?query=added:yesterday")
results = response.json()["results"]
self.assertEqual(len(results), 0)