From 1fe85992660a3d6d6c935bdbe4927ca9f4c7fa0e Mon Sep 17 00:00:00 2001 From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Wed, 23 Jul 2025 22:05:55 -0700 Subject: [PATCH] Fix: Make some natural keyword date searches timezone-aware (#10416) --- src/documents/index.py | 39 +++++++++++++++++++++++++++++++ src/documents/tests/test_index.py | 37 +++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/src/documents/index.py b/src/documents/index.py index 10de04245..3d1030dca 100644 --- a/src/documents/index.py +++ b/src/documents/index.py @@ -2,10 +2,12 @@ from __future__ import annotations import logging import math +import re from collections import Counter from contextlib import contextmanager from datetime import datetime from datetime import time +from datetime import timedelta from datetime import timezone from shutil import rmtree from typing import TYPE_CHECKING @@ -13,6 +15,8 @@ from typing import Literal from django.conf import settings from django.utils import timezone as django_timezone +from django.utils.timezone import get_current_timezone +from django.utils.timezone import now from guardian.shortcuts import get_users_with_perms from whoosh import classify from whoosh import highlight @@ -344,6 +348,7 @@ class LocalDateParser(English): class DelayedFullTextQuery(DelayedQuery): def _get_query(self) -> tuple: q_str = self.query_params["query"] + q_str = rewrite_natural_date_keywords(q_str) qp = MultifieldParser( [ "content", @@ -450,3 +455,37 @@ def get_permissions_criterias(user: User | None = None) -> list: query.Term("viewer_id", str(user.id)), ) return user_criterias + + +def rewrite_natural_date_keywords(query_string: str) -> str: + """ + Rewrites natural date keywords (e.g. added:today or added:"yesterday") to UTC range syntax for Whoosh. + """ + + tz = get_current_timezone() + local_now = now().astimezone(tz) + + today = local_now.date() + yesterday = today - timedelta(days=1) + + ranges = { + "today": ( + datetime.combine(today, time.min, tzinfo=tz), + datetime.combine(today, time.max, tzinfo=tz), + ), + "yesterday": ( + datetime.combine(yesterday, time.min, tzinfo=tz), + datetime.combine(yesterday, time.max, tzinfo=tz), + ), + } + + pattern = r"(\b(?:added|created))\s*:\s*[\"']?(today|yesterday)[\"']?" + + def repl(m): + field, keyword = m.group(1), m.group(2) + start, end = ranges[keyword] + start_str = start.astimezone(timezone.utc).strftime("%Y%m%d%H%M%S") + end_str = end.astimezone(timezone.utc).strftime("%Y%m%d%H%M%S") + return f"{field}:[{start_str} TO {end_str}]" + + return re.sub(pattern, repl, query_string) diff --git a/src/documents/tests/test_index.py b/src/documents/tests/test_index.py index 24bc26d4c..2a41542e9 100644 --- a/src/documents/tests/test_index.py +++ b/src/documents/tests/test_index.py @@ -1,6 +1,11 @@ +from datetime import datetime from unittest import mock +from django.contrib.auth.models import User from django.test import TestCase +from django.test import override_settings +from django.utils.timezone import get_current_timezone +from django.utils.timezone import timezone from documents import index from documents.models import Document @@ -90,3 +95,35 @@ class TestAutoComplete(DirectoriesMixin, TestCase): _, kwargs = mocked_update_doc.call_args self.assertIsNone(kwargs["asn"]) + + @override_settings(TIME_ZONE="Pacific/Auckland") + def test_added_today_respects_local_timezone_boundary(self): + tz = get_current_timezone() + fixed_now = datetime(2025, 7, 20, 15, 0, 0, tzinfo=tz) + + # Fake a time near the local boundary (1 AM NZT = 13:00 UTC on previous UTC day) + local_dt = datetime(2025, 7, 20, 1, 0, 0).replace(tzinfo=tz) + utc_dt = local_dt.astimezone(timezone.utc) + + doc = Document.objects.create( + title="Time zone", + content="Testing added:today", + checksum="edgecase123", + added=utc_dt, + ) + + with index.open_index_writer() as writer: + index.update_document(writer, doc) + + superuser = User.objects.create_superuser(username="testuser") + self.client.force_login(superuser) + + with mock.patch("documents.index.now", return_value=fixed_now): + response = self.client.get("/api/documents/?query=added:today") + results = response.json()["results"] + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["id"], doc.id) + + response = self.client.get("/api/documents/?query=added:yesterday") + results = response.json()["results"] + self.assertEqual(len(results), 0)