diff --git a/src/documents/index.py b/src/documents/index.py index 10de04245..18e654a05 100644 --- a/src/documents/index.py +++ b/src/documents/index.py @@ -2,10 +2,12 @@ from __future__ import annotations import logging import math +import re from collections import Counter from contextlib import contextmanager from datetime import datetime from datetime import time +from datetime import timedelta from datetime import timezone from shutil import rmtree from typing import TYPE_CHECKING @@ -13,6 +15,8 @@ from typing import Literal from django.conf import settings from django.utils import timezone as django_timezone +from django.utils.timezone import get_current_timezone +from django.utils.timezone import now from guardian.shortcuts import get_users_with_perms from whoosh import classify from whoosh import highlight @@ -344,6 +348,7 @@ class LocalDateParser(English): class DelayedFullTextQuery(DelayedQuery): def _get_query(self) -> tuple: q_str = self.query_params["query"] + q_str = rewrite_natural_date_keywords(q_str) qp = MultifieldParser( [ "content", @@ -450,3 +455,47 @@ def get_permissions_criterias(user: User | None = None) -> list: query.Term("viewer_id", str(user.id)), ) return user_criterias + + +def rewrite_natural_date_keywords(query_string: str) -> str: + """ + Rewrites `added:today`, `created:yesterday` into whoosh datetime ranges. + This prevents UTC confusion when searching with natural language date keywords. + """ + + replacements = {} + patterns = [ + ("added:today", "added"), + ("added:yesterday", "added"), + ("created:today", "created"), + ("created:yesterday", "created"), + ] + + tz = get_current_timezone() + local_now = now().astimezone(tz) + + today_start_local = datetime.combine(local_now.date(), time.min).replace(tzinfo=tz) + today_end_local = datetime.combine(local_now.date(), time.max).replace(tzinfo=tz) + yesterday_start_local = today_start_local - timedelta(days=1) + yesterday_end_local = today_end_local - timedelta(days=1) + + for pattern, field in patterns: + if pattern in query_string: + if pattern.endswith("today"): + start = today_start_local + end = today_end_local + else: + start = yesterday_start_local + end = yesterday_end_local + + start_str = start.astimezone(timezone.utc).strftime("%Y%m%d%H%M%S") + end_str = end.astimezone(timezone.utc).strftime("%Y%m%d%H%M%S") + + range_expr = f"{field}:[{start_str} TO {end_str}]" + logger.warning(f"RANGE: {range_expr}") + replacements[pattern] = range_expr + + for match, replacement in replacements.items(): + query_string = re.sub(rf"\b{re.escape(match)}\b", replacement, query_string) + + return query_string diff --git a/src/documents/tests/test_index.py b/src/documents/tests/test_index.py index 24bc26d4c..2a41542e9 100644 --- a/src/documents/tests/test_index.py +++ b/src/documents/tests/test_index.py @@ -1,6 +1,11 @@ +from datetime import datetime from unittest import mock +from django.contrib.auth.models import User from django.test import TestCase +from django.test import override_settings +from django.utils.timezone import get_current_timezone +from django.utils.timezone import timezone from documents import index from documents.models import Document @@ -90,3 +95,35 @@ class TestAutoComplete(DirectoriesMixin, TestCase): _, kwargs = mocked_update_doc.call_args self.assertIsNone(kwargs["asn"]) + + @override_settings(TIME_ZONE="Pacific/Auckland") + def test_added_today_respects_local_timezone_boundary(self): + tz = get_current_timezone() + fixed_now = datetime(2025, 7, 20, 15, 0, 0, tzinfo=tz) + + # Fake a time near the local boundary (1 AM NZT = 13:00 UTC on previous UTC day) + local_dt = datetime(2025, 7, 20, 1, 0, 0).replace(tzinfo=tz) + utc_dt = local_dt.astimezone(timezone.utc) + + doc = Document.objects.create( + title="Time zone", + content="Testing added:today", + checksum="edgecase123", + added=utc_dt, + ) + + with index.open_index_writer() as writer: + index.update_document(writer, doc) + + superuser = User.objects.create_superuser(username="testuser") + self.client.force_login(superuser) + + with mock.patch("documents.index.now", return_value=fixed_now): + response = self.client.get("/api/documents/?query=added:today") + results = response.json()["results"] + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["id"], doc.id) + + response = self.client.get("/api/documents/?query=added:yesterday") + results = response.json()["results"] + self.assertEqual(len(results), 0)