From af0817ab7405d199dcfd73937e1fef58beac9ca7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Bogda=C5=82?= Date: Mon, 11 Dec 2023 18:32:43 +0100 Subject: [PATCH] Fix: Convert search dates to UTC in advanced search (#4891) * Index documents using local timezone * Add local date parser --- src/documents/index.py | 25 ++++++++++++++- src/documents/tests/test_api.py | 56 +++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) diff --git a/src/documents/index.py b/src/documents/index.py index 2e2585071..da5168b9a 100644 --- a/src/documents/index.py +++ b/src/documents/index.py @@ -25,9 +25,11 @@ from whoosh.index import open_dir from whoosh.qparser import MultifieldParser from whoosh.qparser import QueryParser from whoosh.qparser.dateparse import DateParserPlugin +from whoosh.qparser.dateparse import English from whoosh.scoring import TF_IDF from whoosh.searching import ResultsPage from whoosh.searching import Searcher +from whoosh.util.times import timespan from whoosh.writing import AsyncWriter # from documents.models import CustomMetadata @@ -356,6 +358,22 @@ class DelayedQuery: return page +class LocalDateParser(English): + def reverse_timezone_offset(self, d): + return (d.replace(tzinfo=timezone.get_current_timezone())).astimezone( + timezone.utc, + ) + + def date_from(self, *args, **kwargs): + d = super().date_from(*args, **kwargs) + if isinstance(d, timespan): + d.start = self.reverse_timezone_offset(d.start) + d.end = self.reverse_timezone_offset(d.end) + else: + d = self.reverse_timezone_offset(d) + return d + + class DelayedFullTextQuery(DelayedQuery): def _get_query(self): q_str = self.query_params["query"] @@ -371,7 +389,12 @@ class DelayedFullTextQuery(DelayedQuery): ], self.searcher.ixreader.schema, ) - qp.add_plugin(DateParserPlugin(basedate=timezone.now())) + qp.add_plugin( + DateParserPlugin( + basedate=timezone.now(), + dateparser=LocalDateParser(), + ), + ) q = qp.parse(q_str) corrected = self.searcher.correct_query(q, q_str) diff --git a/src/documents/tests/test_api.py b/src/documents/tests/test_api.py index e671ce2ce..c2ade1d45 100644 --- a/src/documents/tests/test_api.py +++ b/src/documents/tests/test_api.py @@ -964,6 +964,62 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): # Assert subset in results self.assertDictEqual(result, {**result, **subset}) + @override_settings( + TIME_ZONE="Europe/Sofia", + ) + def test_search_added_specific_date_with_timezone_ahead(self): + """ + GIVEN: + - Two documents added right now + - One document added on a specific date + - The timezone is behind UTC time (+2) + WHEN: + - Query for documents added on a specific date + THEN: + - The one document is returned + """ + d1 = Document.objects.create( + title="invoice", + content="the thing i bought at a shop and paid with bank account", + checksum="A", + pk=1, + ) + d2 = Document.objects.create( + title="bank statement 1", + content="things i paid for in august", + pk=2, + checksum="B", + ) + d3 = Document.objects.create( + title="bank statement 3", + content="things i paid for in september", + pk=3, + checksum="C", + # specific time zone aware date + added=timezone.make_aware(datetime.datetime(2023, 12, 1)), + ) + # refresh doc instance to ensure we operate on date objects that Django uses + # Django converts dates to UTC + d3.refresh_from_db() + + with index.open_index_writer() as writer: + index.update_document(writer, d1) + index.update_document(writer, d2) + index.update_document(writer, d3) + + response = self.client.get("/api/documents/?query=added:20231201") + results = response.data["results"] + + # Expect 1 document returned + self.assertEqual(len(results), 1) + + for idx, subset in enumerate( + [{"id": 3, "title": "bank statement 3"}], + ): + result = results[idx] + # Assert subset in results + self.assertDictEqual(result, {**result, **subset}) + def test_search_added_in_last_month(self): """ GIVEN: