Fix: Convert search dates to UTC in advanced search (#4891)

* Index documents using local timezone

* Add local date parser
This commit is contained in:
Adam Bogdał 2023-12-11 18:32:43 +01:00 committed by GitHub
parent fbf1a051a2
commit af0817ab74
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 80 additions and 1 deletions

View File

@ -25,9 +25,11 @@ from whoosh.index import open_dir
from whoosh.qparser import MultifieldParser from whoosh.qparser import MultifieldParser
from whoosh.qparser import QueryParser from whoosh.qparser import QueryParser
from whoosh.qparser.dateparse import DateParserPlugin from whoosh.qparser.dateparse import DateParserPlugin
from whoosh.qparser.dateparse import English
from whoosh.scoring import TF_IDF from whoosh.scoring import TF_IDF
from whoosh.searching import ResultsPage from whoosh.searching import ResultsPage
from whoosh.searching import Searcher from whoosh.searching import Searcher
from whoosh.util.times import timespan
from whoosh.writing import AsyncWriter from whoosh.writing import AsyncWriter
# from documents.models import CustomMetadata # from documents.models import CustomMetadata
@ -356,6 +358,22 @@ class DelayedQuery:
return page return page
class LocalDateParser(English):
def reverse_timezone_offset(self, d):
return (d.replace(tzinfo=timezone.get_current_timezone())).astimezone(
timezone.utc,
)
def date_from(self, *args, **kwargs):
d = super().date_from(*args, **kwargs)
if isinstance(d, timespan):
d.start = self.reverse_timezone_offset(d.start)
d.end = self.reverse_timezone_offset(d.end)
else:
d = self.reverse_timezone_offset(d)
return d
class DelayedFullTextQuery(DelayedQuery): class DelayedFullTextQuery(DelayedQuery):
def _get_query(self): def _get_query(self):
q_str = self.query_params["query"] q_str = self.query_params["query"]
@ -371,7 +389,12 @@ class DelayedFullTextQuery(DelayedQuery):
], ],
self.searcher.ixreader.schema, self.searcher.ixreader.schema,
) )
qp.add_plugin(DateParserPlugin(basedate=timezone.now())) qp.add_plugin(
DateParserPlugin(
basedate=timezone.now(),
dateparser=LocalDateParser(),
),
)
q = qp.parse(q_str) q = qp.parse(q_str)
corrected = self.searcher.correct_query(q, q_str) corrected = self.searcher.correct_query(q, q_str)

View File

@ -964,6 +964,62 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
# Assert subset in results # Assert subset in results
self.assertDictEqual(result, {**result, **subset}) self.assertDictEqual(result, {**result, **subset})
@override_settings(
TIME_ZONE="Europe/Sofia",
)
def test_search_added_specific_date_with_timezone_ahead(self):
"""
GIVEN:
- Two documents added right now
- One document added on a specific date
- The timezone is behind UTC time (+2)
WHEN:
- Query for documents added on a specific date
THEN:
- The one document is returned
"""
d1 = Document.objects.create(
title="invoice",
content="the thing i bought at a shop and paid with bank account",
checksum="A",
pk=1,
)
d2 = Document.objects.create(
title="bank statement 1",
content="things i paid for in august",
pk=2,
checksum="B",
)
d3 = Document.objects.create(
title="bank statement 3",
content="things i paid for in september",
pk=3,
checksum="C",
# specific time zone aware date
added=timezone.make_aware(datetime.datetime(2023, 12, 1)),
)
# refresh doc instance to ensure we operate on date objects that Django uses
# Django converts dates to UTC
d3.refresh_from_db()
with index.open_index_writer() as writer:
index.update_document(writer, d1)
index.update_document(writer, d2)
index.update_document(writer, d3)
response = self.client.get("/api/documents/?query=added:20231201")
results = response.data["results"]
# Expect 1 document returned
self.assertEqual(len(results), 1)
for idx, subset in enumerate(
[{"id": 3, "title": "bank statement 3"}],
):
result = results[idx]
# Assert subset in results
self.assertDictEqual(result, {**result, **subset})
def test_search_added_in_last_month(self): def test_search_added_in_last_month(self):
""" """
GIVEN: GIVEN: