Fix: Convert search dates to UTC in advanced search (#4891)

* Index documents using local timezone

* Add local date parser
This commit is contained in:
Adam Bogdał 2023-12-11 18:32:43 +01:00 committed by GitHub
parent fbf1a051a2
commit af0817ab74
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 80 additions and 1 deletions

View File

@ -25,9 +25,11 @@ from whoosh.index import open_dir
from whoosh.qparser import MultifieldParser
from whoosh.qparser import QueryParser
from whoosh.qparser.dateparse import DateParserPlugin
from whoosh.qparser.dateparse import English
from whoosh.scoring import TF_IDF
from whoosh.searching import ResultsPage
from whoosh.searching import Searcher
from whoosh.util.times import timespan
from whoosh.writing import AsyncWriter
# from documents.models import CustomMetadata
@ -356,6 +358,22 @@ class DelayedQuery:
return page
class LocalDateParser(English):
def reverse_timezone_offset(self, d):
return (d.replace(tzinfo=timezone.get_current_timezone())).astimezone(
timezone.utc,
)
def date_from(self, *args, **kwargs):
d = super().date_from(*args, **kwargs)
if isinstance(d, timespan):
d.start = self.reverse_timezone_offset(d.start)
d.end = self.reverse_timezone_offset(d.end)
else:
d = self.reverse_timezone_offset(d)
return d
class DelayedFullTextQuery(DelayedQuery):
def _get_query(self):
q_str = self.query_params["query"]
@ -371,7 +389,12 @@ class DelayedFullTextQuery(DelayedQuery):
],
self.searcher.ixreader.schema,
)
qp.add_plugin(DateParserPlugin(basedate=timezone.now()))
qp.add_plugin(
DateParserPlugin(
basedate=timezone.now(),
dateparser=LocalDateParser(),
),
)
q = qp.parse(q_str)
corrected = self.searcher.correct_query(q, q_str)

View File

@ -964,6 +964,62 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
# Assert subset in results
self.assertDictEqual(result, {**result, **subset})
@override_settings(
TIME_ZONE="Europe/Sofia",
)
def test_search_added_specific_date_with_timezone_ahead(self):
"""
GIVEN:
- Two documents added right now
- One document added on a specific date
- The timezone is behind UTC time (+2)
WHEN:
- Query for documents added on a specific date
THEN:
- The one document is returned
"""
d1 = Document.objects.create(
title="invoice",
content="the thing i bought at a shop and paid with bank account",
checksum="A",
pk=1,
)
d2 = Document.objects.create(
title="bank statement 1",
content="things i paid for in august",
pk=2,
checksum="B",
)
d3 = Document.objects.create(
title="bank statement 3",
content="things i paid for in september",
pk=3,
checksum="C",
# specific time zone aware date
added=timezone.make_aware(datetime.datetime(2023, 12, 1)),
)
# refresh doc instance to ensure we operate on date objects that Django uses
# Django converts dates to UTC
d3.refresh_from_db()
with index.open_index_writer() as writer:
index.update_document(writer, d1)
index.update_document(writer, d2)
index.update_document(writer, d3)
response = self.client.get("/api/documents/?query=added:20231201")
results = response.data["results"]
# Expect 1 document returned
self.assertEqual(len(results), 1)
for idx, subset in enumerate(
[{"id": 3, "title": "bank statement 3"}],
):
result = results[idx]
# Assert subset in results
self.assertDictEqual(result, {**result, **subset})
def test_search_added_in_last_month(self):
"""
GIVEN: