mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Fix: Convert search dates to UTC in advanced search (#4891)
* Index documents using local timezone * Add local date parser
This commit is contained in:
parent
fbf1a051a2
commit
af0817ab74
@ -25,9 +25,11 @@ from whoosh.index import open_dir
|
||||
from whoosh.qparser import MultifieldParser
|
||||
from whoosh.qparser import QueryParser
|
||||
from whoosh.qparser.dateparse import DateParserPlugin
|
||||
from whoosh.qparser.dateparse import English
|
||||
from whoosh.scoring import TF_IDF
|
||||
from whoosh.searching import ResultsPage
|
||||
from whoosh.searching import Searcher
|
||||
from whoosh.util.times import timespan
|
||||
from whoosh.writing import AsyncWriter
|
||||
|
||||
# from documents.models import CustomMetadata
|
||||
@ -356,6 +358,22 @@ class DelayedQuery:
|
||||
return page
|
||||
|
||||
|
||||
class LocalDateParser(English):
|
||||
def reverse_timezone_offset(self, d):
|
||||
return (d.replace(tzinfo=timezone.get_current_timezone())).astimezone(
|
||||
timezone.utc,
|
||||
)
|
||||
|
||||
def date_from(self, *args, **kwargs):
|
||||
d = super().date_from(*args, **kwargs)
|
||||
if isinstance(d, timespan):
|
||||
d.start = self.reverse_timezone_offset(d.start)
|
||||
d.end = self.reverse_timezone_offset(d.end)
|
||||
else:
|
||||
d = self.reverse_timezone_offset(d)
|
||||
return d
|
||||
|
||||
|
||||
class DelayedFullTextQuery(DelayedQuery):
|
||||
def _get_query(self):
|
||||
q_str = self.query_params["query"]
|
||||
@ -371,7 +389,12 @@ class DelayedFullTextQuery(DelayedQuery):
|
||||
],
|
||||
self.searcher.ixreader.schema,
|
||||
)
|
||||
qp.add_plugin(DateParserPlugin(basedate=timezone.now()))
|
||||
qp.add_plugin(
|
||||
DateParserPlugin(
|
||||
basedate=timezone.now(),
|
||||
dateparser=LocalDateParser(),
|
||||
),
|
||||
)
|
||||
q = qp.parse(q_str)
|
||||
|
||||
corrected = self.searcher.correct_query(q, q_str)
|
||||
|
@ -964,6 +964,62 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
|
||||
# Assert subset in results
|
||||
self.assertDictEqual(result, {**result, **subset})
|
||||
|
||||
@override_settings(
|
||||
TIME_ZONE="Europe/Sofia",
|
||||
)
|
||||
def test_search_added_specific_date_with_timezone_ahead(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Two documents added right now
|
||||
- One document added on a specific date
|
||||
- The timezone is behind UTC time (+2)
|
||||
WHEN:
|
||||
- Query for documents added on a specific date
|
||||
THEN:
|
||||
- The one document is returned
|
||||
"""
|
||||
d1 = Document.objects.create(
|
||||
title="invoice",
|
||||
content="the thing i bought at a shop and paid with bank account",
|
||||
checksum="A",
|
||||
pk=1,
|
||||
)
|
||||
d2 = Document.objects.create(
|
||||
title="bank statement 1",
|
||||
content="things i paid for in august",
|
||||
pk=2,
|
||||
checksum="B",
|
||||
)
|
||||
d3 = Document.objects.create(
|
||||
title="bank statement 3",
|
||||
content="things i paid for in september",
|
||||
pk=3,
|
||||
checksum="C",
|
||||
# specific time zone aware date
|
||||
added=timezone.make_aware(datetime.datetime(2023, 12, 1)),
|
||||
)
|
||||
# refresh doc instance to ensure we operate on date objects that Django uses
|
||||
# Django converts dates to UTC
|
||||
d3.refresh_from_db()
|
||||
|
||||
with index.open_index_writer() as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
|
||||
response = self.client.get("/api/documents/?query=added:20231201")
|
||||
results = response.data["results"]
|
||||
|
||||
# Expect 1 document returned
|
||||
self.assertEqual(len(results), 1)
|
||||
|
||||
for idx, subset in enumerate(
|
||||
[{"id": 3, "title": "bank statement 3"}],
|
||||
):
|
||||
result = results[idx]
|
||||
# Assert subset in results
|
||||
self.assertDictEqual(result, {**result, **subset})
|
||||
|
||||
def test_search_added_in_last_month(self):
|
||||
"""
|
||||
GIVEN:
|
||||
|
Loading…
x
Reference in New Issue
Block a user