mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-11-23 23:49:08 -06:00
Merge branch 'dev' into feature-ai
This commit is contained in:
@@ -287,15 +287,75 @@ class DelayedQuery:
|
||||
self.first_score = None
|
||||
self.filter_queryset = filter_queryset
|
||||
self.suggested_correction = None
|
||||
self._manual_hits_cache: list | None = None
|
||||
|
||||
def __len__(self) -> int:
|
||||
if self._manual_sort_requested():
|
||||
manual_hits = self._manual_hits()
|
||||
return len(manual_hits)
|
||||
|
||||
page = self[0:1]
|
||||
return len(page)
|
||||
|
||||
def _manual_sort_requested(self):
|
||||
ordering = self.query_params.get("ordering", "")
|
||||
return ordering.lstrip("-").startswith("custom_field_")
|
||||
|
||||
def _manual_hits(self):
|
||||
if self._manual_hits_cache is None:
|
||||
q, mask, suggested_correction = self._get_query()
|
||||
self.suggested_correction = suggested_correction
|
||||
|
||||
results = self.searcher.search(
|
||||
q,
|
||||
mask=mask,
|
||||
filter=MappedDocIdSet(self.filter_queryset, self.searcher.ixreader),
|
||||
limit=None,
|
||||
)
|
||||
results.fragmenter = highlight.ContextFragmenter(surround=50)
|
||||
results.formatter = HtmlFormatter(tagname="span", between=" ... ")
|
||||
|
||||
if not self.first_score and len(results) > 0:
|
||||
self.first_score = results[0].score
|
||||
|
||||
if self.first_score:
|
||||
results.top_n = [
|
||||
(
|
||||
(hit[0] / self.first_score) if self.first_score else None,
|
||||
hit[1],
|
||||
)
|
||||
for hit in results.top_n
|
||||
]
|
||||
|
||||
hits_by_id = {hit["id"]: hit for hit in results}
|
||||
matching_ids = list(hits_by_id.keys())
|
||||
|
||||
ordered_ids = list(
|
||||
self.filter_queryset.filter(id__in=matching_ids).values_list(
|
||||
"id",
|
||||
flat=True,
|
||||
),
|
||||
)
|
||||
ordered_ids = list(dict.fromkeys(ordered_ids))
|
||||
|
||||
self._manual_hits_cache = [
|
||||
hits_by_id[_id] for _id in ordered_ids if _id in hits_by_id
|
||||
]
|
||||
return self._manual_hits_cache
|
||||
|
||||
def __getitem__(self, item):
|
||||
if item.start in self.saved_results:
|
||||
return self.saved_results[item.start]
|
||||
|
||||
if self._manual_sort_requested():
|
||||
manual_hits = self._manual_hits()
|
||||
start = 0 if item.start is None else item.start
|
||||
stop = item.stop
|
||||
hits = manual_hits[start:stop] if stop is not None else manual_hits[start:]
|
||||
page = ManualResultsPage(hits)
|
||||
self.saved_results[start] = page
|
||||
return page
|
||||
|
||||
q, mask, suggested_correction = self._get_query()
|
||||
self.suggested_correction = suggested_correction
|
||||
sortedby, reverse = self._get_query_sortedby()
|
||||
@@ -315,21 +375,33 @@ class DelayedQuery:
|
||||
if not self.first_score and len(page.results) > 0 and sortedby is None:
|
||||
self.first_score = page.results[0].score
|
||||
|
||||
page.results.top_n = list(
|
||||
map(
|
||||
lambda hit: (
|
||||
(hit[0] / self.first_score) if self.first_score else None,
|
||||
hit[1],
|
||||
),
|
||||
page.results.top_n,
|
||||
),
|
||||
)
|
||||
page.results.top_n = [
|
||||
(
|
||||
(hit[0] / self.first_score) if self.first_score else None,
|
||||
hit[1],
|
||||
)
|
||||
for hit in page.results.top_n
|
||||
]
|
||||
|
||||
self.saved_results[item.start] = page
|
||||
|
||||
return page
|
||||
|
||||
|
||||
class ManualResultsPage(list):
|
||||
def __init__(self, hits):
|
||||
super().__init__(hits)
|
||||
self.results = ManualResults(hits)
|
||||
|
||||
|
||||
class ManualResults:
|
||||
def __init__(self, hits):
|
||||
self._docnums = [hit.docnum for hit in hits]
|
||||
|
||||
def docs(self):
|
||||
return self._docnums
|
||||
|
||||
|
||||
class LocalDateParser(English):
|
||||
def reverse_timezone_offset(self, d):
|
||||
return (d.replace(tzinfo=django_timezone.get_current_timezone())).astimezone(
|
||||
|
||||
@@ -48,12 +48,13 @@ if settings.AUDIT_LOG_ENABLED:
|
||||
|
||||
|
||||
@contextmanager
|
||||
def disable_signal(sig, receiver, sender) -> Generator:
|
||||
def disable_signal(sig, receiver, sender, *, weak: bool | None = None) -> Generator:
|
||||
try:
|
||||
sig.disconnect(receiver=receiver, sender=sender)
|
||||
yield
|
||||
finally:
|
||||
sig.connect(receiver=receiver, sender=sender)
|
||||
kwargs = {"weak": weak} if weak is not None else {}
|
||||
sig.connect(receiver=receiver, sender=sender, **kwargs)
|
||||
|
||||
|
||||
class Command(CryptMixin, BaseCommand):
|
||||
@@ -258,16 +259,19 @@ class Command(CryptMixin, BaseCommand):
|
||||
post_save,
|
||||
receiver=update_filename_and_move_files,
|
||||
sender=Document,
|
||||
weak=False,
|
||||
),
|
||||
disable_signal(
|
||||
m2m_changed,
|
||||
receiver=update_filename_and_move_files,
|
||||
sender=Document.tags.through,
|
||||
weak=False,
|
||||
),
|
||||
disable_signal(
|
||||
post_save,
|
||||
receiver=update_filename_and_move_files,
|
||||
sender=CustomFieldInstance,
|
||||
weak=False,
|
||||
),
|
||||
disable_signal(
|
||||
post_save,
|
||||
|
||||
@@ -396,9 +396,9 @@ class CannotMoveFilesException(Exception):
|
||||
|
||||
|
||||
# should be disabled in /src/documents/management/commands/document_importer.py handle
|
||||
@receiver(models.signals.post_save, sender=CustomFieldInstance)
|
||||
@receiver(models.signals.m2m_changed, sender=Document.tags.through)
|
||||
@receiver(models.signals.post_save, sender=Document)
|
||||
@receiver(models.signals.post_save, sender=CustomFieldInstance, weak=False)
|
||||
@receiver(models.signals.m2m_changed, sender=Document.tags.through, weak=False)
|
||||
@receiver(models.signals.post_save, sender=Document, weak=False)
|
||||
def update_filename_and_move_files(
|
||||
sender,
|
||||
instance: Document | CustomFieldInstance,
|
||||
|
||||
@@ -89,6 +89,65 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(len(results), 0)
|
||||
self.assertCountEqual(response.data["all"], [])
|
||||
|
||||
def test_search_custom_field_ordering(self):
|
||||
custom_field = CustomField.objects.create(
|
||||
name="Sortable field",
|
||||
data_type=CustomField.FieldDataType.INT,
|
||||
)
|
||||
d1 = Document.objects.create(
|
||||
title="first",
|
||||
content="match",
|
||||
checksum="A1",
|
||||
)
|
||||
d2 = Document.objects.create(
|
||||
title="second",
|
||||
content="match",
|
||||
checksum="B2",
|
||||
)
|
||||
d3 = Document.objects.create(
|
||||
title="third",
|
||||
content="match",
|
||||
checksum="C3",
|
||||
)
|
||||
CustomFieldInstance.objects.create(
|
||||
document=d1,
|
||||
field=custom_field,
|
||||
value_int=30,
|
||||
)
|
||||
CustomFieldInstance.objects.create(
|
||||
document=d2,
|
||||
field=custom_field,
|
||||
value_int=10,
|
||||
)
|
||||
CustomFieldInstance.objects.create(
|
||||
document=d3,
|
||||
field=custom_field,
|
||||
value_int=20,
|
||||
)
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
|
||||
response = self.client.get(
|
||||
f"/api/documents/?query=match&ordering=custom_field_{custom_field.pk}",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(
|
||||
[doc["id"] for doc in response.data["results"]],
|
||||
[d2.id, d3.id, d1.id],
|
||||
)
|
||||
|
||||
response = self.client.get(
|
||||
f"/api/documents/?query=match&ordering=-custom_field_{custom_field.pk}",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(
|
||||
[doc["id"] for doc in response.data["results"]],
|
||||
[d1.id, d3.id, d2.id],
|
||||
)
|
||||
|
||||
def test_search_multi_page(self):
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
for i in range(55):
|
||||
|
||||
@@ -54,8 +54,8 @@ class TestCustomAccountAdapter(TestCase):
|
||||
# False because request host is not in allowed hosts
|
||||
self.assertFalse(adapter.is_safe_url(url))
|
||||
|
||||
@mock.patch("allauth.core.ratelimit._consume_rate", return_value=True)
|
||||
def test_pre_authenticate(self, mock_consume_rate):
|
||||
@mock.patch("allauth.core.internal.ratelimit.consume", return_value=True)
|
||||
def test_pre_authenticate(self, mock_consume):
|
||||
adapter = get_adapter()
|
||||
request = HttpRequest()
|
||||
request.get_host = mock.Mock(return_value="example.com")
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from typing import Final
|
||||
|
||||
__version__: Final[tuple[int, int, int]] = (2, 19, 5)
|
||||
__version__: Final[tuple[int, int, int]] = (2, 19, 6)
|
||||
# Version string like X.Y.Z
|
||||
__full_version_str__: Final[str] = ".".join(map(str, __version__))
|
||||
# Version string like X.Y
|
||||
|
||||
@@ -55,7 +55,7 @@ Content-Transfer-Encoding: 7bit
|
||||
<p>Some Text</p>
|
||||
<p>
|
||||
<img src="cid:part1.pNdUSz0s.D3NqVtPg@example.de" alt="Has to be rewritten to work..">
|
||||
<img src="https://upload.wikimedia.org/wikipedia/en/f/f7/RickRoll.png" alt="This image should not be shown.">
|
||||
<img src="https://docs.paperless-ngx.com/assets/logo_full_white.svg" alt="This image should not be shown.">
|
||||
</p>
|
||||
|
||||
<p>and an embedded image.<br>
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
<p>Some Text</p>
|
||||
<p>
|
||||
<img src="cid:part1.pNdUSz0s.D3NqVtPg@example.de" alt="Has to be rewritten to work..">
|
||||
<img src="https://upload.wikimedia.org/wikipedia/en/f/f7/RickRoll.png" alt="This image should not be shown.">
|
||||
<img src="https://docs.paperless-ngx.com/assets/logo_full_white.svg" alt="This image should not be shown.">
|
||||
</p>
|
||||
|
||||
<p>and an embedded image.<br>
|
||||
|
||||
@@ -2,7 +2,6 @@ import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
@@ -54,34 +53,6 @@ class TestUrlCanary:
|
||||
Verify certain URLs are still available so testing is valid still
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def _fetch_wikimedia(cls, url: str) -> httpx.Response:
|
||||
"""
|
||||
Wikimedia occasionally throttles automated requests (HTTP 429). Retry a few
|
||||
times with a short backoff so the tests stay stable, and skip if throttling
|
||||
persists.
|
||||
"""
|
||||
last_resp: httpx.Response | None = None
|
||||
# Wikimedia rejects requests without a browser-like User-Agent header and returns 403.
|
||||
headers = {
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (X11; Linux x86_64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/123.0.0.0 Safari/537.36"
|
||||
),
|
||||
}
|
||||
for delay in (0, 1, 2):
|
||||
resp = httpx.get(url, headers=headers, timeout=30.0)
|
||||
if resp.status_code != httpx.codes.TOO_MANY_REQUESTS:
|
||||
return resp
|
||||
last_resp = resp
|
||||
time.sleep(delay)
|
||||
|
||||
pytest.skip(
|
||||
"Wikimedia throttled the canary request with HTTP 429; try rerunning later.",
|
||||
)
|
||||
return last_resp # pragma: no cover
|
||||
|
||||
def test_online_image_exception_on_not_available(self):
|
||||
"""
|
||||
GIVEN:
|
||||
@@ -96,8 +67,8 @@ class TestUrlCanary:
|
||||
whether this image stays online forever, so here we check if we can detect if is not
|
||||
available anymore.
|
||||
"""
|
||||
resp = self._fetch_wikimedia(
|
||||
"https://upload.wikimedia.org/wikipedia/en/f/f7/nonexistent.png",
|
||||
resp = httpx.get(
|
||||
"https://docs.paperless-ngx.com/assets/non-existent.png",
|
||||
)
|
||||
with pytest.raises(httpx.HTTPStatusError) as exec_info:
|
||||
resp.raise_for_status()
|
||||
@@ -119,8 +90,8 @@ class TestUrlCanary:
|
||||
"""
|
||||
|
||||
# Now check the URL used in samples/sample.html
|
||||
resp = self._fetch_wikimedia(
|
||||
"https://upload.wikimedia.org/wikipedia/en/f/f7/RickRoll.png",
|
||||
resp = httpx.get(
|
||||
"https://docs.paperless-ngx.com/assets/logo_full_white.svg",
|
||||
)
|
||||
resp.raise_for_status()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user