mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
some search index optimizations
This commit is contained in:
parent
56bd966c02
commit
8bf4241b16
@ -1,7 +1,5 @@
|
|||||||
from django.contrib import admin
|
from django.contrib import admin
|
||||||
from whoosh.writing import AsyncWriter
|
|
||||||
|
|
||||||
from . import index
|
|
||||||
from .models import Correspondent, Document, DocumentType, Tag, \
|
from .models import Correspondent, Document, DocumentType, Tag, \
|
||||||
SavedView, SavedViewFilterRule
|
SavedView, SavedViewFilterRule
|
||||||
|
|
||||||
@ -84,17 +82,21 @@ class DocumentAdmin(admin.ModelAdmin):
|
|||||||
created_.short_description = "Created"
|
created_.short_description = "Created"
|
||||||
|
|
||||||
def delete_queryset(self, request, queryset):
|
def delete_queryset(self, request, queryset):
|
||||||
ix = index.open_index()
|
from documents import index
|
||||||
with AsyncWriter(ix) as writer:
|
|
||||||
|
with index.open_index_writer() as writer:
|
||||||
for o in queryset:
|
for o in queryset:
|
||||||
index.remove_document(writer, o)
|
index.remove_document(writer, o)
|
||||||
|
|
||||||
super(DocumentAdmin, self).delete_queryset(request, queryset)
|
super(DocumentAdmin, self).delete_queryset(request, queryset)
|
||||||
|
|
||||||
def delete_model(self, request, obj):
|
def delete_model(self, request, obj):
|
||||||
|
from documents import index
|
||||||
index.remove_document_from_index(obj)
|
index.remove_document_from_index(obj)
|
||||||
super(DocumentAdmin, self).delete_model(request, obj)
|
super(DocumentAdmin, self).delete_model(request, obj)
|
||||||
|
|
||||||
def save_model(self, request, obj, form, change):
|
def save_model(self, request, obj, form, change):
|
||||||
|
from documents import index
|
||||||
index.add_or_update_document(obj)
|
index.add_or_update_document(obj)
|
||||||
super(DocumentAdmin, self).save_model(request, obj, form, change)
|
super(DocumentAdmin, self).save_model(request, obj, form, change)
|
||||||
|
|
||||||
|
@ -2,9 +2,7 @@ import itertools
|
|||||||
|
|
||||||
from django.db.models import Q
|
from django.db.models import Q
|
||||||
from django_q.tasks import async_task
|
from django_q.tasks import async_task
|
||||||
from whoosh.writing import AsyncWriter
|
|
||||||
|
|
||||||
from documents import index
|
|
||||||
from documents.models import Document, Correspondent, DocumentType
|
from documents.models import Document, Correspondent, DocumentType
|
||||||
|
|
||||||
|
|
||||||
@ -99,8 +97,9 @@ def modify_tags(doc_ids, add_tags, remove_tags):
|
|||||||
def delete(doc_ids):
|
def delete(doc_ids):
|
||||||
Document.objects.filter(id__in=doc_ids).delete()
|
Document.objects.filter(id__in=doc_ids).delete()
|
||||||
|
|
||||||
ix = index.open_index()
|
from documents import index
|
||||||
with AsyncWriter(ix) as writer:
|
|
||||||
|
with index.open_index_writer() as writer:
|
||||||
for id in doc_ids:
|
for id in doc_ids:
|
||||||
index.remove_document_by_id(writer, id)
|
index.remove_document_by_id(writer, id)
|
||||||
|
|
||||||
|
@ -86,6 +86,22 @@ def open_index(recreate=False):
|
|||||||
return create_in(settings.INDEX_DIR, get_schema())
|
return create_in(settings.INDEX_DIR, get_schema())
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def open_index_writer(ix=None, optimize=False):
|
||||||
|
if ix:
|
||||||
|
writer = AsyncWriter(ix)
|
||||||
|
else:
|
||||||
|
writer = AsyncWriter(open_index())
|
||||||
|
|
||||||
|
try:
|
||||||
|
yield writer
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(str(e))
|
||||||
|
writer.cancel()
|
||||||
|
finally:
|
||||||
|
writer.commit(optimize=optimize)
|
||||||
|
|
||||||
|
|
||||||
def update_document(writer, doc):
|
def update_document(writer, doc):
|
||||||
tags = ",".join([t.name for t in doc.tags.all()])
|
tags = ",".join([t.name for t in doc.tags.all()])
|
||||||
writer.update_document(
|
writer.update_document(
|
||||||
@ -110,14 +126,12 @@ def remove_document_by_id(writer, doc_id):
|
|||||||
|
|
||||||
|
|
||||||
def add_or_update_document(document):
|
def add_or_update_document(document):
|
||||||
ix = open_index()
|
with open_index_writer() as writer:
|
||||||
with AsyncWriter(ix) as writer:
|
|
||||||
update_document(writer, document)
|
update_document(writer, document)
|
||||||
|
|
||||||
|
|
||||||
def remove_document_from_index(document):
|
def remove_document_from_index(document):
|
||||||
ix = open_index()
|
with open_index_writer() as writer:
|
||||||
with AsyncWriter(ix) as writer:
|
|
||||||
remove_document(writer, document)
|
remove_document(writer, document)
|
||||||
|
|
||||||
|
|
||||||
|
@ -11,7 +11,7 @@ from django.dispatch import receiver
|
|||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
from filelock import FileLock
|
from filelock import FileLock
|
||||||
|
|
||||||
from .. import index, matching
|
from .. import matching
|
||||||
from ..file_handling import delete_empty_directories, \
|
from ..file_handling import delete_empty_directories, \
|
||||||
create_source_path_directory, \
|
create_source_path_directory, \
|
||||||
generate_unique_filename
|
generate_unique_filename
|
||||||
@ -305,4 +305,6 @@ def set_log_entry(sender, document=None, logging_group=None, **kwargs):
|
|||||||
|
|
||||||
|
|
||||||
def add_to_index(sender, document, **kwargs):
|
def add_to_index(sender, document, **kwargs):
|
||||||
|
from documents import index
|
||||||
|
|
||||||
index.add_or_update_document(document)
|
index.add_or_update_document(document)
|
||||||
|
@ -4,6 +4,7 @@ from django.contrib.admin.sites import AdminSite
|
|||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
|
||||||
|
from documents import index
|
||||||
from documents.admin import DocumentAdmin
|
from documents.admin import DocumentAdmin
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
from documents.tests.utils import DirectoriesMixin
|
from documents.tests.utils import DirectoriesMixin
|
||||||
@ -11,37 +12,52 @@ from documents.tests.utils import DirectoriesMixin
|
|||||||
|
|
||||||
class TestDocumentAdmin(DirectoriesMixin, TestCase):
|
class TestDocumentAdmin(DirectoriesMixin, TestCase):
|
||||||
|
|
||||||
|
def get_document_from_index(self, doc):
|
||||||
|
ix = index.open_index()
|
||||||
|
with ix.searcher() as searcher:
|
||||||
|
return searcher.document(id=doc.id)
|
||||||
|
|
||||||
def setUp(self) -> None:
|
def setUp(self) -> None:
|
||||||
super(TestDocumentAdmin, self).setUp()
|
super(TestDocumentAdmin, self).setUp()
|
||||||
self.doc_admin = DocumentAdmin(model=Document, admin_site=AdminSite())
|
self.doc_admin = DocumentAdmin(model=Document, admin_site=AdminSite())
|
||||||
|
|
||||||
@mock.patch("documents.admin.index.add_or_update_document")
|
def test_save_model(self):
|
||||||
def test_save_model(self, m):
|
|
||||||
doc = Document.objects.create(title="test")
|
doc = Document.objects.create(title="test")
|
||||||
|
|
||||||
doc.title = "new title"
|
doc.title = "new title"
|
||||||
self.doc_admin.save_model(None, doc, None, None)
|
self.doc_admin.save_model(None, doc, None, None)
|
||||||
self.assertEqual(Document.objects.get(id=doc.id).title, "new title")
|
self.assertEqual(Document.objects.get(id=doc.id).title, "new title")
|
||||||
m.assert_called_once()
|
self.assertEqual(self.get_document_from_index(doc)['title'], "new title")
|
||||||
|
|
||||||
@mock.patch("documents.admin.index.remove_document")
|
def test_delete_model(self):
|
||||||
def test_delete_model(self, m):
|
|
||||||
doc = Document.objects.create(title="test")
|
doc = Document.objects.create(title="test")
|
||||||
self.doc_admin.delete_model(None, doc)
|
index.add_or_update_document(doc)
|
||||||
self.assertRaises(Document.DoesNotExist, Document.objects.get, id=doc.id)
|
self.assertIsNotNone(self.get_document_from_index(doc))
|
||||||
m.assert_called_once()
|
|
||||||
|
|
||||||
@mock.patch("documents.admin.index.remove_document")
|
self.doc_admin.delete_model(None, doc)
|
||||||
def test_delete_queryset(self, m):
|
|
||||||
|
self.assertRaises(Document.DoesNotExist, Document.objects.get, id=doc.id)
|
||||||
|
self.assertIsNone(self.get_document_from_index(doc))
|
||||||
|
|
||||||
|
def test_delete_queryset(self):
|
||||||
|
docs = []
|
||||||
for i in range(42):
|
for i in range(42):
|
||||||
Document.objects.create(title="Many documents with the same title", checksum=f"{i:02}")
|
doc = Document.objects.create(title="Many documents with the same title", checksum=f"{i:02}")
|
||||||
|
docs.append(doc)
|
||||||
|
index.add_or_update_document(doc)
|
||||||
|
|
||||||
self.assertEqual(Document.objects.count(), 42)
|
self.assertEqual(Document.objects.count(), 42)
|
||||||
|
|
||||||
|
for doc in docs:
|
||||||
|
self.assertIsNotNone(self.get_document_from_index(doc))
|
||||||
|
|
||||||
self.doc_admin.delete_queryset(None, Document.objects.all())
|
self.doc_admin.delete_queryset(None, Document.objects.all())
|
||||||
|
|
||||||
self.assertEqual(m.call_count, 42)
|
|
||||||
self.assertEqual(Document.objects.count(), 0)
|
self.assertEqual(Document.objects.count(), 0)
|
||||||
|
|
||||||
|
for doc in docs:
|
||||||
|
self.assertIsNone(self.get_document_from_index(doc))
|
||||||
|
|
||||||
def test_created(self):
|
def test_created(self):
|
||||||
doc = Document.objects.create(title="test", created=timezone.datetime(2020, 4, 12))
|
doc = Document.objects.create(title="test", created=timezone.datetime(2020, 4, 12))
|
||||||
self.assertEqual(self.doc_admin.created_(doc), "2020-04-12")
|
self.assertEqual(self.doc_admin.created_(doc), "2020-04-12")
|
||||||
|
@ -32,7 +32,6 @@ from rest_framework.viewsets import (
|
|||||||
ViewSet
|
ViewSet
|
||||||
)
|
)
|
||||||
|
|
||||||
import documents.index as index
|
|
||||||
from paperless.db import GnuPG
|
from paperless.db import GnuPG
|
||||||
from paperless.views import StandardPagination
|
from paperless.views import StandardPagination
|
||||||
from .classifier import load_classifier
|
from .classifier import load_classifier
|
||||||
@ -176,10 +175,12 @@ class DocumentViewSet(RetrieveModelMixin,
|
|||||||
def update(self, request, *args, **kwargs):
|
def update(self, request, *args, **kwargs):
|
||||||
response = super(DocumentViewSet, self).update(
|
response = super(DocumentViewSet, self).update(
|
||||||
request, *args, **kwargs)
|
request, *args, **kwargs)
|
||||||
|
from documents import index
|
||||||
index.add_or_update_document(self.get_object())
|
index.add_or_update_document(self.get_object())
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def destroy(self, request, *args, **kwargs):
|
def destroy(self, request, *args, **kwargs):
|
||||||
|
from documents import index
|
||||||
index.remove_document_from_index(self.get_object())
|
index.remove_document_from_index(self.get_object())
|
||||||
return super(DocumentViewSet, self).destroy(request, *args, **kwargs)
|
return super(DocumentViewSet, self).destroy(request, *args, **kwargs)
|
||||||
|
|
||||||
@ -501,10 +502,6 @@ class SearchView(APIView):
|
|||||||
|
|
||||||
permission_classes = (IsAuthenticated,)
|
permission_classes = (IsAuthenticated,)
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
super(SearchView, self).__init__(*args, **kwargs)
|
|
||||||
self.ix = index.open_index()
|
|
||||||
|
|
||||||
def add_infos_to_hit(self, r):
|
def add_infos_to_hit(self, r):
|
||||||
try:
|
try:
|
||||||
doc = Document.objects.get(id=r['id'])
|
doc = Document.objects.get(id=r['id'])
|
||||||
@ -525,6 +522,7 @@ class SearchView(APIView):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def get(self, request, format=None):
|
def get(self, request, format=None):
|
||||||
|
from documents import index
|
||||||
|
|
||||||
if 'query' in request.query_params:
|
if 'query' in request.query_params:
|
||||||
query = request.query_params['query']
|
query = request.query_params['query']
|
||||||
@ -554,8 +552,10 @@ class SearchView(APIView):
|
|||||||
if page < 1:
|
if page < 1:
|
||||||
page = 1
|
page = 1
|
||||||
|
|
||||||
|
ix = index.open_index()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with index.query_page(self.ix, page, query, more_like_id, more_like_content) as (result_page, corrected_query): # NOQA: E501
|
with index.query_page(ix, page, query, more_like_id, more_like_content) as (result_page, corrected_query): # NOQA: E501
|
||||||
return Response(
|
return Response(
|
||||||
{'count': len(result_page),
|
{'count': len(result_page),
|
||||||
'page': result_page.pagenum,
|
'page': result_page.pagenum,
|
||||||
@ -570,10 +570,6 @@ class SearchAutoCompleteView(APIView):
|
|||||||
|
|
||||||
permission_classes = (IsAuthenticated,)
|
permission_classes = (IsAuthenticated,)
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
super(SearchAutoCompleteView, self).__init__(*args, **kwargs)
|
|
||||||
self.ix = index.open_index()
|
|
||||||
|
|
||||||
def get(self, request, format=None):
|
def get(self, request, format=None):
|
||||||
if 'term' in request.query_params:
|
if 'term' in request.query_params:
|
||||||
term = request.query_params['term']
|
term = request.query_params['term']
|
||||||
@ -587,7 +583,11 @@ class SearchAutoCompleteView(APIView):
|
|||||||
else:
|
else:
|
||||||
limit = 10
|
limit = 10
|
||||||
|
|
||||||
return Response(index.autocomplete(self.ix, term, limit))
|
from documents import index
|
||||||
|
|
||||||
|
ix = index.open_index()
|
||||||
|
|
||||||
|
return Response(index.autocomplete(ix, term, limit))
|
||||||
|
|
||||||
|
|
||||||
class StatisticsView(APIView):
|
class StatisticsView(APIView):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user