update index after bulk edit operations #195

This commit is contained in:
jonaswinkler 2020-12-27 17:05:35 +01:00
parent fb83069975
commit 6a70369a77
4 changed files with 59 additions and 26 deletions

View File

@ -1,6 +1,8 @@
from django.db.models import Q
from django_q.tasks import async_task
from whoosh.writing import AsyncWriter
from documents import index
from documents.models import Document, Correspondent, DocumentType
@ -13,6 +15,11 @@ def set_correspondent(doc_ids, correspondent):
affected_docs = [doc.id for doc in qs]
qs.update(correspondent=correspondent)
async_task(
"documents.tasks.bulk_index_documents",
document_ids=affected_docs
)
async_task("documents.tasks.bulk_rename_files", document_ids=affected_docs)
return "OK"
@ -27,6 +34,11 @@ def set_document_type(doc_ids, document_type):
affected_docs = [doc.id for doc in qs]
qs.update(document_type=document_type)
async_task(
"documents.tasks.bulk_index_documents",
document_ids=affected_docs
)
async_task("documents.tasks.bulk_rename_files", document_ids=affected_docs)
return "OK"
@ -44,6 +56,11 @@ def add_tag(doc_ids, tag):
document_id=doc, tag_id=tag) for doc in affected_docs
])
async_task(
"documents.tasks.bulk_index_documents",
document_ids=affected_docs
)
async_task("documents.tasks.bulk_rename_files", document_ids=affected_docs)
return "OK"
@ -61,6 +78,11 @@ def remove_tag(doc_ids, tag):
Q(tag_id=tag)
).delete()
async_task(
"documents.tasks.bulk_index_documents",
document_ids=affected_docs
)
async_task("documents.tasks.bulk_rename_files", document_ids=affected_docs)
return "OK"
@ -69,4 +91,9 @@ def remove_tag(doc_ids, tag):
def delete(doc_ids):
Document.objects.filter(id__in=doc_ids).delete()
ix = index.open_index()
with AsyncWriter(ix) as writer:
for id in doc_ids:
index.remove_document_by_id(writer, id)
return "OK"

View File

@ -87,11 +87,6 @@ def open_index(recreate=False):
def update_document(writer, doc):
# TODO: this line caused many issues all around, since:
# We need to make sure that this method does not get called with
# deserialized documents (i.e, document objects that don't come from
# Django's ORM interfaces directly.
logger.debug("Indexing {}...".format(doc))
tags = ",".join([t.name for t in doc.tags.all()])
writer.update_document(
id=doc.pk,
@ -107,9 +102,11 @@ def update_document(writer, doc):
def remove_document(writer, doc):
# TODO: see above.
logger.debug("Removing {} from index...".format(doc))
writer.delete_by_term('id', doc.pk)
remove_document_by_id(writer, doc.pk)
def remove_document_by_id(writer, doc_id):
writer.delete_by_term('id', doc_id)
def add_or_update_document(document):

View File

@ -94,3 +94,12 @@ def bulk_rename_files(document_ids):
qs = Document.objects.filter(id__in=document_ids)
for doc in qs:
post_save.send(Document, instance=doc, created=False)
def bulk_index_documents(document_ids):
documents = Document.objects.filter(id__in=document_ids)
ix = index.open_index()
with AsyncWriter(ix) as writer:
for doc in documents:
index.update_document(writer, doc)

View File

@ -699,49 +699,49 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
self.assertEqual(Document.objects.filter(correspondent=self.c2).count(), 1)
bulk_edit.set_correspondent([self.doc1.id, self.doc2.id, self.doc3.id], self.c2.id)
self.assertEqual(Document.objects.filter(correspondent=self.c2).count(), 3)
self.async_task.assert_called_once()
args, kwargs = self.async_task.call_args
self.assertCountEqual(kwargs['document_ids'], [self.doc1.id, self.doc2.id])
self.assertEqual(self.async_task.call_count, 2)
self.assertCountEqual(self.async_task.call_args_list[0][1]['document_ids'], [self.doc1.id, self.doc2.id])
self.assertCountEqual(self.async_task.call_args_list[0][1]['document_ids'], [self.doc1.id, self.doc2.id])
def test_unset_correspondent(self):
self.assertEqual(Document.objects.filter(correspondent=self.c2).count(), 1)
bulk_edit.set_correspondent([self.doc1.id, self.doc2.id, self.doc3.id], None)
self.assertEqual(Document.objects.filter(correspondent=self.c2).count(), 0)
self.async_task.assert_called_once()
args, kwargs = self.async_task.call_args
self.assertCountEqual(kwargs['document_ids'], [self.doc2.id, self.doc3.id])
self.assertEqual(self.async_task.call_count, 2)
self.assertCountEqual(self.async_task.call_args_list[0][1]['document_ids'], [self.doc2.id, self.doc3.id])
self.assertCountEqual(self.async_task.call_args_list[0][1]['document_ids'], [self.doc2.id, self.doc3.id])
def test_set_document_type(self):
self.assertEqual(Document.objects.filter(document_type=self.dt2).count(), 1)
bulk_edit.set_document_type([self.doc1.id, self.doc2.id, self.doc3.id], self.dt2.id)
self.assertEqual(Document.objects.filter(document_type=self.dt2).count(), 3)
self.async_task.assert_called_once()
args, kwargs = self.async_task.call_args
self.assertCountEqual(kwargs['document_ids'], [self.doc1.id, self.doc2.id])
self.assertEqual(self.async_task.call_count, 2)
self.assertCountEqual(self.async_task.call_args_list[0][1]['document_ids'], [self.doc1.id, self.doc2.id])
self.assertCountEqual(self.async_task.call_args_list[0][1]['document_ids'], [self.doc1.id, self.doc2.id])
def test_unset_document_type(self):
self.assertEqual(Document.objects.filter(document_type=self.dt2).count(), 1)
bulk_edit.set_document_type([self.doc1.id, self.doc2.id, self.doc3.id], None)
self.assertEqual(Document.objects.filter(document_type=self.dt2).count(), 0)
self.async_task.assert_called_once()
args, kwargs = self.async_task.call_args
self.assertCountEqual(kwargs['document_ids'], [self.doc2.id, self.doc3.id])
self.assertEqual(self.async_task.call_count, 2)
self.assertCountEqual(self.async_task.call_args_list[0][1]['document_ids'], [self.doc2.id, self.doc3.id])
self.assertCountEqual(self.async_task.call_args_list[0][1]['document_ids'], [self.doc2.id, self.doc3.id])
def test_add_tag(self):
self.assertEqual(Document.objects.filter(tags__id=self.t1.id).count(), 2)
bulk_edit.add_tag([self.doc1.id, self.doc2.id, self.doc3.id, self.doc4.id], self.t1.id)
self.assertEqual(Document.objects.filter(tags__id=self.t1.id).count(), 4)
self.async_task.assert_called_once()
args, kwargs = self.async_task.call_args
self.assertCountEqual(kwargs['document_ids'], [self.doc1.id, self.doc3.id])
self.assertEqual(self.async_task.call_count, 2)
self.assertCountEqual(self.async_task.call_args_list[0][1]['document_ids'], [self.doc1.id, self.doc3.id])
self.assertCountEqual(self.async_task.call_args_list[0][1]['document_ids'], [self.doc1.id, self.doc3.id])
def test_remove_tag(self):
self.assertEqual(Document.objects.filter(tags__id=self.t1.id).count(), 2)
bulk_edit.remove_tag([self.doc1.id, self.doc3.id, self.doc4.id], self.t1.id)
self.assertEqual(Document.objects.filter(tags__id=self.t1.id).count(), 1)
self.async_task.assert_called_once()
args, kwargs = self.async_task.call_args
self.assertCountEqual(kwargs['document_ids'], [self.doc4.id])
self.assertEqual(self.async_task.call_count, 2)
self.assertCountEqual(self.async_task.call_args_list[0][1]['document_ids'], [self.doc4.id])
self.assertCountEqual(self.async_task.call_args_list[0][1]['document_ids'], [self.doc4.id])
def test_delete(self):
self.assertEqual(Document.objects.count(), 5)