From f88cf691731fed3c0aef5da931df139a52d26a62 Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Sat, 5 Dec 2020 00:37:05 +0100 Subject: [PATCH] bugfix --- src/documents/index.py | 5 +++++ .../management/commands/document_archiver.py | 17 +++++++++++------ src/documents/tests/test_management_archiver.py | 2 +- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/documents/index.py b/src/documents/index.py index b4d6e1c51..53bf34542 100644 --- a/src/documents/index.py +++ b/src/documents/index.py @@ -82,6 +82,10 @@ def open_index(recreate=False): def update_document(writer, doc): + # TODO: this line caused many issues all around, since: + # We need to make sure that this method does not get called with + # deserialized documents (i.e, document objects that don't come from + # Django's ORM interfaces directly. logger.debug("Indexing {}...".format(doc)) tags = ",".join([t.name for t in doc.tags.all()]) writer.update_document( @@ -98,6 +102,7 @@ def update_document(writer, doc): def remove_document(writer, doc): + # TODO: see above. logger.debug("Removing {} from index...".format(doc)) writer.delete_by_term('id', doc.pk) diff --git a/src/documents/management/commands/document_archiver.py b/src/documents/management/commands/document_archiver.py index aba2ea693..2e7e7b34d 100644 --- a/src/documents/management/commands/document_archiver.py +++ b/src/documents/management/commands/document_archiver.py @@ -23,7 +23,9 @@ from ...parsers import get_parser_class_for_mime_type logger = logging.getLogger(__name__) -def handle_document(document): +def handle_document(document_id): + document = Document.objects.get(id=document_id) + mime_type = document.mime_type parser_class = get_parser_class_for_mime_type(mime_type) @@ -98,9 +100,12 @@ class Command(Renderable, BaseCommand): else: documents = Document.objects.all() - documents_to_process = list(filter( - lambda d: overwrite or not d.archive_checksum, - documents + document_ids = list(map( + lambda doc: doc.id, + filter( + lambda d: overwrite or not d.archive_checksum, + documents + ) )) logging.getLogger().handlers[0].level = logging.ERROR @@ -108,7 +113,7 @@ class Command(Renderable, BaseCommand): list(tqdm.tqdm( pool.imap_unordered( handle_document, - documents_to_process + document_ids ), - total=len(documents_to_process) + total=len(document_ids) )) diff --git a/src/documents/tests/test_management_archiver.py b/src/documents/tests/test_management_archiver.py index ec4fc5ac4..fdb588acf 100644 --- a/src/documents/tests/test_management_archiver.py +++ b/src/documents/tests/test_management_archiver.py @@ -32,7 +32,7 @@ class TestArchiver(DirectoriesMixin, TestCase): shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, "0000001.pdf")) self.make_models() - handle_document(self.d1) + handle_document(self.d1.pk) doc = Document.objects.get(id=self.d1.id)