This commit is contained in:
jonaswinkler 2020-12-05 00:37:05 +01:00
parent e9758d5224
commit f88cf69173
3 changed files with 17 additions and 7 deletions

View File

@ -82,6 +82,10 @@ def open_index(recreate=False):
def update_document(writer, doc): def update_document(writer, doc):
# TODO: this line caused many issues all around, since:
# We need to make sure that this method does not get called with
# deserialized documents (i.e, document objects that don't come from
# Django's ORM interfaces directly.
logger.debug("Indexing {}...".format(doc)) logger.debug("Indexing {}...".format(doc))
tags = ",".join([t.name for t in doc.tags.all()]) tags = ",".join([t.name for t in doc.tags.all()])
writer.update_document( writer.update_document(
@ -98,6 +102,7 @@ def update_document(writer, doc):
def remove_document(writer, doc): def remove_document(writer, doc):
# TODO: see above.
logger.debug("Removing {} from index...".format(doc)) logger.debug("Removing {} from index...".format(doc))
writer.delete_by_term('id', doc.pk) writer.delete_by_term('id', doc.pk)

View File

@ -23,7 +23,9 @@ from ...parsers import get_parser_class_for_mime_type
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def handle_document(document): def handle_document(document_id):
document = Document.objects.get(id=document_id)
mime_type = document.mime_type mime_type = document.mime_type
parser_class = get_parser_class_for_mime_type(mime_type) parser_class = get_parser_class_for_mime_type(mime_type)
@ -98,9 +100,12 @@ class Command(Renderable, BaseCommand):
else: else:
documents = Document.objects.all() documents = Document.objects.all()
documents_to_process = list(filter( document_ids = list(map(
lambda d: overwrite or not d.archive_checksum, lambda doc: doc.id,
documents filter(
lambda d: overwrite or not d.archive_checksum,
documents
)
)) ))
logging.getLogger().handlers[0].level = logging.ERROR logging.getLogger().handlers[0].level = logging.ERROR
@ -108,7 +113,7 @@ class Command(Renderable, BaseCommand):
list(tqdm.tqdm( list(tqdm.tqdm(
pool.imap_unordered( pool.imap_unordered(
handle_document, handle_document,
documents_to_process document_ids
), ),
total=len(documents_to_process) total=len(document_ids)
)) ))

View File

@ -32,7 +32,7 @@ class TestArchiver(DirectoriesMixin, TestCase):
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, "0000001.pdf")) shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, "0000001.pdf"))
self.make_models() self.make_models()
handle_document(self.d1) handle_document(self.d1.pk)
doc = Document.objects.get(id=self.d1.id) doc = Document.objects.get(id=self.d1.id)