diff --git a/src/documents/management/commands/document_thumbnails.py b/src/documents/management/commands/document_thumbnails.py new file mode 100644 index 000000000..41ddec86e --- /dev/null +++ b/src/documents/management/commands/document_thumbnails.py @@ -0,0 +1,65 @@ +import logging +import multiprocessing +import shutil + +import tqdm +from django import db +from django.core.management.base import BaseCommand + +from documents.models import Document +from ...mixins import Renderable +from ...parsers import get_parser_class_for_mime_type + + +def _process_document(doc_in): + document = Document.objects.get(id=doc_in) + parser = get_parser_class_for_mime_type(document.mime_type)( + logging_group=None) + try: + thumb = parser.get_optimised_thumbnail( + document.source_path, document.mime_type) + + shutil.move(thumb, document.thumbnail_path) + finally: + parser.cleanup() + +class Command(Renderable, BaseCommand): + + help = """ + This will regenerate the thumbnails for all documents. + """.replace(" ", "") + + def __init__(self, *args, **kwargs): + self.verbosity = 0 + BaseCommand.__init__(self, *args, **kwargs) + + def add_arguments(self, parser): + parser.add_argument( + "-d", "--document", + default=None, + type=int, + required=False, + help="Specify the ID of a document, and this command will only " + "run on this specific document." + ) + + def handle(self, *args, **options): + + self.verbosity = options["verbosity"] + + logging.getLogger().handlers[0].level = logging.ERROR + + if options['document']: + documents = Document.objects.filter(pk=options['document']) + else: + documents = Document.objects.all() + + ids = [doc.id for doc in documents] + + # Note to future self: this prevents django from reusing database + # conncetions between processes, which is bad and does not work + # with postgres. + db.connections.close_all() + + with multiprocessing.Pool() as pool: + list(tqdm.tqdm(pool.imap_unordered(_process_document, ids), total=len(ids)))