paperless-ngx/src/documents/management/commands/document_thumbnails.py

85 lines
2.3 KiB
Python

import logging
import multiprocessing
import shutil
import tqdm
from django import db
from django.core.management.base import BaseCommand
from documents.models import Document
from documents.parsers import get_parser_class_for_mime_type
def _process_document(doc_in):
document: Document = Document.objects.get(id=doc_in)
parser_class = get_parser_class_for_mime_type(document.mime_type)
if parser_class:
parser = parser_class(logging_group=None)
else:
print(f"{document} No parser for mime type {document.mime_type}")
return
try:
thumb = parser.get_thumbnail(
document.source_path,
document.mime_type,
document.get_public_filename(),
)
shutil.move(thumb, document.thumbnail_path)
finally:
parser.cleanup()
class Command(BaseCommand):
help = """
This will regenerate the thumbnails for all documents.
""".replace(
" ",
"",
)
def add_arguments(self, parser):
parser.add_argument(
"-d",
"--document",
default=None,
type=int,
required=False,
help="Specify the ID of a document, and this command will only "
"run on this specific document.",
)
parser.add_argument(
"--no-progress-bar",
default=False,
action="store_true",
help="If set, the progress bar will not be shown",
)
def handle(self, *args, **options):
logging.getLogger().handlers[0].level = logging.ERROR
if options["document"]:
documents = Document.objects.filter(pk=options["document"])
else:
documents = Document.objects.all()
ids = [doc.id for doc in documents]
# Note to future self: this prevents django from reusing database
# connections between processes, which is bad and does not work
# with postgres.
db.connections.close_all()
with multiprocessing.Pool() as pool:
list(
tqdm.tqdm(
pool.imap_unordered(_process_document, ids),
total=len(ids),
disable=options["no_progress_bar"],
),
)