Updates following testing of command

This commit is contained in:
Trenton Holmes 2022-06-10 11:23:24 -07:00 committed by Michael Shamoon
parent b7250477b5
commit a090cf7a10

View File

@ -1,4 +1,3 @@
import logging
import shutil
from pathlib import Path
from typing import Type
@ -38,13 +37,14 @@ class Command(BaseCommand):
def handle(self, *args, **options):
logging.getLogger().handlers[0].level = logging.ERROR
all_docs = Document.objects.all()
for doc_pk in tqdm.tqdm(args.documents, disable=options["no_progress_bar"]):
for doc_pk in tqdm.tqdm(
options["documents"],
disable=options["no_progress_bar"],
):
try:
self.stdout.write(self.style.INFO(f"Parsing document {doc_pk}"))
self.stdout.write(f"Parsing document {doc_pk}")
doc: Document = all_docs.get(pk=doc_pk)
except ObjectDoesNotExist:
self.stdout.write(self.style.ERROR(f"Document {doc_pk} does not exist"))
@ -64,11 +64,15 @@ class Command(BaseCommand):
shutil.copy(doc.source_path, temp_file)
try:
self.stdout.write(
f"Using {type(document_parser).__name__} for document",
)
# Try to re-parse the document into text
document_parser.parse(str(temp_file), doc.mime_type)
doc.content = document_parser.get_text()
doc.save()
self.stdout.write("Document OCR updated")
except ParseError as e:
self.stdout.write(self.style.ERROR(f"Error parsing document: {e}"))