From a090cf7a101d27516e7b36f8ab27b5440550f330 Mon Sep 17 00:00:00 2001 From: Trenton Holmes Date: Fri, 10 Jun 2022 11:23:24 -0700 Subject: [PATCH] Updates following testing of command --- .../management/commands/document_redo_ocr.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/documents/management/commands/document_redo_ocr.py b/src/documents/management/commands/document_redo_ocr.py index c76218d99..3ead5a485 100644 --- a/src/documents/management/commands/document_redo_ocr.py +++ b/src/documents/management/commands/document_redo_ocr.py @@ -1,4 +1,3 @@ -import logging import shutil from pathlib import Path from typing import Type @@ -38,13 +37,14 @@ class Command(BaseCommand): def handle(self, *args, **options): - logging.getLogger().handlers[0].level = logging.ERROR - all_docs = Document.objects.all() - for doc_pk in tqdm.tqdm(args.documents, disable=options["no_progress_bar"]): + for doc_pk in tqdm.tqdm( + options["documents"], + disable=options["no_progress_bar"], + ): try: - self.stdout.write(self.style.INFO(f"Parsing document {doc_pk}")) + self.stdout.write(f"Parsing document {doc_pk}") doc: Document = all_docs.get(pk=doc_pk) except ObjectDoesNotExist: self.stdout.write(self.style.ERROR(f"Document {doc_pk} does not exist")) @@ -64,11 +64,15 @@ class Command(BaseCommand): shutil.copy(doc.source_path, temp_file) try: + self.stdout.write( + f"Using {type(document_parser).__name__} for document", + ) # Try to re-parse the document into text document_parser.parse(str(temp_file), doc.mime_type) doc.content = document_parser.get_text() doc.save() + self.stdout.write("Document OCR updated") except ParseError as e: self.stdout.write(self.style.ERROR(f"Error parsing document: {e}"))