mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Updates following testing of command
This commit is contained in:
parent
b7250477b5
commit
a090cf7a10
@ -1,4 +1,3 @@
|
|||||||
import logging
|
|
||||||
import shutil
|
import shutil
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Type
|
from typing import Type
|
||||||
@ -38,13 +37,14 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
|
|
||||||
logging.getLogger().handlers[0].level = logging.ERROR
|
|
||||||
|
|
||||||
all_docs = Document.objects.all()
|
all_docs = Document.objects.all()
|
||||||
|
|
||||||
for doc_pk in tqdm.tqdm(args.documents, disable=options["no_progress_bar"]):
|
for doc_pk in tqdm.tqdm(
|
||||||
|
options["documents"],
|
||||||
|
disable=options["no_progress_bar"],
|
||||||
|
):
|
||||||
try:
|
try:
|
||||||
self.stdout.write(self.style.INFO(f"Parsing document {doc_pk}"))
|
self.stdout.write(f"Parsing document {doc_pk}")
|
||||||
doc: Document = all_docs.get(pk=doc_pk)
|
doc: Document = all_docs.get(pk=doc_pk)
|
||||||
except ObjectDoesNotExist:
|
except ObjectDoesNotExist:
|
||||||
self.stdout.write(self.style.ERROR(f"Document {doc_pk} does not exist"))
|
self.stdout.write(self.style.ERROR(f"Document {doc_pk} does not exist"))
|
||||||
@ -64,11 +64,15 @@ class Command(BaseCommand):
|
|||||||
shutil.copy(doc.source_path, temp_file)
|
shutil.copy(doc.source_path, temp_file)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
self.stdout.write(
|
||||||
|
f"Using {type(document_parser).__name__} for document",
|
||||||
|
)
|
||||||
# Try to re-parse the document into text
|
# Try to re-parse the document into text
|
||||||
document_parser.parse(str(temp_file), doc.mime_type)
|
document_parser.parse(str(temp_file), doc.mime_type)
|
||||||
|
|
||||||
doc.content = document_parser.get_text()
|
doc.content = document_parser.get_text()
|
||||||
doc.save()
|
doc.save()
|
||||||
|
self.stdout.write("Document OCR updated")
|
||||||
|
|
||||||
except ParseError as e:
|
except ParseError as e:
|
||||||
self.stdout.write(self.style.ERROR(f"Error parsing document: {e}"))
|
self.stdout.write(self.style.ERROR(f"Error parsing document: {e}"))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user