mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
87 lines
2.4 KiB
Python
87 lines
2.4 KiB
Python
import logging
|
|
|
|
from django.conf import settings
|
|
from whoosh.writing import AsyncWriter
|
|
|
|
from documents import index, sanity_checker
|
|
from documents.classifier import DocumentClassifier, \
|
|
IncompatibleClassifierVersionError
|
|
from documents.consumer import Consumer, ConsumerError
|
|
from documents.models import Document
|
|
from documents.sanity_checker import SanityFailedError
|
|
|
|
|
|
def index_optimize():
|
|
index.open_index().optimize()
|
|
|
|
|
|
def index_reindex():
|
|
documents = Document.objects.all()
|
|
|
|
ix = index.open_index(recreate=True)
|
|
|
|
with AsyncWriter(ix) as writer:
|
|
for document in documents:
|
|
index.update_document(writer, document)
|
|
|
|
|
|
def train_classifier():
|
|
classifier = DocumentClassifier()
|
|
|
|
try:
|
|
# load the classifier, since we might not have to train it again.
|
|
classifier.reload()
|
|
except (FileNotFoundError, IncompatibleClassifierVersionError):
|
|
# This is what we're going to fix here.
|
|
pass
|
|
|
|
try:
|
|
if classifier.train():
|
|
logging.getLogger(__name__).info(
|
|
"Saving updated classifier model to {}...".format(
|
|
settings.MODEL_FILE)
|
|
)
|
|
classifier.save_classifier()
|
|
else:
|
|
logging.getLogger(__name__).debug(
|
|
"Training data unchanged."
|
|
)
|
|
|
|
except Exception as e:
|
|
logging.getLogger(__name__).error(
|
|
"Classifier error: " + str(e)
|
|
)
|
|
|
|
|
|
def consume_file(path,
|
|
override_filename=None,
|
|
override_title=None,
|
|
override_correspondent_id=None,
|
|
override_document_type_id=None,
|
|
override_tag_ids=None):
|
|
|
|
document = Consumer().try_consume_file(
|
|
path,
|
|
override_filename=override_filename,
|
|
override_title=override_title,
|
|
override_correspondent_id=override_correspondent_id,
|
|
override_document_type_id=override_document_type_id,
|
|
override_tag_ids=override_tag_ids)
|
|
|
|
if document:
|
|
return "Success. New document id {} created".format(
|
|
document.pk
|
|
)
|
|
else:
|
|
raise ConsumerError("Unknown error: Returned document was null, but "
|
|
"no error message was given.")
|
|
|
|
|
|
def sanity_check():
|
|
messages = sanity_checker.check_sanity()
|
|
|
|
if len(messages) > 0:
|
|
raise SanityFailedError(messages)
|
|
else:
|
|
return "No issues detected."
|