mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
unified document matching, legacy and automatching work alongside now
This commit is contained in:
@@ -9,7 +9,7 @@ from django.contrib.contenttypes.models import ContentType
|
||||
from django.utils import timezone
|
||||
|
||||
from documents.classifier import DocumentClassifier
|
||||
from .. import index
|
||||
from .. import index, matching
|
||||
from ..models import Document, Tag
|
||||
|
||||
|
||||
@@ -17,35 +17,107 @@ def logger(message, group):
|
||||
logging.getLogger(__name__).debug(message, extra={"group": group})
|
||||
|
||||
|
||||
#TODO: global? really?
|
||||
classifier = DocumentClassifier()
|
||||
|
||||
|
||||
def index_document(sender, document=None, logging_group=None, **kwargs):
|
||||
index.add_document_to_index(sender, instance=document)
|
||||
|
||||
|
||||
def classify_document(sender, document=None, logging_group=None, **kwargs):
|
||||
global classifier
|
||||
try:
|
||||
classifier.reload()
|
||||
classifier.classify_document(
|
||||
document,
|
||||
classify_correspondent=True,
|
||||
classify_tags=True,
|
||||
classify_document_type=True
|
||||
)
|
||||
except FileNotFoundError:
|
||||
logging.getLogger(__name__).fatal(
|
||||
"Cannot classify document, classifier model file was not found."
|
||||
)
|
||||
|
||||
|
||||
def add_inbox_tags(sender, document=None, logging_group=None, **kwargs):
|
||||
inbox_tags = Tag.objects.filter(is_inbox_tag=True)
|
||||
document.tags.add(*inbox_tags)
|
||||
|
||||
|
||||
def set_correspondent(sender, document=None, logging_group=None, classifier=None, replace=False, use_first=True, **kwargs):
|
||||
if document.correspondent and not replace:
|
||||
return
|
||||
|
||||
potential_correspondents = matching.match_correspondents(document.content, classifier)
|
||||
|
||||
potential_count = len(potential_correspondents)
|
||||
if potential_correspondents:
|
||||
selected = potential_correspondents[0]
|
||||
else:
|
||||
selected = None
|
||||
if potential_count > 1:
|
||||
if use_first:
|
||||
message = "Detected {} potential correspondents, so we've opted for {}"
|
||||
logger(
|
||||
message.format(potential_count, selected),
|
||||
logging_group
|
||||
)
|
||||
else:
|
||||
message = "Detected {} potential correspondents, not assigning any correspondent"
|
||||
logger(
|
||||
message.format(potential_count),
|
||||
logging_group
|
||||
)
|
||||
return
|
||||
|
||||
logger(
|
||||
'Assigning correspondent "{}" to "{}" '.format(selected, document),
|
||||
logging_group
|
||||
)
|
||||
|
||||
document.correspondent = selected
|
||||
document.save(update_fields=("correspondent",))
|
||||
|
||||
|
||||
def set_document_type(sender, document=None, logging_group=None, classifier=None, replace=False, use_first=True, **kwargs):
|
||||
if document.document_type and not replace:
|
||||
return
|
||||
|
||||
potential_document_type = matching.match_document_types(document.content, classifier)
|
||||
|
||||
potential_count = len(potential_document_type)
|
||||
if potential_document_type:
|
||||
selected = potential_document_type[0]
|
||||
else:
|
||||
selected = None
|
||||
|
||||
if potential_count > 1:
|
||||
if use_first:
|
||||
message = "Detected {} potential document types, so we've opted for {}"
|
||||
logger(
|
||||
message.format(potential_count, selected),
|
||||
logging_group
|
||||
)
|
||||
else:
|
||||
message = "Detected {} potential document types, not assigning any document type"
|
||||
logger(
|
||||
message.format(potential_count),
|
||||
logging_group
|
||||
)
|
||||
return
|
||||
|
||||
logger(
|
||||
'Assigning document type "{}" to "{}" '.format(selected, document),
|
||||
logging_group
|
||||
)
|
||||
|
||||
document.document_type = selected
|
||||
document.save(update_fields=("document_type",))
|
||||
|
||||
|
||||
def set_tags(sender, document=None, logging_group=None, classifier=None, replace=False, **kwargs):
|
||||
if replace:
|
||||
document.tags.clear()
|
||||
current_tags = set([])
|
||||
else:
|
||||
current_tags = set(document.tags.all())
|
||||
|
||||
relevant_tags = set(matching.match_tags(document.content, classifier)) - current_tags
|
||||
|
||||
if not relevant_tags:
|
||||
return
|
||||
|
||||
message = 'Tagging "{}" with "{}"'
|
||||
logger(
|
||||
message.format(document, ", ".join([t.slug for t in relevant_tags])),
|
||||
logging_group
|
||||
)
|
||||
|
||||
document.tags.add(*relevant_tags)
|
||||
|
||||
|
||||
def run_pre_consume_script(sender, filename, **kwargs):
|
||||
|
||||
if not settings.PRE_CONSUME_SCRIPT:
|
||||
|
Reference in New Issue
Block a user