mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-09-14 21:45:37 -05:00
Enhancement: Limit excessively long content length when computing suggestions (#10656)
This helps prevent excessive processing times on very large documents by limiting the text analyzed during date parsing, tag prediction, and correspondent matching. If the document exceeds 1.2M chars, crop to 1M char.
This commit is contained in:
@@ -41,7 +41,11 @@ def log_reason(
|
||||
|
||||
|
||||
def match_correspondents(document: Document, classifier: DocumentClassifier, user=None):
|
||||
pred_id = classifier.predict_correspondent(document.content) if classifier else None
|
||||
pred_id = (
|
||||
classifier.predict_correspondent(document.suggestion_content)
|
||||
if classifier
|
||||
else None
|
||||
)
|
||||
|
||||
if user is None and document.owner is not None:
|
||||
user = document.owner
|
||||
@@ -65,8 +69,11 @@ def match_correspondents(document: Document, classifier: DocumentClassifier, use
|
||||
|
||||
|
||||
def match_document_types(document: Document, classifier: DocumentClassifier, user=None):
|
||||
pred_id = classifier.predict_document_type(document.content) if classifier else None
|
||||
|
||||
pred_id = (
|
||||
classifier.predict_document_type(document.suggestion_content)
|
||||
if classifier
|
||||
else None
|
||||
)
|
||||
if user is None and document.owner is not None:
|
||||
user = document.owner
|
||||
|
||||
@@ -89,7 +96,9 @@ def match_document_types(document: Document, classifier: DocumentClassifier, use
|
||||
|
||||
|
||||
def match_tags(document: Document, classifier: DocumentClassifier, user=None):
|
||||
predicted_tag_ids = classifier.predict_tags(document.content) if classifier else []
|
||||
predicted_tag_ids = (
|
||||
classifier.predict_tags(document.suggestion_content) if classifier else []
|
||||
)
|
||||
|
||||
if user is None and document.owner is not None:
|
||||
user = document.owner
|
||||
@@ -112,7 +121,11 @@ def match_tags(document: Document, classifier: DocumentClassifier, user=None):
|
||||
|
||||
|
||||
def match_storage_paths(document: Document, classifier: DocumentClassifier, user=None):
|
||||
pred_id = classifier.predict_storage_path(document.content) if classifier else None
|
||||
pred_id = (
|
||||
classifier.predict_storage_path(document.suggestion_content)
|
||||
if classifier
|
||||
else None
|
||||
)
|
||||
|
||||
if user is None and document.owner is not None:
|
||||
user = document.owner
|
||||
|
Reference in New Issue
Block a user