mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-08-26 01:16:16 +00:00
Format Python code with black
This commit is contained in:
@@ -12,7 +12,8 @@ def log_reason(matching_model, document, reason):
|
||||
class_name = type(matching_model).__name__
|
||||
logger.debug(
|
||||
f"{class_name} {matching_model.name} matched on document "
|
||||
f"{document} because {reason}")
|
||||
f"{document} because {reason}"
|
||||
)
|
||||
|
||||
|
||||
def match_correspondents(document, classifier):
|
||||
@@ -23,9 +24,9 @@ def match_correspondents(document, classifier):
|
||||
|
||||
correspondents = Correspondent.objects.all()
|
||||
|
||||
return list(filter(
|
||||
lambda o: matches(o, document) or o.pk == pred_id,
|
||||
correspondents))
|
||||
return list(
|
||||
filter(lambda o: matches(o, document) or o.pk == pred_id, correspondents)
|
||||
)
|
||||
|
||||
|
||||
def match_document_types(document, classifier):
|
||||
@@ -36,9 +37,9 @@ def match_document_types(document, classifier):
|
||||
|
||||
document_types = DocumentType.objects.all()
|
||||
|
||||
return list(filter(
|
||||
lambda o: matches(o, document) or o.pk == pred_id,
|
||||
document_types))
|
||||
return list(
|
||||
filter(lambda o: matches(o, document) or o.pk == pred_id, document_types)
|
||||
)
|
||||
|
||||
|
||||
def match_tags(document, classifier):
|
||||
@@ -49,9 +50,9 @@ def match_tags(document, classifier):
|
||||
|
||||
tags = Tag.objects.all()
|
||||
|
||||
return list(filter(
|
||||
lambda o: matches(o, document) or o.pk in predicted_tag_ids,
|
||||
tags))
|
||||
return list(
|
||||
filter(lambda o: matches(o, document) or o.pk in predicted_tag_ids, tags)
|
||||
)
|
||||
|
||||
|
||||
def matches(matching_model, document):
|
||||
@@ -68,73 +69,73 @@ def matches(matching_model, document):
|
||||
|
||||
if matching_model.matching_algorithm == MatchingModel.MATCH_ALL:
|
||||
for word in _split_match(matching_model):
|
||||
search_result = re.search(
|
||||
rf"\b{word}\b", document_content, **search_kwargs)
|
||||
search_result = re.search(rf"\b{word}\b", document_content, **search_kwargs)
|
||||
if not search_result:
|
||||
return False
|
||||
log_reason(
|
||||
matching_model, document,
|
||||
f"it contains all of these words: {matching_model.match}"
|
||||
matching_model,
|
||||
document,
|
||||
f"it contains all of these words: {matching_model.match}",
|
||||
)
|
||||
return True
|
||||
|
||||
elif matching_model.matching_algorithm == MatchingModel.MATCH_ANY:
|
||||
for word in _split_match(matching_model):
|
||||
if re.search(rf"\b{word}\b", document_content, **search_kwargs):
|
||||
log_reason(
|
||||
matching_model, document,
|
||||
f"it contains this word: {word}"
|
||||
)
|
||||
log_reason(matching_model, document, f"it contains this word: {word}")
|
||||
return True
|
||||
return False
|
||||
|
||||
elif matching_model.matching_algorithm == MatchingModel.MATCH_LITERAL:
|
||||
result = bool(re.search(
|
||||
rf"\b{re.escape(matching_model.match)}\b",
|
||||
document_content,
|
||||
**search_kwargs
|
||||
))
|
||||
result = bool(
|
||||
re.search(
|
||||
rf"\b{re.escape(matching_model.match)}\b",
|
||||
document_content,
|
||||
**search_kwargs,
|
||||
)
|
||||
)
|
||||
if result:
|
||||
log_reason(
|
||||
matching_model, document,
|
||||
f"it contains this string: \"{matching_model.match}\""
|
||||
matching_model,
|
||||
document,
|
||||
f'it contains this string: "{matching_model.match}"',
|
||||
)
|
||||
return result
|
||||
|
||||
elif matching_model.matching_algorithm == MatchingModel.MATCH_REGEX:
|
||||
try:
|
||||
match = re.search(
|
||||
re.compile(matching_model.match, **search_kwargs),
|
||||
document_content
|
||||
re.compile(matching_model.match, **search_kwargs), document_content
|
||||
)
|
||||
except re.error:
|
||||
logger.error(
|
||||
f"Error while processing regular expression "
|
||||
f"{matching_model.match}"
|
||||
f"Error while processing regular expression " f"{matching_model.match}"
|
||||
)
|
||||
return False
|
||||
if match:
|
||||
log_reason(
|
||||
matching_model, document,
|
||||
matching_model,
|
||||
document,
|
||||
f"the string {match.group()} matches the regular expression "
|
||||
f"{matching_model.match}"
|
||||
f"{matching_model.match}",
|
||||
)
|
||||
return bool(match)
|
||||
|
||||
elif matching_model.matching_algorithm == MatchingModel.MATCH_FUZZY:
|
||||
from fuzzywuzzy import fuzz
|
||||
|
||||
match = re.sub(r'[^\w\s]', '', matching_model.match)
|
||||
text = re.sub(r'[^\w\s]', '', document_content)
|
||||
match = re.sub(r"[^\w\s]", "", matching_model.match)
|
||||
text = re.sub(r"[^\w\s]", "", document_content)
|
||||
if matching_model.is_insensitive:
|
||||
match = match.lower()
|
||||
text = text.lower()
|
||||
if fuzz.partial_ratio(match, text) >= 90:
|
||||
# TODO: make this better
|
||||
log_reason(
|
||||
matching_model, document,
|
||||
matching_model,
|
||||
document,
|
||||
f"parts of the document content somehow match the string "
|
||||
f"{matching_model.match}"
|
||||
f"{matching_model.match}",
|
||||
)
|
||||
return True
|
||||
else:
|
||||
@@ -162,8 +163,6 @@ def _split_match(matching_model):
|
||||
normspace = re.compile(r"\s+").sub
|
||||
return [
|
||||
# normspace(" ", (t[0] or t[1]).strip()).replace(" ", r"\s+")
|
||||
re.escape(
|
||||
normspace(" ", (t[0] or t[1]).strip())
|
||||
).replace(r"\ ", r"\s+")
|
||||
re.escape(normspace(" ", (t[0] or t[1]).strip())).replace(r"\ ", r"\s+")
|
||||
for t in findterms(matching_model.match)
|
||||
]
|
||||
|
Reference in New Issue
Block a user