code cleanup

2026-01-12 21:44:21 -06:00 · 2020-11-21 15:34:00 +01:00
parent b44f8383e4
commit 450fb877f6
6 changed files with 71 additions and 49 deletions
--- a/src/documents/matching.py
+++ b/src/documents/matching.py
@@ -12,7 +12,10 @@ def match_correspondents(document_content, classifier):
        pred_id = None

    correspondents = Correspondent.objects.all()
-    return [o for o in correspondents if matches(o, document_content) or o.pk == pred_id]
+
+    return list(filter(
+        lambda o: matches(o, document_content) or o.pk == pred_id,
+        correspondents))


 def match_document_types(document_content, classifier):
@@ -22,15 +25,23 @@ def match_document_types(document_content, classifier):
        pred_id = None

    document_types = DocumentType.objects.all()
-    return [o for o in document_types if matches(o, document_content) or o.pk == pred_id]
+
+    return list(filter(
+        lambda o: matches(o, document_content) or o.pk == pred_id,
+        document_types))


 def match_tags(document_content, classifier):
-    objects = Tag.objects.all()
-    predicted_tag_ids = classifier.predict_tags(document_content) if classifier else []
+    if classifier:
+        predicted_tag_ids = classifier.predict_tags(document_content)
+    else:
+        predicted_tag_ids = []

-    matched_tags = [o for o in objects if matches(o, document_content) or o.pk in predicted_tag_ids]
-    return matched_tags
+    tags = Tag.objects.all()
+
+    return list(filter(
+        lambda o: matches(o, document_content) or o.pk in predicted_tag_ids,
+        tags))


 def matches(matching_model, document_content):
@@ -48,39 +59,45 @@ def matches(matching_model, document_content):
    if matching_model.matching_algorithm == MatchingModel.MATCH_ALL:
        for word in _split_match(matching_model):
            search_result = re.search(
-                r"\b{}\b".format(word), document_content, **search_kwargs)
+                rf"\b{word}\b", document_content, **search_kwargs)
            if not search_result:
                return False
        return True

-    if matching_model.matching_algorithm == MatchingModel.MATCH_ANY:
+    elif matching_model.matching_algorithm == MatchingModel.MATCH_ANY:
        for word in _split_match(matching_model):
-            if re.search(r"\b{}\b".format(word), document_content, **search_kwargs):
+            if re.search(rf"\b{word}\b", document_content, **search_kwargs):
                return True
        return False

-    if matching_model.matching_algorithm == MatchingModel.MATCH_LITERAL:
+    elif matching_model.matching_algorithm == MatchingModel.MATCH_LITERAL:
        return bool(re.search(
-            r"\b{}\b".format(matching_model.match), document_content, **search_kwargs))
+            rf"\b{matching_model.match}\b",
+            document_content,
+            **search_kwargs
+        ))

-    if matching_model.matching_algorithm == MatchingModel.MATCH_REGEX:
+    elif matching_model.matching_algorithm == MatchingModel.MATCH_REGEX:
        return bool(re.search(
-            re.compile(matching_model.match, **search_kwargs), document_content))
+            re.compile(matching_model.match, **search_kwargs),
+            document_content
+        ))

-    if matching_model.matching_algorithm == MatchingModel.MATCH_FUZZY:
+    elif matching_model.matching_algorithm == MatchingModel.MATCH_FUZZY:
        match = re.sub(r'[^\w\s]', '', matching_model.match)
        text = re.sub(r'[^\w\s]', '', document_content)
        if matching_model.is_insensitive:
            match = match.lower()
            text = text.lower()

-        return True if fuzz.partial_ratio(match, text) >= 90 else False
+        return fuzz.partial_ratio(match, text) >= 90

-    if matching_model.matching_algorithm == MatchingModel.MATCH_AUTO:
+    elif matching_model.matching_algorithm == MatchingModel.MATCH_AUTO:
        # this is done elsewhere.
        return False

-    raise NotImplementedError("Unsupported matching algorithm")
+    else:
+        raise NotImplementedError("Unsupported matching algorithm")


 def _split_match(matching_model):