Format Python code with black

2025-11-25 23:59:09 -06:00 · 2022-02-27 15:26:41 +01:00
parent 13885968e3
commit fc695896dd
136 changed files with 6142 additions and 3811 deletions
--- a/src/documents/matching.py
+++ b/src/documents/matching.py
@@ -12,7 +12,8 @@ def log_reason(matching_model, document, reason):
    class_name = type(matching_model).__name__
    logger.debug(
        f"{class_name} {matching_model.name} matched on document "
-        f"{document} because {reason}")
+        f"{document} because {reason}"
+    )


 def match_correspondents(document, classifier):
@@ -23,9 +24,9 @@ def match_correspondents(document, classifier):

    correspondents = Correspondent.objects.all()

-    return list(filter(
-        lambda o: matches(o, document) or o.pk == pred_id,
-        correspondents))
+    return list(
+        filter(lambda o: matches(o, document) or o.pk == pred_id, correspondents)
+    )


 def match_document_types(document, classifier):
@@ -36,9 +37,9 @@ def match_document_types(document, classifier):

    document_types = DocumentType.objects.all()

-    return list(filter(
-        lambda o: matches(o, document) or o.pk == pred_id,
-        document_types))
+    return list(
+        filter(lambda o: matches(o, document) or o.pk == pred_id, document_types)
+    )


 def match_tags(document, classifier):
@@ -49,9 +50,9 @@ def match_tags(document, classifier):

    tags = Tag.objects.all()

-    return list(filter(
-        lambda o: matches(o, document) or o.pk in predicted_tag_ids,
-        tags))
+    return list(
+        filter(lambda o: matches(o, document) or o.pk in predicted_tag_ids, tags)
+    )


 def matches(matching_model, document):
@@ -68,73 +69,73 @@ def matches(matching_model, document):

    if matching_model.matching_algorithm == MatchingModel.MATCH_ALL:
        for word in _split_match(matching_model):
-            search_result = re.search(
-                rf"\b{word}\b", document_content, **search_kwargs)
+            search_result = re.search(rf"\b{word}\b", document_content, **search_kwargs)
            if not search_result:
                return False
        log_reason(
-            matching_model, document,
-            f"it contains all of these words: {matching_model.match}"
+            matching_model,
+            document,
+            f"it contains all of these words: {matching_model.match}",
        )
        return True

    elif matching_model.matching_algorithm == MatchingModel.MATCH_ANY:
        for word in _split_match(matching_model):
            if re.search(rf"\b{word}\b", document_content, **search_kwargs):
-                log_reason(
-                    matching_model, document,
-                    f"it contains this word: {word}"
-                )
+                log_reason(matching_model, document, f"it contains this word: {word}")
                return True
        return False

    elif matching_model.matching_algorithm == MatchingModel.MATCH_LITERAL:
-        result = bool(re.search(
-            rf"\b{re.escape(matching_model.match)}\b",
-            document_content,
-            **search_kwargs
-        ))
+        result = bool(
+            re.search(
+                rf"\b{re.escape(matching_model.match)}\b",
+                document_content,
+                **search_kwargs,
+            )
+        )
        if result:
            log_reason(
-                matching_model, document,
-                f"it contains this string: \"{matching_model.match}\""
+                matching_model,
+                document,
+                f'it contains this string: "{matching_model.match}"',
            )
        return result

    elif matching_model.matching_algorithm == MatchingModel.MATCH_REGEX:
        try:
            match = re.search(
-                re.compile(matching_model.match, **search_kwargs),
-                document_content
+                re.compile(matching_model.match, **search_kwargs), document_content
            )
        except re.error:
            logger.error(
-                f"Error while processing regular expression "
-                f"{matching_model.match}"
+                f"Error while processing regular expression " f"{matching_model.match}"
            )
            return False
        if match:
            log_reason(
-                matching_model, document,
+                matching_model,
+                document,
                f"the string {match.group()} matches the regular expression "
-                f"{matching_model.match}"
+                f"{matching_model.match}",
            )
        return bool(match)

    elif matching_model.matching_algorithm == MatchingModel.MATCH_FUZZY:
        from fuzzywuzzy import fuzz

-        match = re.sub(r'[^\w\s]', '', matching_model.match)
-        text = re.sub(r'[^\w\s]', '', document_content)
+        match = re.sub(r"[^\w\s]", "", matching_model.match)
+        text = re.sub(r"[^\w\s]", "", document_content)
        if matching_model.is_insensitive:
            match = match.lower()
            text = text.lower()
        if fuzz.partial_ratio(match, text) >= 90:
            # TODO: make this better
            log_reason(
-                matching_model, document,
+                matching_model,
+                document,
                f"parts of the document content somehow match the string "
-                f"{matching_model.match}"
+                f"{matching_model.match}",
            )
            return True
        else:
@@ -162,8 +163,6 @@ def _split_match(matching_model):
    normspace = re.compile(r"\s+").sub
    return [
        # normspace(" ", (t[0] or t[1]).strip()).replace(" ", r"\s+")
-        re.escape(
-            normspace(" ", (t[0] or t[1]).strip())
-        ).replace(r"\ ", r"\s+")
+        re.escape(normspace(" ", (t[0] or t[1]).strip())).replace(r"\ ", r"\s+")
        for t in findterms(matching_model.match)
    ]