From b39c3f7866e16997199f67b4a5f9268174f0f844 Mon Sep 17 00:00:00 2001 From: jonaswinkler <17569239+jonaswinkler@users.noreply.github.com> Date: Wed, 17 Mar 2021 22:44:18 +0100 Subject: [PATCH] fixes #668 (see https://github.com/the-paperless-project/paperless/pull/571) --- src/documents/matching.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/documents/matching.py b/src/documents/matching.py index 3b70930b6..a1f3896e5 100644 --- a/src/documents/matching.py +++ b/src/documents/matching.py @@ -90,7 +90,7 @@ def matches(matching_model, document): elif matching_model.matching_algorithm == MatchingModel.MATCH_LITERAL: result = bool(re.search( - rf"\b{matching_model.match}\b", + rf"\b{re.escape(matching_model.match)}\b", document_content, **search_kwargs )) @@ -161,6 +161,9 @@ def _split_match(matching_model): findterms = re.compile(r'"([^"]+)"|(\S+)').findall normspace = re.compile(r"\s+").sub return [ - normspace(" ", (t[0] or t[1]).strip()).replace(" ", r"\s+") + # normspace(" ", (t[0] or t[1]).strip()).replace(" ", r"\s+") + re.escape( + normspace(" ", (t[0] or t[1]).strip()) + ).replace(r"\ ", r"\s+") for t in findterms(matching_model.match) ]