From 6a36a4ec972bf7750f1fb0f62eeffbc33068cf48 Mon Sep 17 00:00:00 2001 From: ishirav Date: Sat, 23 Dec 2017 06:05:48 +0200 Subject: [PATCH] Support search terms that contain multiple words in ANY/ALL matching modes, by surrounding the terms with double quotes. --- src/documents/models.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/documents/models.py b/src/documents/models.py index 2fa2dca0b..a9f17c2a2 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -89,7 +89,7 @@ class MatchingModel(models.Model): search_kwargs = {"flags": re.IGNORECASE} if self.matching_algorithm == self.MATCH_ALL: - for word in self.match.split(" "): + for word in self._split_match(): search_result = re.search( r"\b{}\b".format(word), text, **search_kwargs) if not search_result: @@ -97,7 +97,7 @@ class MatchingModel(models.Model): return True if self.matching_algorithm == self.MATCH_ANY: - for word in self.match.split(" "): + for word in self._split_match(): if re.search(r"\b{}\b".format(word), text, **search_kwargs): return True return False @@ -121,6 +121,19 @@ class MatchingModel(models.Model): raise NotImplementedError("Unsupported matching algorithm") + def _split_match(self): + ''' + Splits the match to invidual keywords, getting rid of unecessary spaces + and grouping quoted words together. + Example: + ' some random words "with quotes " and spaces' + ==> + ['some', 'random', 'words', 'with quotes', 'and', 'spaces'] + ''' + findterms = re.compile(r'"([^"]+)"|(\S+)').findall + normspace = re.compile(r'\s{2,}').sub + return [normspace(' ', (t[0] or t[1]).strip()) for t in findterms(self.match)] + def save(self, *args, **kwargs): self.match = self.match.lower()