From 789abb3bbb3c14b85952e98485cf98c289deb9c0 Mon Sep 17 00:00:00 2001 From: jonaswinkler Date: Fri, 18 Dec 2020 16:42:33 +0100 Subject: [PATCH] changed up the highlight fragment formatter --- docs/api.rst | 15 ++++------ .../result-highlight.component.html | 2 +- src/documents/index.py | 29 +++++++++++-------- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index d352758fa..cff72a970 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -221,21 +221,16 @@ Each fragment contains a list of strings, and some of them are marked as a highl [ [ - {"text": "This is a sample text with a "}, - {"text": "highlighted", "term": 0}, - {"text": " word."} + {"text": "This is a sample text with a ", "highlight": false}, + {"text": "highlighted", "highlight": true}, + {"text": " word.", "highlight": false} ], [ - {"text": "Another", "term": 1}, - {"text": " fragment with a highlight."} + {"text": "Another", "highlight": true}, + {"text": " fragment with a highlight.", "highlight": false} ] ] - - -When ``term`` is present within a string, the word within ``text`` should be highlighted. -The term index groups multiple matches together and words with the same index -should get identical highlighting. A client may use this example to produce the following output: ... This is a sample text with a **highlighted** word. ... **Another** fragment with a highlight. ... diff --git a/src-ui/src/app/components/search/result-highlight/result-highlight.component.html b/src-ui/src/app/components/search/result-highlight/result-highlight.component.html index 1842f5cea..5dc5baa94 100644 --- a/src-ui/src/app/components/search/result-highlight/result-highlight.component.html +++ b/src-ui/src/app/components/search/result-highlight/result-highlight.component.html @@ -1,3 +1,3 @@ ... - {{token.text}} ... + {{token.text}} ... \ No newline at end of file diff --git a/src/documents/index.py b/src/documents/index.py index fdf7d7041..308ee932e 100644 --- a/src/documents/index.py +++ b/src/documents/index.py @@ -20,32 +20,37 @@ class JsonFormatter(Formatter): self.seen = {} def format_token(self, text, token, replace=False): - seen = self.seen ttext = self._text(get_text(text, token, replace)) - if ttext in seen: - termnum = seen[ttext] - else: - termnum = len(seen) - seen[ttext] = termnum - - return {'text': ttext, 'term': termnum} + return {'text': ttext, 'highlight': 'true'} def format_fragment(self, fragment, replace=False): output = [] index = fragment.startchar text = fragment.text - + amend_token = None for t in fragment.matches: if t.startchar is None: continue if t.startchar < index: continue if t.startchar > index: - output.append({'text': text[index:t.startchar]}) - output.append(self.format_token(text, t, replace)) + text_inbetween = text[index:t.startchar] + if amend_token and t.startchar - index < 10: + amend_token['text'] += text_inbetween + else: + output.append({'text': text_inbetween, + 'highlight': False}) + amend_token = None + token = self.format_token(text, t, replace) + if amend_token: + amend_token['text'] += token['text'] + else: + output.append(token) + amend_token = token index = t.endchar if index < fragment.endchar: - output.append({'text': text[index:fragment.endchar]}) + output.append({'text': text[index:fragment.endchar], + 'highlight': False}) return output def format(self, fragments, replace=False):