From 789abb3bbb3c14b85952e98485cf98c289deb9c0 Mon Sep 17 00:00:00 2001
From: jonaswinkler <jonas.winkler@jpwinkler.de>
Date: Fri, 18 Dec 2020 16:42:33 +0100
Subject: [PATCH] changed up the highlight fragment formatter

---
 docs/api.rst                                  | 15 ++++------
 .../result-highlight.component.html           |  2 +-
 src/documents/index.py                        | 29 +++++++++++--------
 3 files changed, 23 insertions(+), 23 deletions(-)
diff --git a/docs/api.rst b/docs/api.rst
index d352758fa..cff72a970 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -221,21 +221,16 @@ Each fragment contains a list of strings, and some of them are marked as a highl
 
     [
         [
-            {"text": "This is a sample text with a "},
-            {"text": "highlighted", "term": 0},
-            {"text": " word."}
+            {"text": "This is a sample text with a ", "highlight": false},
+            {"text": "highlighted", "highlight": true},
+            {"text": " word.", "highlight": false}
         ],
         [
-            {"text": "Another", "term": 1},
-            {"text": " fragment with a highlight."}
+            {"text": "Another", "highlight": true},
+            {"text": " fragment with a highlight.", "highlight": false}
         ]
     ]
 
-
-
-When ``term`` is present within a string, the word within ``text`` should be highlighted.
-The term index groups multiple matches together and words with the same index
-should get identical highlighting.
 A client may use this example to produce the following output:
 
 ... This is a sample text with a **highlighted** word. ... **Another** fragment with a highlight. ...
diff --git a/src-ui/src/app/components/search/result-highlight/result-highlight.component.html b/src-ui/src/app/components/search/result-highlight/result-highlight.component.html
index 1842f5cea..5dc5baa94 100644
--- a/src-ui/src/app/components/search/result-highlight/result-highlight.component.html
+++ b/src-ui/src/app/components/search/result-highlight/result-highlight.component.html
@@ -1,3 +1,3 @@
 ... <span *ngFor="let fragment of highlights">
-    <span *ngFor="let token of fragment" [ngClass]="token.term != null ? 'match term'+ token.term : ''">{{token.text}}</span> ... 
+    <span *ngFor="let token of fragment" [class.match]="token.highlight">{{token.text}}</span> ... 
 </span>
\ No newline at end of file
diff --git a/src/documents/index.py b/src/documents/index.py
index fdf7d7041..308ee932e 100644
--- a/src/documents/index.py
+++ b/src/documents/index.py
@@ -20,32 +20,37 @@ class JsonFormatter(Formatter):
         self.seen = {}
 
     def format_token(self, text, token, replace=False):
-        seen = self.seen
         ttext = self._text(get_text(text, token, replace))
-        if ttext in seen:
-            termnum = seen[ttext]
-        else:
-            termnum = len(seen)
-            seen[ttext] = termnum
-
-        return {'text': ttext, 'term': termnum}
+        return {'text': ttext, 'highlight': 'true'}
 
     def format_fragment(self, fragment, replace=False):
         output = []
         index = fragment.startchar
         text = fragment.text
-
+        amend_token = None
         for t in fragment.matches:
             if t.startchar is None:
                 continue
             if t.startchar < index:
                 continue
             if t.startchar > index:
-                output.append({'text': text[index:t.startchar]})
-            output.append(self.format_token(text, t, replace))
+                text_inbetween = text[index:t.startchar]
+                if amend_token and t.startchar - index < 10:
+                    amend_token['text'] += text_inbetween
+                else:
+                    output.append({'text': text_inbetween,
+                                   'highlight': False})
+                    amend_token = None
+            token = self.format_token(text, t, replace)
+            if amend_token:
+                amend_token['text'] += token['text']
+            else:
+                output.append(token)
+                amend_token = token
             index = t.endchar
         if index < fragment.endchar:
-            output.append({'text': text[index:fragment.endchar]})
+            output.append({'text': text[index:fragment.endchar],
+                           'highlight': False})
         return output
 
     def format(self, fragments, replace=False):