Add the new paperless_tika parser

This parser will use an external Tika and Gotenberg server to parse
"Office" documents (.doc, .xls, .odt, etc.)

Signed-off-by: Jo Vandeginste <Jo.Vandeginste@kuleuven.be>
This commit is contained in:
Jo Vandeginste
2020-12-29 01:23:40 +01:00
parent d690b34ee0
commit b8e8bf3dd4
9 changed files with 276 additions and 0 deletions

View File

@@ -42,6 +42,7 @@ whoosh="~=2.7.4"
inotifyrecursive = "~=0.3.4"
ocrmypdf = "*"
tqdm = "*"
tika = "*"
[dev-packages]
coveralls = "*"