reworked PDF parser that uses OCRmyPDF and produces archive files.

This commit is contained in:
Jonas Winkler
2020-11-25 14:50:43 +01:00
parent 95ec520f13
commit 2d559d330d
7 changed files with 374 additions and 186 deletions

View File

@@ -23,7 +23,6 @@ langdetect = "*"
pdftotext = "*"
pathvalidate = "*"
pillow = "*"
pyocr = "~=0.7.2"
python-gnupg = "*"
python-dotenv = "*"
python-dateutil = "*"
@@ -35,6 +34,7 @@ scikit-learn="~=0.23.2"
whitenoise = "~=5.2.0"
watchdog = "*"
whoosh="~=2.7.4"
ocrmypdf = "*"
[dev-packages]
coveralls = "*"