reworked PDF parser that uses OCRmyPDF and produces archive files.

2026-01-24 22:39:02 -06:00 · 2020-11-25 14:50:43 +01:00
parent 95ec520f13
commit 2d559d330d
7 changed files with 374 additions and 186 deletions
--- a/2
+++ b/2
@@ -23,7 +23,6 @@ langdetect = "*"
 pdftotext = "*"
 pathvalidate = "*"
 pillow = "*"
-pyocr = "~=0.7.2"
 python-gnupg = "*"
 python-dotenv = "*"
 python-dateutil = "*"
@@ -35,6 +34,7 @@ scikit-learn="~=0.23.2"
 whitenoise = "~=5.2.0"
 watchdog = "*"
 whoosh="~=2.7.4"
+ocrmypdf = "*"

 [dev-packages]
 coveralls = "*"