reworked PDF parser that uses OCRmyPDF and produces archive files.

2025-12-20 01:45:58 -06:00 · 2020-11-25 14:50:43 +01:00
parent ac6c72a6c9
commit 15935ab61f
7 changed files with 374 additions and 186 deletions
--- a/src/documents/parsers.py
+++ b/src/documents/parsers.py
@@ -107,23 +107,6 @@ def run_convert(input_file,
        raise ParseError("Convert failed at {}".format(args))


-def run_unpaper(pnm, logging_group=None):
-    pnm_out = pnm.replace(".pnm", ".unpaper.pnm")
-
-    command_args = (settings.UNPAPER_BINARY, "--overwrite", "--quiet", pnm,
-                    pnm_out)
-
-    logger.debug(f"Execute: {' '.join(command_args)}",
-                 extra={'group': logging_group})
-
-    if not subprocess.Popen(command_args,
-                            stdout=subprocess.DEVNULL,
-                            stderr=subprocess.DEVNULL).wait() == 0:
-        raise ParseError(f"Unpaper failed at {command_args}")
-
-    return pnm_out
-
-
 class ParseError(Exception):
    pass