reworked PDF parser that uses OCRmyPDF and produces archive files.

This commit is contained in:
Jonas Winkler
2020-11-25 14:50:43 +01:00
parent ac6c72a6c9
commit 15935ab61f
7 changed files with 374 additions and 186 deletions

View File

@@ -107,23 +107,6 @@ def run_convert(input_file,
raise ParseError("Convert failed at {}".format(args))
def run_unpaper(pnm, logging_group=None):
pnm_out = pnm.replace(".pnm", ".unpaper.pnm")
command_args = (settings.UNPAPER_BINARY, "--overwrite", "--quiet", pnm,
pnm_out)
logger.debug(f"Execute: {' '.join(command_args)}",
extra={'group': logging_group})
if not subprocess.Popen(command_args,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL).wait() == 0:
raise ParseError(f"Unpaper failed at {command_args}")
return pnm_out
class ParseError(Exception):
pass