mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
reworked PDF parser that uses OCRmyPDF and produces archive files.
This commit is contained in:
@@ -107,23 +107,6 @@ def run_convert(input_file,
|
||||
raise ParseError("Convert failed at {}".format(args))
|
||||
|
||||
|
||||
def run_unpaper(pnm, logging_group=None):
|
||||
pnm_out = pnm.replace(".pnm", ".unpaper.pnm")
|
||||
|
||||
command_args = (settings.UNPAPER_BINARY, "--overwrite", "--quiet", pnm,
|
||||
pnm_out)
|
||||
|
||||
logger.debug(f"Execute: {' '.join(command_args)}",
|
||||
extra={'group': logging_group})
|
||||
|
||||
if not subprocess.Popen(command_args,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL).wait() == 0:
|
||||
raise ParseError(f"Unpaper failed at {command_args}")
|
||||
|
||||
return pnm_out
|
||||
|
||||
|
||||
class ParseError(Exception):
|
||||
pass
|
||||
|
||||
|
Reference in New Issue
Block a user