mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Merge pull request #494 from JensPfeifle/fix_447
fix parse error of some documents by using gs
This commit is contained in:
commit
305d50d7ed
@ -247,6 +247,9 @@ PAPERLESS_EMAIL_SECRET=""
|
||||
# Convert (part of the ImageMagick suite)
|
||||
#PAPERLESS_CONVERT_BINARY=/usr/bin/convert
|
||||
|
||||
# Ghostscript
|
||||
#PAPERLESS_GS_BINARY = /usr/bin/gs
|
||||
|
||||
# Unpaper
|
||||
#PAPERLESS_UNPAPER_BINARY=/usr/bin/unpaper
|
||||
|
||||
|
@ -263,6 +263,9 @@ CONVERT_TMPDIR = os.getenv("PAPERLESS_CONVERT_TMPDIR")
|
||||
CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT")
|
||||
CONVERT_DENSITY = os.getenv("PAPERLESS_CONVERT_DENSITY")
|
||||
|
||||
# Ghostscript
|
||||
GS_BINARY = os.getenv("PAPERLESS_GS_BINARY", "gs")
|
||||
|
||||
# OptiPNG
|
||||
OPTIPNG_BINARY = os.getenv("PAPERLESS_OPTIPNG_BINARY", "optipng")
|
||||
|
||||
|
@ -29,6 +29,7 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
"""
|
||||
|
||||
CONVERT = settings.CONVERT_BINARY
|
||||
GHOSTSCRIPT = settings.GS_BINARY
|
||||
DENSITY = settings.CONVERT_DENSITY if settings.CONVERT_DENSITY else 300
|
||||
THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None
|
||||
UNPAPER = settings.UNPAPER_BINARY
|
||||
@ -47,13 +48,38 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
out_path = os.path.join(self.tempdir, "convert.png")
|
||||
|
||||
# Run convert to get a decent thumbnail
|
||||
run_convert(
|
||||
self.CONVERT,
|
||||
"-scale", "500x5000",
|
||||
"-alpha", "remove",
|
||||
"{}[0]".format(self.document_path),
|
||||
out_path
|
||||
)
|
||||
try:
|
||||
run_convert(
|
||||
self.CONVERT,
|
||||
"-scale", "500x5000",
|
||||
"-alpha", "remove",
|
||||
"{}[0]".format(self.document_path),
|
||||
out_path
|
||||
)
|
||||
except ParseError:
|
||||
# if convert fails, fall back to extracting
|
||||
# the first PDF page as a PNG using Ghostscript
|
||||
self.log(
|
||||
"warning",
|
||||
"Thumbnail generation with ImageMagick failed, "
|
||||
"falling back to Ghostscript."
|
||||
)
|
||||
gs_out_path = os.path.join(self.tempdir, "gs_out.png")
|
||||
cmd = [self.GHOSTSCRIPT,
|
||||
"-q",
|
||||
"-sDEVICE=pngalpha",
|
||||
"-o", gs_out_path,
|
||||
self.document_path]
|
||||
if not subprocess.Popen(cmd).wait() == 0:
|
||||
raise ParseError("Thumbnail (gs) failed at {}".format(cmd))
|
||||
# then run convert on the output from gs
|
||||
run_convert(
|
||||
self.CONVERT,
|
||||
"-scale", "500x5000",
|
||||
"-alpha", "remove",
|
||||
gs_out_path,
|
||||
out_path
|
||||
)
|
||||
|
||||
return out_path
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user