mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Merge pull request #494 from JensPfeifle/fix_447
fix parse error of some documents by using gs
This commit is contained in:
commit
305d50d7ed
@ -247,6 +247,9 @@ PAPERLESS_EMAIL_SECRET=""
|
|||||||
# Convert (part of the ImageMagick suite)
|
# Convert (part of the ImageMagick suite)
|
||||||
#PAPERLESS_CONVERT_BINARY=/usr/bin/convert
|
#PAPERLESS_CONVERT_BINARY=/usr/bin/convert
|
||||||
|
|
||||||
|
# Ghostscript
|
||||||
|
#PAPERLESS_GS_BINARY = /usr/bin/gs
|
||||||
|
|
||||||
# Unpaper
|
# Unpaper
|
||||||
#PAPERLESS_UNPAPER_BINARY=/usr/bin/unpaper
|
#PAPERLESS_UNPAPER_BINARY=/usr/bin/unpaper
|
||||||
|
|
||||||
|
@ -263,6 +263,9 @@ CONVERT_TMPDIR = os.getenv("PAPERLESS_CONVERT_TMPDIR")
|
|||||||
CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT")
|
CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT")
|
||||||
CONVERT_DENSITY = os.getenv("PAPERLESS_CONVERT_DENSITY")
|
CONVERT_DENSITY = os.getenv("PAPERLESS_CONVERT_DENSITY")
|
||||||
|
|
||||||
|
# Ghostscript
|
||||||
|
GS_BINARY = os.getenv("PAPERLESS_GS_BINARY", "gs")
|
||||||
|
|
||||||
# OptiPNG
|
# OptiPNG
|
||||||
OPTIPNG_BINARY = os.getenv("PAPERLESS_OPTIPNG_BINARY", "optipng")
|
OPTIPNG_BINARY = os.getenv("PAPERLESS_OPTIPNG_BINARY", "optipng")
|
||||||
|
|
||||||
|
@ -29,6 +29,7 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
CONVERT = settings.CONVERT_BINARY
|
CONVERT = settings.CONVERT_BINARY
|
||||||
|
GHOSTSCRIPT = settings.GS_BINARY
|
||||||
DENSITY = settings.CONVERT_DENSITY if settings.CONVERT_DENSITY else 300
|
DENSITY = settings.CONVERT_DENSITY if settings.CONVERT_DENSITY else 300
|
||||||
THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None
|
THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None
|
||||||
UNPAPER = settings.UNPAPER_BINARY
|
UNPAPER = settings.UNPAPER_BINARY
|
||||||
@ -47,6 +48,7 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
out_path = os.path.join(self.tempdir, "convert.png")
|
out_path = os.path.join(self.tempdir, "convert.png")
|
||||||
|
|
||||||
# Run convert to get a decent thumbnail
|
# Run convert to get a decent thumbnail
|
||||||
|
try:
|
||||||
run_convert(
|
run_convert(
|
||||||
self.CONVERT,
|
self.CONVERT,
|
||||||
"-scale", "500x5000",
|
"-scale", "500x5000",
|
||||||
@ -54,6 +56,30 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
"{}[0]".format(self.document_path),
|
"{}[0]".format(self.document_path),
|
||||||
out_path
|
out_path
|
||||||
)
|
)
|
||||||
|
except ParseError:
|
||||||
|
# if convert fails, fall back to extracting
|
||||||
|
# the first PDF page as a PNG using Ghostscript
|
||||||
|
self.log(
|
||||||
|
"warning",
|
||||||
|
"Thumbnail generation with ImageMagick failed, "
|
||||||
|
"falling back to Ghostscript."
|
||||||
|
)
|
||||||
|
gs_out_path = os.path.join(self.tempdir, "gs_out.png")
|
||||||
|
cmd = [self.GHOSTSCRIPT,
|
||||||
|
"-q",
|
||||||
|
"-sDEVICE=pngalpha",
|
||||||
|
"-o", gs_out_path,
|
||||||
|
self.document_path]
|
||||||
|
if not subprocess.Popen(cmd).wait() == 0:
|
||||||
|
raise ParseError("Thumbnail (gs) failed at {}".format(cmd))
|
||||||
|
# then run convert on the output from gs
|
||||||
|
run_convert(
|
||||||
|
self.CONVERT,
|
||||||
|
"-scale", "500x5000",
|
||||||
|
"-alpha", "remove",
|
||||||
|
gs_out_path,
|
||||||
|
out_path
|
||||||
|
)
|
||||||
|
|
||||||
return out_path
|
return out_path
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user