diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py index 6086a5920..f312eee2a 100644 --- a/src/paperless_tesseract/parsers.py +++ b/src/paperless_tesseract/parsers.py @@ -46,25 +46,39 @@ class RasterisedDocumentParser(DocumentParser): """ out_path = os.path.join(self.tempdir, "convert.png") - gs_out_path = os.path.join(self.tempdir, "gs_out.png") - # Extract the first PDF page as a PNG using Ghostscript - # https://github.com/danielquinn/paperless/issues/447 - # call gs first - cmd = [self.GHOSTSCRIPT, - "-q", - "-sDEVICE=pngalpha", - "-o", gs_out_path, - self.document_path] - if not subprocess.Popen(cmd).wait() == 0: - raise ParseError("Thumbnail (gs) failed at {}".format(cmd)) - # then run convert on the output from gs - run_convert( - self.CONVERT, - "-scale", "500x5000", - "-alpha", "remove", - gs_out_path, - out_path + # Run convert to get a decent thumbnail + try: + run_convert( + self.CONVERT, + "-scale", "500x5000", + "-alpha", "remove", + "{}[0]".format(self.document_path), + out_path + ) + except ParseError: + # if convert fails, fall back to extracting + # the first PDF page as a PNG using Ghostscript + self.log( + "warning", + "Thumbnail generation with ImageMagick failed, " + "falling back to Ghostscript." + ) + gs_out_path = os.path.join(self.tempdir, "gs_out.png") + cmd = [self.GHOSTSCRIPT, + "-q", + "-sDEVICE=pngalpha", + "-o", gs_out_path, + self.document_path] + if not subprocess.Popen(cmd).wait() == 0: + raise ParseError("Thumbnail (gs) failed at {}".format(cmd)) + # then run convert on the output from gs + run_convert( + self.CONVERT, + "-scale", "500x5000", + "-alpha", "remove", + gs_out_path, + out_path ) return out_path