From 59e0c1fe4eadc9026ab40526000fe89f28c9e551 Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Mon, 2 Jan 2023 14:20:46 -0800 Subject: [PATCH] Let convert handle the removal of the alpha channel --- src/paperless_tesseract/parsers.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py index 4107cace8..44671fa11 100644 --- a/src/paperless_tesseract/parsers.py +++ b/src/paperless_tesseract/parsers.py @@ -1,6 +1,7 @@ import json import os import re +import subprocess from pathlib import Path from typing import Optional @@ -79,6 +80,17 @@ class RasterisedDocumentParser(DocumentParser): with Image.open(image) as im: return im.mode in ("RGBA", "LA") + def remove_alpha(self, image_path: str): + subprocess.run( + [ + settings.CONVERT_BINARY, + "-alpha", + "off", + image_path, + image_path, + ], + ) + def get_dpi(self, image): try: with Image.open(image) as im: @@ -230,11 +242,7 @@ class RasterisedDocumentParser(DocumentParser): f"Removing alpha layer from {input_file} " "for compatibility with img2pdf", ) - with Image.open(input_file) as im: - background = Image.new("RGBA", im.size, (255, 255, 255)) - background.alpha_composite(im) - background = background.convert("RGB") - background.save(input_file, format=im.format) + self.remove_alpha(input_file) if dpi: self.log("debug", f"Detected DPI for image {input_file}: {dpi}")