mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Use optipng to optimise document thumbnails
This commit is contained in:
parent
2a3f766b93
commit
750ab5bf85
@ -14,7 +14,7 @@ ENV PAPERLESS_EXPORT_DIR=/export \
|
||||
|
||||
|
||||
RUN apk update --no-cache && apk add python3 gnupg libmagic bash shadow curl \
|
||||
sudo poppler tesseract-ocr imagemagick ghostscript unpaper && \
|
||||
sudo poppler tesseract-ocr imagemagick ghostscript unpaper optipng && \
|
||||
apk add --virtual .build-dependencies \
|
||||
python3-dev poppler-dev gcc g++ musl-dev zlib-dev jpeg-dev && \
|
||||
# Install python dependencies
|
||||
|
@ -1,9 +1,14 @@
|
||||
Changelog
|
||||
#########
|
||||
|
||||
2.4.1
|
||||
2.5.0
|
||||
=====
|
||||
|
||||
* **New dependency**: Paperless now optimises thumbnail generation with
|
||||
`optipng`_, so you'll need to install that somewhere in your PATH or declare
|
||||
its location in ``PAPERLESS_OPTIPNG_BINARY``. The Docker image has already
|
||||
been updated on the Docker Hub, so you just need to pull the latest one from
|
||||
there if you're a Docker user.
|
||||
* An annoying bug in the date capture code was causing some bogus dates to be
|
||||
attached to documents, which in turn busted the UI. Thanks to `Andrew Peng`_
|
||||
for reporting this. `#414`_.
|
||||
@ -632,3 +637,4 @@ bulk of the work on this big change.
|
||||
|
||||
.. _pipenv: https://docs.pipenv.org/
|
||||
.. _a new home on Docker Hub: https://hub.docker.com/r/danielquinn/paperless/
|
||||
.. _optipng: http://optipng.sourceforge.net/
|
||||
|
@ -213,3 +213,23 @@ PAPERLESS_DEBUG="false"
|
||||
# The number of years for which a correspondent will be included in the recent
|
||||
# correspondents filter.
|
||||
#PAPERLESS_RECENT_CORRESPONDENT_YEARS=1
|
||||
|
||||
###############################################################################
|
||||
#### Third-Party Binaries ####
|
||||
###############################################################################
|
||||
|
||||
# There are a few external software packages that Paperless expects to find on
|
||||
# your system when it starts up. Unless you've done something creative with
|
||||
# their installation, you probably won't need to edit any of these. However,
|
||||
# if you've installed these programs somewhere where simply typing the name of
|
||||
# the program doesn't automatically execute it (ie. the program isn't in your
|
||||
# $PATH), then you'll need to specify the literal path for that program here.
|
||||
|
||||
# Convert (part of the ImageMagick suite)
|
||||
#PAPERLESS_CONVERT_BINARY=/usr/bin/convert
|
||||
|
||||
# Unpaper
|
||||
#PAPERLESS_UNPAPER_BINARY=/usr/bin/unpaper
|
||||
|
||||
# Optipng (for optimising thumbnail sizes)
|
||||
#PAPERLESS_OPTIPNG_BINARY=/usr/bin/optipng
|
||||
|
@ -149,7 +149,7 @@ class Consumer:
|
||||
parsed_document = parser_class(doc)
|
||||
|
||||
try:
|
||||
thumbnail = parsed_document.get_thumbnail()
|
||||
thumbnail = parsed_document.get_optimised_thumbnail()
|
||||
date = parsed_document.get_date()
|
||||
document = self._store(
|
||||
parsed_document.get_text(),
|
||||
|
@ -2,6 +2,7 @@ import logging
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
import dateparser
|
||||
@ -36,6 +37,7 @@ class DocumentParser:
|
||||
|
||||
SCRATCH = settings.SCRATCH_DIR
|
||||
DATE_ORDER = settings.DATE_ORDER
|
||||
OPTIPNG = settings.OPTIPNG_BINARY
|
||||
|
||||
def __init__(self, path):
|
||||
self.document_path = path
|
||||
@ -49,6 +51,19 @@ class DocumentParser:
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def optimise_thumbnail(self, in_path):
|
||||
|
||||
out_path = os.path.join(self.tempdir, "optipng.png")
|
||||
|
||||
args = (self.OPTIPNG, "-o5", in_path, "-out", out_path)
|
||||
if not subprocess.Popen(args).wait() == 0:
|
||||
raise ParseError("Optipng failed at {}".format(args))
|
||||
|
||||
return out_path
|
||||
|
||||
def get_optimised_thumbnail(self):
|
||||
return self.optimise_thumbnail(self.get_thumbnail())
|
||||
|
||||
def get_text(self):
|
||||
"""
|
||||
Returns the text from the document and only the text.
|
||||
|
@ -76,7 +76,12 @@ def binaries_check(app_configs, **kwargs):
|
||||
error = "Paperless can't find {}. Without it, consumption is impossible."
|
||||
hint = "Either it's not in your ${PATH} or it's not installed."
|
||||
|
||||
binaries = (settings.CONVERT_BINARY, settings.UNPAPER_BINARY, "tesseract")
|
||||
binaries = (
|
||||
settings.CONVERT_BINARY,
|
||||
settings.OPTIPNG_BINARY,
|
||||
settings.UNPAPER_BINARY,
|
||||
"tesseract"
|
||||
)
|
||||
|
||||
check_messages = []
|
||||
for binary in binaries:
|
||||
|
@ -247,6 +247,9 @@ CONVERT_TMPDIR = os.getenv("PAPERLESS_CONVERT_TMPDIR")
|
||||
CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT")
|
||||
CONVERT_DENSITY = os.getenv("PAPERLESS_CONVERT_DENSITY")
|
||||
|
||||
# OptiPNG
|
||||
OPTIPNG_BINARY = os.getenv("PAPERLESS_OPTIPNG_BINARY", "optipng")
|
||||
|
||||
# Unpaper
|
||||
UNPAPER_BINARY = os.getenv("PAPERLESS_UNPAPER_BINARY", "unpaper")
|
||||
|
||||
|
@ -44,15 +44,18 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
The thumbnail of a PDF is just a 500px wide image of the first page.
|
||||
"""
|
||||
|
||||
out_path = os.path.join(self.tempdir, "convert.png")
|
||||
|
||||
# Run convert to get a decent thumbnail
|
||||
run_convert(
|
||||
self.CONVERT,
|
||||
"-scale", "500x5000",
|
||||
"-alpha", "remove",
|
||||
"{}[0]".format(self.document_path),
|
||||
os.path.join(self.tempdir, "convert.png")
|
||||
out_path
|
||||
)
|
||||
|
||||
return os.path.join(self.tempdir, "convert.png")
|
||||
return out_path
|
||||
|
||||
def _is_ocred(self):
|
||||
|
||||
|
@ -32,7 +32,7 @@ class TextDocumentParser(DocumentParser):
|
||||
text_color = "black" # text color
|
||||
psize = [500, 647] # icon size
|
||||
n_lines = 50 # number of lines to show
|
||||
output_file = os.path.join(self.tempdir, "convert-txt.png")
|
||||
out_path = os.path.join(self.tempdir, "convert.png")
|
||||
|
||||
temp_bg = os.path.join(self.tempdir, "bg.png")
|
||||
temp_txlayer = os.path.join(self.tempdir, "tx.png")
|
||||
@ -43,9 +43,13 @@ class TextDocumentParser(DocumentParser):
|
||||
work_size = ",".join([str(n - 1) for n in psize])
|
||||
r = str(round(psize[0] / 10))
|
||||
rounded = ",".join([r, r])
|
||||
run_command(self.CONVERT, "-size ", picsize, ' xc:none -draw ',
|
||||
'"fill ', bg_color, ' roundrectangle 0,0,',
|
||||
work_size, ",", rounded, '" ', temp_bg)
|
||||
run_command(
|
||||
self.CONVERT,
|
||||
"-size ", picsize,
|
||||
' xc:none -draw ',
|
||||
'"fill ', bg_color, ' roundrectangle 0,0,', work_size, ",", rounded, '" ', # NOQA: E501
|
||||
temp_bg
|
||||
)
|
||||
|
||||
def read_text():
|
||||
with open(self.document_path, 'r') as src:
|
||||
@ -54,22 +58,29 @@ class TextDocumentParser(DocumentParser):
|
||||
return text.replace('"', "'")
|
||||
|
||||
def create_txlayer():
|
||||
run_command(self.CONVERT,
|
||||
"-background none",
|
||||
"-fill",
|
||||
text_color,
|
||||
"-pointsize", "12",
|
||||
"-border 4 -bordercolor none",
|
||||
"-size ", txsize,
|
||||
' caption:"', read_text(), '" ',
|
||||
temp_txlayer)
|
||||
run_command(
|
||||
self.CONVERT,
|
||||
"-background none",
|
||||
"-fill",
|
||||
text_color,
|
||||
"-pointsize", "12",
|
||||
"-border 4 -bordercolor none",
|
||||
"-size ", txsize,
|
||||
' caption:"', read_text(), '" ',
|
||||
temp_txlayer
|
||||
)
|
||||
|
||||
create_txlayer()
|
||||
create_bg()
|
||||
run_command(self.CONVERT, temp_bg, temp_txlayer,
|
||||
"-background None -layers merge ", output_file)
|
||||
run_command(
|
||||
self.CONVERT,
|
||||
temp_bg,
|
||||
temp_txlayer,
|
||||
"-background None -layers merge ",
|
||||
out_path
|
||||
)
|
||||
|
||||
return output_file
|
||||
return out_path
|
||||
|
||||
def get_text(self):
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user