mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-09 09:58:20 -05:00
Use optipng to optimise document thumbnails
This commit is contained in:
parent
2a3f766b93
commit
750ab5bf85
@ -14,7 +14,7 @@ ENV PAPERLESS_EXPORT_DIR=/export \
|
|||||||
|
|
||||||
|
|
||||||
RUN apk update --no-cache && apk add python3 gnupg libmagic bash shadow curl \
|
RUN apk update --no-cache && apk add python3 gnupg libmagic bash shadow curl \
|
||||||
sudo poppler tesseract-ocr imagemagick ghostscript unpaper && \
|
sudo poppler tesseract-ocr imagemagick ghostscript unpaper optipng && \
|
||||||
apk add --virtual .build-dependencies \
|
apk add --virtual .build-dependencies \
|
||||||
python3-dev poppler-dev gcc g++ musl-dev zlib-dev jpeg-dev && \
|
python3-dev poppler-dev gcc g++ musl-dev zlib-dev jpeg-dev && \
|
||||||
# Install python dependencies
|
# Install python dependencies
|
||||||
|
@ -1,9 +1,14 @@
|
|||||||
Changelog
|
Changelog
|
||||||
#########
|
#########
|
||||||
|
|
||||||
2.4.1
|
2.5.0
|
||||||
=====
|
=====
|
||||||
|
|
||||||
|
* **New dependency**: Paperless now optimises thumbnail generation with
|
||||||
|
`optipng`_, so you'll need to install that somewhere in your PATH or declare
|
||||||
|
its location in ``PAPERLESS_OPTIPNG_BINARY``. The Docker image has already
|
||||||
|
been updated on the Docker Hub, so you just need to pull the latest one from
|
||||||
|
there if you're a Docker user.
|
||||||
* An annoying bug in the date capture code was causing some bogus dates to be
|
* An annoying bug in the date capture code was causing some bogus dates to be
|
||||||
attached to documents, which in turn busted the UI. Thanks to `Andrew Peng`_
|
attached to documents, which in turn busted the UI. Thanks to `Andrew Peng`_
|
||||||
for reporting this. `#414`_.
|
for reporting this. `#414`_.
|
||||||
@ -632,3 +637,4 @@ bulk of the work on this big change.
|
|||||||
|
|
||||||
.. _pipenv: https://docs.pipenv.org/
|
.. _pipenv: https://docs.pipenv.org/
|
||||||
.. _a new home on Docker Hub: https://hub.docker.com/r/danielquinn/paperless/
|
.. _a new home on Docker Hub: https://hub.docker.com/r/danielquinn/paperless/
|
||||||
|
.. _optipng: http://optipng.sourceforge.net/
|
||||||
|
@ -213,3 +213,23 @@ PAPERLESS_DEBUG="false"
|
|||||||
# The number of years for which a correspondent will be included in the recent
|
# The number of years for which a correspondent will be included in the recent
|
||||||
# correspondents filter.
|
# correspondents filter.
|
||||||
#PAPERLESS_RECENT_CORRESPONDENT_YEARS=1
|
#PAPERLESS_RECENT_CORRESPONDENT_YEARS=1
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
#### Third-Party Binaries ####
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
# There are a few external software packages that Paperless expects to find on
|
||||||
|
# your system when it starts up. Unless you've done something creative with
|
||||||
|
# their installation, you probably won't need to edit any of these. However,
|
||||||
|
# if you've installed these programs somewhere where simply typing the name of
|
||||||
|
# the program doesn't automatically execute it (ie. the program isn't in your
|
||||||
|
# $PATH), then you'll need to specify the literal path for that program here.
|
||||||
|
|
||||||
|
# Convert (part of the ImageMagick suite)
|
||||||
|
#PAPERLESS_CONVERT_BINARY=/usr/bin/convert
|
||||||
|
|
||||||
|
# Unpaper
|
||||||
|
#PAPERLESS_UNPAPER_BINARY=/usr/bin/unpaper
|
||||||
|
|
||||||
|
# Optipng (for optimising thumbnail sizes)
|
||||||
|
#PAPERLESS_OPTIPNG_BINARY=/usr/bin/optipng
|
||||||
|
@ -149,7 +149,7 @@ class Consumer:
|
|||||||
parsed_document = parser_class(doc)
|
parsed_document = parser_class(doc)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
thumbnail = parsed_document.get_thumbnail()
|
thumbnail = parsed_document.get_optimised_thumbnail()
|
||||||
date = parsed_document.get_date()
|
date = parsed_document.get_date()
|
||||||
document = self._store(
|
document = self._store(
|
||||||
parsed_document.get_text(),
|
parsed_document.get_text(),
|
||||||
|
@ -2,6 +2,7 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
|
import subprocess
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
import dateparser
|
import dateparser
|
||||||
@ -36,6 +37,7 @@ class DocumentParser:
|
|||||||
|
|
||||||
SCRATCH = settings.SCRATCH_DIR
|
SCRATCH = settings.SCRATCH_DIR
|
||||||
DATE_ORDER = settings.DATE_ORDER
|
DATE_ORDER = settings.DATE_ORDER
|
||||||
|
OPTIPNG = settings.OPTIPNG_BINARY
|
||||||
|
|
||||||
def __init__(self, path):
|
def __init__(self, path):
|
||||||
self.document_path = path
|
self.document_path = path
|
||||||
@ -49,6 +51,19 @@ class DocumentParser:
|
|||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def optimise_thumbnail(self, in_path):
|
||||||
|
|
||||||
|
out_path = os.path.join(self.tempdir, "optipng.png")
|
||||||
|
|
||||||
|
args = (self.OPTIPNG, "-o5", in_path, "-out", out_path)
|
||||||
|
if not subprocess.Popen(args).wait() == 0:
|
||||||
|
raise ParseError("Optipng failed at {}".format(args))
|
||||||
|
|
||||||
|
return out_path
|
||||||
|
|
||||||
|
def get_optimised_thumbnail(self):
|
||||||
|
return self.optimise_thumbnail(self.get_thumbnail())
|
||||||
|
|
||||||
def get_text(self):
|
def get_text(self):
|
||||||
"""
|
"""
|
||||||
Returns the text from the document and only the text.
|
Returns the text from the document and only the text.
|
||||||
|
@ -76,7 +76,12 @@ def binaries_check(app_configs, **kwargs):
|
|||||||
error = "Paperless can't find {}. Without it, consumption is impossible."
|
error = "Paperless can't find {}. Without it, consumption is impossible."
|
||||||
hint = "Either it's not in your ${PATH} or it's not installed."
|
hint = "Either it's not in your ${PATH} or it's not installed."
|
||||||
|
|
||||||
binaries = (settings.CONVERT_BINARY, settings.UNPAPER_BINARY, "tesseract")
|
binaries = (
|
||||||
|
settings.CONVERT_BINARY,
|
||||||
|
settings.OPTIPNG_BINARY,
|
||||||
|
settings.UNPAPER_BINARY,
|
||||||
|
"tesseract"
|
||||||
|
)
|
||||||
|
|
||||||
check_messages = []
|
check_messages = []
|
||||||
for binary in binaries:
|
for binary in binaries:
|
||||||
|
@ -247,6 +247,9 @@ CONVERT_TMPDIR = os.getenv("PAPERLESS_CONVERT_TMPDIR")
|
|||||||
CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT")
|
CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT")
|
||||||
CONVERT_DENSITY = os.getenv("PAPERLESS_CONVERT_DENSITY")
|
CONVERT_DENSITY = os.getenv("PAPERLESS_CONVERT_DENSITY")
|
||||||
|
|
||||||
|
# OptiPNG
|
||||||
|
OPTIPNG_BINARY = os.getenv("PAPERLESS_OPTIPNG_BINARY", "optipng")
|
||||||
|
|
||||||
# Unpaper
|
# Unpaper
|
||||||
UNPAPER_BINARY = os.getenv("PAPERLESS_UNPAPER_BINARY", "unpaper")
|
UNPAPER_BINARY = os.getenv("PAPERLESS_UNPAPER_BINARY", "unpaper")
|
||||||
|
|
||||||
|
@ -44,15 +44,18 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
The thumbnail of a PDF is just a 500px wide image of the first page.
|
The thumbnail of a PDF is just a 500px wide image of the first page.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
out_path = os.path.join(self.tempdir, "convert.png")
|
||||||
|
|
||||||
|
# Run convert to get a decent thumbnail
|
||||||
run_convert(
|
run_convert(
|
||||||
self.CONVERT,
|
self.CONVERT,
|
||||||
"-scale", "500x5000",
|
"-scale", "500x5000",
|
||||||
"-alpha", "remove",
|
"-alpha", "remove",
|
||||||
"{}[0]".format(self.document_path),
|
"{}[0]".format(self.document_path),
|
||||||
os.path.join(self.tempdir, "convert.png")
|
out_path
|
||||||
)
|
)
|
||||||
|
|
||||||
return os.path.join(self.tempdir, "convert.png")
|
return out_path
|
||||||
|
|
||||||
def _is_ocred(self):
|
def _is_ocred(self):
|
||||||
|
|
||||||
|
@ -32,7 +32,7 @@ class TextDocumentParser(DocumentParser):
|
|||||||
text_color = "black" # text color
|
text_color = "black" # text color
|
||||||
psize = [500, 647] # icon size
|
psize = [500, 647] # icon size
|
||||||
n_lines = 50 # number of lines to show
|
n_lines = 50 # number of lines to show
|
||||||
output_file = os.path.join(self.tempdir, "convert-txt.png")
|
out_path = os.path.join(self.tempdir, "convert.png")
|
||||||
|
|
||||||
temp_bg = os.path.join(self.tempdir, "bg.png")
|
temp_bg = os.path.join(self.tempdir, "bg.png")
|
||||||
temp_txlayer = os.path.join(self.tempdir, "tx.png")
|
temp_txlayer = os.path.join(self.tempdir, "tx.png")
|
||||||
@ -43,9 +43,13 @@ class TextDocumentParser(DocumentParser):
|
|||||||
work_size = ",".join([str(n - 1) for n in psize])
|
work_size = ",".join([str(n - 1) for n in psize])
|
||||||
r = str(round(psize[0] / 10))
|
r = str(round(psize[0] / 10))
|
||||||
rounded = ",".join([r, r])
|
rounded = ",".join([r, r])
|
||||||
run_command(self.CONVERT, "-size ", picsize, ' xc:none -draw ',
|
run_command(
|
||||||
'"fill ', bg_color, ' roundrectangle 0,0,',
|
self.CONVERT,
|
||||||
work_size, ",", rounded, '" ', temp_bg)
|
"-size ", picsize,
|
||||||
|
' xc:none -draw ',
|
||||||
|
'"fill ', bg_color, ' roundrectangle 0,0,', work_size, ",", rounded, '" ', # NOQA: E501
|
||||||
|
temp_bg
|
||||||
|
)
|
||||||
|
|
||||||
def read_text():
|
def read_text():
|
||||||
with open(self.document_path, 'r') as src:
|
with open(self.document_path, 'r') as src:
|
||||||
@ -54,22 +58,29 @@ class TextDocumentParser(DocumentParser):
|
|||||||
return text.replace('"', "'")
|
return text.replace('"', "'")
|
||||||
|
|
||||||
def create_txlayer():
|
def create_txlayer():
|
||||||
run_command(self.CONVERT,
|
run_command(
|
||||||
"-background none",
|
self.CONVERT,
|
||||||
"-fill",
|
"-background none",
|
||||||
text_color,
|
"-fill",
|
||||||
"-pointsize", "12",
|
text_color,
|
||||||
"-border 4 -bordercolor none",
|
"-pointsize", "12",
|
||||||
"-size ", txsize,
|
"-border 4 -bordercolor none",
|
||||||
' caption:"', read_text(), '" ',
|
"-size ", txsize,
|
||||||
temp_txlayer)
|
' caption:"', read_text(), '" ',
|
||||||
|
temp_txlayer
|
||||||
|
)
|
||||||
|
|
||||||
create_txlayer()
|
create_txlayer()
|
||||||
create_bg()
|
create_bg()
|
||||||
run_command(self.CONVERT, temp_bg, temp_txlayer,
|
run_command(
|
||||||
"-background None -layers merge ", output_file)
|
self.CONVERT,
|
||||||
|
temp_bg,
|
||||||
|
temp_txlayer,
|
||||||
|
"-background None -layers merge ",
|
||||||
|
out_path
|
||||||
|
)
|
||||||
|
|
||||||
return output_file
|
return out_path
|
||||||
|
|
||||||
def get_text(self):
|
def get_text(self):
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user