mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-09 09:58:20 -05:00
Added thumbnail generation to the conumer
This commit is contained in:
parent
8a9ea4664c
commit
495ed1c36c
@ -119,10 +119,11 @@ class Consumer(object):
|
|||||||
|
|
||||||
tempdir = tempfile.mkdtemp(prefix="paperless", dir=self.SCRATCH)
|
tempdir = tempfile.mkdtemp(prefix="paperless", dir=self.SCRATCH)
|
||||||
pngs = self._get_greyscale(tempdir, doc)
|
pngs = self._get_greyscale(tempdir, doc)
|
||||||
|
thumbnail = self._get_thumbnail(tempdir, doc)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
text = self._get_ocr(pngs)
|
text = self._get_ocr(pngs)
|
||||||
self._store(text, doc)
|
self._store(text, doc, thumbnail)
|
||||||
except OCRError as e:
|
except OCRError as e:
|
||||||
self._ignore.append(doc)
|
self._ignore.append(doc)
|
||||||
self.log("error", "OCR FAILURE for {}: {}".format(doc, e))
|
self.log("error", "OCR FAILURE for {}: {}".format(doc, e))
|
||||||
@ -133,6 +134,9 @@ class Consumer(object):
|
|||||||
self._cleanup_doc(doc)
|
self._cleanup_doc(doc)
|
||||||
|
|
||||||
def _get_greyscale(self, tempdir, doc):
|
def _get_greyscale(self, tempdir, doc):
|
||||||
|
"""
|
||||||
|
Greyscale images are easier for Tesseract to OCR
|
||||||
|
"""
|
||||||
|
|
||||||
self.log("info", "Generating greyscale image from {}".format(doc))
|
self.log("info", "Generating greyscale image from {}".format(doc))
|
||||||
|
|
||||||
@ -150,6 +154,23 @@ class Consumer(object):
|
|||||||
|
|
||||||
return sorted(filter(lambda __: os.path.isfile(__), pngs))
|
return sorted(filter(lambda __: os.path.isfile(__), pngs))
|
||||||
|
|
||||||
|
def _get_thumbnail(self, tempdir, doc):
|
||||||
|
"""
|
||||||
|
The thumbnail of a PDF is just a 500px wide image of the first page.
|
||||||
|
"""
|
||||||
|
|
||||||
|
self.log("info", "Generating the thumbnail")
|
||||||
|
|
||||||
|
subprocess.Popen((
|
||||||
|
self.CONVERT,
|
||||||
|
"-scale", "500x5000",
|
||||||
|
"-alpha", "remove",
|
||||||
|
doc,
|
||||||
|
os.path.join(tempdir, "convert-%04d.png")
|
||||||
|
)).wait()
|
||||||
|
|
||||||
|
return os.path.join(tempdir, "convert-0000.png")
|
||||||
|
|
||||||
def _guess_language(self, text):
|
def _guess_language(self, text):
|
||||||
try:
|
try:
|
||||||
guess = langdetect.detect(text)
|
guess = langdetect.detect(text)
|
||||||
@ -288,7 +309,7 @@ class Consumer(object):
|
|||||||
m = re.match(self.REGEX_TITLE, parseable)
|
m = re.match(self.REGEX_TITLE, parseable)
|
||||||
return None, m.group(1), (), get_suffix(m.group(2))
|
return None, m.group(1), (), get_suffix(m.group(2))
|
||||||
|
|
||||||
def _store(self, text, doc):
|
def _store(self, text, doc, thumbnail):
|
||||||
|
|
||||||
sender, title, tags, file_type = self._guess_attributes_from_name(doc)
|
sender, title, tags, file_type = self._guess_attributes_from_name(doc)
|
||||||
relevant_tags = set(list(Tag.match_all(text)) + list(tags))
|
relevant_tags = set(list(Tag.match_all(text)) + list(tags))
|
||||||
@ -313,9 +334,16 @@ class Consumer(object):
|
|||||||
self.log("debug", "Tagging with {}".format(tag_names))
|
self.log("debug", "Tagging with {}".format(tag_names))
|
||||||
document.tags.add(*relevant_tags)
|
document.tags.add(*relevant_tags)
|
||||||
|
|
||||||
|
# Encrypt and store the actual document
|
||||||
with open(doc, "rb") as unencrypted:
|
with open(doc, "rb") as unencrypted:
|
||||||
with open(document.source_path, "wb") as encrypted:
|
with open(document.source_path, "wb") as encrypted:
|
||||||
self.log("debug", "Encrypting")
|
self.log("debug", "Encrypting the document")
|
||||||
|
encrypted.write(GnuPG.encrypted(unencrypted))
|
||||||
|
|
||||||
|
# Encrypt and store the thumbnail
|
||||||
|
with open(thumbnail, "rb") as unencrypted:
|
||||||
|
with open(document.thumbnail_path, "wb") as encrypted:
|
||||||
|
self.log("debug", "Encrypting the thumbnail")
|
||||||
encrypted.write(GnuPG.encrypted(unencrypted))
|
encrypted.write(GnuPG.encrypted(unencrypted))
|
||||||
|
|
||||||
self.log("info", "Completed")
|
self.log("info", "Completed")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user