mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-17 10:13:56 -05:00
Added a consume-start and consume-finish signal
This commit is contained in:
parent
8f9e34078b
commit
1170139127
@ -26,6 +26,8 @@ from paperless.db import GnuPG
|
|||||||
|
|
||||||
from .models import Correspondent, Tag, Document, Log
|
from .models import Correspondent, Tag, Document, Log
|
||||||
from .languages import ISO639
|
from .languages import ISO639
|
||||||
|
from .signals import (
|
||||||
|
document_consumption_started, document_consumption_finished)
|
||||||
|
|
||||||
|
|
||||||
class OCRError(Exception):
|
class OCRError(Exception):
|
||||||
@ -118,22 +120,33 @@ class Consumer(object):
|
|||||||
|
|
||||||
self.log("info", "Consuming {}".format(doc))
|
self.log("info", "Consuming {}".format(doc))
|
||||||
|
|
||||||
|
document_consumption_started.send(
|
||||||
|
sender=self.__class__, filename=doc)
|
||||||
|
|
||||||
tempdir = tempfile.mkdtemp(prefix="paperless", dir=self.SCRATCH)
|
tempdir = tempfile.mkdtemp(prefix="paperless", dir=self.SCRATCH)
|
||||||
imgs = self._get_greyscale(tempdir, doc)
|
imgs = self._get_greyscale(tempdir, doc)
|
||||||
thumbnail = self._get_thumbnail(tempdir, doc)
|
thumbnail = self._get_thumbnail(tempdir, doc)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
text = self._get_ocr(imgs)
|
|
||||||
self._store(text, doc, thumbnail)
|
document = self._store(self._get_ocr(imgs), doc, thumbnail)
|
||||||
|
|
||||||
except OCRError as e:
|
except OCRError as e:
|
||||||
|
|
||||||
self._ignore.append(doc)
|
self._ignore.append(doc)
|
||||||
self.log("error", "OCR FAILURE for {}: {}".format(doc, e))
|
self.log("error", "OCR FAILURE for {}: {}".format(doc, e))
|
||||||
self._cleanup_tempdir(tempdir)
|
self._cleanup_tempdir(tempdir)
|
||||||
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
self._cleanup_tempdir(tempdir)
|
self._cleanup_tempdir(tempdir)
|
||||||
self._cleanup_doc(doc)
|
self._cleanup_doc(doc)
|
||||||
|
|
||||||
|
document_consumption_finished.send(
|
||||||
|
sender=self.__class__, filename=document)
|
||||||
|
|
||||||
def _get_greyscale(self, tempdir, doc):
|
def _get_greyscale(self, tempdir, doc):
|
||||||
"""
|
"""
|
||||||
Greyscale images are easier for Tesseract to OCR
|
Greyscale images are easier for Tesseract to OCR
|
||||||
@ -360,6 +373,8 @@ class Consumer(object):
|
|||||||
|
|
||||||
self.log("info", "Completed")
|
self.log("info", "Completed")
|
||||||
|
|
||||||
|
return document
|
||||||
|
|
||||||
def _cleanup_tempdir(self, d):
|
def _cleanup_tempdir(self, d):
|
||||||
self.log("debug", "Deleting directory {}".format(d))
|
self.log("debug", "Deleting directory {}".format(d))
|
||||||
shutil.rmtree(d)
|
shutil.rmtree(d)
|
||||||
|
4
src/documents/signals.py
Normal file
4
src/documents/signals.py
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
from django.dispatch import Signal
|
||||||
|
|
||||||
|
document_consumption_started = Signal(providing_args=["filename"])
|
||||||
|
document_consumption_finished = Signal(providing_args=["document"])
|
Loading…
x
Reference in New Issue
Block a user