mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Added a consume-start and consume-finish signal
This commit is contained in:
parent
8f9e34078b
commit
1170139127
@ -26,6 +26,8 @@ from paperless.db import GnuPG
|
||||
|
||||
from .models import Correspondent, Tag, Document, Log
|
||||
from .languages import ISO639
|
||||
from .signals import (
|
||||
document_consumption_started, document_consumption_finished)
|
||||
|
||||
|
||||
class OCRError(Exception):
|
||||
@ -118,22 +120,33 @@ class Consumer(object):
|
||||
|
||||
self.log("info", "Consuming {}".format(doc))
|
||||
|
||||
document_consumption_started.send(
|
||||
sender=self.__class__, filename=doc)
|
||||
|
||||
tempdir = tempfile.mkdtemp(prefix="paperless", dir=self.SCRATCH)
|
||||
imgs = self._get_greyscale(tempdir, doc)
|
||||
thumbnail = self._get_thumbnail(tempdir, doc)
|
||||
|
||||
try:
|
||||
text = self._get_ocr(imgs)
|
||||
self._store(text, doc, thumbnail)
|
||||
|
||||
document = self._store(self._get_ocr(imgs), doc, thumbnail)
|
||||
|
||||
except OCRError as e:
|
||||
|
||||
self._ignore.append(doc)
|
||||
self.log("error", "OCR FAILURE for {}: {}".format(doc, e))
|
||||
self._cleanup_tempdir(tempdir)
|
||||
|
||||
continue
|
||||
|
||||
else:
|
||||
|
||||
self._cleanup_tempdir(tempdir)
|
||||
self._cleanup_doc(doc)
|
||||
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__, filename=document)
|
||||
|
||||
def _get_greyscale(self, tempdir, doc):
|
||||
"""
|
||||
Greyscale images are easier for Tesseract to OCR
|
||||
@ -360,6 +373,8 @@ class Consumer(object):
|
||||
|
||||
self.log("info", "Completed")
|
||||
|
||||
return document
|
||||
|
||||
def _cleanup_tempdir(self, d):
|
||||
self.log("debug", "Deleting directory {}".format(d))
|
||||
shutil.rmtree(d)
|
||||
|
4
src/documents/signals.py
Normal file
4
src/documents/signals.py
Normal file
@ -0,0 +1,4 @@
|
||||
from django.dispatch import Signal
|
||||
|
||||
document_consumption_started = Signal(providing_args=["filename"])
|
||||
document_consumption_finished = Signal(providing_args=["document"])
|
Loading…
x
Reference in New Issue
Block a user