mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-03 03:16:10 -06:00 
			
		
		
		
	Added a consume-start and consume-finish signal
This commit is contained in:
		@@ -26,6 +26,8 @@ from paperless.db import GnuPG
 | 
			
		||||
 | 
			
		||||
from .models import Correspondent, Tag, Document, Log
 | 
			
		||||
from .languages import ISO639
 | 
			
		||||
from .signals import (
 | 
			
		||||
    document_consumption_started, document_consumption_finished)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class OCRError(Exception):
 | 
			
		||||
@@ -118,22 +120,33 @@ class Consumer(object):
 | 
			
		||||
 | 
			
		||||
            self.log("info", "Consuming {}".format(doc))
 | 
			
		||||
 | 
			
		||||
            document_consumption_started.send(
 | 
			
		||||
                sender=self.__class__, filename=doc)
 | 
			
		||||
 | 
			
		||||
            tempdir = tempfile.mkdtemp(prefix="paperless", dir=self.SCRATCH)
 | 
			
		||||
            imgs = self._get_greyscale(tempdir, doc)
 | 
			
		||||
            thumbnail = self._get_thumbnail(tempdir, doc)
 | 
			
		||||
 | 
			
		||||
            try:
 | 
			
		||||
                text = self._get_ocr(imgs)
 | 
			
		||||
                self._store(text, doc, thumbnail)
 | 
			
		||||
 | 
			
		||||
                document = self._store(self._get_ocr(imgs), doc, thumbnail)
 | 
			
		||||
 | 
			
		||||
            except OCRError as e:
 | 
			
		||||
 | 
			
		||||
                self._ignore.append(doc)
 | 
			
		||||
                self.log("error", "OCR FAILURE for {}: {}".format(doc, e))
 | 
			
		||||
                self._cleanup_tempdir(tempdir)
 | 
			
		||||
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            else:
 | 
			
		||||
 | 
			
		||||
                self._cleanup_tempdir(tempdir)
 | 
			
		||||
                self._cleanup_doc(doc)
 | 
			
		||||
 | 
			
		||||
                document_consumption_finished.send(
 | 
			
		||||
                    sender=self.__class__, filename=document)
 | 
			
		||||
 | 
			
		||||
    def _get_greyscale(self, tempdir, doc):
 | 
			
		||||
        """
 | 
			
		||||
        Greyscale images are easier for Tesseract to OCR
 | 
			
		||||
@@ -360,6 +373,8 @@ class Consumer(object):
 | 
			
		||||
 | 
			
		||||
        self.log("info", "Completed")
 | 
			
		||||
 | 
			
		||||
        return document
 | 
			
		||||
 | 
			
		||||
    def _cleanup_tempdir(self, d):
 | 
			
		||||
        self.log("debug", "Deleting directory {}".format(d))
 | 
			
		||||
        shutil.rmtree(d)
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										4
									
								
								src/documents/signals.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								src/documents/signals.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,4 @@
 | 
			
		||||
from django.dispatch import Signal
 | 
			
		||||
 | 
			
		||||
document_consumption_started = Signal(providing_args=["filename"])
 | 
			
		||||
document_consumption_finished = Signal(providing_args=["document"])
 | 
			
		||||
		Reference in New Issue
	
	Block a user