mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-09-16 21:55:37 -05:00
feat: refactor for pluggable consumers
I've broken out the OCR-specific code from the consumers and dumped it all into its own app, `paperless_tesseract`. This new app should serve as a sample of how to create one's own consumer for different file types. Documentation for how to do this isn't ready yet, but for the impatient: * Create a new app * containing a `parsers.py` for your parser modelled after `paperless_tesseract.parsers.RasterisedDocumentParser` * containing a `signals.py` with a handler moddelled after `paperless_tesseract.signals.ConsumerDeclaration` * connect the signal handler to `documents.signals.document_consumer_declaration` in `your_app.apps` * Install the app into Paperless by declaring `PAPERLESS_INSTALLED_APPS=your_app`. Additional apps should be separated with commas. * Restart the consumer
This commit is contained in:
23
src/paperless_tesseract/signals.py
Normal file
23
src/paperless_tesseract/signals.py
Normal file
@@ -0,0 +1,23 @@
|
||||
import re
|
||||
|
||||
from .parsers import RasterisedDocumentParser
|
||||
|
||||
|
||||
class ConsumerDeclaration(object):
|
||||
|
||||
MATCHING_FILES = re.compile("^.*\.(pdf|jpg|gif|png|tiff|pnm|bmp)$")
|
||||
|
||||
@classmethod
|
||||
def handle(cls, sender, **kwargs):
|
||||
return cls.test
|
||||
|
||||
@classmethod
|
||||
def test(cls, doc):
|
||||
|
||||
if cls.MATCHING_FILES.match(doc):
|
||||
return {
|
||||
"parser": RasterisedDocumentParser,
|
||||
"weight": 0
|
||||
}
|
||||
|
||||
return None
|
Reference in New Issue
Block a user