mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Removed log components and introduced signals for tags & correspondents
This commit is contained in:
parent
49b56425e8
commit
b92e007e15
@ -31,6 +31,13 @@ class MonthListFilter(admin.SimpleListFilter):
|
||||
return queryset.filter(created__year=year, created__month=month)
|
||||
|
||||
|
||||
class CorrespondentAdmin(admin.ModelAdmin):
|
||||
|
||||
list_display = ("name", "match", "matching_algorithm")
|
||||
list_filter = ("matching_algorithm",)
|
||||
list_editable = ("match", "matching_algorithm")
|
||||
|
||||
|
||||
class TagAdmin(admin.ModelAdmin):
|
||||
|
||||
list_display = ("name", "colour", "match", "matching_algorithm")
|
||||
@ -103,11 +110,11 @@ class DocumentAdmin(admin.ModelAdmin):
|
||||
|
||||
class LogAdmin(admin.ModelAdmin):
|
||||
|
||||
list_display = ("message", "level", "component")
|
||||
list_filter = ("level", "component",)
|
||||
list_display = ("message", "level",)
|
||||
list_filter = ("level",)
|
||||
|
||||
|
||||
admin.site.register(Correspondent)
|
||||
admin.site.register(Correspondent, CorrespondentAdmin)
|
||||
admin.site.register(Tag, TagAdmin)
|
||||
admin.site.register(Document, DocumentAdmin)
|
||||
admin.site.register(Log, LogAdmin)
|
||||
|
@ -2,4 +2,15 @@ from django.apps import AppConfig
|
||||
|
||||
|
||||
class DocumentsConfig(AppConfig):
|
||||
name = 'documents'
|
||||
|
||||
name = "documents"
|
||||
|
||||
def ready(self):
|
||||
|
||||
from .signals import document_consumption_finished
|
||||
from .signals.handlers import set_correspondent, set_tags
|
||||
|
||||
document_consumption_finished.connect(set_tags)
|
||||
document_consumption_finished.connect(set_correspondent)
|
||||
|
||||
AppConfig.ready(self)
|
||||
|
@ -80,8 +80,7 @@ class Consumer(object):
|
||||
|
||||
def log(self, level, message):
|
||||
getattr(self.logger, level)(message, extra={
|
||||
"group": self.logging_group,
|
||||
"component": Log.COMPONENT_CONSUMER
|
||||
"group": self.logging_group
|
||||
})
|
||||
|
||||
def consume(self):
|
||||
@ -107,7 +106,10 @@ class Consumer(object):
|
||||
self.log("info", "Consuming {}".format(doc))
|
||||
|
||||
document_consumption_started.send(
|
||||
sender=self.__class__, filename=doc)
|
||||
sender=self.__class__,
|
||||
filename=doc,
|
||||
logging_group=self.logging_group
|
||||
)
|
||||
|
||||
tempdir = tempfile.mkdtemp(prefix="paperless", dir=self.SCRATCH)
|
||||
imgs = self._get_greyscale(tempdir, doc)
|
||||
@ -131,7 +133,10 @@ class Consumer(object):
|
||||
self._cleanup_doc(doc)
|
||||
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__, filename=document)
|
||||
sender=self.__class__,
|
||||
document=document,
|
||||
logging_group=self.logging_group
|
||||
)
|
||||
|
||||
def _get_greyscale(self, tempdir, doc):
|
||||
"""
|
||||
@ -271,7 +276,6 @@ class Consumer(object):
|
||||
def _store(self, text, doc, thumbnail):
|
||||
|
||||
file_info = FileInfo.from_path(doc)
|
||||
relevant_tags = set(list(Tag.match_all(text)) + list(file_info.tags))
|
||||
|
||||
stats = os.stat(doc)
|
||||
|
||||
@ -288,6 +292,7 @@ class Consumer(object):
|
||||
datetime.datetime.fromtimestamp(stats.st_mtime))
|
||||
)
|
||||
|
||||
relevant_tags = set(list(Tag.match_all(text)) + list(file_info.tags))
|
||||
if relevant_tags:
|
||||
tag_names = ", ".join([t.slug for t in relevant_tags])
|
||||
self.log("debug", "Tagging with {}".format(tag_names))
|
||||
|
@ -11,18 +11,11 @@ class PaperlessLogger(logging.StreamHandler):
|
||||
|
||||
logging.StreamHandler.emit(self, record)
|
||||
|
||||
if not hasattr(record, "component"):
|
||||
return
|
||||
|
||||
# We have to do the import here or Django will barf when it tries to
|
||||
# load this because the apps aren't loaded at that point
|
||||
from .models import Log
|
||||
|
||||
kwargs = {
|
||||
"message": record.msg,
|
||||
"component": record.component,
|
||||
"level": record.levelno,
|
||||
}
|
||||
kwargs = {"message": record.msg, "level": record.levelno}
|
||||
|
||||
if hasattr(record, "group"):
|
||||
kwargs["group"] = record.group
|
||||
|
@ -33,8 +33,7 @@ class Loggable(object):
|
||||
|
||||
def log(self, level, message):
|
||||
getattr(self.logger, level)(message, extra={
|
||||
"group": self.logging_group,
|
||||
"component": Log.COMPONENT_MAIL
|
||||
"group": self.logging_group
|
||||
})
|
||||
|
||||
|
||||
|
@ -47,10 +47,7 @@ class Command(BaseCommand):
|
||||
pass
|
||||
|
||||
logging.getLogger(__name__).info(
|
||||
"Starting document consumer at {}".format(
|
||||
settings.CONSUMPTION_DIR
|
||||
),
|
||||
extra={"component": Log.COMPONENT_CONSUMER}
|
||||
"Starting document consumer at {}".format(settings.CONSUMPTION_DIR)
|
||||
)
|
||||
|
||||
try:
|
||||
|
35
src/documents/migrations/0013_auto_20160325_2111.py
Normal file
35
src/documents/migrations/0013_auto_20160325_2111.py
Normal file
@ -0,0 +1,35 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Generated by Django 1.9.4 on 2016-03-25 21:11
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.db import migrations, models
|
||||
import django.utils.timezone
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '0012_auto_20160305_0040'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='correspondent',
|
||||
name='match',
|
||||
field=models.CharField(blank=True, max_length=256),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='correspondent',
|
||||
name='matching_algorithm',
|
||||
field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. If you don\'t know what a regex is, you probably don\'t want this option.'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='created',
|
||||
field=models.DateTimeField(default=django.utils.timezone.now),
|
||||
),
|
||||
migrations.RemoveField(
|
||||
model_name='log',
|
||||
name='component',
|
||||
),
|
||||
]
|
@ -15,50 +15,7 @@ from django.utils import timezone
|
||||
from .managers import LogManager
|
||||
|
||||
|
||||
class SluggedModel(models.Model):
|
||||
|
||||
name = models.CharField(max_length=128, unique=True)
|
||||
slug = models.SlugField(blank=True)
|
||||
|
||||
class Meta(object):
|
||||
abstract = True
|
||||
|
||||
def save(self, *args, **kwargs):
|
||||
if not self.slug:
|
||||
self.slug = slugify(self.name)
|
||||
models.Model.save(self, *args, **kwargs)
|
||||
|
||||
def __str__(self):
|
||||
return self.name
|
||||
|
||||
|
||||
class Correspondent(SluggedModel):
|
||||
|
||||
# This regex is probably more restrictive than it needs to be, but it's
|
||||
# better safe than sorry.
|
||||
SAFE_REGEX = re.compile(r"^[\w\- ,.']+$")
|
||||
|
||||
class Meta(object):
|
||||
ordering = ("name",)
|
||||
|
||||
|
||||
class Tag(SluggedModel):
|
||||
|
||||
COLOURS = (
|
||||
(1, "#a6cee3"),
|
||||
(2, "#1f78b4"),
|
||||
(3, "#b2df8a"),
|
||||
(4, "#33a02c"),
|
||||
(5, "#fb9a99"),
|
||||
(6, "#e31a1c"),
|
||||
(7, "#fdbf6f"),
|
||||
(8, "#ff7f00"),
|
||||
(9, "#cab2d6"),
|
||||
(10, "#6a3d9a"),
|
||||
(11, "#b15928"),
|
||||
(12, "#000000"),
|
||||
(13, "#cccccc")
|
||||
)
|
||||
class MatchingModel(models.Model):
|
||||
|
||||
MATCH_ANY = 1
|
||||
MATCH_ALL = 2
|
||||
@ -71,7 +28,9 @@ class Tag(SluggedModel):
|
||||
(MATCH_REGEX, "Regular Expression"),
|
||||
)
|
||||
|
||||
colour = models.PositiveIntegerField(choices=COLOURS, default=1)
|
||||
name = models.CharField(max_length=128, unique=True)
|
||||
slug = models.SlugField(blank=True)
|
||||
|
||||
match = models.CharField(max_length=256, blank=True)
|
||||
matching_algorithm = models.PositiveIntegerField(
|
||||
choices=MATCHING_ALGORITHMS,
|
||||
@ -88,6 +47,12 @@ class Tag(SluggedModel):
|
||||
)
|
||||
)
|
||||
|
||||
class Meta(object):
|
||||
abstract = True
|
||||
|
||||
def __str__(self):
|
||||
return self.name
|
||||
|
||||
@property
|
||||
def conditions(self):
|
||||
return "{}: \"{}\" ({})".format(
|
||||
@ -131,8 +96,44 @@ class Tag(SluggedModel):
|
||||
raise NotImplementedError("Unsupported matching algorithm")
|
||||
|
||||
def save(self, *args, **kwargs):
|
||||
|
||||
self.match = self.match.lower()
|
||||
SluggedModel.save(self, *args, **kwargs)
|
||||
|
||||
if not self.slug:
|
||||
self.slug = slugify(self.name)
|
||||
|
||||
models.Model.save(self, *args, **kwargs)
|
||||
|
||||
|
||||
class Correspondent(MatchingModel):
|
||||
|
||||
# This regex is probably more restrictive than it needs to be, but it's
|
||||
# better safe than sorry.
|
||||
SAFE_REGEX = re.compile(r"^[\w\- ,.']+$")
|
||||
|
||||
class Meta(object):
|
||||
ordering = ("name",)
|
||||
|
||||
|
||||
class Tag(MatchingModel):
|
||||
|
||||
COLOURS = (
|
||||
(1, "#a6cee3"),
|
||||
(2, "#1f78b4"),
|
||||
(3, "#b2df8a"),
|
||||
(4, "#33a02c"),
|
||||
(5, "#fb9a99"),
|
||||
(6, "#e31a1c"),
|
||||
(7, "#fdbf6f"),
|
||||
(8, "#ff7f00"),
|
||||
(9, "#cab2d6"),
|
||||
(10, "#6a3d9a"),
|
||||
(11, "#b15928"),
|
||||
(12, "#000000"),
|
||||
(13, "#cccccc")
|
||||
)
|
||||
|
||||
colour = models.PositiveIntegerField(choices=COLOURS, default=1)
|
||||
|
||||
|
||||
class Document(models.Model):
|
||||
@ -219,17 +220,9 @@ class Log(models.Model):
|
||||
(logging.CRITICAL, "Critical"),
|
||||
)
|
||||
|
||||
COMPONENT_CONSUMER = 1
|
||||
COMPONENT_MAIL = 2
|
||||
COMPONENTS = (
|
||||
(COMPONENT_CONSUMER, "Consumer"),
|
||||
(COMPONENT_MAIL, "Mail Fetcher")
|
||||
)
|
||||
|
||||
group = models.UUIDField(blank=True)
|
||||
message = models.TextField()
|
||||
level = models.PositiveIntegerField(choices=LEVELS, default=logging.INFO)
|
||||
component = models.PositiveIntegerField(choices=COMPONENTS)
|
||||
created = models.DateTimeField(auto_now_add=True)
|
||||
modified = models.DateTimeField(auto_now=True)
|
||||
|
||||
|
53
src/documents/signals/handlers.py
Normal file
53
src/documents/signals/handlers.py
Normal file
@ -0,0 +1,53 @@
|
||||
import logging
|
||||
|
||||
from ..models import Correspondent, Tag
|
||||
|
||||
|
||||
def logger(message, group):
|
||||
logging.getLogger(__name__).debug(message, extra={"group": group})
|
||||
|
||||
|
||||
def set_correspondent(sender, document=None, logging_group=None, **kwargs):
|
||||
|
||||
# No sense in assigning a correspondent when one is already set.
|
||||
if document.correspondent:
|
||||
return
|
||||
|
||||
# No matching correspondents, so no need to continue
|
||||
potential_correspondents = Correspondent.match_all(document.content)
|
||||
if not potential_correspondents:
|
||||
return
|
||||
|
||||
potential_count = len(potential_correspondents)
|
||||
selected = potential_correspondents[0]
|
||||
if potential_count > 1:
|
||||
message = "Detected {} potential correspondents, so we've opted for {}"
|
||||
logger(
|
||||
message.format(potential_count, selected),
|
||||
logging_group
|
||||
)
|
||||
|
||||
logger(
|
||||
'Assigning correspondent "{}" to "{}" '.format(selected, document),
|
||||
logging_group
|
||||
)
|
||||
|
||||
document.correspondent = selected
|
||||
document.save(update_fields="correspondent")
|
||||
|
||||
|
||||
def set_tags(sender, document=None, logging_group=None, **kwargs):
|
||||
|
||||
current_tags = set(document.tags.all())
|
||||
relevant_tags = set(Tag.match_all(document.content)) - current_tags
|
||||
|
||||
if not relevant_tags:
|
||||
return
|
||||
|
||||
message = 'Tagging "{}" with "{}"'
|
||||
logger(
|
||||
message.format(document, ", ".join([t.slug for t in relevant_tags])),
|
||||
logging_group
|
||||
)
|
||||
|
||||
document.tags.add(*relevant_tags)
|
@ -15,21 +15,9 @@ class TestPaperlessLog(TestCase):
|
||||
self.logger = logging.getLogger(
|
||||
"documents.management.commands.document_consumer")
|
||||
|
||||
def test_ignored(self):
|
||||
with mock.patch("logging.StreamHandler.emit") as __:
|
||||
self.assertEqual(Log.objects.all().count(), 0)
|
||||
self.logger.info("This is an informational message")
|
||||
self.logger.warning("This is an informational message")
|
||||
self.logger.error("This is an informational message")
|
||||
self.logger.critical("This is an informational message")
|
||||
self.assertEqual(Log.objects.all().count(), 0)
|
||||
|
||||
def test_that_it_saves_at_all(self):
|
||||
|
||||
kw = {
|
||||
"group": uuid.uuid4(),
|
||||
"component": Log.COMPONENT_MAIL
|
||||
}
|
||||
kw = {"group": uuid.uuid4()}
|
||||
|
||||
self.assertEqual(Log.objects.all().count(), 0)
|
||||
|
||||
@ -53,14 +41,8 @@ class TestPaperlessLog(TestCase):
|
||||
|
||||
def test_groups(self):
|
||||
|
||||
kw1 = {
|
||||
"group": uuid.uuid4(),
|
||||
"component": Log.COMPONENT_MAIL
|
||||
}
|
||||
kw2 = {
|
||||
"group": uuid.uuid4(),
|
||||
"component": Log.COMPONENT_MAIL
|
||||
}
|
||||
kw1 = {"group": uuid.uuid4()}
|
||||
kw2 = {"group": uuid.uuid4()}
|
||||
|
||||
self.assertEqual(Log.objects.all().count(), 0)
|
||||
|
||||
@ -86,49 +68,9 @@ class TestPaperlessLog(TestCase):
|
||||
self.assertEqual(Log.objects.all().count(), 4)
|
||||
self.assertEqual(Log.objects.filter(group=kw1["group"]).count(), 2)
|
||||
|
||||
def test_components(self):
|
||||
|
||||
c1 = Log.COMPONENT_CONSUMER
|
||||
c2 = Log.COMPONENT_MAIL
|
||||
kw1 = {
|
||||
"group": uuid.uuid4(),
|
||||
"component": c1
|
||||
}
|
||||
kw2 = {
|
||||
"group": kw1["group"],
|
||||
"component": c2
|
||||
}
|
||||
|
||||
self.assertEqual(Log.objects.all().count(), 0)
|
||||
|
||||
with mock.patch("logging.StreamHandler.emit") as __:
|
||||
|
||||
# Debug messages are ignored by default
|
||||
self.logger.debug("This is a debugging message", extra=kw1)
|
||||
self.assertEqual(Log.objects.all().count(), 0)
|
||||
|
||||
self.logger.info("This is an informational message", extra=kw2)
|
||||
self.assertEqual(Log.objects.all().count(), 1)
|
||||
self.assertEqual(Log.objects.filter(component=c2).count(), 1)
|
||||
|
||||
self.logger.warning("This is an warning message", extra=kw1)
|
||||
self.assertEqual(Log.objects.all().count(), 2)
|
||||
self.assertEqual(Log.objects.filter(component=c1).count(), 1)
|
||||
|
||||
self.logger.error("This is an error message", extra=kw2)
|
||||
self.assertEqual(Log.objects.all().count(), 3)
|
||||
self.assertEqual(Log.objects.filter(component=c2).count(), 2)
|
||||
|
||||
self.logger.critical("This is a critical message", extra=kw1)
|
||||
self.assertEqual(Log.objects.all().count(), 4)
|
||||
self.assertEqual(Log.objects.filter(component=c1).count(), 2)
|
||||
|
||||
def test_groupped_query(self):
|
||||
|
||||
kw = {
|
||||
"group": uuid.uuid4(),
|
||||
"component": Log.COMPONENT_MAIL
|
||||
}
|
||||
kw = {"group": uuid.uuid4()}
|
||||
with mock.patch("logging.StreamHandler.emit") as __:
|
||||
self.logger.info("Message 0", extra=kw)
|
||||
self.logger.info("Message 1", extra=kw)
|
||||
|
@ -43,7 +43,7 @@ INSTALLED_APPS = [
|
||||
|
||||
"django_extensions",
|
||||
|
||||
"documents",
|
||||
"documents.apps.DocumentsConfig",
|
||||
|
||||
"rest_framework",
|
||||
"crispy_forms",
|
||||
|
Loading…
x
Reference in New Issue
Block a user