Removed log components and introduced signals for tags & correspondents

This commit is contained in:
Daniel Quinn 2016-03-28 11:11:15 +01:00
parent 49b56425e8
commit b92e007e15
12 changed files with 175 additions and 140 deletions

@ -31,6 +31,13 @@ class MonthListFilter(admin.SimpleListFilter):
return queryset.filter(created__year=year, created__month=month) return queryset.filter(created__year=year, created__month=month)
class CorrespondentAdmin(admin.ModelAdmin):
list_display = ("name", "match", "matching_algorithm")
list_filter = ("matching_algorithm",)
list_editable = ("match", "matching_algorithm")
class TagAdmin(admin.ModelAdmin): class TagAdmin(admin.ModelAdmin):
list_display = ("name", "colour", "match", "matching_algorithm") list_display = ("name", "colour", "match", "matching_algorithm")
@ -103,11 +110,11 @@ class DocumentAdmin(admin.ModelAdmin):
class LogAdmin(admin.ModelAdmin): class LogAdmin(admin.ModelAdmin):
list_display = ("message", "level", "component") list_display = ("message", "level",)
list_filter = ("level", "component",) list_filter = ("level",)
admin.site.register(Correspondent) admin.site.register(Correspondent, CorrespondentAdmin)
admin.site.register(Tag, TagAdmin) admin.site.register(Tag, TagAdmin)
admin.site.register(Document, DocumentAdmin) admin.site.register(Document, DocumentAdmin)
admin.site.register(Log, LogAdmin) admin.site.register(Log, LogAdmin)

@ -2,4 +2,15 @@ from django.apps import AppConfig
class DocumentsConfig(AppConfig): class DocumentsConfig(AppConfig):
name = 'documents'
name = "documents"
def ready(self):
from .signals import document_consumption_finished
from .signals.handlers import set_correspondent, set_tags
document_consumption_finished.connect(set_tags)
document_consumption_finished.connect(set_correspondent)
AppConfig.ready(self)

@ -80,8 +80,7 @@ class Consumer(object):
def log(self, level, message): def log(self, level, message):
getattr(self.logger, level)(message, extra={ getattr(self.logger, level)(message, extra={
"group": self.logging_group, "group": self.logging_group
"component": Log.COMPONENT_CONSUMER
}) })
def consume(self): def consume(self):
@ -107,7 +106,10 @@ class Consumer(object):
self.log("info", "Consuming {}".format(doc)) self.log("info", "Consuming {}".format(doc))
document_consumption_started.send( document_consumption_started.send(
sender=self.__class__, filename=doc) sender=self.__class__,
filename=doc,
logging_group=self.logging_group
)
tempdir = tempfile.mkdtemp(prefix="paperless", dir=self.SCRATCH) tempdir = tempfile.mkdtemp(prefix="paperless", dir=self.SCRATCH)
imgs = self._get_greyscale(tempdir, doc) imgs = self._get_greyscale(tempdir, doc)
@ -131,7 +133,10 @@ class Consumer(object):
self._cleanup_doc(doc) self._cleanup_doc(doc)
document_consumption_finished.send( document_consumption_finished.send(
sender=self.__class__, filename=document) sender=self.__class__,
document=document,
logging_group=self.logging_group
)
def _get_greyscale(self, tempdir, doc): def _get_greyscale(self, tempdir, doc):
""" """
@ -271,7 +276,6 @@ class Consumer(object):
def _store(self, text, doc, thumbnail): def _store(self, text, doc, thumbnail):
file_info = FileInfo.from_path(doc) file_info = FileInfo.from_path(doc)
relevant_tags = set(list(Tag.match_all(text)) + list(file_info.tags))
stats = os.stat(doc) stats = os.stat(doc)
@ -288,6 +292,7 @@ class Consumer(object):
datetime.datetime.fromtimestamp(stats.st_mtime)) datetime.datetime.fromtimestamp(stats.st_mtime))
) )
relevant_tags = set(list(Tag.match_all(text)) + list(file_info.tags))
if relevant_tags: if relevant_tags:
tag_names = ", ".join([t.slug for t in relevant_tags]) tag_names = ", ".join([t.slug for t in relevant_tags])
self.log("debug", "Tagging with {}".format(tag_names)) self.log("debug", "Tagging with {}".format(tag_names))

@ -11,18 +11,11 @@ class PaperlessLogger(logging.StreamHandler):
logging.StreamHandler.emit(self, record) logging.StreamHandler.emit(self, record)
if not hasattr(record, "component"):
return
# We have to do the import here or Django will barf when it tries to # We have to do the import here or Django will barf when it tries to
# load this because the apps aren't loaded at that point # load this because the apps aren't loaded at that point
from .models import Log from .models import Log
kwargs = { kwargs = {"message": record.msg, "level": record.levelno}
"message": record.msg,
"component": record.component,
"level": record.levelno,
}
if hasattr(record, "group"): if hasattr(record, "group"):
kwargs["group"] = record.group kwargs["group"] = record.group

@ -33,8 +33,7 @@ class Loggable(object):
def log(self, level, message): def log(self, level, message):
getattr(self.logger, level)(message, extra={ getattr(self.logger, level)(message, extra={
"group": self.logging_group, "group": self.logging_group
"component": Log.COMPONENT_MAIL
}) })

@ -47,10 +47,7 @@ class Command(BaseCommand):
pass pass
logging.getLogger(__name__).info( logging.getLogger(__name__).info(
"Starting document consumer at {}".format( "Starting document consumer at {}".format(settings.CONSUMPTION_DIR)
settings.CONSUMPTION_DIR
),
extra={"component": Log.COMPONENT_CONSUMER}
) )
try: try:

@ -0,0 +1,35 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.9.4 on 2016-03-25 21:11
from __future__ import unicode_literals
from django.db import migrations, models
import django.utils.timezone
class Migration(migrations.Migration):
dependencies = [
('documents', '0012_auto_20160305_0040'),
]
operations = [
migrations.AddField(
model_name='correspondent',
name='match',
field=models.CharField(blank=True, max_length=256),
),
migrations.AddField(
model_name='correspondent',
name='matching_algorithm',
field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. If you don\'t know what a regex is, you probably don\'t want this option.'),
),
migrations.AlterField(
model_name='document',
name='created',
field=models.DateTimeField(default=django.utils.timezone.now),
),
migrations.RemoveField(
model_name='log',
name='component',
),
]

@ -15,50 +15,7 @@ from django.utils import timezone
from .managers import LogManager from .managers import LogManager
class SluggedModel(models.Model): class MatchingModel(models.Model):
name = models.CharField(max_length=128, unique=True)
slug = models.SlugField(blank=True)
class Meta(object):
abstract = True
def save(self, *args, **kwargs):
if not self.slug:
self.slug = slugify(self.name)
models.Model.save(self, *args, **kwargs)
def __str__(self):
return self.name
class Correspondent(SluggedModel):
# This regex is probably more restrictive than it needs to be, but it's
# better safe than sorry.
SAFE_REGEX = re.compile(r"^[\w\- ,.']+$")
class Meta(object):
ordering = ("name",)
class Tag(SluggedModel):
COLOURS = (
(1, "#a6cee3"),
(2, "#1f78b4"),
(3, "#b2df8a"),
(4, "#33a02c"),
(5, "#fb9a99"),
(6, "#e31a1c"),
(7, "#fdbf6f"),
(8, "#ff7f00"),
(9, "#cab2d6"),
(10, "#6a3d9a"),
(11, "#b15928"),
(12, "#000000"),
(13, "#cccccc")
)
MATCH_ANY = 1 MATCH_ANY = 1
MATCH_ALL = 2 MATCH_ALL = 2
@ -71,7 +28,9 @@ class Tag(SluggedModel):
(MATCH_REGEX, "Regular Expression"), (MATCH_REGEX, "Regular Expression"),
) )
colour = models.PositiveIntegerField(choices=COLOURS, default=1) name = models.CharField(max_length=128, unique=True)
slug = models.SlugField(blank=True)
match = models.CharField(max_length=256, blank=True) match = models.CharField(max_length=256, blank=True)
matching_algorithm = models.PositiveIntegerField( matching_algorithm = models.PositiveIntegerField(
choices=MATCHING_ALGORITHMS, choices=MATCHING_ALGORITHMS,
@ -88,6 +47,12 @@ class Tag(SluggedModel):
) )
) )
class Meta(object):
abstract = True
def __str__(self):
return self.name
@property @property
def conditions(self): def conditions(self):
return "{}: \"{}\" ({})".format( return "{}: \"{}\" ({})".format(
@ -131,8 +96,44 @@ class Tag(SluggedModel):
raise NotImplementedError("Unsupported matching algorithm") raise NotImplementedError("Unsupported matching algorithm")
def save(self, *args, **kwargs): def save(self, *args, **kwargs):
self.match = self.match.lower() self.match = self.match.lower()
SluggedModel.save(self, *args, **kwargs)
if not self.slug:
self.slug = slugify(self.name)
models.Model.save(self, *args, **kwargs)
class Correspondent(MatchingModel):
# This regex is probably more restrictive than it needs to be, but it's
# better safe than sorry.
SAFE_REGEX = re.compile(r"^[\w\- ,.']+$")
class Meta(object):
ordering = ("name",)
class Tag(MatchingModel):
COLOURS = (
(1, "#a6cee3"),
(2, "#1f78b4"),
(3, "#b2df8a"),
(4, "#33a02c"),
(5, "#fb9a99"),
(6, "#e31a1c"),
(7, "#fdbf6f"),
(8, "#ff7f00"),
(9, "#cab2d6"),
(10, "#6a3d9a"),
(11, "#b15928"),
(12, "#000000"),
(13, "#cccccc")
)
colour = models.PositiveIntegerField(choices=COLOURS, default=1)
class Document(models.Model): class Document(models.Model):
@ -219,17 +220,9 @@ class Log(models.Model):
(logging.CRITICAL, "Critical"), (logging.CRITICAL, "Critical"),
) )
COMPONENT_CONSUMER = 1
COMPONENT_MAIL = 2
COMPONENTS = (
(COMPONENT_CONSUMER, "Consumer"),
(COMPONENT_MAIL, "Mail Fetcher")
)
group = models.UUIDField(blank=True) group = models.UUIDField(blank=True)
message = models.TextField() message = models.TextField()
level = models.PositiveIntegerField(choices=LEVELS, default=logging.INFO) level = models.PositiveIntegerField(choices=LEVELS, default=logging.INFO)
component = models.PositiveIntegerField(choices=COMPONENTS)
created = models.DateTimeField(auto_now_add=True) created = models.DateTimeField(auto_now_add=True)
modified = models.DateTimeField(auto_now=True) modified = models.DateTimeField(auto_now=True)

@ -0,0 +1,53 @@
import logging
from ..models import Correspondent, Tag
def logger(message, group):
logging.getLogger(__name__).debug(message, extra={"group": group})
def set_correspondent(sender, document=None, logging_group=None, **kwargs):
# No sense in assigning a correspondent when one is already set.
if document.correspondent:
return
# No matching correspondents, so no need to continue
potential_correspondents = Correspondent.match_all(document.content)
if not potential_correspondents:
return
potential_count = len(potential_correspondents)
selected = potential_correspondents[0]
if potential_count > 1:
message = "Detected {} potential correspondents, so we've opted for {}"
logger(
message.format(potential_count, selected),
logging_group
)
logger(
'Assigning correspondent "{}" to "{}" '.format(selected, document),
logging_group
)
document.correspondent = selected
document.save(update_fields="correspondent")
def set_tags(sender, document=None, logging_group=None, **kwargs):
current_tags = set(document.tags.all())
relevant_tags = set(Tag.match_all(document.content)) - current_tags
if not relevant_tags:
return
message = 'Tagging "{}" with "{}"'
logger(
message.format(document, ", ".join([t.slug for t in relevant_tags])),
logging_group
)
document.tags.add(*relevant_tags)

@ -15,21 +15,9 @@ class TestPaperlessLog(TestCase):
self.logger = logging.getLogger( self.logger = logging.getLogger(
"documents.management.commands.document_consumer") "documents.management.commands.document_consumer")
def test_ignored(self):
with mock.patch("logging.StreamHandler.emit") as __:
self.assertEqual(Log.objects.all().count(), 0)
self.logger.info("This is an informational message")
self.logger.warning("This is an informational message")
self.logger.error("This is an informational message")
self.logger.critical("This is an informational message")
self.assertEqual(Log.objects.all().count(), 0)
def test_that_it_saves_at_all(self): def test_that_it_saves_at_all(self):
kw = { kw = {"group": uuid.uuid4()}
"group": uuid.uuid4(),
"component": Log.COMPONENT_MAIL
}
self.assertEqual(Log.objects.all().count(), 0) self.assertEqual(Log.objects.all().count(), 0)
@ -53,14 +41,8 @@ class TestPaperlessLog(TestCase):
def test_groups(self): def test_groups(self):
kw1 = { kw1 = {"group": uuid.uuid4()}
"group": uuid.uuid4(), kw2 = {"group": uuid.uuid4()}
"component": Log.COMPONENT_MAIL
}
kw2 = {
"group": uuid.uuid4(),
"component": Log.COMPONENT_MAIL
}
self.assertEqual(Log.objects.all().count(), 0) self.assertEqual(Log.objects.all().count(), 0)
@ -86,49 +68,9 @@ class TestPaperlessLog(TestCase):
self.assertEqual(Log.objects.all().count(), 4) self.assertEqual(Log.objects.all().count(), 4)
self.assertEqual(Log.objects.filter(group=kw1["group"]).count(), 2) self.assertEqual(Log.objects.filter(group=kw1["group"]).count(), 2)
def test_components(self):
c1 = Log.COMPONENT_CONSUMER
c2 = Log.COMPONENT_MAIL
kw1 = {
"group": uuid.uuid4(),
"component": c1
}
kw2 = {
"group": kw1["group"],
"component": c2
}
self.assertEqual(Log.objects.all().count(), 0)
with mock.patch("logging.StreamHandler.emit") as __:
# Debug messages are ignored by default
self.logger.debug("This is a debugging message", extra=kw1)
self.assertEqual(Log.objects.all().count(), 0)
self.logger.info("This is an informational message", extra=kw2)
self.assertEqual(Log.objects.all().count(), 1)
self.assertEqual(Log.objects.filter(component=c2).count(), 1)
self.logger.warning("This is an warning message", extra=kw1)
self.assertEqual(Log.objects.all().count(), 2)
self.assertEqual(Log.objects.filter(component=c1).count(), 1)
self.logger.error("This is an error message", extra=kw2)
self.assertEqual(Log.objects.all().count(), 3)
self.assertEqual(Log.objects.filter(component=c2).count(), 2)
self.logger.critical("This is a critical message", extra=kw1)
self.assertEqual(Log.objects.all().count(), 4)
self.assertEqual(Log.objects.filter(component=c1).count(), 2)
def test_groupped_query(self): def test_groupped_query(self):
kw = { kw = {"group": uuid.uuid4()}
"group": uuid.uuid4(),
"component": Log.COMPONENT_MAIL
}
with mock.patch("logging.StreamHandler.emit") as __: with mock.patch("logging.StreamHandler.emit") as __:
self.logger.info("Message 0", extra=kw) self.logger.info("Message 0", extra=kw)
self.logger.info("Message 1", extra=kw) self.logger.info("Message 1", extra=kw)

@ -43,7 +43,7 @@ INSTALLED_APPS = [
"django_extensions", "django_extensions",
"documents", "documents.apps.DocumentsConfig",
"rest_framework", "rest_framework",
"crispy_forms", "crispy_forms",