mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
226 lines
6.5 KiB
Python
226 lines
6.5 KiB
Python
import logging
|
|
import os
|
|
import re
|
|
|
|
from django.conf import settings
|
|
from django.core.urlresolvers import reverse
|
|
from django.db import models
|
|
from django.template.defaultfilters import slugify
|
|
from django.utils import timezone
|
|
|
|
from .managers import LogManager
|
|
|
|
|
|
class SluggedModel(models.Model):
|
|
|
|
name = models.CharField(max_length=128, unique=True)
|
|
slug = models.SlugField(blank=True)
|
|
|
|
class Meta(object):
|
|
abstract = True
|
|
|
|
def save(self, *args, **kwargs):
|
|
if not self.slug:
|
|
self.slug = slugify(self.name)
|
|
models.Model.save(self, *args, **kwargs)
|
|
|
|
def __str__(self):
|
|
return self.name
|
|
|
|
|
|
class Sender(SluggedModel):
|
|
|
|
# This regex is probably more restrictive than it needs to be, but it's
|
|
# better safe than sorry.
|
|
SAFE_REGEX = re.compile(r"^[\w\- ,.']+$")
|
|
|
|
class Meta(object):
|
|
ordering = ("name",)
|
|
|
|
|
|
class Tag(SluggedModel):
|
|
|
|
COLOURS = (
|
|
(1, "#a6cee3"),
|
|
(2, "#1f78b4"),
|
|
(3, "#b2df8a"),
|
|
(4, "#33a02c"),
|
|
(5, "#fb9a99"),
|
|
(6, "#e31a1c"),
|
|
(7, "#fdbf6f"),
|
|
(8, "#ff7f00"),
|
|
(9, "#cab2d6"),
|
|
(10, "#6a3d9a"),
|
|
(11, "#b15928"),
|
|
(12, "#000000"),
|
|
(13, "#cccccc")
|
|
)
|
|
|
|
MATCH_ANY = 1
|
|
MATCH_ALL = 2
|
|
MATCH_LITERAL = 3
|
|
MATCH_REGEX = 4
|
|
MATCHING_ALGORITHMS = (
|
|
(MATCH_ANY, "Any"),
|
|
(MATCH_ALL, "All"),
|
|
(MATCH_LITERAL, "Literal"),
|
|
(MATCH_REGEX, "Regular Expression"),
|
|
)
|
|
|
|
colour = models.PositiveIntegerField(choices=COLOURS, default=1)
|
|
match = models.CharField(max_length=256, blank=True)
|
|
matching_algorithm = models.PositiveIntegerField(
|
|
choices=MATCHING_ALGORITHMS,
|
|
default=MATCH_ANY,
|
|
help_text=(
|
|
"Which algorithm you want to use when matching text to the OCR'd "
|
|
"PDF. Here, \"any\" looks for any occurrence of any word "
|
|
"provided in the PDF, while \"all\" requires that every word "
|
|
"provided appear in the PDF, albeit not in the order provided. A "
|
|
"\"literal\" match means that the text you enter must appear in "
|
|
"the PDF exactly as you've entered it, and \"regular expression\" "
|
|
"uses a regex to match the PDF. If you don't know what a regex "
|
|
"is, you probably don't want this option."
|
|
)
|
|
)
|
|
|
|
@property
|
|
def conditions(self):
|
|
return "{}: \"{}\" ({})".format(
|
|
self.name, self.match, self.get_matching_algorithm_display())
|
|
|
|
@classmethod
|
|
def match_all(cls, text, tags=None):
|
|
|
|
if tags is None:
|
|
tags = cls.objects.all()
|
|
|
|
text = text.lower()
|
|
for tag in tags:
|
|
if tag.matches(text):
|
|
yield tag
|
|
|
|
def matches(self, text):
|
|
|
|
# Check that match is not empty
|
|
if self.match.strip() == "":
|
|
return False
|
|
|
|
if self.matching_algorithm == self.MATCH_ALL:
|
|
for word in self.match.split(" "):
|
|
if not re.search(r"\b{}\b".format(word), text):
|
|
return False
|
|
return True
|
|
|
|
if self.matching_algorithm == self.MATCH_ANY:
|
|
for word in self.match.split(" "):
|
|
if re.search(r"\b{}\b".format(word), text):
|
|
return True
|
|
return False
|
|
|
|
if self.matching_algorithm == self.MATCH_LITERAL:
|
|
return bool(re.search(r"\b{}\b".format(self.match), text))
|
|
|
|
if self.matching_algorithm == self.MATCH_REGEX:
|
|
return bool(re.search(re.compile(self.match), text))
|
|
|
|
raise NotImplementedError("Unsupported matching algorithm")
|
|
|
|
def save(self, *args, **kwargs):
|
|
self.match = self.match.lower()
|
|
SluggedModel.save(self, *args, **kwargs)
|
|
|
|
|
|
class Document(models.Model):
|
|
|
|
TYPE_PDF = "pdf"
|
|
TYPE_PNG = "png"
|
|
TYPE_JPG = "jpg"
|
|
TYPE_GIF = "gif"
|
|
TYPE_TIF = "tiff"
|
|
TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF,)
|
|
|
|
sender = models.ForeignKey(
|
|
Sender, blank=True, null=True, related_name="documents")
|
|
title = models.CharField(max_length=128, blank=True, db_index=True)
|
|
content = models.TextField(db_index=True)
|
|
file_type = models.CharField(
|
|
max_length=4,
|
|
editable=False,
|
|
choices=tuple([(t, t.upper()) for t in TYPES])
|
|
)
|
|
tags = models.ManyToManyField(
|
|
Tag, related_name="documents", blank=True)
|
|
created = models.DateTimeField(default=timezone.now, editable=False)
|
|
modified = models.DateTimeField(auto_now=True, editable=False)
|
|
|
|
class Meta(object):
|
|
ordering = ("sender", "title")
|
|
|
|
def __str__(self):
|
|
created = self.created.strftime("%Y-%m-%d")
|
|
if self.sender and self.title:
|
|
return "{}: {}, {}".format(created, self.sender, self.title)
|
|
if self.sender or self.title:
|
|
return "{}: {}".format(created, self.sender or self.title)
|
|
return str(created)
|
|
|
|
@property
|
|
def source_path(self):
|
|
return os.path.join(
|
|
settings.MEDIA_ROOT,
|
|
"documents",
|
|
"{:07}.{}.gpg".format(self.pk, self.file_type)
|
|
)
|
|
|
|
@property
|
|
def source_file(self):
|
|
return open(self.source_path, "rb")
|
|
|
|
@property
|
|
def file_name(self):
|
|
if self.sender and self.title:
|
|
tags = ",".join([t.slug for t in self.tags.all()])
|
|
if tags:
|
|
return "{} - {} - {}.{}".format(
|
|
self.sender, self.title, tags, self.file_type)
|
|
return "{} - {}.{}".format(self.sender, self.title, self.file_type)
|
|
return os.path.basename(self.source_path)
|
|
|
|
@property
|
|
def download_url(self):
|
|
return reverse("fetch", kwargs={"pk": self.pk})
|
|
|
|
|
|
class Log(models.Model):
|
|
|
|
LEVELS = (
|
|
(logging.DEBUG, "Debugging"),
|
|
(logging.INFO, "Informational"),
|
|
(logging.WARNING, "Warning"),
|
|
(logging.ERROR, "Error"),
|
|
(logging.CRITICAL, "Critical"),
|
|
)
|
|
|
|
COMPONENT_CONSUMER = 1
|
|
COMPONENT_MAIL = 2
|
|
COMPONENTS = (
|
|
(COMPONENT_CONSUMER, "Consumer"),
|
|
(COMPONENT_MAIL, "Mail Fetcher")
|
|
)
|
|
|
|
group = models.UUIDField(blank=True)
|
|
message = models.TextField()
|
|
level = models.PositiveIntegerField(choices=LEVELS, default=logging.INFO)
|
|
component = models.PositiveIntegerField(choices=COMPONENTS)
|
|
created = models.DateTimeField(auto_now_add=True)
|
|
modified = models.DateTimeField(auto_now=True)
|
|
|
|
objects = LogManager()
|
|
|
|
class Meta(object):
|
|
ordering = ("-modified",)
|
|
|
|
def __str__(self):
|
|
return self.message
|