Merge remote-tracking branch 'upstream/dev' into feature/remote-user

This commit is contained in:
Michael Shamoon
2021-01-03 00:38:10 -08:00
93 changed files with 5386 additions and 741 deletions

View File

@@ -1,34 +1,30 @@
from django.apps import AppConfig
from django.utils.translation import gettext_lazy as _
class DocumentsConfig(AppConfig):
name = "documents"
def ready(self):
verbose_name = _("Documents")
from .signals import document_consumption_started
def ready(self):
from .signals import document_consumption_finished
from .signals.handlers import (
add_inbox_tags,
run_pre_consume_script,
run_post_consume_script,
set_log_entry,
set_correspondent,
set_document_type,
set_tags,
add_to_index
)
document_consumption_started.connect(run_pre_consume_script)
document_consumption_finished.connect(add_inbox_tags)
document_consumption_finished.connect(set_correspondent)
document_consumption_finished.connect(set_document_type)
document_consumption_finished.connect(set_tags)
document_consumption_finished.connect(set_log_entry)
document_consumption_finished.connect(add_to_index)
document_consumption_finished.connect(run_post_consume_script)
AppConfig.ready(self)

View File

@@ -1,7 +1,7 @@
import datetime
import hashlib
import logging
import os
from subprocess import Popen
import magic
from django.conf import settings
@@ -9,6 +9,7 @@ from django.db import transaction
from django.db.models import Q
from django.utils import timezone
from filelock import FileLock
from rest_framework.reverse import reverse
from .classifier import DocumentClassifier, IncompatibleClassifierVersionError
from .file_handling import create_source_path_directory, \
@@ -66,6 +67,39 @@ class Consumer(LoggingMixin):
os.makedirs(settings.ORIGINALS_DIR, exist_ok=True)
os.makedirs(settings.ARCHIVE_DIR, exist_ok=True)
def run_pre_consume_script(self):
if not settings.PRE_CONSUME_SCRIPT:
return
try:
Popen((settings.PRE_CONSUME_SCRIPT, self.path)).wait()
except Exception as e:
raise ConsumerError(
f"Error while executing pre-consume script: {e}"
)
def run_post_consume_script(self, document):
if not settings.POST_CONSUME_SCRIPT:
return
try:
Popen((
settings.POST_CONSUME_SCRIPT,
str(document.pk),
document.get_public_filename(),
os.path.normpath(document.source_path),
os.path.normpath(document.thumbnail_path),
reverse("document-download", kwargs={"pk": document.pk}),
reverse("document-thumb", kwargs={"pk": document.pk}),
str(document.correspondent),
str(",".join(document.tags.all().values_list(
"name", flat=True)))
)).wait()
except Exception as e:
raise ConsumerError(
f"Error while executing pre-consume script: {e}"
)
def try_consume_file(self,
path,
override_filename=None,
@@ -119,6 +153,8 @@ class Consumer(LoggingMixin):
logging_group=self.logging_group
)
self.run_pre_consume_script()
# This doesn't parse the document yet, but gives us a parser.
document_parser = parser_class(self.logging_group)
@@ -130,7 +166,7 @@ class Consumer(LoggingMixin):
try:
self.log("debug", "Parsing {}...".format(self.filename))
document_parser.parse(self.path, mime_type)
document_parser.parse(self.path, mime_type, self.filename)
self.log("debug", f"Generating thumbnail for {self.filename}...")
thumbnail = document_parser.get_optimised_thumbnail(
@@ -158,7 +194,7 @@ class Consumer(LoggingMixin):
try:
classifier = DocumentClassifier()
classifier.reload()
except (FileNotFoundError, IncompatibleClassifierVersionError) as e:
except (OSError, EOFError, IncompatibleClassifierVersionError) as e:
self.log(
"warning",
f"Cannot classify documents: {e}.")
@@ -215,6 +251,9 @@ class Consumer(LoggingMixin):
# Delete the file only if it was successfully consumed
self.log("debug", "Deleting file {}".format(self.path))
os.unlink(self.path)
self.run_post_consume_script(document)
except Exception as e:
self.log(
"error",

View File

@@ -100,7 +100,9 @@ def generate_filename(doc, counter=0):
many_to_dictionary(doc.tags))
tag_list = pathvalidate.sanitize_filename(
",".join([tag.name for tag in doc.tags.all()]),
",".join(sorted(
[tag.name for tag in doc.tags.all()]
)),
replacement_text="-"
)

View File

@@ -73,7 +73,7 @@ class Command(Renderable, BaseCommand):
classifier = DocumentClassifier()
try:
classifier.reload()
except (FileNotFoundError, IncompatibleClassifierVersionError) as e:
except (OSError, EOFError, IncompatibleClassifierVersionError) as e:
logging.getLogger(__name__).warning(
f"Cannot classify documents: {e}.")
classifier = None

View File

@@ -0,0 +1,68 @@
import logging
import multiprocessing
import shutil
import tqdm
from django import db
from django.core.management.base import BaseCommand
from documents.models import Document
from ...mixins import Renderable
from ...parsers import get_parser_class_for_mime_type
def _process_document(doc_in):
document = Document.objects.get(id=doc_in)
parser = get_parser_class_for_mime_type(document.mime_type)(
logging_group=None)
try:
thumb = parser.get_optimised_thumbnail(
document.source_path, document.mime_type)
shutil.move(thumb, document.thumbnail_path)
finally:
parser.cleanup()
class Command(Renderable, BaseCommand):
help = """
This will regenerate the thumbnails for all documents.
""".replace(" ", "")
def __init__(self, *args, **kwargs):
self.verbosity = 0
BaseCommand.__init__(self, *args, **kwargs)
def add_arguments(self, parser):
parser.add_argument(
"-d", "--document",
default=None,
type=int,
required=False,
help="Specify the ID of a document, and this command will only "
"run on this specific document."
)
def handle(self, *args, **options):
self.verbosity = options["verbosity"]
logging.getLogger().handlers[0].level = logging.ERROR
if options['document']:
documents = Document.objects.filter(pk=options['document'])
else:
documents = Document.objects.all()
ids = [doc.id for doc in documents]
# Note to future self: this prevents django from reusing database
# conncetions between processes, which is bad and does not work
# with postgres.
db.connections.close_all()
with multiprocessing.Pool() as pool:
list(tqdm.tqdm(
pool.imap_unordered(_process_document, ids), total=len(ids)
))

View File

@@ -0,0 +1,18 @@
# Generated by Django 3.1.4 on 2021-01-01 21:59
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('documents', '1009_auto_20201216_2005'),
]
operations = [
migrations.AlterField(
model_name='savedviewfilterrule',
name='value',
field=models.CharField(blank=True, max_length=128, null=True),
),
]

View File

@@ -0,0 +1,250 @@
# Generated by Django 3.1.4 on 2021-01-01 23:40
from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion
import django.utils.timezone
class Migration(migrations.Migration):
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
('documents', '1010_auto_20210101_2159'),
]
operations = [
migrations.AlterModelOptions(
name='correspondent',
options={'ordering': ('name',), 'verbose_name': 'correspondent', 'verbose_name_plural': 'correspondents'},
),
migrations.AlterModelOptions(
name='document',
options={'ordering': ('-created',), 'verbose_name': 'document', 'verbose_name_plural': 'documents'},
),
migrations.AlterModelOptions(
name='documenttype',
options={'verbose_name': 'document type', 'verbose_name_plural': 'document types'},
),
migrations.AlterModelOptions(
name='log',
options={'ordering': ('-created',), 'verbose_name': 'log', 'verbose_name_plural': 'logs'},
),
migrations.AlterModelOptions(
name='savedview',
options={'ordering': ('name',), 'verbose_name': 'saved view', 'verbose_name_plural': 'saved views'},
),
migrations.AlterModelOptions(
name='savedviewfilterrule',
options={'verbose_name': 'filter rule', 'verbose_name_plural': 'filter rules'},
),
migrations.AlterModelOptions(
name='tag',
options={'verbose_name': 'tag', 'verbose_name_plural': 'tags'},
),
migrations.AlterField(
model_name='correspondent',
name='is_insensitive',
field=models.BooleanField(default=True, verbose_name='is insensitive'),
),
migrations.AlterField(
model_name='correspondent',
name='match',
field=models.CharField(blank=True, max_length=256, verbose_name='match'),
),
migrations.AlterField(
model_name='correspondent',
name='matching_algorithm',
field=models.PositiveIntegerField(choices=[(1, 'Any word'), (2, 'All words'), (3, 'Exact match'), (4, 'Regular expression'), (5, 'Fuzzy word'), (6, 'Automatic')], default=1, verbose_name='matching algorithm'),
),
migrations.AlterField(
model_name='correspondent',
name='name',
field=models.CharField(max_length=128, unique=True, verbose_name='name'),
),
migrations.AlterField(
model_name='document',
name='added',
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now, editable=False, verbose_name='added'),
),
migrations.AlterField(
model_name='document',
name='archive_checksum',
field=models.CharField(blank=True, editable=False, help_text='The checksum of the archived document.', max_length=32, null=True, verbose_name='archive checksum'),
),
migrations.AlterField(
model_name='document',
name='archive_serial_number',
field=models.IntegerField(blank=True, db_index=True, help_text='The position of this document in your physical document archive.', null=True, unique=True, verbose_name='archive serial number'),
),
migrations.AlterField(
model_name='document',
name='checksum',
field=models.CharField(editable=False, help_text='The checksum of the original document.', max_length=32, unique=True, verbose_name='checksum'),
),
migrations.AlterField(
model_name='document',
name='content',
field=models.TextField(blank=True, help_text='The raw, text-only data of the document. This field is primarily used for searching.', verbose_name='content'),
),
migrations.AlterField(
model_name='document',
name='correspondent',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.correspondent', verbose_name='correspondent'),
),
migrations.AlterField(
model_name='document',
name='created',
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now, verbose_name='created'),
),
migrations.AlterField(
model_name='document',
name='document_type',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.documenttype', verbose_name='document type'),
),
migrations.AlterField(
model_name='document',
name='filename',
field=models.FilePathField(default=None, editable=False, help_text='Current filename in storage', max_length=1024, null=True, verbose_name='filename'),
),
migrations.AlterField(
model_name='document',
name='mime_type',
field=models.CharField(editable=False, max_length=256, verbose_name='mime type'),
),
migrations.AlterField(
model_name='document',
name='modified',
field=models.DateTimeField(auto_now=True, db_index=True, verbose_name='modified'),
),
migrations.AlterField(
model_name='document',
name='storage_type',
field=models.CharField(choices=[('unencrypted', 'Unencrypted'), ('gpg', 'Encrypted with GNU Privacy Guard')], default='unencrypted', editable=False, max_length=11, verbose_name='storage type'),
),
migrations.AlterField(
model_name='document',
name='tags',
field=models.ManyToManyField(blank=True, related_name='documents', to='documents.Tag', verbose_name='tags'),
),
migrations.AlterField(
model_name='document',
name='title',
field=models.CharField(blank=True, db_index=True, max_length=128, verbose_name='title'),
),
migrations.AlterField(
model_name='documenttype',
name='is_insensitive',
field=models.BooleanField(default=True, verbose_name='is insensitive'),
),
migrations.AlterField(
model_name='documenttype',
name='match',
field=models.CharField(blank=True, max_length=256, verbose_name='match'),
),
migrations.AlterField(
model_name='documenttype',
name='matching_algorithm',
field=models.PositiveIntegerField(choices=[(1, 'Any word'), (2, 'All words'), (3, 'Exact match'), (4, 'Regular expression'), (5, 'Fuzzy word'), (6, 'Automatic')], default=1, verbose_name='matching algorithm'),
),
migrations.AlterField(
model_name='documenttype',
name='name',
field=models.CharField(max_length=128, unique=True, verbose_name='name'),
),
migrations.AlterField(
model_name='log',
name='created',
field=models.DateTimeField(auto_now_add=True, verbose_name='created'),
),
migrations.AlterField(
model_name='log',
name='group',
field=models.UUIDField(blank=True, null=True, verbose_name='group'),
),
migrations.AlterField(
model_name='log',
name='level',
field=models.PositiveIntegerField(choices=[(10, 'debug'), (20, 'information'), (30, 'warning'), (40, 'error'), (50, 'critical')], default=20, verbose_name='level'),
),
migrations.AlterField(
model_name='log',
name='message',
field=models.TextField(verbose_name='message'),
),
migrations.AlterField(
model_name='savedview',
name='name',
field=models.CharField(max_length=128, verbose_name='name'),
),
migrations.AlterField(
model_name='savedview',
name='show_in_sidebar',
field=models.BooleanField(verbose_name='show in sidebar'),
),
migrations.AlterField(
model_name='savedview',
name='show_on_dashboard',
field=models.BooleanField(verbose_name='show on dashboard'),
),
migrations.AlterField(
model_name='savedview',
name='sort_field',
field=models.CharField(max_length=128, verbose_name='sort field'),
),
migrations.AlterField(
model_name='savedview',
name='sort_reverse',
field=models.BooleanField(default=False, verbose_name='sort reverse'),
),
migrations.AlterField(
model_name='savedview',
name='user',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL, verbose_name='user'),
),
migrations.AlterField(
model_name='savedviewfilterrule',
name='rule_type',
field=models.PositiveIntegerField(choices=[(0, 'title contains'), (1, 'content contains'), (2, 'ASN is'), (3, 'correspondent is'), (4, 'document type is'), (5, 'is in inbox'), (6, 'has tag'), (7, 'has any tag'), (8, 'created before'), (9, 'created after'), (10, 'created year is'), (11, 'created month is'), (12, 'created day is'), (13, 'added before'), (14, 'added after'), (15, 'modified before'), (16, 'modified after'), (17, 'does not have tag')], verbose_name='rule type'),
),
migrations.AlterField(
model_name='savedviewfilterrule',
name='saved_view',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='filter_rules', to='documents.savedview', verbose_name='saved view'),
),
migrations.AlterField(
model_name='savedviewfilterrule',
name='value',
field=models.CharField(blank=True, max_length=128, null=True, verbose_name='value'),
),
migrations.AlterField(
model_name='tag',
name='colour',
field=models.PositiveIntegerField(choices=[(1, '#a6cee3'), (2, '#1f78b4'), (3, '#b2df8a'), (4, '#33a02c'), (5, '#fb9a99'), (6, '#e31a1c'), (7, '#fdbf6f'), (8, '#ff7f00'), (9, '#cab2d6'), (10, '#6a3d9a'), (11, '#b15928'), (12, '#000000'), (13, '#cccccc')], default=1, verbose_name='color'),
),
migrations.AlterField(
model_name='tag',
name='is_inbox_tag',
field=models.BooleanField(default=False, help_text='Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.', verbose_name='is inbox tag'),
),
migrations.AlterField(
model_name='tag',
name='is_insensitive',
field=models.BooleanField(default=True, verbose_name='is insensitive'),
),
migrations.AlterField(
model_name='tag',
name='match',
field=models.CharField(blank=True, max_length=256, verbose_name='match'),
),
migrations.AlterField(
model_name='tag',
name='matching_algorithm',
field=models.PositiveIntegerField(choices=[(1, 'Any word'), (2, 'All words'), (3, 'Exact match'), (4, 'Regular expression'), (5, 'Fuzzy word'), (6, 'Automatic')], default=1, verbose_name='matching algorithm'),
),
migrations.AlterField(
model_name='tag',
name='name',
field=models.CharField(max_length=128, unique=True, verbose_name='name'),
),
]

View File

@@ -13,6 +13,8 @@ from django.contrib.auth.models import User
from django.db import models
from django.utils import timezone
from django.utils.translation import gettext_lazy as _
from documents.file_handling import archive_name_from_filename
from documents.parsers import get_default_file_extension
@@ -27,36 +29,31 @@ class MatchingModel(models.Model):
MATCH_AUTO = 6
MATCHING_ALGORITHMS = (
(MATCH_ANY, "Any"),
(MATCH_ALL, "All"),
(MATCH_LITERAL, "Literal"),
(MATCH_REGEX, "Regular Expression"),
(MATCH_FUZZY, "Fuzzy Match"),
(MATCH_AUTO, "Automatic Classification"),
(MATCH_ANY, _("Any word")),
(MATCH_ALL, _("All words")),
(MATCH_LITERAL, _("Exact match")),
(MATCH_REGEX, _("Regular expression")),
(MATCH_FUZZY, _("Fuzzy word")),
(MATCH_AUTO, _("Automatic")),
)
name = models.CharField(max_length=128, unique=True)
name = models.CharField(
_("name"),
max_length=128, unique=True)
match = models.CharField(
_("match"),
max_length=256, blank=True)
match = models.CharField(max_length=256, blank=True)
matching_algorithm = models.PositiveIntegerField(
_("matching algorithm"),
choices=MATCHING_ALGORITHMS,
default=MATCH_ANY,
help_text=(
"Which algorithm you want to use when matching text to the OCR'd "
"PDF. Here, \"any\" looks for any occurrence of any word "
"provided in the PDF, while \"all\" requires that every word "
"provided appear in the PDF, albeit not in the order provided. A "
"\"literal\" match means that the text you enter must appear in "
"the PDF exactly as you've entered it, and \"regular expression\" "
"uses a regex to match the PDF. (If you don't know what a regex "
"is, you probably don't want this option.) Finally, a \"fuzzy "
"match\" looks for words or phrases that are mostly—but not "
"exactly—the same, which can be useful for matching against "
"documents containg imperfections that foil accurate OCR."
)
default=MATCH_ANY
)
is_insensitive = models.BooleanField(default=True)
is_insensitive = models.BooleanField(
_("is insensitive"),
default=True)
class Meta:
abstract = True
@@ -80,6 +77,8 @@ class Correspondent(MatchingModel):
class Meta:
ordering = ("name",)
verbose_name = _("correspondent")
verbose_name_plural = _("correspondents")
class Tag(MatchingModel):
@@ -100,18 +99,27 @@ class Tag(MatchingModel):
(13, "#cccccc")
)
colour = models.PositiveIntegerField(choices=COLOURS, default=1)
colour = models.PositiveIntegerField(
_("color"),
choices=COLOURS, default=1)
is_inbox_tag = models.BooleanField(
_("is inbox tag"),
default=False,
help_text="Marks this tag as an inbox tag: All newly consumed "
"documents will be tagged with inbox tags."
help_text=_("Marks this tag as an inbox tag: All newly consumed "
"documents will be tagged with inbox tags.")
)
class Meta:
verbose_name = _("tag")
verbose_name_plural = _("tags")
class DocumentType(MatchingModel):
pass
class Meta:
verbose_name = _("document type")
verbose_name_plural = _("document types")
class Document(models.Model):
@@ -119,8 +127,8 @@ class Document(models.Model):
STORAGE_TYPE_UNENCRYPTED = "unencrypted"
STORAGE_TYPE_GPG = "gpg"
STORAGE_TYPES = (
(STORAGE_TYPE_UNENCRYPTED, "Unencrypted"),
(STORAGE_TYPE_GPG, "Encrypted with GNU Privacy Guard")
(STORAGE_TYPE_UNENCRYPTED, _("Unencrypted")),
(STORAGE_TYPE_GPG, _("Encrypted with GNU Privacy Guard"))
)
correspondent = models.ForeignKey(
@@ -128,55 +136,68 @@ class Document(models.Model):
blank=True,
null=True,
related_name="documents",
on_delete=models.SET_NULL
on_delete=models.SET_NULL,
verbose_name=_("correspondent")
)
title = models.CharField(max_length=128, blank=True, db_index=True)
title = models.CharField(
_("title"),
max_length=128, blank=True, db_index=True)
document_type = models.ForeignKey(
DocumentType,
blank=True,
null=True,
related_name="documents",
on_delete=models.SET_NULL
on_delete=models.SET_NULL,
verbose_name=_("document type")
)
content = models.TextField(
_("content"),
blank=True,
help_text="The raw, text-only data of the document. This field is "
"primarily used for searching."
help_text=_("The raw, text-only data of the document. This field is "
"primarily used for searching.")
)
mime_type = models.CharField(
_("mime type"),
max_length=256,
editable=False
)
tags = models.ManyToManyField(
Tag, related_name="documents", blank=True)
Tag, related_name="documents", blank=True,
verbose_name=_("tags")
)
checksum = models.CharField(
_("checksum"),
max_length=32,
editable=False,
unique=True,
help_text="The checksum of the original document."
help_text=_("The checksum of the original document.")
)
archive_checksum = models.CharField(
_("archive checksum"),
max_length=32,
editable=False,
blank=True,
null=True,
help_text="The checksum of the archived document."
help_text=_("The checksum of the archived document.")
)
created = models.DateTimeField(
_("created"),
default=timezone.now, db_index=True)
modified = models.DateTimeField(
_("modified"),
auto_now=True, editable=False, db_index=True)
storage_type = models.CharField(
_("storage type"),
max_length=11,
choices=STORAGE_TYPES,
default=STORAGE_TYPE_UNENCRYPTED,
@@ -184,27 +205,32 @@ class Document(models.Model):
)
added = models.DateTimeField(
_("added"),
default=timezone.now, editable=False, db_index=True)
filename = models.FilePathField(
_("filename"),
max_length=1024,
editable=False,
default=None,
null=True,
help_text="Current filename in storage"
help_text=_("Current filename in storage")
)
archive_serial_number = models.IntegerField(
_("archive serial number"),
blank=True,
null=True,
unique=True,
db_index=True,
help_text="The position of this document in your physical document "
"archive."
help_text=_("The position of this document in your physical document "
"archive.")
)
class Meta:
ordering = ("-created",)
verbose_name = _("document")
verbose_name_plural = _("documents")
def __str__(self):
created = datetime.date.isoformat(self.created)
@@ -286,20 +312,29 @@ class Document(models.Model):
class Log(models.Model):
LEVELS = (
(logging.DEBUG, "Debugging"),
(logging.INFO, "Informational"),
(logging.WARNING, "Warning"),
(logging.ERROR, "Error"),
(logging.CRITICAL, "Critical"),
(logging.DEBUG, _("debug")),
(logging.INFO, _("information")),
(logging.WARNING, _("warning")),
(logging.ERROR, _("error")),
(logging.CRITICAL, _("critical")),
)
group = models.UUIDField(blank=True, null=True)
message = models.TextField()
level = models.PositiveIntegerField(choices=LEVELS, default=logging.INFO)
created = models.DateTimeField(auto_now_add=True)
group = models.UUIDField(
_("group"),
blank=True, null=True)
message = models.TextField(_("message"))
level = models.PositiveIntegerField(
_("level"),
choices=LEVELS, default=logging.INFO)
created = models.DateTimeField(_("created"), auto_now_add=True)
class Meta:
ordering = ("-created",)
verbose_name = _("log")
verbose_name_plural = _("logs")
def __str__(self):
return self.message
@@ -310,48 +345,72 @@ class SavedView(models.Model):
class Meta:
ordering = ("name",)
verbose_name = _("saved view")
verbose_name_plural = _("saved views")
user = models.ForeignKey(User, on_delete=models.CASCADE)
name = models.CharField(max_length=128)
user = models.ForeignKey(User, on_delete=models.CASCADE,
verbose_name=_("user"))
name = models.CharField(
_("name"),
max_length=128)
show_on_dashboard = models.BooleanField()
show_in_sidebar = models.BooleanField()
show_on_dashboard = models.BooleanField(
_("show on dashboard"),
)
show_in_sidebar = models.BooleanField(
_("show in sidebar"),
)
sort_field = models.CharField(max_length=128)
sort_reverse = models.BooleanField(default=False)
sort_field = models.CharField(
_("sort field"),
max_length=128)
sort_reverse = models.BooleanField(
_("sort reverse"),
default=False)
class SavedViewFilterRule(models.Model):
RULE_TYPES = [
(0, "Title contains"),
(1, "Content contains"),
(2, "ASN is"),
(3, "Correspondent is"),
(4, "Document type is"),
(5, "Is in inbox"),
(6, "Has tag"),
(7, "Has any tag"),
(8, "Created before"),
(9, "Created after"),
(10, "Created year is"),
(11, "Created month is"),
(12, "Created day is"),
(13, "Added before"),
(14, "Added after"),
(15, "Modified before"),
(16, "Modified after"),
(17, "Does not have tag"),
(0, _("title contains")),
(1, _("content contains")),
(2, _("ASN is")),
(3, _("correspondent is")),
(4, _("document type is")),
(5, _("is in inbox")),
(6, _("has tag")),
(7, _("has any tag")),
(8, _("created before")),
(9, _("created after")),
(10, _("created year is")),
(11, _("created month is")),
(12, _("created day is")),
(13, _("added before")),
(14, _("added after")),
(15, _("modified before")),
(16, _("modified after")),
(17, _("does not have tag")),
]
saved_view = models.ForeignKey(
SavedView,
on_delete=models.CASCADE,
related_name="filter_rules"
related_name="filter_rules",
verbose_name=_("saved view")
)
rule_type = models.PositiveIntegerField(choices=RULE_TYPES)
rule_type = models.PositiveIntegerField(
_("rule type"),
choices=RULE_TYPES)
value = models.CharField(max_length=128)
value = models.CharField(
_("value"),
max_length=128,
blank=True,
null=True)
class Meta:
verbose_name = _("filter rule")
verbose_name_plural = _("filter rules")
# TODO: why is this in the models file?

View File

@@ -117,6 +117,7 @@ def run_convert(input_file,
trim=False,
type=None,
depth=None,
auto_orient=False,
extra=None,
logging_group=None):
@@ -134,6 +135,7 @@ def run_convert(input_file,
args += ['-trim'] if trim else []
args += ['-type', str(type)] if type else []
args += ['-depth', str(depth)] if depth else []
args += ['-auto-orient'] if auto_orient else []
args += [input_file, output_file]
logger.debug("Execute: " + " ".join(args), extra={'group': logging_group})
@@ -142,6 +144,53 @@ def run_convert(input_file,
raise ParseError("Convert failed at {}".format(args))
def make_thumbnail_from_pdf(in_path, temp_dir, logging_group=None):
"""
The thumbnail of a PDF is just a 500px wide image of the first page.
"""
out_path = os.path.join(temp_dir, "convert.png")
# Run convert to get a decent thumbnail
try:
run_convert(density=300,
scale="500x5000>",
alpha="remove",
strip=True,
trim=False,
auto_orient=True,
input_file="{}[0]".format(in_path),
output_file=out_path,
logging_group=logging_group)
except ParseError:
# if convert fails, fall back to extracting
# the first PDF page as a PNG using Ghostscript
logger.warning(
"Thumbnail generation with ImageMagick failed, falling back "
"to ghostscript. Check your /etc/ImageMagick-x/policy.xml!",
extra={'group': logging_group}
)
gs_out_path = os.path.join(temp_dir, "gs_out.png")
cmd = [settings.GS_BINARY,
"-q",
"-sDEVICE=pngalpha",
"-o", gs_out_path,
in_path]
if not subprocess.Popen(cmd).wait() == 0:
raise ParseError("Thumbnail (gs) failed at {}".format(cmd))
# then run convert on the output from gs
run_convert(density=300,
scale="500x5000>",
alpha="remove",
strip=True,
trim=False,
auto_orient=True,
input_file=gs_out_path,
output_file=out_path,
logging_group=logging_group)
return out_path
def parse_date(filename, text):
"""
Returns the date of the document.
@@ -219,7 +268,7 @@ class DocumentParser(LoggingMixin):
def extract_metadata(self, document_path, mime_type):
return []
def parse(self, document_path, mime_type):
def parse(self, document_path, mime_type, file_name=None):
raise NotImplementedError()
def get_archive_path(self):

View File

@@ -11,7 +11,6 @@ from django.db.models import Q
from django.dispatch import receiver
from django.utils import timezone
from filelock import FileLock
from rest_framework.reverse import reverse
from .. import index, matching
from ..file_handling import delete_empty_directories, \
@@ -147,32 +146,6 @@ def set_tags(sender,
document.tags.add(*relevant_tags)
def run_pre_consume_script(sender, filename, **kwargs):
if not settings.PRE_CONSUME_SCRIPT:
return
Popen((settings.PRE_CONSUME_SCRIPT, filename)).wait()
def run_post_consume_script(sender, document, **kwargs):
if not settings.POST_CONSUME_SCRIPT:
return
Popen((
settings.POST_CONSUME_SCRIPT,
str(document.pk),
document.get_public_filename(),
os.path.normpath(document.source_path),
os.path.normpath(document.thumbnail_path),
reverse("document-download", kwargs={"pk": document.pk}),
reverse("document-thumb", kwargs={"pk": document.pk}),
str(document.correspondent),
str(",".join(document.tags.all().values_list("name", flat=True)))
)).wait()
@receiver(models.signals.post_delete, sender=Document)
def cleanup_document_deletion(sender, instance, using, **kwargs):
with FileLock(settings.MEDIA_LOCK):
@@ -276,13 +249,6 @@ def update_filename_and_move_files(sender, instance, **kwargs):
Document.objects.filter(pk=instance.pk).update(
filename=new_filename)
logging.getLogger(__name__).debug(
f"Moved file {old_source_path} to {new_source_path}.")
if instance.archive_checksum:
logging.getLogger(__name__).debug(
f"Moved file {old_archive_path} to {new_archive_path}.")
except OSError as e:
instance.filename = old_filename
# this happens when we can't move a file. If that's the case for

View File

@@ -35,9 +35,9 @@ def train_classifier():
try:
# load the classifier, since we might not have to train it again.
classifier.reload()
except (FileNotFoundError, IncompatibleClassifierVersionError):
except (OSError, EOFError, IncompatibleClassifierVersionError):
# This is what we're going to fix here.
pass
classifier = DocumentClassifier()
try:
if classifier.train():
@@ -94,7 +94,10 @@ def bulk_update_documents(document_ids):
documents = Document.objects.filter(id__in=document_ids)
ix = index.open_index()
for doc in documents:
post_save.send(Document, instance=doc, created=False)
with AsyncWriter(ix) as writer:
for doc in documents:
index.update_document(writer, doc)
post_save.send(Document, instance=doc, created=False)

View File

@@ -12,11 +12,13 @@
<meta name="full_name" content="{{full_name}}">
<meta name="cookie_prefix" content="{{cookie_prefix}}">
<link rel="icon" type="image/x-icon" href="favicon.ico">
<link rel="stylesheet" href="{% static 'frontend/styles.css' %}"></head>
<link rel="manifest" href="{% static webmanifest %}">
<link rel="stylesheet" href="{% static styles_css %}">
</head>
<body>
<app-root>Loading...</app-root>
<script src="{% static 'frontend/runtime.js' %}" defer></script>
<script src="{% static 'frontend/polyfills.js' %}" defer></script>
<script src="{% static 'frontend/main.js' %}" defer></script>
<script src="{% static runtime_js %}" defer></script>
<script src="{% static polyfills_js %}" defer></script>
<script src="{% static main_js %}" defer></script>
</body>
</html>

View File

@@ -177,7 +177,7 @@ class DummyParser(DocumentParser):
def get_optimised_thumbnail(self, document_path, mime_type):
return self.fake_thumb
def parse(self, document_path, mime_type):
def parse(self, document_path, mime_type, file_name=None):
self.text = "The Text"
@@ -194,7 +194,7 @@ class FaultyParser(DocumentParser):
def get_optimised_thumbnail(self, document_path, mime_type):
return self.fake_thumb
def parse(self, document_path, mime_type):
def parse(self, document_path, mime_type, file_name=None):
raise ParseError("Does not compute.")
@@ -466,3 +466,53 @@ class TestConsumer(DirectoriesMixin, TestCase):
self.assertTrue(os.path.isfile(dst))
self.assertRaises(ConsumerError, self.consumer.try_consume_file, dst)
self.assertTrue(os.path.isfile(dst))
class PostConsumeTestCase(TestCase):
@mock.patch("documents.consumer.Popen")
@override_settings(POST_CONSUME_SCRIPT=None)
def test_no_post_consume_script(self, m):
doc = Document.objects.create(title="Test", mime_type="application/pdf")
tag1 = Tag.objects.create(name="a")
tag2 = Tag.objects.create(name="b")
doc.tags.add(tag1)
doc.tags.add(tag2)
Consumer().run_post_consume_script(doc)
m.assert_not_called()
@mock.patch("documents.consumer.Popen")
@override_settings(POST_CONSUME_SCRIPT="script")
def test_post_consume_script_simple(self, m):
doc = Document.objects.create(title="Test", mime_type="application/pdf")
Consumer().run_post_consume_script(doc)
m.assert_called_once()
@mock.patch("documents.consumer.Popen")
@override_settings(POST_CONSUME_SCRIPT="script")
def test_post_consume_script_with_correspondent(self, m):
c = Correspondent.objects.create(name="my_bank")
doc = Document.objects.create(title="Test", mime_type="application/pdf", correspondent=c)
tag1 = Tag.objects.create(name="a")
tag2 = Tag.objects.create(name="b")
doc.tags.add(tag1)
doc.tags.add(tag2)
Consumer().run_post_consume_script(doc)
m.assert_called_once()
args, kwargs = m.call_args
command = args[0]
self.assertEqual(command[0], "script")
self.assertEqual(command[1], str(doc.pk))
self.assertEqual(command[5], f"/api/documents/{doc.pk}/download/")
self.assertEqual(command[6], f"/api/documents/{doc.pk}/thumb/")
self.assertEqual(command[7], "my_bank")
self.assertCountEqual(command[8].split(","), ["a", "b"])

View File

@@ -1,56 +0,0 @@
from unittest import mock
from django.test import TestCase, override_settings
from documents.models import Document, Tag, Correspondent
from documents.signals.handlers import run_post_consume_script
class PostConsumeTestCase(TestCase):
@mock.patch("documents.signals.handlers.Popen")
@override_settings(POST_CONSUME_SCRIPT=None)
def test_no_post_consume_script(self, m):
doc = Document.objects.create(title="Test", mime_type="application/pdf")
tag1 = Tag.objects.create(name="a")
tag2 = Tag.objects.create(name="b")
doc.tags.add(tag1)
doc.tags.add(tag2)
run_post_consume_script(None, doc)
m.assert_not_called()
@mock.patch("documents.signals.handlers.Popen")
@override_settings(POST_CONSUME_SCRIPT="script")
def test_post_consume_script_simple(self, m):
doc = Document.objects.create(title="Test", mime_type="application/pdf")
run_post_consume_script(None, doc)
m.assert_called_once()
@mock.patch("documents.signals.handlers.Popen")
@override_settings(POST_CONSUME_SCRIPT="script")
def test_post_consume_script_with_correspondent(self, m):
c = Correspondent.objects.create(name="my_bank")
doc = Document.objects.create(title="Test", mime_type="application/pdf", correspondent=c)
tag1 = Tag.objects.create(name="a")
tag2 = Tag.objects.create(name="b")
doc.tags.add(tag1)
doc.tags.add(tag2)
run_post_consume_script(None, doc)
m.assert_called_once()
args, kwargs = m.call_args
command = args[0]
self.assertEqual(command[0], "script")
self.assertEqual(command[1], str(doc.pk))
self.assertEqual(command[5], f"/api/documents/{doc.pk}/download/")
self.assertEqual(command[6], f"/api/documents/{doc.pk}/thumb/")
self.assertEqual(command[7], "my_bank")
self.assertCountEqual(command[8].split(","), ["a", "b"])

View File

@@ -7,6 +7,7 @@ from django.conf import settings
from django.db.models import Count, Max, Case, When, IntegerField
from django.db.models.functions import Lower
from django.http import HttpResponse, HttpResponseBadRequest, Http404
from django.utils.translation import get_language
from django.views.decorators.cache import cache_control
from django.views.generic import TemplateView
from django_filters.rest_framework import DjangoFilterBackend
@@ -56,11 +57,29 @@ from .serialisers import (
class IndexView(TemplateView):
template_name = "index.html"
def get_language(self):
# This is here for the following reason:
# Django identifies languages in the form "en-us"
# However, angular generates locales as "en-US".
# this translates between these two forms.
lang = get_language()
if "-" in lang:
first = lang[:lang.index("-")]
second = lang[lang.index("-")+1:]
return f"{first}-{second.upper()}"
else:
return lang
def get_context_data(self, **kwargs):
context = super().get_context_data(**kwargs)
context['cookie_prefix'] = settings.COOKIE_PREFIX
context['username'] = self.request.user.username
context['full_name'] = self.request.user.get_full_name()
context['styles_css'] = f"frontend/{self.get_language()}/styles.css"
context['runtime_js'] = f"frontend/{self.get_language()}/runtime.js"
context['polyfills_js'] = f"frontend/{self.get_language()}/polyfills.js" # NOQA: E501
context['main_js'] = f"frontend/{self.get_language()}/main.js"
context['manifest'] = f"frontend/{self.get_language()}/manifest.webmanifest" # NOQA: E501
return context

View File

@@ -0,0 +1,567 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
# This file is distributed under the same license as the PACKAGE package.
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
# Translators:
# Jonas Winkler <dev@jpwinkler.de>, 2021
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2021-01-02 00:26+0000\n"
"PO-Revision-Date: 2020-12-30 19:27+0000\n"
"Last-Translator: Jonas Winkler <dev@jpwinkler.de>, 2021\n"
"Language-Team: German (https://www.transifex.com/paperless/teams/115905/de/)\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Language: de\n"
"Plural-Forms: nplurals=2; plural=(n != 1);\n"
#: documents/apps.py:10
msgid "Documents"
msgstr "Dokumente"
#: documents/models.py:32
msgid "Any word"
msgstr "Irgendein Wort"
#: documents/models.py:33
msgid "All words"
msgstr "Alle Wörter"
#: documents/models.py:34
msgid "Exact match"
msgstr "Exakte Übereinstimmung"
#: documents/models.py:35
msgid "Regular expression"
msgstr "Regulärer Ausdruck"
#: documents/models.py:36
msgid "Fuzzy word"
msgstr "Ungenaues Wort"
#: documents/models.py:37
msgid "Automatic"
msgstr "Automatisch"
#: documents/models.py:41 documents/models.py:354 paperless_mail/models.py:25
#: paperless_mail/models.py:100
msgid "name"
msgstr "Name"
#: documents/models.py:45
msgid "match"
msgstr "Zuweisungsmuster"
#: documents/models.py:49
msgid "matching algorithm"
msgstr "Zuweisungsalgorithmus"
#: documents/models.py:55
msgid "is insensitive"
msgstr "Groß-/Kleinschreibung irrelevant"
#: documents/models.py:80 documents/models.py:140
msgid "correspondent"
msgstr "Korrespondent"
#: documents/models.py:81
msgid "correspondents"
msgstr "Korrespondenten"
#: documents/models.py:103
msgid "color"
msgstr "Farbe"
#: documents/models.py:107
msgid "is inbox tag"
msgstr "Posteingangs-Tag"
#: documents/models.py:109
msgid ""
"Marks this tag as an inbox tag: All newly consumed documents will be tagged "
"with inbox tags."
msgstr ""
"Markiert das Tag als Posteingangs-Tag. Neue Dokumente werden immer mit "
"diesem Tag versehen."
#: documents/models.py:114
msgid "tag"
msgstr "Tag"
#: documents/models.py:115 documents/models.py:171
msgid "tags"
msgstr "Tags"
#: documents/models.py:121 documents/models.py:153
msgid "document type"
msgstr "Dokumenttyp"
#: documents/models.py:122
msgid "document types"
msgstr "Dokumenttypen"
#: documents/models.py:130
msgid "Unencrypted"
msgstr "Nicht verschlüsselt"
#: documents/models.py:131
msgid "Encrypted with GNU Privacy Guard"
msgstr "Verschlüsselt mit GNU Privacy Guard"
#: documents/models.py:144
msgid "title"
msgstr "Titel"
#: documents/models.py:157
msgid "content"
msgstr "Inhalt"
#: documents/models.py:159
msgid ""
"The raw, text-only data of the document. This field is primarily used for "
"searching."
msgstr ""
"Der Inhalt des Dokuments in Textform. Dieses Feld wird primär für die Suche "
"verwendet."
#: documents/models.py:164
msgid "mime type"
msgstr "MIME-Typ"
#: documents/models.py:175
msgid "checksum"
msgstr "Prüfsumme"
#: documents/models.py:179
msgid "The checksum of the original document."
msgstr "Die Prüfsumme des originalen Dokuments."
#: documents/models.py:183
msgid "archive checksum"
msgstr "Archiv-Prüfsumme"
#: documents/models.py:188
msgid "The checksum of the archived document."
msgstr "Die Prüfsumme des archivierten Dokuments."
#: documents/models.py:192 documents/models.py:332
msgid "created"
msgstr "Erstellt"
#: documents/models.py:196
msgid "modified"
msgstr "Geändert"
#: documents/models.py:200
msgid "storage type"
msgstr "Speichertyp"
#: documents/models.py:208
msgid "added"
msgstr "Hinzugefügt"
#: documents/models.py:212
msgid "filename"
msgstr "Dateiname"
#: documents/models.py:217
msgid "Current filename in storage"
msgstr "Aktueller Dateiname im Datenspeicher"
#: documents/models.py:221
msgid "archive serial number"
msgstr "Archiv-Seriennummer"
#: documents/models.py:226
msgid "The position of this document in your physical document archive."
msgstr "Die Position dieses Dokuments in Ihrem physischen Dokumentenarchiv."
#: documents/models.py:232
msgid "document"
msgstr "Dokument"
#: documents/models.py:233
msgid "documents"
msgstr "Dokumente"
#: documents/models.py:315
msgid "debug"
msgstr "Debug"
#: documents/models.py:316
msgid "information"
msgstr "Information"
#: documents/models.py:317
msgid "warning"
msgstr "Warnung"
#: documents/models.py:318
msgid "error"
msgstr "Fehler"
#: documents/models.py:319
msgid "critical"
msgstr "Kritisch"
#: documents/models.py:323
msgid "group"
msgstr "Gruppe"
#: documents/models.py:326
msgid "message"
msgstr "Nachricht"
#: documents/models.py:329
msgid "level"
msgstr "Level"
#: documents/models.py:336
msgid "log"
msgstr "Protokoll"
#: documents/models.py:337
msgid "logs"
msgstr "Protokoll"
#: documents/models.py:348 documents/models.py:398
msgid "saved view"
msgstr "Gespeicherte Ansicht"
#: documents/models.py:349
msgid "saved views"
msgstr "Gespeicherte Ansichten"
#: documents/models.py:352
msgid "user"
msgstr "Benutzer"
#: documents/models.py:358
msgid "show on dashboard"
msgstr "Auf Startseite zeigen"
#: documents/models.py:361
msgid "show in sidebar"
msgstr "In Seitenleiste zeigen"
#: documents/models.py:365
msgid "sort field"
msgstr "Sortierfeld"
#: documents/models.py:368
msgid "sort reverse"
msgstr "Umgekehrte Sortierung"
#: documents/models.py:374
msgid "title contains"
msgstr "Titel enthält"
#: documents/models.py:375
msgid "content contains"
msgstr "Inhalt enthält"
#: documents/models.py:376
msgid "ASN is"
msgstr "ASN ist"
#: documents/models.py:377
msgid "correspondent is"
msgstr "Korrespondent ist"
#: documents/models.py:378
msgid "document type is"
msgstr "Dokumenttyp ist"
#: documents/models.py:379
msgid "is in inbox"
msgstr "Ist im Posteingang"
#: documents/models.py:380
msgid "has tag"
msgstr "Hat Tag"
#: documents/models.py:381
msgid "has any tag"
msgstr "Hat irgendein Tag"
#: documents/models.py:382
msgid "created before"
msgstr "Erstellt vor"
#: documents/models.py:383
msgid "created after"
msgstr "Erstellt nach"
#: documents/models.py:384
msgid "created year is"
msgstr "Erstellt im Jahr"
#: documents/models.py:385
msgid "created month is"
msgstr "Erstellt im Monat"
#: documents/models.py:386
msgid "created day is"
msgstr "Erstellt am Tag"
#: documents/models.py:387
msgid "added before"
msgstr "Hinzugefügt vor"
#: documents/models.py:388
msgid "added after"
msgstr "Hinzugefügt nach"
#: documents/models.py:389
msgid "modified before"
msgstr "Geändert vor"
#: documents/models.py:390
msgid "modified after"
msgstr "Geändert nach"
#: documents/models.py:391
msgid "does not have tag"
msgstr "Hat nicht folgendes Tag"
#: documents/models.py:402
msgid "rule type"
msgstr "Regeltyp"
#: documents/models.py:406
msgid "value"
msgstr "Wert"
#: documents/models.py:412
msgid "filter rule"
msgstr "Filterregel"
#: documents/models.py:413
msgid "filter rules"
msgstr "Filterregeln"
#: paperless/settings.py:254
msgid "English"
msgstr "Englisch"
#: paperless/settings.py:255
msgid "German"
msgstr "Deutsch"
#: paperless/urls.py:108
msgid "Paperless-ng administration"
msgstr "Paperless-ng Administration"
#: paperless_mail/admin.py:24
msgid "Filter"
msgstr "Filter"
#: paperless_mail/admin.py:26
msgid ""
"Paperless will only process mails that match ALL of the filters given below."
msgstr ""
"Paperless wird nur E-Mails verarbeiten, für die alle der hier angegebenen "
"Filter zutreffen."
#: paperless_mail/admin.py:34
msgid "Actions"
msgstr "Aktionen"
#: paperless_mail/admin.py:36
msgid ""
"The action applied to the mail. This action is only performed when documents"
" were consumed from the mail. Mails without attachments will remain entirely"
" untouched."
msgstr ""
"Die Aktion, die auf E-Mails angewendet werden soll. Diese Aktion wird nur "
"auf E-Mails angewendet, aus denen Anhänge verarbeitet wurden. E-Mails ohne "
"Anhänge werden vollständig ignoriert."
#: paperless_mail/admin.py:43
msgid "Metadata"
msgstr "Metadaten"
#: paperless_mail/admin.py:45
msgid ""
"Assign metadata to documents consumed from this rule automatically. If you "
"do not assign tags, types or correspondents here, paperless will still "
"process all matching rules that you have defined."
msgstr ""
"Folgende Metadaten werden Dokumenten dieser Regel automatisch zugewiesen. "
"Wenn Sie hier nichts auswählen wird Paperless weiterhin alle "
"Zuweisungsalgorithmen ausführen und Metadaten auf Basis des Dokumentinhalts "
"zuweisen."
#: paperless_mail/apps.py:9
msgid "Paperless mail"
msgstr "Paperless E-Mail"
#: paperless_mail/models.py:11
msgid "mail account"
msgstr "E-Mail-Konto"
#: paperless_mail/models.py:12
msgid "mail accounts"
msgstr "E-Mail-Konten"
#: paperless_mail/models.py:19
msgid "No encryption"
msgstr "Keine Verschlüsselung"
#: paperless_mail/models.py:20
msgid "Use SSL"
msgstr "SSL benutzen"
#: paperless_mail/models.py:21
msgid "Use STARTTLS"
msgstr "STARTTLS benutzen"
#: paperless_mail/models.py:29
msgid "IMAP server"
msgstr "IMAP-Server"
#: paperless_mail/models.py:33
msgid "IMAP port"
msgstr "IMAP-Port"
#: paperless_mail/models.py:36
msgid ""
"This is usually 143 for unencrypted and STARTTLS connections, and 993 for "
"SSL connections."
msgstr ""
"Dies ist in der Regel 143 für unverschlüsselte und STARTTLS-Verbindungen und"
" 993 für SSL-Verbindungen."
#: paperless_mail/models.py:40
msgid "IMAP security"
msgstr "IMAP-Sicherheit"
#: paperless_mail/models.py:46
msgid "username"
msgstr "Benutzername"
#: paperless_mail/models.py:50
msgid "password"
msgstr "Password"
#: paperless_mail/models.py:60
msgid "mail rule"
msgstr "E-Mail-Regel"
#: paperless_mail/models.py:61
msgid "mail rules"
msgstr "E-Mail-Regeln"
#: paperless_mail/models.py:69
msgid "Mark as read, don't process read mails"
msgstr "Als gelesen markieren, gelesene E-Mails nicht verarbeiten"
#: paperless_mail/models.py:70
msgid "Flag the mail, don't process flagged mails"
msgstr "Als wichtig markieren, markierte E-Mails nicht verarbeiten"
#: paperless_mail/models.py:71
msgid "Move to specified folder"
msgstr "In angegebenen Ordner verschieben"
#: paperless_mail/models.py:72
msgid "Delete"
msgstr "Löschen"
#: paperless_mail/models.py:79
msgid "Use subject as title"
msgstr "Betreff als Titel verwenden"
#: paperless_mail/models.py:80
msgid "Use attachment filename as title"
msgstr "Dateiname des Anhangs als Titel verwenden"
#: paperless_mail/models.py:90
msgid "Do not assign a correspondent"
msgstr "Keinen Korrespondenten zuweisen"
#: paperless_mail/models.py:92
msgid "Use mail address"
msgstr "E-Mail-Adresse benutzen"
#: paperless_mail/models.py:94
msgid "Use name (or mail address if not available)"
msgstr "Absendername benutzen (oder E-Mail-Adressen, wenn nicht verfügbar)"
#: paperless_mail/models.py:96
msgid "Use correspondent selected below"
msgstr "Nachfolgend ausgewählten Korrespondent verwenden"
#: paperless_mail/models.py:104
msgid "order"
msgstr "Reihenfolge"
#: paperless_mail/models.py:111
msgid "account"
msgstr "Konto"
#: paperless_mail/models.py:115
msgid "folder"
msgstr "Ordner"
#: paperless_mail/models.py:119
msgid "filter from"
msgstr "Absender filtern"
#: paperless_mail/models.py:122
msgid "filter subject"
msgstr "Betreff filtern"
#: paperless_mail/models.py:125
msgid "filter body"
msgstr "Nachrichteninhalt filtern"
#: paperless_mail/models.py:129
msgid "maximum age"
msgstr "Maximales Alter"
#: paperless_mail/models.py:131
msgid "Specified in days."
msgstr "Angegeben in Tagen."
#: paperless_mail/models.py:134
msgid "action"
msgstr "Aktion"
#: paperless_mail/models.py:140
msgid "action parameter"
msgstr "Parameter für Aktion"
#: paperless_mail/models.py:142
msgid ""
"Additional parameter for the action selected above, i.e., the target folder "
"of the move to folder action."
msgstr ""
"Zusätzlicher Parameter für die oben ausgewählte Aktion, zum Beispiel der "
"Zielordner für die Aktion \"In angegebenen Ordner verschieben\""
#: paperless_mail/models.py:148
msgid "assign title from"
msgstr "Titel zuweisen von"
#: paperless_mail/models.py:158
msgid "assign this tag"
msgstr "Dieses Tag zuweisen"
#: paperless_mail/models.py:166
msgid "assign this document type"
msgstr "Diesen Dokumenttyp zuweisen"
#: paperless_mail/models.py:170
msgid "assign correspondent from"
msgstr "Korrespondent zuweisen von"
#: paperless_mail/models.py:180
msgid "assign this correspondent"
msgstr "Diesen Korrespondent zuweisen"

View File

@@ -0,0 +1,546 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
# This file is distributed under the same license as the PACKAGE package.
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2021-01-02 00:26+0000\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"Language: \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
#: documents/apps.py:10
msgid "Documents"
msgstr ""
#: documents/models.py:32
msgid "Any word"
msgstr ""
#: documents/models.py:33
msgid "All words"
msgstr ""
#: documents/models.py:34
msgid "Exact match"
msgstr ""
#: documents/models.py:35
msgid "Regular expression"
msgstr ""
#: documents/models.py:36
msgid "Fuzzy word"
msgstr ""
#: documents/models.py:37
msgid "Automatic"
msgstr ""
#: documents/models.py:41 documents/models.py:354 paperless_mail/models.py:25
#: paperless_mail/models.py:100
msgid "name"
msgstr ""
#: documents/models.py:45
msgid "match"
msgstr ""
#: documents/models.py:49
msgid "matching algorithm"
msgstr ""
#: documents/models.py:55
msgid "is insensitive"
msgstr ""
#: documents/models.py:80 documents/models.py:140
msgid "correspondent"
msgstr ""
#: documents/models.py:81
msgid "correspondents"
msgstr ""
#: documents/models.py:103
msgid "color"
msgstr ""
#: documents/models.py:107
msgid "is inbox tag"
msgstr ""
#: documents/models.py:109
msgid ""
"Marks this tag as an inbox tag: All newly consumed documents will be tagged "
"with inbox tags."
msgstr ""
#: documents/models.py:114
msgid "tag"
msgstr ""
#: documents/models.py:115 documents/models.py:171
msgid "tags"
msgstr ""
#: documents/models.py:121 documents/models.py:153
msgid "document type"
msgstr ""
#: documents/models.py:122
msgid "document types"
msgstr ""
#: documents/models.py:130
msgid "Unencrypted"
msgstr ""
#: documents/models.py:131
msgid "Encrypted with GNU Privacy Guard"
msgstr ""
#: documents/models.py:144
msgid "title"
msgstr ""
#: documents/models.py:157
msgid "content"
msgstr ""
#: documents/models.py:159
msgid ""
"The raw, text-only data of the document. This field is primarily used for "
"searching."
msgstr ""
#: documents/models.py:164
msgid "mime type"
msgstr ""
#: documents/models.py:175
msgid "checksum"
msgstr ""
#: documents/models.py:179
msgid "The checksum of the original document."
msgstr ""
#: documents/models.py:183
msgid "archive checksum"
msgstr ""
#: documents/models.py:188
msgid "The checksum of the archived document."
msgstr ""
#: documents/models.py:192 documents/models.py:332
msgid "created"
msgstr ""
#: documents/models.py:196
msgid "modified"
msgstr ""
#: documents/models.py:200
msgid "storage type"
msgstr ""
#: documents/models.py:208
msgid "added"
msgstr ""
#: documents/models.py:212
msgid "filename"
msgstr ""
#: documents/models.py:217
msgid "Current filename in storage"
msgstr ""
#: documents/models.py:221
msgid "archive serial number"
msgstr ""
#: documents/models.py:226
msgid "The position of this document in your physical document archive."
msgstr ""
#: documents/models.py:232
msgid "document"
msgstr ""
#: documents/models.py:233
msgid "documents"
msgstr ""
#: documents/models.py:315
msgid "debug"
msgstr ""
#: documents/models.py:316
msgid "information"
msgstr ""
#: documents/models.py:317
msgid "warning"
msgstr ""
#: documents/models.py:318
msgid "error"
msgstr ""
#: documents/models.py:319
msgid "critical"
msgstr ""
#: documents/models.py:323
msgid "group"
msgstr ""
#: documents/models.py:326
msgid "message"
msgstr ""
#: documents/models.py:329
msgid "level"
msgstr ""
#: documents/models.py:336
msgid "log"
msgstr ""
#: documents/models.py:337
msgid "logs"
msgstr ""
#: documents/models.py:348 documents/models.py:398
msgid "saved view"
msgstr ""
#: documents/models.py:349
msgid "saved views"
msgstr ""
#: documents/models.py:352
msgid "user"
msgstr ""
#: documents/models.py:358
msgid "show on dashboard"
msgstr ""
#: documents/models.py:361
msgid "show in sidebar"
msgstr ""
#: documents/models.py:365
msgid "sort field"
msgstr ""
#: documents/models.py:368
msgid "sort reverse"
msgstr ""
#: documents/models.py:374
msgid "title contains"
msgstr ""
#: documents/models.py:375
msgid "content contains"
msgstr ""
#: documents/models.py:376
msgid "ASN is"
msgstr ""
#: documents/models.py:377
msgid "correspondent is"
msgstr ""
#: documents/models.py:378
msgid "document type is"
msgstr ""
#: documents/models.py:379
msgid "is in inbox"
msgstr ""
#: documents/models.py:380
msgid "has tag"
msgstr ""
#: documents/models.py:381
msgid "has any tag"
msgstr ""
#: documents/models.py:382
msgid "created before"
msgstr ""
#: documents/models.py:383
msgid "created after"
msgstr ""
#: documents/models.py:384
msgid "created year is"
msgstr ""
#: documents/models.py:385
msgid "created month is"
msgstr ""
#: documents/models.py:386
msgid "created day is"
msgstr ""
#: documents/models.py:387
msgid "added before"
msgstr ""
#: documents/models.py:388
msgid "added after"
msgstr ""
#: documents/models.py:389
msgid "modified before"
msgstr ""
#: documents/models.py:390
msgid "modified after"
msgstr ""
#: documents/models.py:391
msgid "does not have tag"
msgstr ""
#: documents/models.py:402
msgid "rule type"
msgstr ""
#: documents/models.py:406
msgid "value"
msgstr ""
#: documents/models.py:412
msgid "filter rule"
msgstr ""
#: documents/models.py:413
msgid "filter rules"
msgstr ""
#: paperless/settings.py:254
msgid "English"
msgstr ""
#: paperless/settings.py:255
msgid "German"
msgstr ""
#: paperless/urls.py:108
msgid "Paperless-ng administration"
msgstr ""
#: paperless_mail/admin.py:24
msgid "Filter"
msgstr ""
#: paperless_mail/admin.py:26
msgid ""
"Paperless will only process mails that match ALL of the filters given below."
msgstr ""
#: paperless_mail/admin.py:34
msgid "Actions"
msgstr ""
#: paperless_mail/admin.py:36
msgid ""
"The action applied to the mail. This action is only performed when documents "
"were consumed from the mail. Mails without attachments will remain entirely "
"untouched."
msgstr ""
#: paperless_mail/admin.py:43
msgid "Metadata"
msgstr ""
#: paperless_mail/admin.py:45
msgid ""
"Assign metadata to documents consumed from this rule automatically. If you "
"do not assign tags, types or correspondents here, paperless will still "
"process all matching rules that you have defined."
msgstr ""
#: paperless_mail/apps.py:9
msgid "Paperless mail"
msgstr ""
#: paperless_mail/models.py:11
msgid "mail account"
msgstr ""
#: paperless_mail/models.py:12
msgid "mail accounts"
msgstr ""
#: paperless_mail/models.py:19
msgid "No encryption"
msgstr ""
#: paperless_mail/models.py:20
msgid "Use SSL"
msgstr ""
#: paperless_mail/models.py:21
msgid "Use STARTTLS"
msgstr ""
#: paperless_mail/models.py:29
msgid "IMAP server"
msgstr ""
#: paperless_mail/models.py:33
msgid "IMAP port"
msgstr ""
#: paperless_mail/models.py:36
msgid ""
"This is usually 143 for unencrypted and STARTTLS connections, and 993 for "
"SSL connections."
msgstr ""
#: paperless_mail/models.py:40
msgid "IMAP security"
msgstr ""
#: paperless_mail/models.py:46
msgid "username"
msgstr ""
#: paperless_mail/models.py:50
msgid "password"
msgstr ""
#: paperless_mail/models.py:60
msgid "mail rule"
msgstr ""
#: paperless_mail/models.py:61
msgid "mail rules"
msgstr ""
#: paperless_mail/models.py:69
msgid "Mark as read, don't process read mails"
msgstr ""
#: paperless_mail/models.py:70
msgid "Flag the mail, don't process flagged mails"
msgstr ""
#: paperless_mail/models.py:71
msgid "Move to specified folder"
msgstr ""
#: paperless_mail/models.py:72
msgid "Delete"
msgstr ""
#: paperless_mail/models.py:79
msgid "Use subject as title"
msgstr ""
#: paperless_mail/models.py:80
msgid "Use attachment filename as title"
msgstr ""
#: paperless_mail/models.py:90
msgid "Do not assign a correspondent"
msgstr ""
#: paperless_mail/models.py:92
msgid "Use mail address"
msgstr ""
#: paperless_mail/models.py:94
msgid "Use name (or mail address if not available)"
msgstr ""
#: paperless_mail/models.py:96
msgid "Use correspondent selected below"
msgstr ""
#: paperless_mail/models.py:104
msgid "order"
msgstr ""
#: paperless_mail/models.py:111
msgid "account"
msgstr ""
#: paperless_mail/models.py:115
msgid "folder"
msgstr ""
#: paperless_mail/models.py:119
msgid "filter from"
msgstr ""
#: paperless_mail/models.py:122
msgid "filter subject"
msgstr ""
#: paperless_mail/models.py:125
msgid "filter body"
msgstr ""
#: paperless_mail/models.py:129
msgid "maximum age"
msgstr ""
#: paperless_mail/models.py:131
msgid "Specified in days."
msgstr ""
#: paperless_mail/models.py:134
msgid "action"
msgstr ""
#: paperless_mail/models.py:140
msgid "action parameter"
msgstr ""
#: paperless_mail/models.py:142
msgid ""
"Additional parameter for the action selected above, i.e., the target folder "
"of the move to folder action."
msgstr ""
#: paperless_mail/models.py:148
msgid "assign title from"
msgstr ""
#: paperless_mail/models.py:158
msgid "assign this tag"
msgstr ""
#: paperless_mail/models.py:166
msgid "assign this document type"
msgstr ""
#: paperless_mail/models.py:170
msgid "assign correspondent from"
msgstr ""
#: paperless_mail/models.py:180
msgid "assign this correspondent"
msgstr ""

View File

@@ -6,6 +6,8 @@ import re
from dotenv import load_dotenv
from django.utils.translation import gettext_lazy as _
# Tap paperless.conf if it's available
if os.path.exists("../paperless.conf"):
load_dotenv("../paperless.conf")
@@ -87,6 +89,7 @@ INSTALLED_APPS = [
"documents.apps.DocumentsConfig",
"paperless_tesseract.apps.PaperlessTesseractConfig",
"paperless_text.apps.PaperlessTextConfig",
"paperless_tika.apps.PaperlessTikaConfig",
"paperless_mail.apps.PaperlessMailConfig",
"django.contrib.admin",
@@ -124,6 +127,7 @@ MIDDLEWARE = [
'whitenoise.middleware.WhiteNoiseMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'corsheaders.middleware.CorsMiddleware',
'django.middleware.locale.LocaleMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
@@ -253,6 +257,15 @@ if os.getenv("PAPERLESS_DBHOST"):
LANGUAGE_CODE = 'en-us'
LANGUAGES = [
("en-us", _("English")),
("de", _("German"))
]
LOCALE_PATHS = [
os.path.join(BASE_DIR, "locale")
]
TIME_ZONE = os.getenv("PAPERLESS_TIME_ZONE", "UTC")
USE_I18N = True
@@ -431,3 +444,10 @@ for t in json.loads(os.getenv("PAPERLESS_FILENAME_PARSE_TRANSFORMS", "[]")):
PAPERLESS_FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")
THUMBNAIL_FONT_NAME = os.getenv("PAPERLESS_THUMBNAIL_FONT_NAME", "/usr/share/fonts/liberation/LiberationSerif-Regular.ttf")
# Tika settings
PAPERLESS_TIKA_ENABLED = __get_boolean("PAPERLESS_TIKA_ENABLED", "NO")
PAPERLESS_TIKA_ENDPOINT = os.getenv("PAPERLESS_TIKA_ENDPOINT", "http://localhost:9998")
PAPERLESS_TIKA_GOTENBERG_ENDPOINT = os.getenv(
"PAPERLESS_TIKA_GOTENBERG_ENDPOINT", "http://localhost:3000"
)

View File

@@ -7,6 +7,8 @@ from django.views.generic import RedirectView
from rest_framework.authtoken import views
from rest_framework.routers import DefaultRouter
from django.utils.translation import gettext_lazy as _
from documents.views import (
CorrespondentViewSet,
DocumentViewSet,
@@ -88,7 +90,8 @@ urlpatterns = [
# Frontend assets TODO: this is pretty bad, but it works.
path('assets/<path:path>',
RedirectView.as_view(url='/static/frontend/assets/%(path)s')),
RedirectView.as_view(url='/static/frontend/en-US/assets/%(path)s')),
# TODO: with localization, this is even worse! :/
# login, logout
path('accounts/', include('django.contrib.auth.urls')),
@@ -102,4 +105,4 @@ admin.site.site_header = 'Paperless-ng'
# Text at the end of each page's <title>.
admin.site.site_title = 'Paperless-ng'
# Text at the top of the admin index page.
admin.site.index_title = 'Paperless-ng administration'
admin.site.index_title = _('Paperless-ng administration')

View File

@@ -1 +1 @@
__version__ = (0, 9, 10)
__version__ = (0, 9, 11)

View File

@@ -1,6 +1,8 @@
from django.contrib import admin
from paperless_mail.models import MailAccount, MailRule
from django.utils.translation import gettext_lazy as _
class MailAccountAdmin(admin.ModelAdmin):
@@ -19,31 +21,31 @@ class MailRuleAdmin(admin.ModelAdmin):
(None, {
'fields': ('name', 'order', 'account', 'folder')
}),
("Filter", {
(_("Filter"), {
'description':
"Paperless will only process mails that match ALL of the "
"filters given below.",
_("Paperless will only process mails that match ALL of the "
"filters given below."),
'fields':
('filter_from',
'filter_subject',
'filter_body',
'maximum_age')
}),
("Actions", {
(_("Actions"), {
'description':
"The action applied to the mail. This action is only "
"performed when documents were consumed from the mail. Mails "
"without attachments will remain entirely untouched.",
_("The action applied to the mail. This action is only "
"performed when documents were consumed from the mail. "
"Mails without attachments will remain entirely untouched."),
'fields': (
'action',
'action_parameter')
}),
("Metadata", {
(_("Metadata"), {
'description':
"Assign metadata to documents consumed from this rule "
"automatically. If you do not assign tags, types or "
"correspondents here, paperless will still process all "
"matching rules that you have defined.",
_("Assign metadata to documents consumed from this rule "
"automatically. If you do not assign tags, types or "
"correspondents here, paperless will still process all "
"matching rules that you have defined."),
"fields": (
'assign_title_from',
'assign_tag',

View File

@@ -1,7 +1,9 @@
from django.apps import AppConfig
from django.utils.translation import gettext_lazy as _
class PaperlessMailConfig(AppConfig):
name = 'paperless_mail'
verbose_name = 'Paperless Mail'
verbose_name = _('Paperless mail')

View File

@@ -0,0 +1,128 @@
# Generated by Django 3.1.4 on 2021-01-01 23:40
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('documents', '1011_auto_20210101_2340'),
('paperless_mail', '0005_help_texts'),
]
operations = [
migrations.AlterModelOptions(
name='mailaccount',
options={'verbose_name': 'mail account', 'verbose_name_plural': 'mail accounts'},
),
migrations.AlterModelOptions(
name='mailrule',
options={'verbose_name': 'mail rule', 'verbose_name_plural': 'mail rules'},
),
migrations.AlterField(
model_name='mailaccount',
name='imap_port',
field=models.IntegerField(blank=True, help_text='This is usually 143 for unencrypted and STARTTLS connections, and 993 for SSL connections.', null=True, verbose_name='IMAP port'),
),
migrations.AlterField(
model_name='mailaccount',
name='imap_security',
field=models.PositiveIntegerField(choices=[(1, 'No encryption'), (2, 'Use SSL'), (3, 'Use STARTTLS')], default=2, verbose_name='IMAP security'),
),
migrations.AlterField(
model_name='mailaccount',
name='imap_server',
field=models.CharField(max_length=256, verbose_name='IMAP server'),
),
migrations.AlterField(
model_name='mailaccount',
name='name',
field=models.CharField(max_length=256, unique=True, verbose_name='name'),
),
migrations.AlterField(
model_name='mailaccount',
name='password',
field=models.CharField(max_length=256, verbose_name='password'),
),
migrations.AlterField(
model_name='mailaccount',
name='username',
field=models.CharField(max_length=256, verbose_name='username'),
),
migrations.AlterField(
model_name='mailrule',
name='account',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='rules', to='paperless_mail.mailaccount', verbose_name='account'),
),
migrations.AlterField(
model_name='mailrule',
name='action',
field=models.PositiveIntegerField(choices=[(3, "Mark as read, don't process read mails"), (4, "Flag the mail, don't process flagged mails"), (2, 'Move to specified folder'), (1, 'Delete')], default=3, verbose_name='action'),
),
migrations.AlterField(
model_name='mailrule',
name='action_parameter',
field=models.CharField(blank=True, help_text='Additional parameter for the action selected above, i.e., the target folder of the move to folder action.', max_length=256, null=True, verbose_name='action parameter'),
),
migrations.AlterField(
model_name='mailrule',
name='assign_correspondent',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='documents.correspondent', verbose_name='assign this correspondent'),
),
migrations.AlterField(
model_name='mailrule',
name='assign_correspondent_from',
field=models.PositiveIntegerField(choices=[(1, 'Do not assign a correspondent'), (2, 'Use mail address'), (3, 'Use name (or mail address if not available)'), (4, 'Use correspondent selected below')], default=1, verbose_name='assign correspondent from'),
),
migrations.AlterField(
model_name='mailrule',
name='assign_document_type',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='documents.documenttype', verbose_name='assign this document type'),
),
migrations.AlterField(
model_name='mailrule',
name='assign_tag',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='documents.tag', verbose_name='assign this tag'),
),
migrations.AlterField(
model_name='mailrule',
name='assign_title_from',
field=models.PositiveIntegerField(choices=[(1, 'Use subject as title'), (2, 'Use attachment filename as title')], default=1, verbose_name='assign title from'),
),
migrations.AlterField(
model_name='mailrule',
name='filter_body',
field=models.CharField(blank=True, max_length=256, null=True, verbose_name='filter body'),
),
migrations.AlterField(
model_name='mailrule',
name='filter_from',
field=models.CharField(blank=True, max_length=256, null=True, verbose_name='filter from'),
),
migrations.AlterField(
model_name='mailrule',
name='filter_subject',
field=models.CharField(blank=True, max_length=256, null=True, verbose_name='filter subject'),
),
migrations.AlterField(
model_name='mailrule',
name='folder',
field=models.CharField(default='INBOX', max_length=256, verbose_name='folder'),
),
migrations.AlterField(
model_name='mailrule',
name='maximum_age',
field=models.PositiveIntegerField(default=30, help_text='Specified in days.', verbose_name='maximum age'),
),
migrations.AlterField(
model_name='mailrule',
name='name',
field=models.CharField(max_length=256, unique=True, verbose_name='name'),
),
migrations.AlterField(
model_name='mailrule',
name='order',
field=models.IntegerField(default=0, verbose_name='order'),
),
]

View File

@@ -2,37 +2,53 @@ from django.db import models
import documents.models as document_models
from django.utils.translation import gettext_lazy as _
class MailAccount(models.Model):
class Meta:
verbose_name = _("mail account")
verbose_name_plural = _("mail accounts")
IMAP_SECURITY_NONE = 1
IMAP_SECURITY_SSL = 2
IMAP_SECURITY_STARTTLS = 3
IMAP_SECURITY_OPTIONS = (
(IMAP_SECURITY_NONE, "No encryption"),
(IMAP_SECURITY_SSL, "Use SSL"),
(IMAP_SECURITY_STARTTLS, "Use STARTTLS"),
(IMAP_SECURITY_NONE, _("No encryption")),
(IMAP_SECURITY_SSL, _("Use SSL")),
(IMAP_SECURITY_STARTTLS, _("Use STARTTLS")),
)
name = models.CharField(max_length=256, unique=True)
name = models.CharField(
_("name"),
max_length=256, unique=True)
imap_server = models.CharField(max_length=256)
imap_server = models.CharField(
_("IMAP server"),
max_length=256)
imap_port = models.IntegerField(
_("IMAP port"),
blank=True,
null=True,
help_text="This is usually 143 for unencrypted and STARTTLS "
"connections, and 993 for SSL connections.")
help_text=_("This is usually 143 for unencrypted and STARTTLS "
"connections, and 993 for SSL connections."))
imap_security = models.PositiveIntegerField(
_("IMAP security"),
choices=IMAP_SECURITY_OPTIONS,
default=IMAP_SECURITY_SSL
)
username = models.CharField(max_length=256)
username = models.CharField(
_("username"),
max_length=256)
password = models.CharField(max_length=256)
password = models.CharField(
_("password"),
max_length=256)
def __str__(self):
return self.name
@@ -40,24 +56,28 @@ class MailAccount(models.Model):
class MailRule(models.Model):
class Meta:
verbose_name = _("mail rule")
verbose_name_plural = _("mail rules")
ACTION_DELETE = 1
ACTION_MOVE = 2
ACTION_MARK_READ = 3
ACTION_FLAG = 4
ACTIONS = (
(ACTION_MARK_READ, "Mark as read, don't process read mails"),
(ACTION_FLAG, "Flag the mail, don't process flagged mails"),
(ACTION_MOVE, "Move to specified folder"),
(ACTION_DELETE, "Delete"),
(ACTION_MARK_READ, _("Mark as read, don't process read mails")),
(ACTION_FLAG, _("Flag the mail, don't process flagged mails")),
(ACTION_MOVE, _("Move to specified folder")),
(ACTION_DELETE, _("Delete")),
)
TITLE_FROM_SUBJECT = 1
TITLE_FROM_FILENAME = 2
TITLE_SELECTOR = (
(TITLE_FROM_SUBJECT, "Use subject as title"),
(TITLE_FROM_FILENAME, "Use attachment filename as title")
(TITLE_FROM_SUBJECT, _("Use subject as title")),
(TITLE_FROM_FILENAME, _("Use attachment filename as title"))
)
CORRESPONDENT_FROM_NOTHING = 1
@@ -67,47 +87,65 @@ class MailRule(models.Model):
CORRESPONDENT_SELECTOR = (
(CORRESPONDENT_FROM_NOTHING,
"Do not assign a correspondent"),
_("Do not assign a correspondent")),
(CORRESPONDENT_FROM_EMAIL,
"Use mail address"),
_("Use mail address")),
(CORRESPONDENT_FROM_NAME,
"Use name (or mail address if not available)"),
_("Use name (or mail address if not available)")),
(CORRESPONDENT_FROM_CUSTOM,
"Use correspondent selected below")
_("Use correspondent selected below"))
)
name = models.CharField(max_length=256, unique=True)
name = models.CharField(
_("name"),
max_length=256, unique=True)
order = models.IntegerField(default=0)
order = models.IntegerField(
_("order"),
default=0)
account = models.ForeignKey(
MailAccount,
related_name="rules",
on_delete=models.CASCADE
on_delete=models.CASCADE,
verbose_name=_("account")
)
folder = models.CharField(default='INBOX', max_length=256)
folder = models.CharField(
_("folder"),
default='INBOX', max_length=256)
filter_from = models.CharField(max_length=256, null=True, blank=True)
filter_subject = models.CharField(max_length=256, null=True, blank=True)
filter_body = models.CharField(max_length=256, null=True, blank=True)
filter_from = models.CharField(
_("filter from"),
max_length=256, null=True, blank=True)
filter_subject = models.CharField(
_("filter subject"),
max_length=256, null=True, blank=True)
filter_body = models.CharField(
_("filter body"),
max_length=256, null=True, blank=True)
maximum_age = models.PositiveIntegerField(
_("maximum age"),
default=30,
help_text="Specified in days.")
help_text=_("Specified in days."))
action = models.PositiveIntegerField(
_("action"),
choices=ACTIONS,
default=ACTION_MARK_READ,
)
action_parameter = models.CharField(
_("action parameter"),
max_length=256, blank=True, null=True,
help_text="Additional parameter for the action selected above, i.e., "
"the target folder of the move to folder action."
help_text=_("Additional parameter for the action selected above, "
"i.e., "
"the target folder of the move to folder action.")
)
assign_title_from = models.PositiveIntegerField(
_("assign title from"),
choices=TITLE_SELECTOR,
default=TITLE_FROM_SUBJECT
)
@@ -116,17 +154,20 @@ class MailRule(models.Model):
document_models.Tag,
null=True,
blank=True,
on_delete=models.SET_NULL
on_delete=models.SET_NULL,
verbose_name=_("assign this tag"),
)
assign_document_type = models.ForeignKey(
document_models.DocumentType,
null=True,
blank=True,
on_delete=models.SET_NULL
on_delete=models.SET_NULL,
verbose_name=_("assign this document type"),
)
assign_correspondent_from = models.PositiveIntegerField(
_("assign correspondent from"),
choices=CORRESPONDENT_SELECTOR,
default=CORRESPONDENT_FROM_NOTHING
)
@@ -135,7 +176,8 @@ class MailRule(models.Model):
document_models.Correspondent,
null=True,
blank=True,
on_delete=models.SET_NULL
on_delete=models.SET_NULL,
verbose_name=_("assign this correspondent")
)
def __str__(self):

View File

@@ -1,7 +1,6 @@
import json
import os
import re
import subprocess
import ocrmypdf
import pdftotext
@@ -10,7 +9,8 @@ from PIL import Image
from django.conf import settings
from ocrmypdf import InputFileError, EncryptedPdfError
from documents.parsers import DocumentParser, ParseError, run_convert
from documents.parsers import DocumentParser, ParseError, \
make_thumbnail_from_pdf
class RasterisedDocumentParser(DocumentParser):
@@ -47,48 +47,8 @@ class RasterisedDocumentParser(DocumentParser):
return result
def get_thumbnail(self, document_path, mime_type):
"""
The thumbnail of a PDF is just a 500px wide image of the first page.
"""
out_path = os.path.join(self.tempdir, "convert.png")
# Run convert to get a decent thumbnail
try:
run_convert(density=300,
scale="500x5000>",
alpha="remove",
strip=True,
trim=False,
input_file="{}[0]".format(document_path),
output_file=out_path,
logging_group=self.logging_group)
except ParseError:
# if convert fails, fall back to extracting
# the first PDF page as a PNG using Ghostscript
self.log(
'warning',
"Thumbnail generation with ImageMagick failed, falling back "
"to ghostscript. Check your /etc/ImageMagick-x/policy.xml!")
gs_out_path = os.path.join(self.tempdir, "gs_out.png")
cmd = [settings.GS_BINARY,
"-q",
"-sDEVICE=pngalpha",
"-o", gs_out_path,
document_path]
if not subprocess.Popen(cmd).wait() == 0:
raise ParseError("Thumbnail (gs) failed at {}".format(cmd))
# then run convert on the output from gs
run_convert(density=300,
scale="500x5000>",
alpha="remove",
strip=True,
trim=False,
input_file=gs_out_path,
output_file=out_path,
logging_group=self.logging_group)
return out_path
return make_thumbnail_from_pdf(
document_path, self.tempdir, self.logging_group)
def is_image(self, mime_type):
return mime_type in [
@@ -128,7 +88,7 @@ class RasterisedDocumentParser(DocumentParser):
f"Error while calculating DPI for image {image}: {e}")
return None
def parse(self, document_path, mime_type):
def parse(self, document_path, mime_type, file_name=None):
mode = settings.OCR_MODE
text_original = get_text_from_pdf(document_path)

View File

@@ -78,7 +78,7 @@ class TestParser(DirectoriesMixin, TestCase):
parser.get_thumbnail(os.path.join(self.SAMPLE_FILES, 'simple-digital.pdf'), "application/pdf")
# dont really know how to test it, just call it and assert that it does not raise anything.
@mock.patch("paperless_tesseract.parsers.run_convert")
@mock.patch("documents.parsers.run_convert")
def test_thumbnail_fallback(self, m):
def call_convert(input_file, output_file, **kwargs):

View File

@@ -32,6 +32,6 @@ class TextDocumentParser(DocumentParser):
return out_path
def parse(self, document_path, mime_type):
def parse(self, document_path, mime_type, file_name=None):
with open(document_path, 'r') as f:
self.text = f.read()

View File

@@ -0,0 +1,14 @@
from django.apps import AppConfig
from django.conf import settings
from paperless_tika.signals import tika_consumer_declaration
class PaperlessTikaConfig(AppConfig):
name = "paperless_tika"
def ready(self):
from documents.signals import document_consumer_declaration
if settings.PAPERLESS_TIKA_ENABLED:
document_consumer_declaration.connect(tika_consumer_declaration)
AppConfig.ready(self)

View File

@@ -0,0 +1,87 @@
import os
import requests
import dateutil.parser
from django.conf import settings
from documents.parsers import DocumentParser, ParseError, \
make_thumbnail_from_pdf
from tika import parser
class TikaDocumentParser(DocumentParser):
"""
This parser sends documents to a local tika server
"""
def get_thumbnail(self, document_path, mime_type):
if not self.archive_path:
self.archive_path = self.convert_to_pdf(document_path)
return make_thumbnail_from_pdf(
self.archive_path, self.tempdir, self.logging_group)
def extract_metadata(self, document_path, mime_type):
tika_server = settings.PAPERLESS_TIKA_ENDPOINT
try:
parsed = parser.from_file(document_path, tika_server)
except Exception as e:
self.log("warning", f"Error while fetching document metadata for "
f"{document_path}: {e}")
return []
return [
{
"namespace": "",
"prefix": "",
"key": key,
"value": parsed['metadata'][key]
} for key in parsed['metadata']
]
def parse(self, document_path, mime_type, file_name=None):
self.log("info", f"Sending {document_path} to Tika server")
tika_server = settings.PAPERLESS_TIKA_ENDPOINT
try:
parsed = parser.from_file(document_path, tika_server)
except Exception as err:
raise ParseError(
f"Could not parse {document_path} with tika server at "
f"{tika_server}: {err}"
)
self.text = parsed["content"].strip()
try:
self.date = dateutil.parser.isoparse(
parsed["metadata"]["Creation-Date"])
except Exception as e:
self.log("warning", f"Unable to extract date for document "
f"{document_path}: {e}")
self.archive_path = self.convert_to_pdf(document_path, file_name)
def convert_to_pdf(self, document_path, file_name):
pdf_path = os.path.join(self.tempdir, "convert.pdf")
gotenberg_server = settings.PAPERLESS_TIKA_GOTENBERG_ENDPOINT
url = gotenberg_server + "/convert/office"
self.log("info", f"Converting {document_path} to PDF as {pdf_path}")
files = {"files": (file_name or os.path.basename(document_path),
open(document_path, "rb"))}
headers = {}
try:
response = requests.post(url, files=files, headers=headers)
response.raise_for_status() # ensure we notice bad responses
except Exception as err:
raise ParseError(
f"Error while converting document to PDF: {err}"
)
file = open(pdf_path, "wb")
file.write(response.content)
file.close()
return pdf_path

View File

@@ -0,0 +1,20 @@
from .parsers import TikaDocumentParser
def tika_consumer_declaration(sender, **kwargs):
return {
"parser": TikaDocumentParser,
"weight": 10,
"mime_types": {
"application/msword": ".doc",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx", # NOQA: E501
"application/vnd.ms-excel": ".xls",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx", # NOQA: E501
"application/vnd.ms-powerpoint": ".ppt",
"application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx", # NOQA: E501
"application/vnd.openxmlformats-officedocument.presentationml.slideshow": ".ppsx", # NOQA: E501
"application/vnd.oasis.opendocument.presentation": ".odp",
"application/vnd.oasis.opendocument.spreadsheet": ".ods",
"application/vnd.oasis.opendocument.text": ".odt",
},
}

View File

@@ -0,0 +1,60 @@
import datetime
import os
from pathlib import Path
from unittest import mock
from django.test import TestCase
from requests import Response
from paperless_tika.parsers import TikaDocumentParser
class TestTikaParser(TestCase):
def setUp(self) -> None:
self.parser = TikaDocumentParser(logging_group=None)
def tearDown(self) -> None:
self.parser.cleanup()
@mock.patch("paperless_tika.parsers.parser.from_file")
@mock.patch("paperless_tika.parsers.requests.post")
def test_parse(self, post, from_file):
from_file.return_value = {
"content": "the content",
"metadata": {
"Creation-Date": "2020-11-21"
}
}
response = Response()
response._content = b"PDF document"
response.status_code = 200
post.return_value = response
file = os.path.join(self.parser.tempdir, "input.odt")
Path(file).touch()
self.parser.parse(file, "application/vnd.oasis.opendocument.text")
self.assertEqual(self.parser.text, "the content")
self.assertIsNotNone(self.parser.archive_path)
with open(self.parser.archive_path, "rb") as f:
self.assertEqual(f.read(), b"PDF document")
self.assertEqual(self.parser.date, datetime.datetime(2020, 11, 21))
@mock.patch("paperless_tika.parsers.parser.from_file")
def test_metadata(self, from_file):
from_file.return_value = {
"metadata": {
"Creation-Date": "2020-11-21",
"Some-key": "value"
}
}
file = os.path.join(self.parser.tempdir, "input.odt")
Path(file).touch()
metadata = self.parser.extract_metadata(file, "application/vnd.oasis.opendocument.text")
self.assertTrue("Creation-Date" in [m['key'] for m in metadata])
self.assertTrue("Some-key" in [m['key'] for m in metadata])