Merge branch 'dev' into feature-autocolor

This commit is contained in:
jonaswinkler
2021-02-24 23:10:59 +01:00
378 changed files with 37340 additions and 5324 deletions

BIN
src/clash.pdf Normal file

Binary file not shown.

View File

@@ -1,10 +1,7 @@
from django.contrib import admin
from django.utils.html import format_html, format_html_join
from django.utils.safestring import mark_safe
from whoosh.writing import AsyncWriter
from . import index
from .models import Correspondent, Document, DocumentType, Log, Tag
from .models import Correspondent, Document, DocumentType, Tag, \
SavedView, SavedViewFilterRule
class CorrespondentAdmin(admin.ModelAdmin):
@@ -17,8 +14,6 @@ class CorrespondentAdmin(admin.ModelAdmin):
list_filter = ("matching_algorithm",)
list_editable = ("match", "matching_algorithm")
readonly_fields = ("slug",)
class TagAdmin(admin.ModelAdmin):
@@ -31,8 +26,6 @@ class TagAdmin(admin.ModelAdmin):
list_filter = ("colour", "matching_algorithm")
list_editable = ("colour", "match", "matching_algorithm")
readonly_fields = ("slug", )
class DocumentTypeAdmin(admin.ModelAdmin):
@@ -44,32 +37,40 @@ class DocumentTypeAdmin(admin.ModelAdmin):
list_filter = ("matching_algorithm",)
list_editable = ("match", "matching_algorithm")
readonly_fields = ("slug",)
class DocumentAdmin(admin.ModelAdmin):
search_fields = ("correspondent__name", "title", "content", "tags__name")
readonly_fields = ("added", "mime_type", "storage_type", "filename")
readonly_fields = (
"added",
"modified",
"mime_type",
"storage_type",
"filename",
"checksum",
"archive_filename",
"archive_checksum"
)
list_display_links = ("title",)
list_display = (
"correspondent",
"id",
"title",
"tags_",
"created",
"mime_type",
"filename",
"archive_filename"
)
list_filter = (
"document_type",
"tags",
"correspondent"
("mime_type"),
("archive_serial_number", admin.EmptyFieldListFilter),
("archive_filename", admin.EmptyFieldListFilter),
)
filter_horizontal = ("tags",)
ordering = ["-created", "correspondent"]
ordering = ["-id"]
date_hierarchy = "created"
@@ -81,59 +82,40 @@ class DocumentAdmin(admin.ModelAdmin):
created_.short_description = "Created"
def delete_queryset(self, request, queryset):
ix = index.open_index()
with AsyncWriter(ix) as writer:
from documents import index
with index.open_index_writer() as writer:
for o in queryset:
index.remove_document(writer, o)
super(DocumentAdmin, self).delete_queryset(request, queryset)
def delete_model(self, request, obj):
from documents import index
index.remove_document_from_index(obj)
super(DocumentAdmin, self).delete_model(request, obj)
def save_model(self, request, obj, form, change):
from documents import index
index.add_or_update_document(obj)
super(DocumentAdmin, self).save_model(request, obj, form, change)
@mark_safe
def tags_(self, obj):
r = ""
for tag in obj.tags.all():
r += self._html_tag(
"span",
tag.slug + ", "
)
return r
@staticmethod
def _html_tag(kind, inside=None, **kwargs):
attributes = format_html_join(' ', '{}="{}"', kwargs.items())
if inside is not None:
return format_html("<{kind} {attributes}>{inside}</{kind}>",
kind=kind, attributes=attributes, inside=inside)
return format_html("<{} {}/>", kind, attributes)
class RuleInline(admin.TabularInline):
model = SavedViewFilterRule
class LogAdmin(admin.ModelAdmin):
class SavedViewAdmin(admin.ModelAdmin):
def has_add_permission(self, request):
return False
list_display = ("name", "user")
def has_change_permission(self, request, obj=None):
return False
list_display = ("created", "message", "level",)
list_filter = ("level", "created",)
ordering = ('-created',)
list_display_links = ("created", "message")
inlines = [
RuleInline
]
admin.site.register(Correspondent, CorrespondentAdmin)
admin.site.register(Tag, TagAdmin)
admin.site.register(DocumentType, DocumentTypeAdmin)
admin.site.register(Document, DocumentAdmin)
admin.site.register(Log, LogAdmin)
admin.site.register(SavedView, SavedViewAdmin)

View File

@@ -1,34 +1,30 @@
from django.apps import AppConfig
from django.utils.translation import gettext_lazy as _
class DocumentsConfig(AppConfig):
name = "documents"
def ready(self):
verbose_name = _("Documents")
from .signals import document_consumption_started
def ready(self):
from .signals import document_consumption_finished
from .signals.handlers import (
add_inbox_tags,
run_pre_consume_script,
run_post_consume_script,
set_log_entry,
set_correspondent,
set_document_type,
set_tags,
add_to_index
)
document_consumption_started.connect(run_pre_consume_script)
document_consumption_finished.connect(add_inbox_tags)
document_consumption_finished.connect(set_correspondent)
document_consumption_finished.connect(set_document_type)
document_consumption_finished.connect(set_tags)
document_consumption_finished.connect(set_log_entry)
document_consumption_finished.connect(add_to_index)
document_consumption_finished.connect(run_post_consume_script)
AppConfig.ready(self)

View File

@@ -0,0 +1,60 @@
from zipfile import ZipFile
from documents.models import Document
class BulkArchiveStrategy:
def __init__(self, zipf: ZipFile):
self.zipf = zipf
def make_unique_filename(self,
doc: Document,
archive: bool = False,
folder: str = ""):
counter = 0
while True:
filename = folder + doc.get_public_filename(archive, counter)
if filename in self.zipf.namelist():
counter += 1
else:
return filename
def add_document(self, doc: Document):
raise NotImplementedError() # pragma: no cover
class OriginalsOnlyStrategy(BulkArchiveStrategy):
def add_document(self, doc: Document):
self.zipf.write(doc.source_path, self.make_unique_filename(doc))
class ArchiveOnlyStrategy(BulkArchiveStrategy):
def __init__(self, zipf):
super(ArchiveOnlyStrategy, self).__init__(zipf)
def add_document(self, doc: Document):
if doc.has_archive_version:
self.zipf.write(doc.archive_path,
self.make_unique_filename(doc, archive=True))
else:
self.zipf.write(doc.source_path,
self.make_unique_filename(doc))
class OriginalAndArchiveStrategy(BulkArchiveStrategy):
def add_document(self, doc: Document):
if doc.has_archive_version:
self.zipf.write(
doc.archive_path, self.make_unique_filename(
doc, archive=True, folder="archive/"
)
)
self.zipf.write(
doc.source_path,
self.make_unique_filename(doc, folder="originals/")
)

106
src/documents/bulk_edit.py Normal file
View File

@@ -0,0 +1,106 @@
import itertools
from django.db.models import Q
from django_q.tasks import async_task
from documents.models import Document, Correspondent, DocumentType
def set_correspondent(doc_ids, correspondent):
if correspondent:
correspondent = Correspondent.objects.get(id=correspondent)
qs = Document.objects.filter(
Q(id__in=doc_ids) & ~Q(correspondent=correspondent))
affected_docs = [doc.id for doc in qs]
qs.update(correspondent=correspondent)
async_task(
"documents.tasks.bulk_update_documents", document_ids=affected_docs)
return "OK"
def set_document_type(doc_ids, document_type):
if document_type:
document_type = DocumentType.objects.get(id=document_type)
qs = Document.objects.filter(
Q(id__in=doc_ids) & ~Q(document_type=document_type))
affected_docs = [doc.id for doc in qs]
qs.update(document_type=document_type)
async_task(
"documents.tasks.bulk_update_documents", document_ids=affected_docs)
return "OK"
def add_tag(doc_ids, tag):
qs = Document.objects.filter(Q(id__in=doc_ids) & ~Q(tags__id=tag))
affected_docs = [doc.id for doc in qs]
DocumentTagRelationship = Document.tags.through
DocumentTagRelationship.objects.bulk_create([
DocumentTagRelationship(
document_id=doc, tag_id=tag) for doc in affected_docs
])
async_task(
"documents.tasks.bulk_update_documents", document_ids=affected_docs)
return "OK"
def remove_tag(doc_ids, tag):
qs = Document.objects.filter(Q(id__in=doc_ids) & Q(tags__id=tag))
affected_docs = [doc.id for doc in qs]
DocumentTagRelationship = Document.tags.through
DocumentTagRelationship.objects.filter(
Q(document_id__in=affected_docs) &
Q(tag_id=tag)
).delete()
async_task(
"documents.tasks.bulk_update_documents", document_ids=affected_docs)
return "OK"
def modify_tags(doc_ids, add_tags, remove_tags):
qs = Document.objects.filter(id__in=doc_ids)
affected_docs = [doc.id for doc in qs]
DocumentTagRelationship = Document.tags.through
DocumentTagRelationship.objects.filter(
document_id__in=affected_docs,
tag_id__in=remove_tags,
).delete()
DocumentTagRelationship.objects.bulk_create([DocumentTagRelationship(
document_id=doc, tag_id=tag) for (doc, tag) in itertools.product(
affected_docs, add_tags)
], ignore_conflicts=True)
async_task(
"documents.tasks.bulk_update_documents", document_ids=affected_docs)
return "OK"
def delete(doc_ids):
Document.objects.filter(id__in=doc_ids).delete()
from documents import index
with index.open_index_writer() as writer:
for id in doc_ids:
index.remove_document_by_id(writer, id)
return "OK"

View File

@@ -2,6 +2,7 @@ import textwrap
from django.conf import settings
from django.core.checks import Error, register
from django.core.exceptions import FieldError
from django.db.utils import OperationalError, ProgrammingError
from documents.signals import document_consumer_declaration
@@ -16,7 +17,7 @@ def changed_password_check(app_configs, **kwargs):
try:
encrypted_doc = Document.objects.filter(
storage_type=Document.STORAGE_TYPE_GPG).first()
except (OperationalError, ProgrammingError):
except (OperationalError, ProgrammingError, FieldError):
return [] # No documents table yet
if encrypted_doc:
@@ -50,6 +51,6 @@ def parser_check(app_configs, **kwargs):
if len(parsers) == 0:
return [Error("No parsers found. This is a bug. The consumer won't be "
"able to onsume any documents without parsers.")]
"able to consume any documents without parsers.")]
else:
return []

View File

@@ -5,10 +5,6 @@ import pickle
import re
from django.conf import settings
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MultiLabelBinarizer, LabelBinarizer
from sklearn.utils.multiclass import type_of_target
from documents.models import Document, MatchingModel
@@ -17,7 +13,7 @@ class IncompatibleClassifierVersionError(Exception):
pass
logger = logging.getLogger(__name__)
logger = logging.getLogger("paperless.classifier")
def preprocess_content(content):
@@ -26,15 +22,40 @@ def preprocess_content(content):
return content
def load_classifier():
if not os.path.isfile(settings.MODEL_FILE):
logger.debug(
f"Document classification model does not exist (yet), not "
f"performing automatic matching."
)
return None
classifier = DocumentClassifier()
try:
classifier.load()
except (EOFError, IncompatibleClassifierVersionError) as e:
# there's something wrong with the model file.
logger.exception(
f"Unrecoverable error while loading document "
f"classification model, deleting model file."
)
os.unlink(settings.MODEL_FILE)
classifier = None
except OSError as e:
logger.error(
f"Error while loading document classification model: {str(e)}"
)
classifier = None
return classifier
class DocumentClassifier(object):
FORMAT_VERSION = 6
def __init__(self):
# mtime of the model file on disk. used to prevent reloading when
# nothing has changed.
self.classifier_version = 0
# hash of the training data. used to prevent re-training when the
# training data has not changed.
self.data_hash = None
@@ -45,30 +66,23 @@ class DocumentClassifier(object):
self.correspondent_classifier = None
self.document_type_classifier = None
def reload(self):
if os.path.getmtime(settings.MODEL_FILE) > self.classifier_version:
with open(settings.MODEL_FILE, "rb") as f:
schema_version = pickle.load(f)
def load(self):
with open(settings.MODEL_FILE, "rb") as f:
schema_version = pickle.load(f)
if schema_version != self.FORMAT_VERSION:
raise IncompatibleClassifierVersionError(
"Cannor load classifier, incompatible versions.")
else:
if self.classifier_version > 0:
# Don't be confused by this check. It's simply here
# so that we wont log anything on initial reload.
logger.info("Classifier updated on disk, "
"reloading classifier models")
self.data_hash = pickle.load(f)
self.data_vectorizer = pickle.load(f)
self.tags_binarizer = pickle.load(f)
if schema_version != self.FORMAT_VERSION:
raise IncompatibleClassifierVersionError(
"Cannor load classifier, incompatible versions.")
else:
self.data_hash = pickle.load(f)
self.data_vectorizer = pickle.load(f)
self.tags_binarizer = pickle.load(f)
self.tags_classifier = pickle.load(f)
self.correspondent_classifier = pickle.load(f)
self.document_type_classifier = pickle.load(f)
self.classifier_version = os.path.getmtime(settings.MODEL_FILE)
self.tags_classifier = pickle.load(f)
self.correspondent_classifier = pickle.load(f)
self.document_type_classifier = pickle.load(f)
def save_classifier(self):
def save(self):
with open(settings.MODEL_FILE, "wb") as f:
pickle.dump(self.FORMAT_VERSION, f)
pickle.dump(self.data_hash, f)
@@ -81,13 +95,14 @@ class DocumentClassifier(object):
pickle.dump(self.document_type_classifier, f)
def train(self):
data = list()
labels_tags = list()
labels_correspondent = list()
labels_document_type = list()
# Step 1: Extract and preprocess training data from the database.
logging.getLogger(__name__).debug("Gathering data from database...")
logger.debug("Gathering data from database...")
m = hashlib.sha1()
for doc in Document.objects.order_by('pk').exclude(tags__is_inbox_tag=True): # NOQA: E501
preprocessed_content = preprocess_content(doc.content)
@@ -134,7 +149,7 @@ class DocumentClassifier(object):
num_correspondents = len(set(labels_correspondent) | {-1}) - 1
num_document_types = len(set(labels_document_type) | {-1}) - 1
logging.getLogger(__name__).debug(
logger.debug(
"{} documents, {} tag(s), {} correspondent(s), "
"{} document type(s).".format(
len(data),
@@ -144,8 +159,12 @@ class DocumentClassifier(object):
)
)
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MultiLabelBinarizer, LabelBinarizer
# Step 2: vectorize data
logging.getLogger(__name__).debug("Vectorizing data...")
logger.debug("Vectorizing data...")
self.data_vectorizer = CountVectorizer(
analyzer="word",
ngram_range=(1, 2),
@@ -155,7 +174,7 @@ class DocumentClassifier(object):
# Step 3: train the classifiers
if num_tags > 0:
logging.getLogger(__name__).debug("Training tags classifier...")
logger.debug("Training tags classifier...")
if num_tags == 1:
# Special case where only one tag has auto:
@@ -174,12 +193,12 @@ class DocumentClassifier(object):
self.tags_classifier.fit(data_vectorized, labels_tags_vectorized)
else:
self.tags_classifier = None
logging.getLogger(__name__).debug(
logger.debug(
"There are no tags. Not training tags classifier."
)
if num_correspondents > 0:
logging.getLogger(__name__).debug(
logger.debug(
"Training correspondent classifier..."
)
self.correspondent_classifier = MLPClassifier(tol=0.01)
@@ -189,13 +208,13 @@ class DocumentClassifier(object):
)
else:
self.correspondent_classifier = None
logging.getLogger(__name__).debug(
logger.debug(
"There are no correspondents. Not training correspondent "
"classifier."
)
if num_document_types > 0:
logging.getLogger(__name__).debug(
logger.debug(
"Training document type classifier..."
)
self.document_type_classifier = MLPClassifier(tol=0.01)
@@ -205,7 +224,7 @@ class DocumentClassifier(object):
)
else:
self.document_type_classifier = None
logging.getLogger(__name__).debug(
logger.debug(
"There are no document types. Not training document type "
"classifier."
)
@@ -237,6 +256,8 @@ class DocumentClassifier(object):
return None
def predict_tags(self, content):
from sklearn.utils.multiclass import type_of_target
if self.tags_classifier:
X = self.data_vectorizer.transform([preprocess_content(content)])
y = self.tags_classifier.predict(X)

View File

@@ -1,20 +1,25 @@
import datetime
import hashlib
import logging
import os
import uuid
from subprocess import Popen
import magic
from asgiref.sync import async_to_sync
from channels.layers import get_channel_layer
from django.conf import settings
from django.db import transaction
from django.db.models import Q
from django.utils import timezone
from filelock import FileLock
from rest_framework.reverse import reverse
from .classifier import DocumentClassifier, IncompatibleClassifierVersionError
from .file_handling import create_source_path_directory
from .classifier import load_classifier
from .file_handling import create_source_path_directory, \
generate_unique_filename
from .loggers import LoggingMixin
from .models import Document, FileInfo, Correspondent, DocumentType, Tag
from .parsers import ParseError, get_parser_class_for_mime_type, \
get_supported_file_extensions, parse_date
from .parsers import ParseError, get_parser_class_for_mime_type, parse_date
from .signals import (
document_consumption_finished,
document_consumption_started
@@ -25,8 +30,45 @@ class ConsumerError(Exception):
pass
MESSAGE_DOCUMENT_ALREADY_EXISTS = "document_already_exists"
MESSAGE_FILE_NOT_FOUND = "file_not_found"
MESSAGE_PRE_CONSUME_SCRIPT_NOT_FOUND = "pre_consume_script_not_found"
MESSAGE_PRE_CONSUME_SCRIPT_ERROR = "pre_consume_script_error"
MESSAGE_POST_CONSUME_SCRIPT_NOT_FOUND = "post_consume_script_not_found"
MESSAGE_POST_CONSUME_SCRIPT_ERROR = "post_consume_script_error"
MESSAGE_NEW_FILE = "new_file"
MESSAGE_UNSUPPORTED_TYPE = "unsupported_type"
MESSAGE_PARSING_DOCUMENT = "parsing_document"
MESSAGE_GENERATING_THUMBNAIL = "generating_thumbnail"
MESSAGE_PARSE_DATE = "parse_date"
MESSAGE_SAVE_DOCUMENT = "save_document"
MESSAGE_FINISHED = "finished"
class Consumer(LoggingMixin):
logging_name = "paperless.consumer"
def _send_progress(self, current_progress, max_progress, status,
message=None, document_id=None):
payload = {
'filename': os.path.basename(self.filename) if self.filename else None, # NOQA: E501
'task_id': self.task_id,
'current_progress': current_progress,
'max_progress': max_progress,
'status': status,
'message': message,
'document_id': document_id
}
async_to_sync(self.channel_layer.group_send)("status_updates",
{'type': 'status_update',
'data': payload})
def _fail(self, message, log_message=None):
self._send_progress(100, 100, 'FAILED', message)
self.log("error", log_message or message)
raise ConsumerError(f"{self.filename}: {log_message or message}")
def __init__(self):
super().__init__()
self.path = None
@@ -35,11 +77,16 @@ class Consumer(LoggingMixin):
self.override_correspondent_id = None
self.override_tag_ids = None
self.override_document_type_id = None
self.task_id = None
self.channel_layer = get_channel_layer()
def pre_check_file_exists(self):
if not os.path.isfile(self.path):
raise ConsumerError("Cannot consume {}: It is not a file".format(
self.path))
self._fail(
MESSAGE_FILE_NOT_FOUND,
f"Cannot consume {self.path}: File not found."
)
def pre_check_duplicate(self):
with open(self.path, "rb") as f:
@@ -47,8 +94,9 @@ class Consumer(LoggingMixin):
if Document.objects.filter(Q(checksum=checksum) | Q(archive_checksum=checksum)).exists(): # NOQA: E501
if settings.CONSUMER_DELETE_DUPLICATES:
os.unlink(self.path)
raise ConsumerError(
"Not consuming {}: It is a duplicate.".format(self.filename)
self._fail(
MESSAGE_DOCUMENT_ALREADY_EXISTS,
f"Not consuming {self.filename}: It is a duplicate."
)
def pre_check_directories(self):
@@ -57,13 +105,62 @@ class Consumer(LoggingMixin):
os.makedirs(settings.ORIGINALS_DIR, exist_ok=True)
os.makedirs(settings.ARCHIVE_DIR, exist_ok=True)
def run_pre_consume_script(self):
if not settings.PRE_CONSUME_SCRIPT:
return
if not os.path.isfile(settings.PRE_CONSUME_SCRIPT):
self._fail(
MESSAGE_PRE_CONSUME_SCRIPT_NOT_FOUND,
f"Configured pre-consume script "
f"{settings.PRE_CONSUME_SCRIPT} does not exist.")
try:
Popen((settings.PRE_CONSUME_SCRIPT, self.path)).wait()
except Exception as e:
self._fail(
MESSAGE_PRE_CONSUME_SCRIPT_ERROR,
f"Error while executing pre-consume script: {e}"
)
def run_post_consume_script(self, document):
if not settings.POST_CONSUME_SCRIPT:
return
if not os.path.isfile(settings.POST_CONSUME_SCRIPT):
self._fail(
MESSAGE_POST_CONSUME_SCRIPT_NOT_FOUND,
f"Configured post-consume script "
f"{settings.POST_CONSUME_SCRIPT} does not exist."
)
try:
Popen((
settings.POST_CONSUME_SCRIPT,
str(document.pk),
document.get_public_filename(),
os.path.normpath(document.source_path),
os.path.normpath(document.thumbnail_path),
reverse("document-download", kwargs={"pk": document.pk}),
reverse("document-thumb", kwargs={"pk": document.pk}),
str(document.correspondent),
str(",".join(document.tags.all().values_list(
"name", flat=True)))
)).wait()
except Exception as e:
self._fail(
MESSAGE_POST_CONSUME_SCRIPT_ERROR,
f"Error while executing post-consume script: {e}"
)
def try_consume_file(self,
path,
override_filename=None,
override_title=None,
override_correspondent_id=None,
override_document_type_id=None,
override_tag_ids=None):
override_tag_ids=None,
task_id=None):
"""
Return the document object if it was successfully created.
"""
@@ -74,6 +171,9 @@ class Consumer(LoggingMixin):
self.override_correspondent_id = override_correspondent_id
self.override_document_type_id = override_document_type_id
self.override_tag_ids = override_tag_ids
self.task_id = task_id or str(uuid.uuid4())
self._send_progress(0, 100, 'STARTING', MESSAGE_NEW_FILE)
# this is for grouping logging entries for this particular file
# together.
@@ -86,19 +186,20 @@ class Consumer(LoggingMixin):
self.pre_check_directories()
self.pre_check_duplicate()
self.log("info", "Consuming {}".format(self.filename))
self.log("info", f"Consuming {self.filename}")
# Determine the parser class.
mime_type = magic.from_file(self.path, mime=True)
self.log("debug", f"Detected mime type: {mime_type}")
parser_class = get_parser_class_for_mime_type(mime_type)
if not parser_class:
raise ConsumerError(f"No parsers abvailable for {self.filename}")
else:
self.log("debug",
f"Parser: {parser_class.__name__} "
f"based on mime type {mime_type}")
self._fail(
MESSAGE_UNSUPPORTED_TYPE,
f"Unsupported mime type {mime_type}"
)
# Notify all listeners that we're going to do some work.
@@ -108,35 +209,54 @@ class Consumer(LoggingMixin):
logging_group=self.logging_group
)
self.run_pre_consume_script()
def progress_callback(current_progress, max_progress):
# recalculate progress to be within 20 and 80
p = int((current_progress / max_progress) * 50 + 20)
self._send_progress(p, 100, "WORKING")
# This doesn't parse the document yet, but gives us a parser.
document_parser = parser_class(self.logging_group)
document_parser = parser_class(self.logging_group, progress_callback)
self.log("debug", f"Parser: {type(document_parser).__name__}")
# However, this already created working directories which we have to
# clean up.
# Parse the document. This may take some time.
text = None
date = None
thumbnail = None
archive_path = None
try:
self._send_progress(20, 100, 'WORKING', MESSAGE_PARSING_DOCUMENT)
self.log("debug", "Parsing {}...".format(self.filename))
document_parser.parse(self.path, mime_type)
document_parser.parse(self.path, mime_type, self.filename)
self.log("debug", f"Generating thumbnail for {self.filename}...")
self._send_progress(70, 100, 'WORKING',
MESSAGE_GENERATING_THUMBNAIL)
thumbnail = document_parser.get_optimised_thumbnail(
self.path, mime_type)
self.path, mime_type, self.filename)
text = document_parser.get_text()
date = document_parser.get_date()
if not date:
self._send_progress(90, 100, 'WORKING',
MESSAGE_PARSE_DATE)
date = parse_date(self.filename, text)
archive_path = document_parser.get_archive_path()
except ParseError as e:
document_parser.cleanup()
self.log(
"error",
f"Error while consuming document {self.filename}: {e}")
raise ConsumerError(e)
self._fail(
str(e),
f"Error while consuming document {self.filename}: {e}"
)
# Prepare the document classifier.
@@ -144,14 +264,9 @@ class Consumer(LoggingMixin):
# reloading the classifier multiple times, since there are multiple
# post-consume hooks that all require the classifier.
try:
classifier = DocumentClassifier()
classifier.reload()
except (FileNotFoundError, IncompatibleClassifierVersionError) as e:
logging.getLogger(__name__).warning(
"Cannot classify documents: {}.".format(e))
classifier = None
classifier = load_classifier()
self._send_progress(95, 100, 'WORKING', MESSAGE_SAVE_DOCUMENT)
# now that everything is done, we can start to store the document
# in the system. This will be a transaction and reasonably fast.
try:
@@ -176,51 +291,55 @@ class Consumer(LoggingMixin):
# After everything is in the database, copy the files into
# place. If this fails, we'll also rollback the transaction.
with FileLock(settings.MEDIA_LOCK):
document.filename = generate_unique_filename(document)
create_source_path_directory(document.source_path)
# TODO: not required, since this is done by the file handling
# logic
create_source_path_directory(document.source_path)
self._write(document.storage_type,
self.path, document.source_path)
self._write(document.storage_type,
thumbnail, document.thumbnail_path)
if archive_path and os.path.isfile(archive_path):
self._write(document.storage_type,
archive_path, document.archive_path)
self.path, document.source_path)
with open(archive_path, 'rb') as f:
document.archive_checksum = hashlib.md5(
f.read()).hexdigest()
document.save()
self._write(document.storage_type,
thumbnail, document.thumbnail_path)
# Afte performing all database operations and moving files
# into place, tell paperless where the file is.
document.filename = os.path.basename(document.source_path)
# Saving the document now will trigger the filename handling
# logic.
if archive_path and os.path.isfile(archive_path):
document.archive_filename = generate_unique_filename(
document,
archive_filename=True
)
create_source_path_directory(document.archive_path)
self._write(document.storage_type,
archive_path, document.archive_path)
with open(archive_path, 'rb') as f:
document.archive_checksum = hashlib.md5(
f.read()).hexdigest()
# Don't save with the lock active. Saving will cause the file
# renaming logic to aquire the lock as well.
document.save()
# Delete the file only if it was successfully consumed
self.log("debug", "Deleting file {}".format(self.path))
os.unlink(self.path)
except Exception as e:
self.log(
"error",
self._fail(
str(e),
f"The following error occured while consuming "
f"{self.filename}: {e}"
)
raise ConsumerError(e)
finally:
document_parser.cleanup()
self.run_post_consume_script(document)
self.log(
"info",
"Document {} consumption finished".format(document)
)
self._send_progress(100, 100, 'SUCCESS', MESSAGE_FINISHED, document.id)
return document
def _store(self, text, date, mime_type):
@@ -240,8 +359,7 @@ class Consumer(LoggingMixin):
with open(self.path, "rb") as f:
document = Document.objects.create(
correspondent=file_info.correspondent,
title=file_info.title,
title=(self.override_title or file_info.title)[:127],
content=text,
mime_type=mime_type,
checksum=hashlib.md5(f.read()).hexdigest(),
@@ -250,20 +368,13 @@ class Consumer(LoggingMixin):
storage_type=storage_type
)
relevant_tags = set(file_info.tags)
if relevant_tags:
tag_names = ", ".join([t.slug for t in relevant_tags])
self.log("debug", "Tagging with {}".format(tag_names))
document.tags.add(*relevant_tags)
self.apply_overrides(document)
document.save()
return document
def apply_overrides(self, document):
if self.override_title:
document.title = self.override_title
if self.override_correspondent_id:
document.correspondent = Correspondent.objects.get(
pk=self.override_correspondent_id)

View File

@@ -1,11 +1,22 @@
import datetime
import logging
import os
from collections import defaultdict
import pathvalidate
from django.conf import settings
from django.template.defaultfilters import slugify
logger = logging.getLogger("paperless.filehandling")
class defaultdictNoStr(defaultdict):
def __str__(self):
raise ValueError("Don't use {tags} directly.")
def create_source_path_directory(source_path):
os.makedirs(os.path.dirname(source_path), exist_ok=True)
@@ -68,44 +79,119 @@ def many_to_dictionary(field):
return mydictionary
def generate_filename(doc):
def generate_unique_filename(doc,
archive_filename=False):
"""
Generates a unique filename for doc in settings.ORIGINALS_DIR.
The returned filename is guaranteed to be either the current filename
of the document if unchanged, or a new filename that does not correspondent
to any existing files. The function will append _01, _02, etc to the
filename before the extension to avoid conflicts.
If archive_filename is True, return a unique archive filename instead.
"""
if archive_filename:
old_filename = doc.archive_filename
root = settings.ARCHIVE_DIR
else:
old_filename = doc.filename
root = settings.ORIGINALS_DIR
# If generating archive filenames, try to make a name that is similar to
# the original filename first.
if archive_filename and doc.filename:
new_filename = os.path.splitext(doc.filename)[0] + ".pdf"
if new_filename == old_filename or not os.path.exists(os.path.join(root, new_filename)): # NOQA: E501
return new_filename
counter = 0
while True:
new_filename = generate_filename(
doc, counter, archive_filename=archive_filename)
if new_filename == old_filename:
# still the same as before.
return new_filename
if os.path.exists(os.path.join(root, new_filename)):
counter += 1
else:
return new_filename
def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
path = ""
try:
if settings.PAPERLESS_FILENAME_FORMAT is not None:
tags = defaultdict(lambda: slugify(None),
many_to_dictionary(doc.tags))
path = settings.PAPERLESS_FILENAME_FORMAT.format(
correspondent=slugify(doc.correspondent),
title=slugify(doc.title),
created=slugify(doc.created),
created_year=doc.created.year if doc.created else "none",
created_month=doc.created.month if doc.created else "none",
created_day=doc.created.day if doc.created else "none",
added=slugify(doc.added),
added_year=doc.added.year if doc.added else "none",
added_month=doc.added.month if doc.added else "none",
added_day=doc.added.day if doc.added else "none",
tags=tags,
tags = defaultdictNoStr(lambda: slugify(None),
many_to_dictionary(doc.tags))
tag_list = pathvalidate.sanitize_filename(
",".join(sorted(
[tag.name for tag in doc.tags.all()]
)),
replacement_text="-"
)
if doc.correspondent:
correspondent = pathvalidate.sanitize_filename(
doc.correspondent.name, replacement_text="-"
)
else:
correspondent = "none"
if doc.document_type:
document_type = pathvalidate.sanitize_filename(
doc.document_type.name, replacement_text="-"
)
else:
document_type = "none"
if doc.archive_serial_number:
asn = str(doc.archive_serial_number)
else:
asn = "none"
path = settings.PAPERLESS_FILENAME_FORMAT.format(
title=pathvalidate.sanitize_filename(
doc.title, replacement_text="-"),
correspondent=correspondent,
document_type=document_type,
created=datetime.date.isoformat(doc.created),
created_year=doc.created.year if doc.created else "none",
created_month=f"{doc.created.month:02}" if doc.created else "none", # NOQA: E501
created_day=f"{doc.created.day:02}" if doc.created else "none",
added=datetime.date.isoformat(doc.added),
added_year=doc.added.year if doc.added else "none",
added_month=f"{doc.added.month:02}" if doc.added else "none",
added_day=f"{doc.added.day:02}" if doc.added else "none",
asn=asn,
tags=tags,
tag_list=tag_list
).strip()
path = path.strip(os.sep)
except (ValueError, KeyError, IndexError):
logging.getLogger(__name__).warning(
logger.warning(
f"Invalid PAPERLESS_FILENAME_FORMAT: "
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")
# Always append the primary key to guarantee uniqueness of filename
counter_str = f"_{counter:02}" if counter else ""
filetype_str = ".pdf" if archive_filename else doc.file_type
if len(path) > 0:
filename = "%s-%07i%s" % (path, doc.pk, doc.file_type)
filename = f"{path}{counter_str}{filetype_str}"
else:
filename = "%07i%s" % (doc.pk, doc.file_type)
filename = f"{doc.pk:07}{counter_str}{filetype_str}"
# Append .gpg for encrypted files
if doc.storage_type == doc.STORAGE_TYPE_GPG:
if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG:
filename += ".gpg"
return filename
def archive_name_from_filename(filename):
return os.path.splitext(filename)[0] + ".pdf"

View File

@@ -4,7 +4,7 @@ from .models import Correspondent, Document, Tag, DocumentType, Log
CHAR_KWARGS = ["istartswith", "iendswith", "icontains", "iexact"]
ID_KWARGS = ["in", "exact"]
INT_KWARGS = ["exact", "gt", "gte", "lt", "lte"]
INT_KWARGS = ["exact", "gt", "gte", "lt", "lte", "isnull"]
DATE_KWARGS = ["year", "month", "day", "date__gt", "gt", "date__lt", "lt"]
@@ -37,6 +37,10 @@ class DocumentTypeFilterSet(FilterSet):
class TagsFilter(Filter):
def __init__(self, exclude=False):
super(TagsFilter, self).__init__()
self.exclude = exclude
def filter(self, qs, value):
if not value:
return qs
@@ -47,7 +51,10 @@ class TagsFilter(Filter):
return qs
for tag_id in tag_ids:
qs = qs.filter(tags__id=tag_id)
if self.exclude:
qs = qs.exclude(tags__id=tag_id)
else:
qs = qs.filter(tags__id=tag_id)
return qs
@@ -74,6 +81,8 @@ class DocumentFilterSet(FilterSet):
tags__id__all = TagsFilter()
tags__id__none = TagsFilter(exclude=True)
is_in_inbox = InboxFilter()
class Meta:
@@ -89,12 +98,14 @@ class DocumentFilterSet(FilterSet):
"added": DATE_KWARGS,
"modified": DATE_KWARGS,
"correspondent": ["isnull"],
"correspondent__id": ID_KWARGS,
"correspondent__name": CHAR_KWARGS,
"tags__id": ID_KWARGS,
"tags__name": CHAR_KWARGS,
"document_type": ["isnull"],
"document_type__id": ID_KWARGS,
"document_type__name": CHAR_KWARGS,

View File

@@ -1,59 +0,0 @@
import os
import tempfile
from datetime import datetime
from time import mktime
import magic
from django import forms
from django.conf import settings
from django_q.tasks import async_task
from pathvalidate import validate_filename, ValidationError
from documents.parsers import is_mime_type_supported
class UploadForm(forms.Form):
document = forms.FileField()
def clean_document(self):
document_name = self.cleaned_data.get("document").name
try:
validate_filename(document_name)
except ValidationError:
raise forms.ValidationError("That filename is suspicious.")
document_data = self.cleaned_data.get("document").read()
mime_type = magic.from_buffer(document_data, mime=True)
if not is_mime_type_supported(mime_type):
raise forms.ValidationError("This mime type is not supported.")
return document_name, document_data
def save(self):
"""
Since the consumer already does a lot of work, it's easier just to save
to-be-consumed files to the consumption directory rather than have the
form do that as well. Think of it as a poor-man's queue server.
"""
original_filename, data = self.cleaned_data.get("document")
t = int(mktime(datetime.now().timetuple()))
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
with tempfile.NamedTemporaryFile(prefix="paperless-upload-",
dir=settings.SCRATCH_DIR,
delete=False) as f:
f.write(data)
os.utime(f.name, times=(t, t))
async_task("documents.tasks.consume_file",
f.name,
override_filename=original_filename,
task_name=os.path.basename(original_filename)[:100])

View File

@@ -3,7 +3,7 @@ import os
from contextlib import contextmanager
from django.conf import settings
from whoosh import highlight
from whoosh import highlight, classify, query
from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME
from whoosh.highlight import Formatter, get_text
from whoosh.index import create_in, exists_in, open_dir
@@ -12,7 +12,7 @@ from whoosh.qparser.dateparse import DateParserPlugin
from whoosh.writing import AsyncWriter
logger = logging.getLogger(__name__)
logger = logging.getLogger("paperless.index")
class JsonFormatter(Formatter):
@@ -20,32 +20,37 @@ class JsonFormatter(Formatter):
self.seen = {}
def format_token(self, text, token, replace=False):
seen = self.seen
ttext = self._text(get_text(text, token, replace))
if ttext in seen:
termnum = seen[ttext]
else:
termnum = len(seen)
seen[ttext] = termnum
return {'text': ttext, 'term': termnum}
return {'text': ttext, 'highlight': 'true'}
def format_fragment(self, fragment, replace=False):
output = []
index = fragment.startchar
text = fragment.text
amend_token = None
for t in fragment.matches:
if t.startchar is None:
continue
if t.startchar < index:
continue
if t.startchar > index:
output.append({'text': text[index:t.startchar]})
output.append(self.format_token(text, t, replace))
text_inbetween = text[index:t.startchar]
if amend_token and t.startchar - index < 10:
amend_token['text'] += text_inbetween
else:
output.append({'text': text_inbetween,
'highlight': False})
amend_token = None
token = self.format_token(text, t, replace)
if amend_token:
amend_token['text'] += token['text']
else:
output.append(token)
amend_token = token
index = t.endchar
if index < fragment.endchar:
output.append({'text': text[index:fragment.endchar]})
output.append({'text': text[index:fragment.endchar],
'highlight': False})
return output
def format(self, fragments, replace=False):
@@ -73,16 +78,31 @@ def open_index(recreate=False):
try:
if exists_in(settings.INDEX_DIR) and not recreate:
return open_dir(settings.INDEX_DIR, schema=get_schema())
except Exception as e:
logger.error(f"Error while opening the index: {e}, recreating.")
except Exception:
logger.exception(f"Error while opening the index, recreating.")
if not os.path.isdir(settings.INDEX_DIR):
os.makedirs(settings.INDEX_DIR, exist_ok=True)
return create_in(settings.INDEX_DIR, get_schema())
@contextmanager
def open_index_writer(ix=None, optimize=False):
if ix:
writer = AsyncWriter(ix)
else:
writer = AsyncWriter(open_index())
try:
yield writer
except Exception as e:
logger.exception(str(e))
writer.cancel()
finally:
writer.commit(optimize=optimize)
def update_document(writer, doc):
logger.debug("Indexing {}...".format(doc))
tags = ",".join([t.name for t in doc.tags.all()])
writer.update_document(
id=doc.pk,
@@ -98,39 +118,60 @@ def update_document(writer, doc):
def remove_document(writer, doc):
logger.debug("Removing {} from index...".format(doc))
writer.delete_by_term('id', doc.pk)
remove_document_by_id(writer, doc.pk)
def remove_document_by_id(writer, doc_id):
writer.delete_by_term('id', doc_id)
def add_or_update_document(document):
ix = open_index()
with AsyncWriter(ix) as writer:
with open_index_writer() as writer:
update_document(writer, document)
def remove_document_from_index(document):
ix = open_index()
with AsyncWriter(ix) as writer:
with open_index_writer() as writer:
remove_document(writer, document)
@contextmanager
def query_page(ix, querystring, page):
def query_page(ix, page, querystring, more_like_doc_id, more_like_doc_content):
searcher = ix.searcher()
try:
qp = MultifieldParser(
["content", "title", "correspondent", "tag", "type"],
ix.schema)
qp.add_plugin(DateParserPlugin())
if querystring:
qp = MultifieldParser(
["content", "title", "correspondent", "tag", "type"],
ix.schema)
qp.add_plugin(DateParserPlugin())
str_q = qp.parse(querystring)
corrected = searcher.correct_query(str_q, querystring)
else:
str_q = None
corrected = None
if more_like_doc_id:
docnum = searcher.document_number(id=more_like_doc_id)
kts = searcher.key_terms_from_text(
'content', more_like_doc_content, numterms=20,
model=classify.Bo1Model, normalize=False)
more_like_q = query.Or(
[query.Term('content', word, boost=weight)
for word, weight in kts])
result_page = searcher.search_page(
more_like_q, page, filter=str_q, mask={docnum})
elif str_q:
result_page = searcher.search_page(str_q, page)
else:
raise ValueError(
"Either querystring or more_like_doc_id is required."
)
q = qp.parse(querystring)
result_page = searcher.search_page(q, page)
result_page.results.fragmenter = highlight.ContextFragmenter(
surround=50)
result_page.results.formatter = JsonFormatter()
corrected = searcher.correct_query(q, querystring)
if corrected.query != q:
if corrected and corrected.query != str_q:
corrected_query = corrected.string
else:
corrected_query = None

View File

@@ -4,34 +4,25 @@ import uuid
from django.conf import settings
class PaperlessHandler(logging.Handler):
def emit(self, record):
if settings.DISABLE_DBHANDLER:
return
# We have to do the import here or Django will barf when it tries to
# load this because the apps aren't loaded at that point
from .models import Log
kwargs = {"message": record.msg, "level": record.levelno}
if hasattr(record, "group"):
kwargs["group"] = record.group
Log.objects.create(**kwargs)
class LoggingMixin:
logging_group = None
logging_name = None
def renew_logging_group(self):
self.logging_group = uuid.uuid4()
def log(self, level, message):
target = ".".join([self.__class__.__module__, self.__class__.__name__])
logger = logging.getLogger(target)
def log(self, level, message, **kwargs):
if self.logging_name:
logger = logging.getLogger(self.logging_name)
else:
name = ".".join([
self.__class__.__module__,
self.__class__.__name__
])
logger = logging.getLogger(name)
getattr(logger, level)(message, extra={
"group": self.logging_group
})
}, **kwargs)

View File

@@ -2,7 +2,6 @@ import os
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
from termcolor import colored as coloured
from documents.models import Document
from paperless.db import GnuPG
@@ -26,16 +25,14 @@ class Command(BaseCommand):
def handle(self, *args, **options):
try:
print(coloured(
print(
"\n\nWARNING: This script is going to work directly on your "
"document originals, so\nWARNING: you probably shouldn't run "
"this unless you've got a recent backup\nWARNING: handy. It "
"*should* work without a hitch, but be safe and backup your\n"
"WARNING: stuff first.\n\nHit Ctrl+C to exit now, or Enter to "
"continue.\n\n",
"yellow",
attrs=("bold",)
))
"continue.\n\n"
)
__ = input()
except KeyboardInterrupt:
return
@@ -57,8 +54,8 @@ class Command(BaseCommand):
for document in encrypted_files:
print(coloured("Decrypting {}".format(
document).encode('utf-8'), "green"))
print("Decrypting {}".format(
document).encode('utf-8'))
old_paths = [document.source_path, document.thumbnail_path]
@@ -82,7 +79,8 @@ class Command(BaseCommand):
with open(document.thumbnail_path, "wb") as f:
f.write(raw_thumb)
document.save(update_fields=("storage_type", "filename"))
Document.objects.filter(id=document.id).update(
storage_type=document.storage_type, filename=document.filename)
for path in old_paths:
os.unlink(path)

View File

@@ -5,59 +5,83 @@ import logging
import os
import shutil
import uuid
from time import sleep
import tqdm
from django import db
from django.conf import settings
from django.core.management.base import BaseCommand
from django.db import transaction
from filelock import FileLock
from whoosh.writing import AsyncWriter
from documents.models import Document
from ... import index
from ...file_handling import create_source_path_directory
from ...mixins import Renderable
from ...file_handling import create_source_path_directory, \
generate_unique_filename
from ...parsers import get_parser_class_for_mime_type
logger = logging.getLogger(__name__)
logger = logging.getLogger("paperless.management.archiver")
def handle_document(document):
def handle_document(document_id):
document = Document.objects.get(id=document_id)
mime_type = document.mime_type
parser_class = get_parser_class_for_mime_type(mime_type)
if not parser_class:
logger.error(f"No parser found for mime type {mime_type}, cannot "
f"archive document {document} (ID: {document_id})")
return
parser = parser_class(logging_group=uuid.uuid4())
try:
parser.parse(document.source_path, mime_type)
parser.parse(
document.source_path,
mime_type,
document.get_public_filename())
thumbnail = parser.get_optimised_thumbnail(
document.source_path,
mime_type,
document.get_public_filename()
)
if parser.get_archive_path():
with transaction.atomic():
with open(parser.get_archive_path(), 'rb') as f:
checksum = hashlib.md5(f.read()).hexdigest()
# i'm going to save first so that in case the file move
# I'm going to save first so that in case the file move
# fails, the database is rolled back.
# we also don't use save() since that triggers the filehandling
# We also don't use save() since that triggers the filehandling
# logic, and we don't want that yet (file not yet in place)
document.archive_filename = generate_unique_filename(
document, archive_filename=True)
Document.objects.filter(pk=document.pk).update(
archive_checksum=checksum,
content=parser.get_text()
content=parser.get_text(),
archive_filename=document.archive_filename
)
create_source_path_directory(document.archive_path)
shutil.move(parser.get_archive_path(), document.archive_path)
with FileLock(settings.MEDIA_LOCK):
create_source_path_directory(document.archive_path)
shutil.move(parser.get_archive_path(),
document.archive_path)
shutil.move(thumbnail, document.thumbnail_path)
with AsyncWriter(index.open_index()) as writer:
index.update_document(writer, document)
with index.open_index_writer() as writer:
index.update_document(writer, document)
except Exception as e:
logger.error(f"Error while parsing document {document}: {str(e)}")
logger.exception(f"Error while parsing document {document} "
f"(ID: {document_id})")
finally:
parser.cleanup()
class Command(Renderable, BaseCommand):
class Command(BaseCommand):
help = """
Using the current classification model, assigns correspondents, tags
@@ -66,10 +90,6 @@ class Command(Renderable, BaseCommand):
modified) after their initial import.
""".replace(" ", "")
def __init__(self, *args, **kwargs):
self.verbosity = 0
BaseCommand.__init__(self, *args, **kwargs)
def add_arguments(self, parser):
parser.add_argument(
"-f", "--overwrite",
@@ -98,17 +118,29 @@ class Command(Renderable, BaseCommand):
else:
documents = Document.objects.all()
documents_to_process = list(filter(
lambda d: overwrite or not d.archive_checksum,
documents
document_ids = list(map(
lambda doc: doc.id,
filter(
lambda d: overwrite or not d.has_archive_version,
documents
)
))
logging.getLogger().handlers[0].level = logging.ERROR
with multiprocessing.Pool(processes=settings.TASK_WORKERS) as pool:
list(tqdm.tqdm(
pool.imap_unordered(
handle_document,
documents_to_process
),
total=len(documents_to_process)
))
# Note to future self: this prevents django from reusing database
# conncetions between processes, which is bad and does not work
# with postgres.
db.connections.close_all()
try:
logging.getLogger().handlers[0].level = logging.ERROR
with multiprocessing.Pool(processes=settings.TASK_WORKERS) as pool:
list(tqdm.tqdm(
pool.imap_unordered(
handle_document,
document_ids
),
total=len(document_ids)
))
except KeyboardInterrupt:
print("Aborting...")

View File

@@ -1,11 +1,11 @@
import logging
import os
from pathlib import Path
from threading import Thread
from time import sleep
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
from django.utils.text import slugify
from django_q.tasks import async_task
from watchdog.events import FileSystemEventHandler
from watchdog.observers.polling import PollingObserver
@@ -18,21 +18,20 @@ try:
except ImportError:
INotify = flags = None
logger = logging.getLogger(__name__)
logger = logging.getLogger("paperless.management.consumer")
def _tags_from_path(filepath):
"""Walk up the directory tree from filepath to CONSUMPTION_DIr
"""Walk up the directory tree from filepath to CONSUMPTION_DIR
and get or create Tag IDs for every directory.
"""
tag_ids = set()
path_parts = Path(filepath).relative_to(
settings.CONSUMPTION_DIR).parent.parts
for part in path_parts:
tag_ids.add(Tag.objects.get_or_create(
slug=slugify(part),
defaults={"name": part},
)[0].pk)
tag_ids.add(Tag.objects.get_or_create(name__iexact=part, defaults={
"name": part
})[0].pk)
return tag_ids
@@ -47,7 +46,7 @@ def _consume(filepath):
return
if not is_file_ext_supported(os.path.splitext(filepath)[1]):
logger.debug(
logger.warning(
f"Not consuming file {filepath}: Unknown file extension.")
return
@@ -56,10 +55,10 @@ def _consume(filepath):
if settings.CONSUMER_SUBDIRS_AS_TAGS:
tag_ids = _tags_from_path(filepath)
except Exception as e:
logger.error(
"Error creating tags from path: {}".format(e))
logger.exception("Error creating tags from path")
try:
logger.info(f"Adding {filepath} to the task queue.")
async_task("documents.tasks.consume_file",
filepath,
override_tag_ids=tag_ids if tag_ids else None,
@@ -68,14 +67,14 @@ def _consume(filepath):
# Catch all so that the consumer won't crash.
# This is also what the test case is listening for to check for
# errors.
logger.error(
"Error while consuming document: {}".format(e))
logger.exception("Error while consuming document")
def _consume_wait_unmodified(file, num_tries=20, wait_time=1):
def _consume_wait_unmodified(file):
logger.debug(f"Waiting for file {file} to remain unmodified")
mtime = -1
current_try = 0
while current_try < num_tries:
while current_try < settings.CONSUMER_POLLING_RETRY_COUNT:
try:
new_mtime = os.stat(file).st_mtime
except FileNotFoundError:
@@ -86,7 +85,7 @@ def _consume_wait_unmodified(file, num_tries=20, wait_time=1):
_consume(file)
return
mtime = new_mtime
sleep(wait_time)
sleep(settings.CONSUMER_POLLING_DELAY)
current_try += 1
logger.error(f"Timeout while waiting on file {file} to remain unmodified.")
@@ -95,10 +94,14 @@ def _consume_wait_unmodified(file, num_tries=20, wait_time=1):
class Handler(FileSystemEventHandler):
def on_created(self, event):
_consume_wait_unmodified(event.src_path)
Thread(
target=_consume_wait_unmodified, args=(event.src_path,)
).start()
def on_moved(self, event):
_consume_wait_unmodified(event.dest_path)
Thread(
target=_consume_wait_unmodified, args=(event.dest_path,)
).start()
class Command(BaseCommand):
@@ -110,12 +113,7 @@ class Command(BaseCommand):
# This is here primarily for the tests and is irrelevant in production.
stop_flag = False
def __init__(self, *args, **kwargs):
self.logger = logging.getLogger(__name__)
BaseCommand.__init__(self, *args, **kwargs)
self.observer = None
observer = None
def add_arguments(self, parser):
parser.add_argument(
@@ -163,7 +161,7 @@ class Command(BaseCommand):
logger.debug("Consumer exiting.")
def handle_polling(self, directory, recursive):
logging.getLogger(__name__).info(
logger.info(
f"Polling directory for changes: {directory}")
self.observer = PollingObserver(timeout=settings.CONSUMER_POLLING)
self.observer.schedule(Handler(), directory, recursive=recursive)
@@ -178,7 +176,7 @@ class Command(BaseCommand):
self.observer.join()
def handle_inotify(self, directory, recursive):
logging.getLogger(__name__).info(
logger.info(
f"Using inotify to watch directory for changes: {directory}")
inotify = INotify()

View File

@@ -1,10 +1,9 @@
from django.core.management.base import BaseCommand
from ...mixins import Renderable
from ...tasks import train_classifier
class Command(Renderable, BaseCommand):
class Command(BaseCommand):
help = """
Trains the classifier on your data and saves the resulting models to a

View File

@@ -1,19 +1,24 @@
import hashlib
import json
import os
import shutil
import time
import tqdm
from django.conf import settings
from django.core import serializers
from django.core.management.base import BaseCommand, CommandError
from django.db import transaction
from filelock import FileLock
from documents.models import Document, Correspondent, Tag, DocumentType
from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME, \
EXPORTER_ARCHIVE_NAME
from paperless.db import GnuPG
from ...mixins import Renderable
from ...file_handling import generate_filename, delete_empty_directories
class Command(Renderable, BaseCommand):
class Command(BaseCommand):
help = """
Decrypt and rename all files in our collection into a given target
@@ -24,13 +29,47 @@ class Command(Renderable, BaseCommand):
def add_arguments(self, parser):
parser.add_argument("target")
parser.add_argument(
"-c", "--compare-checksums",
default=False,
action="store_true",
help="Compare file checksums when determining whether to export "
"a file or not. If not specified, file size and time "
"modified is used instead."
)
parser.add_argument(
"-f", "--use-filename-format",
default=False,
action="store_true",
help="Use PAPERLESS_FILENAME_FORMAT for storing files in the "
"export directory, if configured."
)
parser.add_argument(
"-d", "--delete",
default=False,
action="store_true",
help="After exporting, delete files in the export directory that "
"do not belong to the current export, such as files from "
"deleted documents."
)
def __init__(self, *args, **kwargs):
BaseCommand.__init__(self, *args, **kwargs)
self.target = None
self.files_in_export_dir = []
self.exported_files = []
self.compare_checksums = False
self.use_filename_format = False
self.delete = False
def handle(self, *args, **options):
self.target = options["target"]
self.compare_checksums = options['compare_checksums']
self.use_filename_format = options['use_filename_format']
self.delete = options['delete']
if not os.path.exists(self.target):
raise CommandError("That path doesn't exist")
@@ -38,72 +77,148 @@ class Command(Renderable, BaseCommand):
if not os.access(self.target, os.W_OK):
raise CommandError("That path doesn't appear to be writable")
self.dump()
with FileLock(settings.MEDIA_LOCK):
self.dump()
def dump(self):
# 1. Take a snapshot of what files exist in the current export folder
for root, dirs, files in os.walk(self.target):
self.files_in_export_dir.extend(
map(lambda f: os.path.abspath(os.path.join(root, f)), files)
)
documents = Document.objects.all()
document_map = {d.pk: d for d in documents}
manifest = json.loads(serializers.serialize("json", documents))
# 2. Create manifest, containing all correspondents, types, tags and
# documents
with transaction.atomic():
manifest = json.loads(
serializers.serialize("json", Correspondent.objects.all()))
for index, document_dict in enumerate(manifest):
manifest += json.loads(serializers.serialize(
"json", Tag.objects.all()))
# Force output to unencrypted as that will be the current state.
# The importer will make the decision to encrypt or not.
manifest[index]["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED # NOQA: E501
manifest += json.loads(serializers.serialize(
"json", DocumentType.objects.all()))
documents = Document.objects.order_by("id")
document_map = {d.pk: d for d in documents}
document_manifest = json.loads(
serializers.serialize("json", documents))
manifest += document_manifest
# 3. Export files from each document
for index, document_dict in tqdm.tqdm(enumerate(document_manifest),
total=len(document_manifest)):
# 3.1. store files unencrypted
document_dict["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED # NOQA: E501
document = document_map[document_dict["pk"]]
unique_filename = f"{document.pk:07}_{document.file_name}"
file_target = os.path.join(self.target, unique_filename)
# 3.2. generate a unique filename
filename_counter = 0
while True:
if self.use_filename_format:
base_name = generate_filename(
document, counter=filename_counter,
append_gpg=False)
else:
base_name = document.get_public_filename(
counter=filename_counter)
thumbnail_name = unique_filename + "-thumbnail.png"
if base_name not in self.exported_files:
self.exported_files.append(base_name)
break
else:
filename_counter += 1
# 3.3. write filenames into manifest
original_name = base_name
original_target = os.path.join(self.target, original_name)
document_dict[EXPORTER_FILE_NAME] = original_name
thumbnail_name = base_name + "-thumbnail.png"
thumbnail_target = os.path.join(self.target, thumbnail_name)
document_dict[EXPORTER_FILE_NAME] = unique_filename
document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name
if os.path.exists(document.archive_path):
archive_name = \
f"{document.pk:07}_archive_{document.archive_file_name}"
if document.has_archive_version:
archive_name = base_name + "-archive.pdf"
archive_target = os.path.join(self.target, archive_name)
document_dict[EXPORTER_ARCHIVE_NAME] = archive_name
else:
archive_target = None
print(f"Exporting: {file_target}")
# 3.4. write files to target folder
t = int(time.mktime(document.created.timetuple()))
if document.storage_type == Document.STORAGE_TYPE_GPG:
with open(file_target, "wb") as f:
os.makedirs(os.path.dirname(original_target), exist_ok=True)
with open(original_target, "wb") as f:
f.write(GnuPG.decrypted(document.source_file))
os.utime(file_target, times=(t, t))
os.utime(original_target, times=(t, t))
os.makedirs(os.path.dirname(thumbnail_target), exist_ok=True)
with open(thumbnail_target, "wb") as f:
f.write(GnuPG.decrypted(document.thumbnail_file))
os.utime(thumbnail_target, times=(t, t))
if archive_target:
os.makedirs(os.path.dirname(archive_target), exist_ok=True)
with open(archive_target, "wb") as f:
f.write(GnuPG.decrypted(document.archive_path))
os.utime(archive_target, times=(t, t))
else:
self.check_and_copy(document.source_path,
document.checksum,
original_target)
shutil.copy(document.source_path, file_target)
shutil.copy(document.thumbnail_path, thumbnail_target)
self.check_and_copy(document.thumbnail_path,
None,
thumbnail_target)
if archive_target:
shutil.copy(document.archive_path, archive_target)
self.check_and_copy(document.archive_path,
document.archive_checksum,
archive_target)
manifest += json.loads(
serializers.serialize("json", Correspondent.objects.all()))
# 4. write manifest to target forlder
manifest_path = os.path.abspath(
os.path.join(self.target, "manifest.json"))
manifest += json.loads(serializers.serialize(
"json", Tag.objects.all()))
manifest += json.loads(serializers.serialize(
"json", DocumentType.objects.all()))
with open(os.path.join(self.target, "manifest.json"), "w") as f:
with open(manifest_path, "w") as f:
json.dump(manifest, f, indent=2)
if self.delete:
# 5. Remove files which we did not explicitly export in this run
if manifest_path in self.files_in_export_dir:
self.files_in_export_dir.remove(manifest_path)
for f in self.files_in_export_dir:
os.remove(f)
delete_empty_directories(os.path.abspath(os.path.dirname(f)),
os.path.abspath(self.target))
def check_and_copy(self, source, source_checksum, target):
if os.path.abspath(target) in self.files_in_export_dir:
self.files_in_export_dir.remove(os.path.abspath(target))
perform_copy = False
if os.path.exists(target):
source_stat = os.stat(source)
target_stat = os.stat(target)
if self.compare_checksums and source_checksum:
with open(target, "rb") as f:
target_checksum = hashlib.md5(f.read()).hexdigest()
perform_copy = target_checksum != source_checksum
elif source_stat.st_mtime != target_stat.st_mtime:
perform_copy = True
elif source_stat.st_size != target_stat.st_size:
perform_copy = True
else:
# Copy if it does not exist
perform_copy = True
if perform_copy:
os.makedirs(os.path.dirname(target), exist_ok=True)
shutil.copy2(source, target)

View File

@@ -1,19 +1,33 @@
import json
import logging
import os
import shutil
from contextlib import contextmanager
import tqdm
from django.conf import settings
from django.core.management import call_command
from django.core.management.base import BaseCommand, CommandError
from django.db.models.signals import post_save, m2m_changed
from filelock import FileLock
from documents.models import Document
from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME, \
EXPORTER_ARCHIVE_NAME
from ...file_handling import generate_filename, create_source_path_directory
from ...mixins import Renderable
from ...file_handling import create_source_path_directory
from ...signals.handlers import update_filename_and_move_files
class Command(Renderable, BaseCommand):
@contextmanager
def disable_signal(sig, receiver, sender):
try:
sig.disconnect(receiver=receiver, sender=sender)
yield
finally:
sig.connect(receiver=receiver, sender=sender)
class Command(BaseCommand):
help = """
Using a manifest.json file, load the data from there, and import the
@@ -30,6 +44,8 @@ class Command(Renderable, BaseCommand):
def handle(self, *args, **options):
logging.getLogger().handlers[0].level = logging.ERROR
self.source = options["source"]
if not os.path.exists(self.source):
@@ -45,11 +61,19 @@ class Command(Renderable, BaseCommand):
self.manifest = json.load(f)
self._check_manifest()
with disable_signal(post_save,
receiver=update_filename_and_move_files,
sender=Document):
with disable_signal(m2m_changed,
receiver=update_filename_and_move_files,
sender=Document.tags.through):
# Fill up the database with whatever is in the manifest
call_command("loaddata", manifest_path)
# Fill up the database with whatever is in the manifest
call_command("loaddata", manifest_path)
self._import_files_from_manifest()
self._import_files_from_manifest()
print("Updating search index...")
call_command('document_index', 'reindex')
@staticmethod
def _check_manifest_exists(path):
@@ -93,10 +117,13 @@ class Command(Renderable, BaseCommand):
os.makedirs(settings.THUMBNAIL_DIR, exist_ok=True)
os.makedirs(settings.ARCHIVE_DIR, exist_ok=True)
for record in self.manifest:
print("Copy files into paperless...")
if not record["model"] == "documents.document":
continue
manifest_documents = list(filter(
lambda r: r["model"] == "documents.document",
self.manifest))
for record in tqdm.tqdm(manifest_documents):
document = Document.objects.get(pk=record["pk"])
@@ -114,17 +141,19 @@ class Command(Renderable, BaseCommand):
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.filename = generate_filename(document)
with FileLock(settings.MEDIA_LOCK):
if os.path.isfile(document.source_path):
raise FileExistsError(document.source_path)
if os.path.isfile(document.source_path):
raise FileExistsError(document.source_path)
create_source_path_directory(document.source_path)
create_source_path_directory(document.source_path)
print(f"Moving {document_path} to {document.source_path}")
shutil.copy(document_path, document.source_path)
shutil.copy(thumbnail_path, document.thumbnail_path)
if archive_path:
shutil.copy(archive_path, document.archive_path)
shutil.copy2(document_path, document.source_path)
shutil.copy2(thumbnail_path, document.thumbnail_path)
if archive_path:
create_source_path_directory(document.archive_path)
# TODO: this assumes that the export is valid and
# archive_filename is present on all documents with
# archived files
shutil.copy2(archive_path, document.archive_path)
document.save()

View File

@@ -1,25 +1,19 @@
from django.core.management import BaseCommand
from django.db import transaction
from documents.mixins import Renderable
from documents.tasks import index_reindex, index_optimize
class Command(Renderable, BaseCommand):
class Command(BaseCommand):
help = "Manages the document index."
def __init__(self, *args, **kwargs):
self.verbosity = 0
BaseCommand.__init__(self, *args, **kwargs)
def add_arguments(self, parser):
parser.add_argument("command", choices=['reindex', 'optimize'])
def handle(self, *args, **options):
self.verbosity = options["verbosity"]
if options['command'] == 'reindex':
index_reindex()
elif options['command'] == 'optimize':
index_optimize()
with transaction.atomic():
if options['command'] == 'reindex':
index_reindex()
elif options['command'] == 'optimize':
index_optimize()

View File

@@ -1,12 +0,0 @@
from django.core.management.base import BaseCommand
from documents.models import Log
class Command(BaseCommand):
help = "A quick & dirty way to see what's in the logs"
def handle(self, *args, **options):
for log in Log.objects.order_by("pk"):
print(log)

View File

@@ -1,23 +1,21 @@
import logging
import tqdm
from django.core.management.base import BaseCommand
from django.db.models.signals import post_save
from documents.models import Document
from ...mixins import Renderable
class Command(Renderable, BaseCommand):
class Command(BaseCommand):
help = """
This will rename all documents to match the latest filename format.
""".replace(" ", "")
def __init__(self, *args, **kwargs):
self.verbosity = 0
BaseCommand.__init__(self, *args, **kwargs)
def handle(self, *args, **options):
self.verbosity = options["verbosity"]
logging.getLogger().handlers[0].level = logging.ERROR
for document in Document.objects.all():
# Saving the document again will generate a new filename and rename
document.save()
for document in tqdm.tqdm(Document.objects.all()):
post_save.send(Document, instance=document)

View File

@@ -2,14 +2,15 @@ import logging
from django.core.management.base import BaseCommand
from documents.classifier import DocumentClassifier, \
IncompatibleClassifierVersionError
from documents.classifier import load_classifier
from documents.models import Document
from ...mixins import Renderable
from ...signals.handlers import set_correspondent, set_document_type, set_tags
class Command(Renderable, BaseCommand):
logger = logging.getLogger("paperless.management.retagger")
class Command(BaseCommand):
help = """
Using the current classification model, assigns correspondents, tags
@@ -18,10 +19,6 @@ class Command(Renderable, BaseCommand):
modified) after their initial import.
""".replace(" ", "")
def __init__(self, *args, **kwargs):
self.verbosity = 0
BaseCommand.__init__(self, *args, **kwargs)
def add_arguments(self, parser):
parser.add_argument(
"-c", "--correspondent",
@@ -62,24 +59,16 @@ class Command(Renderable, BaseCommand):
def handle(self, *args, **options):
self.verbosity = options["verbosity"]
if options["inbox_only"]:
queryset = Document.objects.filter(tags__is_inbox_tag=True)
else:
queryset = Document.objects.all()
documents = queryset.distinct()
classifier = DocumentClassifier()
try:
classifier.reload()
except (FileNotFoundError, IncompatibleClassifierVersionError) as e:
logging.getLogger(__name__).warning(
f"Cannot classify documents: {e}.")
classifier = None
classifier = load_classifier()
for document in documents:
logging.getLogger(__name__).info(
logger.info(
f"Processing document {document.title}")
if options['correspondent']:

View File

@@ -0,0 +1,15 @@
from django.core.management.base import BaseCommand
from documents.sanity_checker import check_sanity
class Command(BaseCommand):
help = """
This command checks your document archive for issues.
""".replace(" ", "")
def handle(self, *args, **options):
messages = check_sanity(progress=True)
messages.log_messages()

View File

@@ -0,0 +1,69 @@
import logging
import multiprocessing
import shutil
import tqdm
from django import db
from django.core.management.base import BaseCommand
from documents.models import Document
from ...parsers import get_parser_class_for_mime_type
def _process_document(doc_in):
document = Document.objects.get(id=doc_in)
parser_class = get_parser_class_for_mime_type(document.mime_type)
if parser_class:
parser = parser_class(logging_group=None)
else:
print(f"{document} No parser for mime type {document.mime_type}")
return
try:
thumb = parser.get_optimised_thumbnail(
document.source_path,
document.mime_type,
document.get_public_filename()
)
shutil.move(thumb, document.thumbnail_path)
finally:
parser.cleanup()
class Command(BaseCommand):
help = """
This will regenerate the thumbnails for all documents.
""".replace(" ", "")
def add_arguments(self, parser):
parser.add_argument(
"-d", "--document",
default=None,
type=int,
required=False,
help="Specify the ID of a document, and this command will only "
"run on this specific document."
)
def handle(self, *args, **options):
logging.getLogger().handlers[0].level = logging.ERROR
if options['document']:
documents = Document.objects.filter(pk=options['document'])
else:
documents = Document.objects.all()
ids = [doc.id for doc in documents]
# Note to future self: this prevents django from reusing database
# conncetions between processes, which is bad and does not work
# with postgres.
db.connections.close_all()
with multiprocessing.Pool() as pool:
list(tqdm.tqdm(
pool.imap_unordered(_process_document, ids), total=len(ids)
))

View File

@@ -1,53 +1,63 @@
import logging
import re
from fuzzywuzzy import fuzz
from documents.models import MatchingModel, Correspondent, DocumentType, Tag
def match_correspondents(document_content, classifier):
logger = logging.getLogger("paperless.matching")
def log_reason(matching_model, document, reason):
class_name = type(matching_model).__name__
logger.debug(
f"{class_name} {matching_model.name} matched on document "
f"{document} because {reason}")
def match_correspondents(document, classifier):
if classifier:
pred_id = classifier.predict_correspondent(document_content)
pred_id = classifier.predict_correspondent(document.content)
else:
pred_id = None
correspondents = Correspondent.objects.all()
return list(filter(
lambda o: matches(o, document_content) or o.pk == pred_id,
lambda o: matches(o, document) or o.pk == pred_id,
correspondents))
def match_document_types(document_content, classifier):
def match_document_types(document, classifier):
if classifier:
pred_id = classifier.predict_document_type(document_content)
pred_id = classifier.predict_document_type(document.content)
else:
pred_id = None
document_types = DocumentType.objects.all()
return list(filter(
lambda o: matches(o, document_content) or o.pk == pred_id,
lambda o: matches(o, document) or o.pk == pred_id,
document_types))
def match_tags(document_content, classifier):
def match_tags(document, classifier):
if classifier:
predicted_tag_ids = classifier.predict_tags(document_content)
predicted_tag_ids = classifier.predict_tags(document.content)
else:
predicted_tag_ids = []
tags = Tag.objects.all()
return list(filter(
lambda o: matches(o, document_content) or o.pk in predicted_tag_ids,
lambda o: matches(o, document) or o.pk in predicted_tag_ids,
tags))
def matches(matching_model, document_content):
def matches(matching_model, document):
search_kwargs = {}
document_content = document_content.lower()
document_content = document.content.lower()
# Check that match is not empty
if matching_model.match.strip() == "":
@@ -62,35 +72,73 @@ def matches(matching_model, document_content):
rf"\b{word}\b", document_content, **search_kwargs)
if not search_result:
return False
log_reason(
matching_model, document,
f"it contains all of these words: {matching_model.match}"
)
return True
elif matching_model.matching_algorithm == MatchingModel.MATCH_ANY:
for word in _split_match(matching_model):
if re.search(rf"\b{word}\b", document_content, **search_kwargs):
log_reason(
matching_model, document,
f"it contains this word: {word}"
)
return True
return False
elif matching_model.matching_algorithm == MatchingModel.MATCH_LITERAL:
return bool(re.search(
result = bool(re.search(
rf"\b{matching_model.match}\b",
document_content,
**search_kwargs
))
if result:
log_reason(
matching_model, document,
f"it contains this string: \"{matching_model.match}\""
)
return result
elif matching_model.matching_algorithm == MatchingModel.MATCH_REGEX:
return bool(re.search(
re.compile(matching_model.match, **search_kwargs),
document_content
))
try:
match = re.search(
re.compile(matching_model.match, **search_kwargs),
document_content
)
except re.error:
logger.error(
f"Error while processing regular expression "
f"{matching_model.match}"
)
return False
if match:
log_reason(
matching_model, document,
f"the string {match.group()} matches the regular expression "
f"{matching_model.match}"
)
return bool(match)
elif matching_model.matching_algorithm == MatchingModel.MATCH_FUZZY:
from fuzzywuzzy import fuzz
match = re.sub(r'[^\w\s]', '', matching_model.match)
text = re.sub(r'[^\w\s]', '', document_content)
if matching_model.is_insensitive:
match = match.lower()
text = text.lower()
return fuzz.partial_ratio(match, text) >= 90
if fuzz.partial_ratio(match, text) >= 90:
# TODO: make this better
log_reason(
matching_model, document,
f"parts of the document content somehow match the string "
f"{matching_model.match}"
)
return True
else:
return False
elif matching_model.matching_algorithm == MatchingModel.MATCH_AUTO:
# this is done elsewhere.

View File

@@ -6,13 +6,18 @@ import magic
from django.conf import settings
from django.db import migrations, models
from paperless.db import GnuPG
STORAGE_TYPE_UNENCRYPTED = "unencrypted"
STORAGE_TYPE_GPG = "gpg"
def source_path(self):
if self.filename:
fname = str(self.filename)
else:
fname = "{:07}.{}".format(self.pk, self.file_type)
if self.storage_type == self.STORAGE_TYPE_GPG:
if self.storage_type == STORAGE_TYPE_GPG:
fname += ".gpg"
return os.path.join(
@@ -26,9 +31,18 @@ def add_mime_types(apps, schema_editor):
documents = Document.objects.all()
for d in documents:
d.mime_type = magic.from_file(source_path(d), mime=True)
f = open(source_path(d), "rb")
if d.storage_type == STORAGE_TYPE_GPG:
data = GnuPG.decrypted(f)
else:
data = f.read(1024)
d.mime_type = magic.from_buffer(data, mime=True)
d.save()
f.close()
def add_file_extensions(apps, schema_editor):
Document = apps.get_model("documents", "Document")

View File

@@ -0,0 +1,25 @@
# Generated by Django 3.1.4 on 2020-12-08 22:09
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('documents', '1005_checksums'),
]
operations = [
migrations.RemoveField(
model_name='correspondent',
name='slug',
),
migrations.RemoveField(
model_name='documenttype',
name='slug',
),
migrations.RemoveField(
model_name='tag',
name='slug',
),
]

View File

@@ -0,0 +1,37 @@
# Generated by Django 3.1.4 on 2020-12-12 14:41
from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
('documents', '1006_auto_20201208_2209'),
]
operations = [
migrations.CreateModel(
name='SavedView',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=128)),
('show_on_dashboard', models.BooleanField()),
('show_in_sidebar', models.BooleanField()),
('sort_field', models.CharField(max_length=128)),
('sort_reverse', models.BooleanField(default=False)),
('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
],
),
migrations.CreateModel(
name='SavedViewFilterRule',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('rule_type', models.PositiveIntegerField(choices=[(0, 'Title contains'), (1, 'Content contains'), (2, 'ASN is'), (3, 'Correspondent is'), (4, 'Document type is'), (5, 'Is in inbox'), (6, 'Has tag'), (7, 'Has any tag'), (8, 'Created before'), (9, 'Created after'), (10, 'Created year is'), (11, 'Created month is'), (12, 'Created day is'), (13, 'Added before'), (14, 'Added after'), (15, 'Modified before'), (16, 'Modified after'), (17, 'Does not have tag')])),
('value', models.CharField(max_length=128)),
('saved_view', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='filter_rules', to='documents.savedview')),
],
),
]

View File

@@ -0,0 +1,34 @@
# Generated by Django 3.1.4 on 2020-12-16 17:36
from django.db import migrations
import django.db.models.functions.text
class Migration(migrations.Migration):
dependencies = [
('documents', '1007_savedview_savedviewfilterrule'),
]
operations = [
migrations.AlterModelOptions(
name='correspondent',
options={'ordering': (django.db.models.functions.text.Lower('name'),)},
),
migrations.AlterModelOptions(
name='document',
options={'ordering': ('-created',)},
),
migrations.AlterModelOptions(
name='documenttype',
options={'ordering': (django.db.models.functions.text.Lower('name'),)},
),
migrations.AlterModelOptions(
name='savedview',
options={'ordering': (django.db.models.functions.text.Lower('name'),)},
),
migrations.AlterModelOptions(
name='tag',
options={'ordering': (django.db.models.functions.text.Lower('name'),)},
),
]

View File

@@ -0,0 +1,29 @@
# Generated by Django 3.1.4 on 2020-12-16 20:05
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('documents', '1008_auto_20201216_1736'),
]
operations = [
migrations.AlterModelOptions(
name='correspondent',
options={'ordering': ('name',)},
),
migrations.AlterModelOptions(
name='documenttype',
options={'ordering': ('name',)},
),
migrations.AlterModelOptions(
name='savedview',
options={'ordering': ('name',)},
),
migrations.AlterModelOptions(
name='tag',
options={'ordering': ('name',)},
),
]

View File

@@ -0,0 +1,18 @@
# Generated by Django 3.1.4 on 2021-01-01 21:59
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('documents', '1009_auto_20201216_2005'),
]
operations = [
migrations.AlterField(
model_name='savedviewfilterrule',
name='value',
field=models.CharField(blank=True, max_length=128, null=True),
),
]

View File

@@ -0,0 +1,250 @@
# Generated by Django 3.1.4 on 2021-01-01 23:40
from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion
import django.utils.timezone
class Migration(migrations.Migration):
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
('documents', '1010_auto_20210101_2159'),
]
operations = [
migrations.AlterModelOptions(
name='correspondent',
options={'ordering': ('name',), 'verbose_name': 'correspondent', 'verbose_name_plural': 'correspondents'},
),
migrations.AlterModelOptions(
name='document',
options={'ordering': ('-created',), 'verbose_name': 'document', 'verbose_name_plural': 'documents'},
),
migrations.AlterModelOptions(
name='documenttype',
options={'verbose_name': 'document type', 'verbose_name_plural': 'document types'},
),
migrations.AlterModelOptions(
name='log',
options={'ordering': ('-created',), 'verbose_name': 'log', 'verbose_name_plural': 'logs'},
),
migrations.AlterModelOptions(
name='savedview',
options={'ordering': ('name',), 'verbose_name': 'saved view', 'verbose_name_plural': 'saved views'},
),
migrations.AlterModelOptions(
name='savedviewfilterrule',
options={'verbose_name': 'filter rule', 'verbose_name_plural': 'filter rules'},
),
migrations.AlterModelOptions(
name='tag',
options={'verbose_name': 'tag', 'verbose_name_plural': 'tags'},
),
migrations.AlterField(
model_name='correspondent',
name='is_insensitive',
field=models.BooleanField(default=True, verbose_name='is insensitive'),
),
migrations.AlterField(
model_name='correspondent',
name='match',
field=models.CharField(blank=True, max_length=256, verbose_name='match'),
),
migrations.AlterField(
model_name='correspondent',
name='matching_algorithm',
field=models.PositiveIntegerField(choices=[(1, 'Any word'), (2, 'All words'), (3, 'Exact match'), (4, 'Regular expression'), (5, 'Fuzzy word'), (6, 'Automatic')], default=1, verbose_name='matching algorithm'),
),
migrations.AlterField(
model_name='correspondent',
name='name',
field=models.CharField(max_length=128, unique=True, verbose_name='name'),
),
migrations.AlterField(
model_name='document',
name='added',
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now, editable=False, verbose_name='added'),
),
migrations.AlterField(
model_name='document',
name='archive_checksum',
field=models.CharField(blank=True, editable=False, help_text='The checksum of the archived document.', max_length=32, null=True, verbose_name='archive checksum'),
),
migrations.AlterField(
model_name='document',
name='archive_serial_number',
field=models.IntegerField(blank=True, db_index=True, help_text='The position of this document in your physical document archive.', null=True, unique=True, verbose_name='archive serial number'),
),
migrations.AlterField(
model_name='document',
name='checksum',
field=models.CharField(editable=False, help_text='The checksum of the original document.', max_length=32, unique=True, verbose_name='checksum'),
),
migrations.AlterField(
model_name='document',
name='content',
field=models.TextField(blank=True, help_text='The raw, text-only data of the document. This field is primarily used for searching.', verbose_name='content'),
),
migrations.AlterField(
model_name='document',
name='correspondent',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.correspondent', verbose_name='correspondent'),
),
migrations.AlterField(
model_name='document',
name='created',
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now, verbose_name='created'),
),
migrations.AlterField(
model_name='document',
name='document_type',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.documenttype', verbose_name='document type'),
),
migrations.AlterField(
model_name='document',
name='filename',
field=models.FilePathField(default=None, editable=False, help_text='Current filename in storage', max_length=1024, null=True, verbose_name='filename'),
),
migrations.AlterField(
model_name='document',
name='mime_type',
field=models.CharField(editable=False, max_length=256, verbose_name='mime type'),
),
migrations.AlterField(
model_name='document',
name='modified',
field=models.DateTimeField(auto_now=True, db_index=True, verbose_name='modified'),
),
migrations.AlterField(
model_name='document',
name='storage_type',
field=models.CharField(choices=[('unencrypted', 'Unencrypted'), ('gpg', 'Encrypted with GNU Privacy Guard')], default='unencrypted', editable=False, max_length=11, verbose_name='storage type'),
),
migrations.AlterField(
model_name='document',
name='tags',
field=models.ManyToManyField(blank=True, related_name='documents', to='documents.Tag', verbose_name='tags'),
),
migrations.AlterField(
model_name='document',
name='title',
field=models.CharField(blank=True, db_index=True, max_length=128, verbose_name='title'),
),
migrations.AlterField(
model_name='documenttype',
name='is_insensitive',
field=models.BooleanField(default=True, verbose_name='is insensitive'),
),
migrations.AlterField(
model_name='documenttype',
name='match',
field=models.CharField(blank=True, max_length=256, verbose_name='match'),
),
migrations.AlterField(
model_name='documenttype',
name='matching_algorithm',
field=models.PositiveIntegerField(choices=[(1, 'Any word'), (2, 'All words'), (3, 'Exact match'), (4, 'Regular expression'), (5, 'Fuzzy word'), (6, 'Automatic')], default=1, verbose_name='matching algorithm'),
),
migrations.AlterField(
model_name='documenttype',
name='name',
field=models.CharField(max_length=128, unique=True, verbose_name='name'),
),
migrations.AlterField(
model_name='log',
name='created',
field=models.DateTimeField(auto_now_add=True, verbose_name='created'),
),
migrations.AlterField(
model_name='log',
name='group',
field=models.UUIDField(blank=True, null=True, verbose_name='group'),
),
migrations.AlterField(
model_name='log',
name='level',
field=models.PositiveIntegerField(choices=[(10, 'debug'), (20, 'information'), (30, 'warning'), (40, 'error'), (50, 'critical')], default=20, verbose_name='level'),
),
migrations.AlterField(
model_name='log',
name='message',
field=models.TextField(verbose_name='message'),
),
migrations.AlterField(
model_name='savedview',
name='name',
field=models.CharField(max_length=128, verbose_name='name'),
),
migrations.AlterField(
model_name='savedview',
name='show_in_sidebar',
field=models.BooleanField(verbose_name='show in sidebar'),
),
migrations.AlterField(
model_name='savedview',
name='show_on_dashboard',
field=models.BooleanField(verbose_name='show on dashboard'),
),
migrations.AlterField(
model_name='savedview',
name='sort_field',
field=models.CharField(max_length=128, verbose_name='sort field'),
),
migrations.AlterField(
model_name='savedview',
name='sort_reverse',
field=models.BooleanField(default=False, verbose_name='sort reverse'),
),
migrations.AlterField(
model_name='savedview',
name='user',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL, verbose_name='user'),
),
migrations.AlterField(
model_name='savedviewfilterrule',
name='rule_type',
field=models.PositiveIntegerField(choices=[(0, 'title contains'), (1, 'content contains'), (2, 'ASN is'), (3, 'correspondent is'), (4, 'document type is'), (5, 'is in inbox'), (6, 'has tag'), (7, 'has any tag'), (8, 'created before'), (9, 'created after'), (10, 'created year is'), (11, 'created month is'), (12, 'created day is'), (13, 'added before'), (14, 'added after'), (15, 'modified before'), (16, 'modified after'), (17, 'does not have tag')], verbose_name='rule type'),
),
migrations.AlterField(
model_name='savedviewfilterrule',
name='saved_view',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='filter_rules', to='documents.savedview', verbose_name='saved view'),
),
migrations.AlterField(
model_name='savedviewfilterrule',
name='value',
field=models.CharField(blank=True, max_length=128, null=True, verbose_name='value'),
),
migrations.AlterField(
model_name='tag',
name='colour',
field=models.PositiveIntegerField(choices=[(1, '#a6cee3'), (2, '#1f78b4'), (3, '#b2df8a'), (4, '#33a02c'), (5, '#fb9a99'), (6, '#e31a1c'), (7, '#fdbf6f'), (8, '#ff7f00'), (9, '#cab2d6'), (10, '#6a3d9a'), (11, '#b15928'), (12, '#000000'), (13, '#cccccc')], default=1, verbose_name='color'),
),
migrations.AlterField(
model_name='tag',
name='is_inbox_tag',
field=models.BooleanField(default=False, help_text='Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.', verbose_name='is inbox tag'),
),
migrations.AlterField(
model_name='tag',
name='is_insensitive',
field=models.BooleanField(default=True, verbose_name='is insensitive'),
),
migrations.AlterField(
model_name='tag',
name='match',
field=models.CharField(blank=True, max_length=256, verbose_name='match'),
),
migrations.AlterField(
model_name='tag',
name='matching_algorithm',
field=models.PositiveIntegerField(choices=[(1, 'Any word'), (2, 'All words'), (3, 'Exact match'), (4, 'Regular expression'), (5, 'Fuzzy word'), (6, 'Automatic')], default=1, verbose_name='matching algorithm'),
),
migrations.AlterField(
model_name='tag',
name='name',
field=models.CharField(max_length=128, unique=True, verbose_name='name'),
),
]

View File

@@ -0,0 +1,330 @@
# Generated by Django 3.1.6 on 2021-02-07 22:26
import datetime
import hashlib
import logging
import os
import shutil
from time import sleep
import pathvalidate
from django.conf import settings
from django.db import migrations, models
from django.template.defaultfilters import slugify
from documents.file_handling import defaultdictNoStr, many_to_dictionary
logger = logging.getLogger("paperless.migrations")
###############################################################################
# This is code copied straight paperless before the change.
###############################################################################
def archive_name_from_filename(filename):
return os.path.splitext(filename)[0] + ".pdf"
def archive_path_old(doc):
if doc.filename:
fname = archive_name_from_filename(doc.filename)
else:
fname = "{:07}.pdf".format(doc.pk)
return os.path.join(
settings.ARCHIVE_DIR,
fname
)
STORAGE_TYPE_GPG = "gpg"
def archive_path_new(doc):
if doc.archive_filename is not None:
return os.path.join(
settings.ARCHIVE_DIR,
str(doc.archive_filename)
)
else:
return None
def source_path(doc):
if doc.filename:
fname = str(doc.filename)
else:
fname = "{:07}{}".format(doc.pk, doc.file_type)
if doc.storage_type == STORAGE_TYPE_GPG:
fname += ".gpg" # pragma: no cover
return os.path.join(
settings.ORIGINALS_DIR,
fname
)
def generate_unique_filename(doc, archive_filename=False):
if archive_filename:
old_filename = doc.archive_filename
root = settings.ARCHIVE_DIR
else:
old_filename = doc.filename
root = settings.ORIGINALS_DIR
counter = 0
while True:
new_filename = generate_filename(
doc, counter, archive_filename=archive_filename)
if new_filename == old_filename:
# still the same as before.
return new_filename
if os.path.exists(os.path.join(root, new_filename)):
counter += 1
else:
return new_filename
def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
path = ""
try:
if settings.PAPERLESS_FILENAME_FORMAT is not None:
tags = defaultdictNoStr(lambda: slugify(None),
many_to_dictionary(doc.tags))
tag_list = pathvalidate.sanitize_filename(
",".join(sorted(
[tag.name for tag in doc.tags.all()]
)),
replacement_text="-"
)
if doc.correspondent:
correspondent = pathvalidate.sanitize_filename(
doc.correspondent.name, replacement_text="-"
)
else:
correspondent = "none"
if doc.document_type:
document_type = pathvalidate.sanitize_filename(
doc.document_type.name, replacement_text="-"
)
else:
document_type = "none"
path = settings.PAPERLESS_FILENAME_FORMAT.format(
title=pathvalidate.sanitize_filename(
doc.title, replacement_text="-"),
correspondent=correspondent,
document_type=document_type,
created=datetime.date.isoformat(doc.created),
created_year=doc.created.year if doc.created else "none",
created_month=f"{doc.created.month:02}" if doc.created else "none", # NOQA: E501
created_day=f"{doc.created.day:02}" if doc.created else "none",
added=datetime.date.isoformat(doc.added),
added_year=doc.added.year if doc.added else "none",
added_month=f"{doc.added.month:02}" if doc.added else "none",
added_day=f"{doc.added.day:02}" if doc.added else "none",
tags=tags,
tag_list=tag_list
).strip()
path = path.strip(os.sep)
except (ValueError, KeyError, IndexError):
logger.warning(
f"Invalid PAPERLESS_FILENAME_FORMAT: "
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")
counter_str = f"_{counter:02}" if counter else ""
filetype_str = ".pdf" if archive_filename else doc.file_type
if len(path) > 0:
filename = f"{path}{counter_str}{filetype_str}"
else:
filename = f"{doc.pk:07}{counter_str}{filetype_str}"
# Append .gpg for encrypted files
if append_gpg and doc.storage_type == STORAGE_TYPE_GPG:
filename += ".gpg"
return filename
###############################################################################
# This code performs bidirection archive file transformation.
###############################################################################
def parse_wrapper(parser, path, mime_type, file_name):
# this is here so that I can mock this out for testing.
parser.parse(path, mime_type, file_name)
def create_archive_version(doc, retry_count=3):
from documents.parsers import get_parser_class_for_mime_type, \
DocumentParser, \
ParseError
logger.info(
f"Regenerating archive document for document ID:{doc.id}"
)
parser_class = get_parser_class_for_mime_type(doc.mime_type)
for try_num in range(retry_count):
parser: DocumentParser = parser_class(None, None)
try:
parse_wrapper(parser, source_path(doc), doc.mime_type,
os.path.basename(doc.filename))
doc.content = parser.get_text()
if parser.get_archive_path() and os.path.isfile(
parser.get_archive_path()):
doc.archive_filename = generate_unique_filename(
doc, archive_filename=True)
with open(parser.get_archive_path(), "rb") as f:
doc.archive_checksum = hashlib.md5(f.read()).hexdigest()
os.makedirs(os.path.dirname(archive_path_new(doc)),
exist_ok=True)
shutil.copy2(parser.get_archive_path(), archive_path_new(doc))
else:
doc.archive_checksum = None
logger.error(
f"Parser did not return an archive document for document "
f"ID:{doc.id}. Removing archive document."
)
doc.save()
return
except ParseError:
if try_num + 1 == retry_count:
logger.exception(
f"Unable to regenerate archive document for ID:{doc.id}. You "
f"need to invoke the document_archiver management command "
f"manually for that document."
)
doc.archive_checksum = None
doc.save()
return
else:
# This is mostly here for the tika parser in docker
# environemnts. The servers for parsing need to come up first,
# and the docker setup doesn't ensure that tika is running
# before attempting migrations.
logger.error("Parse error, will try again in 5 seconds...")
sleep(5)
finally:
parser.cleanup()
def move_old_to_new_locations(apps, schema_editor):
Document = apps.get_model("documents", "Document")
affected_document_ids = set()
old_archive_path_to_id = {}
# check for documents that have incorrect archive versions
for doc in Document.objects.filter(archive_checksum__isnull=False):
old_path = archive_path_old(doc)
if old_path in old_archive_path_to_id:
affected_document_ids.add(doc.id)
affected_document_ids.add(old_archive_path_to_id[old_path])
else:
old_archive_path_to_id[old_path] = doc.id
# check that archive files of all unaffected documents are in place
for doc in Document.objects.filter(archive_checksum__isnull=False):
old_path = archive_path_old(doc)
if doc.id not in affected_document_ids and not os.path.isfile(old_path):
raise ValueError(
f"Archived document ID:{doc.id} does not exist at: "
f"{old_path}")
# check that we can regenerate affected archive versions
for doc_id in affected_document_ids:
from documents.parsers import get_parser_class_for_mime_type
doc = Document.objects.get(id=doc_id)
parser_class = get_parser_class_for_mime_type(doc.mime_type)
if not parser_class:
raise ValueError(
f"Document ID:{doc.id} has an invalid archived document, "
f"but no parsers are available. Cannot migrate.")
for doc in Document.objects.filter(archive_checksum__isnull=False):
if doc.id in affected_document_ids:
old_path = archive_path_old(doc)
# remove affected archive versions
if os.path.isfile(old_path):
logger.debug(
f"Removing {old_path}"
)
os.unlink(old_path)
else:
# Set archive path for unaffected files
doc.archive_filename = archive_name_from_filename(doc.filename)
Document.objects.filter(id=doc.id).update(
archive_filename=doc.archive_filename
)
# regenerate archive documents
for doc_id in affected_document_ids:
doc = Document.objects.get(id=doc_id)
create_archive_version(doc)
def move_new_to_old_locations(apps, schema_editor):
Document = apps.get_model("documents", "Document")
old_archive_paths = set()
for doc in Document.objects.filter(archive_checksum__isnull=False):
new_archive_path = archive_path_new(doc)
old_archive_path = archive_path_old(doc)
if old_archive_path in old_archive_paths:
raise ValueError(
f"Cannot migrate: Archive file name {old_archive_path} of "
f"document {doc.filename} would clash with another archive "
f"filename.")
old_archive_paths.add(old_archive_path)
if new_archive_path != old_archive_path and os.path.isfile(old_archive_path):
raise ValueError(
f"Cannot migrate: Cannot move {new_archive_path} to "
f"{old_archive_path}: file already exists."
)
for doc in Document.objects.filter(archive_checksum__isnull=False):
new_archive_path = archive_path_new(doc)
old_archive_path = archive_path_old(doc)
if new_archive_path != old_archive_path:
logger.debug(f"Moving {new_archive_path} to {old_archive_path}")
shutil.move(new_archive_path, old_archive_path)
class Migration(migrations.Migration):
dependencies = [
('documents', '1011_auto_20210101_2340'),
]
operations = [
migrations.AddField(
model_name='document',
name='archive_filename',
field=models.FilePathField(default=None, editable=False, help_text='Current archive filename in storage', max_length=1024, null=True, unique=True, verbose_name='archive filename'),
),
migrations.AlterField(
model_name='document',
name='filename',
field=models.FilePathField(default=None, editable=False, help_text='Current filename in storage', max_length=1024, null=True, unique=True, verbose_name='filename'),
),
migrations.RunPython(
move_old_to_new_locations,
move_new_to_old_locations
),
]

View File

@@ -1,9 +0,0 @@
class Renderable:
"""
A handy mixin to make it easier/cleaner to print output based on a
verbosity value.
"""
def _render(self, text, verbosity):
if self.verbosity >= verbosity:
print(text)

View File

@@ -1,18 +1,22 @@
# coding=utf-8
import datetime
import logging
import os
import re
from collections import OrderedDict
import pathvalidate
import dateutil.parser
from colorhash import ColorHash
from django.conf import settings
from django.contrib.auth.models import User
from django.db import models
from django.utils import timezone
from django.utils.text import slugify
from django.utils.timezone import is_aware
from django.utils.translation import gettext_lazy as _
from documents.file_handling import archive_name_from_filename
from documents.parsers import get_default_file_extension
@@ -26,37 +30,31 @@ class MatchingModel(models.Model):
MATCH_AUTO = 6
MATCHING_ALGORITHMS = (
(MATCH_ANY, "Any"),
(MATCH_ALL, "All"),
(MATCH_LITERAL, "Literal"),
(MATCH_REGEX, "Regular Expression"),
(MATCH_FUZZY, "Fuzzy Match"),
(MATCH_AUTO, "Automatic Classification"),
(MATCH_ANY, _("Any word")),
(MATCH_ALL, _("All words")),
(MATCH_LITERAL, _("Exact match")),
(MATCH_REGEX, _("Regular expression")),
(MATCH_FUZZY, _("Fuzzy word")),
(MATCH_AUTO, _("Automatic")),
)
name = models.CharField(max_length=128, unique=True)
slug = models.SlugField(blank=True, editable=False)
name = models.CharField(
_("name"),
max_length=128, unique=True)
match = models.CharField(
_("match"),
max_length=256, blank=True)
match = models.CharField(max_length=256, blank=True)
matching_algorithm = models.PositiveIntegerField(
_("matching algorithm"),
choices=MATCHING_ALGORITHMS,
default=MATCH_ANY,
help_text=(
"Which algorithm you want to use when matching text to the OCR'd "
"PDF. Here, \"any\" looks for any occurrence of any word "
"provided in the PDF, while \"all\" requires that every word "
"provided appear in the PDF, albeit not in the order provided. A "
"\"literal\" match means that the text you enter must appear in "
"the PDF exactly as you've entered it, and \"regular expression\" "
"uses a regex to match the PDF. (If you don't know what a regex "
"is, you probably don't want this option.) Finally, a \"fuzzy "
"match\" looks for words or phrases that are mostly—but not "
"exactly—the same, which can be useful for matching against "
"documents containg imperfections that foil accurate OCR."
)
default=MATCH_ANY
)
is_insensitive = models.BooleanField(default=True)
is_insensitive = models.BooleanField(
_("is insensitive"),
default=True)
class Meta:
abstract = True
@@ -65,13 +63,6 @@ class MatchingModel(models.Model):
def __str__(self):
return self.name
def save(self, *args, **kwargs):
self.match = self.match.lower()
self.slug = slugify(self.name)
models.Model.save(self, *args, **kwargs)
class Correspondent(MatchingModel):
@@ -81,18 +72,27 @@ class Correspondent(MatchingModel):
class Meta:
ordering = ("name",)
verbose_name = _("correspondent")
verbose_name_plural = _("correspondents")
class Tag(MatchingModel):
colour = models.CharField(blank=True, max_length=7)
colour = models.CharField(
_("color"),
blank=True, max_length=7)
is_inbox_tag = models.BooleanField(
_("is inbox tag"),
default=False,
help_text="Marks this tag as an inbox tag: All newly consumed "
"documents will be tagged with inbox tags."
help_text=_("Marks this tag as an inbox tag: All newly consumed "
"documents will be tagged with inbox tags.")
)
class Meta:
verbose_name = _("tag")
verbose_name_plural = _("tags")
def save(self, *args, **kwargs):
if self.colour == "":
self.colour = ColorHash(
@@ -105,7 +105,9 @@ class Tag(MatchingModel):
class DocumentType(MatchingModel):
pass
class Meta:
verbose_name = _("document type")
verbose_name_plural = _("document types")
class Document(models.Model):
@@ -113,8 +115,8 @@ class Document(models.Model):
STORAGE_TYPE_UNENCRYPTED = "unencrypted"
STORAGE_TYPE_GPG = "gpg"
STORAGE_TYPES = (
(STORAGE_TYPE_UNENCRYPTED, "Unencrypted"),
(STORAGE_TYPE_GPG, "Encrypted with GNU Privacy Guard")
(STORAGE_TYPE_UNENCRYPTED, _("Unencrypted")),
(STORAGE_TYPE_GPG, _("Encrypted with GNU Privacy Guard"))
)
correspondent = models.ForeignKey(
@@ -122,54 +124,68 @@ class Document(models.Model):
blank=True,
null=True,
related_name="documents",
on_delete=models.SET_NULL
on_delete=models.SET_NULL,
verbose_name=_("correspondent")
)
title = models.CharField(max_length=128, blank=True, db_index=True)
title = models.CharField(
_("title"),
max_length=128, blank=True, db_index=True)
document_type = models.ForeignKey(
DocumentType,
blank=True,
null=True,
related_name="documents",
on_delete=models.SET_NULL
on_delete=models.SET_NULL,
verbose_name=_("document type")
)
content = models.TextField(
_("content"),
blank=True,
help_text="The raw, text-only data of the document. This field is "
"primarily used for searching."
help_text=_("The raw, text-only data of the document. This field is "
"primarily used for searching.")
)
mime_type = models.CharField(
_("mime type"),
max_length=256,
editable=False
)
tags = models.ManyToManyField(
Tag, related_name="documents", blank=True)
Tag, related_name="documents", blank=True,
verbose_name=_("tags")
)
checksum = models.CharField(
_("checksum"),
max_length=32,
editable=False,
unique=True,
help_text="The checksum of the original document."
help_text=_("The checksum of the original document.")
)
archive_checksum = models.CharField(
_("archive checksum"),
max_length=32,
editable=False,
blank=True,
null=True,
help_text="The checksum of the archived document."
help_text=_("The checksum of the archived document.")
)
created = models.DateTimeField(
_("created"),
default=timezone.now, db_index=True)
modified = models.DateTimeField(
_("modified"),
auto_now=True, editable=False, db_index=True)
storage_type = models.CharField(
_("storage type"),
max_length=11,
choices=STORAGE_TYPES,
default=STORAGE_TYPE_UNENCRYPTED,
@@ -177,36 +193,53 @@ class Document(models.Model):
)
added = models.DateTimeField(
_("added"),
default=timezone.now, editable=False, db_index=True)
filename = models.FilePathField(
_("filename"),
max_length=1024,
editable=False,
default=None,
unique=True,
null=True,
help_text="Current filename in storage"
help_text=_("Current filename in storage")
)
archive_filename = models.FilePathField(
_("archive filename"),
max_length=1024,
editable=False,
default=None,
unique=True,
null=True,
help_text=_("Current archive filename in storage")
)
archive_serial_number = models.IntegerField(
_("archive serial number"),
blank=True,
null=True,
unique=True,
db_index=True,
help_text="The position of this document in your physical document "
"archive."
help_text=_("The position of this document in your physical document "
"archive.")
)
class Meta:
ordering = ("correspondent", "title")
ordering = ("-created",)
verbose_name = _("document")
verbose_name_plural = _("documents")
def __str__(self):
created = self.created.strftime("%Y%m%d")
if is_aware(self.created):
created = timezone.localdate(self.created).isoformat()
else:
created = datetime.date.isoformat(self.created)
if self.correspondent and self.title:
return "{}: {} - {}".format(
created, self.correspondent, self.title)
if self.correspondent or self.title:
return "{}: {}".format(created, self.correspondent or self.title)
return str(created)
return f"{created} {self.correspondent} {self.title}"
else:
return f"{created} {self.title}"
@property
def source_path(self):
@@ -215,7 +248,7 @@ class Document(models.Model):
else:
fname = "{:07}{}".format(self.pk, self.file_type)
if self.storage_type == self.STORAGE_TYPE_GPG:
fname += ".gpg"
fname += ".gpg" # pragma: no cover
return os.path.join(
settings.ORIGINALS_DIR,
@@ -227,28 +260,38 @@ class Document(models.Model):
return open(self.source_path, "rb")
@property
def archive_path(self):
if self.filename:
fname = archive_name_from_filename(self.filename)
else:
fname = "{:07}.pdf".format(self.pk)
def has_archive_version(self):
return self.archive_filename is not None
return os.path.join(
settings.ARCHIVE_DIR,
fname
)
@property
def archive_path(self):
if self.has_archive_version:
return os.path.join(
settings.ARCHIVE_DIR,
str(self.archive_filename)
)
else:
return None
@property
def archive_file(self):
return open(self.archive_path, "rb")
@property
def file_name(self):
return slugify(str(self)) + self.file_type
def get_public_filename(self, archive=False, counter=0, suffix=None):
result = str(self)
@property
def archive_file_name(self):
return slugify(str(self)) + ".pdf"
if counter:
result += f"_{counter:02}"
if suffix:
result += suffix
if archive:
result += ".pdf"
else:
result += self.file_type
return pathvalidate.sanitize_filename(result, replacement_text="-")
@property
def file_type(self):
@@ -273,76 +316,116 @@ class Document(models.Model):
class Log(models.Model):
LEVELS = (
(logging.DEBUG, "Debugging"),
(logging.INFO, "Informational"),
(logging.WARNING, "Warning"),
(logging.ERROR, "Error"),
(logging.CRITICAL, "Critical"),
(logging.DEBUG, _("debug")),
(logging.INFO, _("information")),
(logging.WARNING, _("warning")),
(logging.ERROR, _("error")),
(logging.CRITICAL, _("critical")),
)
group = models.UUIDField(blank=True, null=True)
message = models.TextField()
level = models.PositiveIntegerField(choices=LEVELS, default=logging.INFO)
created = models.DateTimeField(auto_now_add=True)
group = models.UUIDField(
_("group"),
blank=True, null=True)
message = models.TextField(_("message"))
level = models.PositiveIntegerField(
_("level"),
choices=LEVELS, default=logging.INFO)
created = models.DateTimeField(_("created"), auto_now_add=True)
class Meta:
ordering = ("-created",)
verbose_name = _("log")
verbose_name_plural = _("logs")
def __str__(self):
return self.message
class SavedView(models.Model):
class Meta:
ordering = ("name",)
verbose_name = _("saved view")
verbose_name_plural = _("saved views")
user = models.ForeignKey(User, on_delete=models.CASCADE,
verbose_name=_("user"))
name = models.CharField(
_("name"),
max_length=128)
show_on_dashboard = models.BooleanField(
_("show on dashboard"),
)
show_in_sidebar = models.BooleanField(
_("show in sidebar"),
)
sort_field = models.CharField(
_("sort field"),
max_length=128)
sort_reverse = models.BooleanField(
_("sort reverse"),
default=False)
class SavedViewFilterRule(models.Model):
RULE_TYPES = [
(0, _("title contains")),
(1, _("content contains")),
(2, _("ASN is")),
(3, _("correspondent is")),
(4, _("document type is")),
(5, _("is in inbox")),
(6, _("has tag")),
(7, _("has any tag")),
(8, _("created before")),
(9, _("created after")),
(10, _("created year is")),
(11, _("created month is")),
(12, _("created day is")),
(13, _("added before")),
(14, _("added after")),
(15, _("modified before")),
(16, _("modified after")),
(17, _("does not have tag")),
]
saved_view = models.ForeignKey(
SavedView,
on_delete=models.CASCADE,
related_name="filter_rules",
verbose_name=_("saved view")
)
rule_type = models.PositiveIntegerField(
_("rule type"),
choices=RULE_TYPES)
value = models.CharField(
_("value"),
max_length=128,
blank=True,
null=True)
class Meta:
verbose_name = _("filter rule")
verbose_name_plural = _("filter rules")
# TODO: why is this in the models file?
class FileInfo:
# This epic regex *almost* worked for our needs, so I'm keeping it here for
# posterity, in the hopes that we might find a way to make it work one day.
ALMOST_REGEX = re.compile(
r"^((?P<date>\d\d\d\d\d\d\d\d\d\d\d\d\d\dZ){separator})?"
r"((?P<correspondent>{non_separated_word}+){separator})??"
r"(?P<title>{non_separated_word}+)"
r"({separator}(?P<tags>[a-z,0-9-]+))?"
r"\.(?P<extension>[a-zA-Z.-]+)$".format(
separator=r"\s+-\s+",
non_separated_word=r"([\w,. ]|([^\s]-))"
)
)
REGEXES = OrderedDict([
("created-correspondent-title-tags", re.compile(
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
r"(?P<correspondent>.*) - "
r"(?P<title>.*) - "
r"(?P<tags>[a-z0-9\-,]*)$",
flags=re.IGNORECASE
)),
("created-title-tags", re.compile(
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
r"(?P<title>.*) - "
r"(?P<tags>[a-z0-9\-,]*)$",
flags=re.IGNORECASE
)),
("created-correspondent-title", re.compile(
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
r"(?P<correspondent>.*) - "
r"(?P<title>.*)$",
flags=re.IGNORECASE
)),
("created-title", re.compile(
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
r"(?P<title>.*)$",
flags=re.IGNORECASE
)),
("correspondent-title-tags", re.compile(
r"(?P<correspondent>.*) - "
r"(?P<title>.*) - "
r"(?P<tags>[a-z0-9\-,]*)$",
flags=re.IGNORECASE
)),
("correspondent-title", re.compile(
r"(?P<correspondent>.*) - "
r"(?P<title>.*)?$",
flags=re.IGNORECASE
)),
("title", re.compile(
r"(?P<title>.*)$",
flags=re.IGNORECASE
@@ -365,28 +448,10 @@ class FileInfo:
except ValueError:
return None
@classmethod
def _get_correspondent(cls, name):
if not name:
return None
return Correspondent.objects.get_or_create(name=name, defaults={
"slug": slugify(name)
})[0]
@classmethod
def _get_title(cls, title):
return title
@classmethod
def _get_tags(cls, tags):
r = []
for t in tags.split(","):
r.append(Tag.objects.get_or_create(
slug=slugify(t),
defaults={"name": t}
)[0])
return tuple(r)
@classmethod
def _mangle_property(cls, properties, name):
if name in properties:
@@ -396,15 +461,6 @@ class FileInfo:
@classmethod
def from_filename(cls, filename):
"""
We use a crude naming convention to make handling the correspondent,
title, and tags easier:
"<date> - <correspondent> - <title> - <tags>"
"<correspondent> - <title> - <tags>"
"<correspondent> - <title>"
"<title>"
"""
# Mutate filename in-place before parsing its components
# by applying at most one of the configured transformations.
for (pattern, repl) in settings.FILENAME_PARSE_TRANSFORMS:
@@ -435,7 +491,5 @@ class FileInfo:
if m:
properties = m.groupdict()
cls._mangle_property(properties, "created")
cls._mangle_property(properties, "correspondent")
cls._mangle_property(properties, "title")
cls._mangle_property(properties, "tags")
return cls(**properties)

View File

@@ -6,7 +6,6 @@ import shutil
import subprocess
import tempfile
import dateparser
import magic
from django.conf import settings
from django.utils import timezone
@@ -36,7 +35,7 @@ DATE_REGEX = re.compile(
)
logger = logging.getLogger(__name__)
logger = logging.getLogger("paperless.parsing")
def is_mime_type_supported(mime_type):
@@ -117,6 +116,7 @@ def run_convert(input_file,
trim=False,
type=None,
depth=None,
auto_orient=False,
extra=None,
logging_group=None):
@@ -134,6 +134,7 @@ def run_convert(input_file,
args += ['-trim'] if trim else []
args += ['-type', str(type)] if type else []
args += ['-depth', str(depth)] if depth else []
args += ['-auto-orient'] if auto_orient else []
args += [input_file, output_file]
logger.debug("Execute: " + " ".join(args), extra={'group': logging_group})
@@ -142,6 +143,53 @@ def run_convert(input_file,
raise ParseError("Convert failed at {}".format(args))
def make_thumbnail_from_pdf(in_path, temp_dir, logging_group=None):
"""
The thumbnail of a PDF is just a 500px wide image of the first page.
"""
out_path = os.path.join(temp_dir, "convert.png")
# Run convert to get a decent thumbnail
try:
run_convert(density=300,
scale="500x5000>",
alpha="remove",
strip=True,
trim=False,
auto_orient=True,
input_file="{}[0]".format(in_path),
output_file=out_path,
logging_group=logging_group)
except ParseError:
# if convert fails, fall back to extracting
# the first PDF page as a PNG using Ghostscript
logger.warning(
"Thumbnail generation with ImageMagick failed, falling back "
"to ghostscript. Check your /etc/ImageMagick-x/policy.xml!",
extra={'group': logging_group}
)
gs_out_path = os.path.join(temp_dir, "gs_out.png")
cmd = [settings.GS_BINARY,
"-q",
"-sDEVICE=pngalpha",
"-o", gs_out_path,
in_path]
if not subprocess.Popen(cmd).wait() == 0:
raise ParseError("Thumbnail (gs) failed at {}".format(cmd))
# then run convert on the output from gs
run_convert(density=300,
scale="500x5000>",
alpha="remove",
strip=True,
trim=False,
auto_orient=True,
input_file=gs_out_path,
output_file=out_path,
logging_group=logging_group)
return out_path
def parse_date(filename, text):
"""
Returns the date of the document.
@@ -151,6 +199,8 @@ def parse_date(filename, text):
"""
Call dateparser.parse with a particular date ordering
"""
import dateparser
return dateparser.parse(
ds,
settings={
@@ -161,9 +211,14 @@ def parse_date(filename, text):
}
)
date = None
def __filter(date):
if date and date.year > 1900 and \
date <= timezone.now() and \
date.date() not in settings.IGNORE_DATES:
return date
return None
next_year = timezone.now().year + 5 # Arbitrary 5 year future limit
date = None
# if filename date parsing is enabled, search there first:
if settings.FILENAME_DATE_ORDER:
@@ -176,7 +231,8 @@ def parse_date(filename, text):
# Skip all matches that do not parse to a proper date
continue
if date is not None and next_year > date.year > 1900:
date = __filter(date)
if date is not None:
return date
# Iterate through all regex matches in text and try to parse the date
@@ -189,10 +245,9 @@ def parse_date(filename, text):
# Skip all matches that do not parse to a proper date
continue
if date is not None and next_year > date.year > 1900:
date = __filter(date)
if date is not None:
break
else:
date = None
return date
@@ -207,30 +262,44 @@ class DocumentParser(LoggingMixin):
`paperless_tesseract.parsers` for inspiration.
"""
def __init__(self, logging_group):
logging_name = "paperless.parsing"
def __init__(self, logging_group, progress_callback=None):
super().__init__()
self.logging_group = logging_group
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
self.tempdir = tempfile.mkdtemp(
prefix="paperless-", dir=settings.SCRATCH_DIR)
self.archive_path = None
self.text = None
self.date = None
self.progress_callback = progress_callback
def parse(self, document_path, mime_type):
def progress(self, current_progress, max_progress):
if self.progress_callback:
self.progress_callback(current_progress, max_progress)
def extract_metadata(self, document_path, mime_type):
return []
def parse(self, document_path, mime_type, file_name=None):
raise NotImplementedError()
def get_archive_path(self):
return self.archive_path
def get_thumbnail(self, document_path, mime_type):
def get_thumbnail(self, document_path, mime_type, file_name=None):
"""
Returns the path to a file we can use as a thumbnail for this document.
"""
raise NotImplementedError()
def get_optimised_thumbnail(self, document_path, mime_type):
thumbnail = self.get_thumbnail(document_path, mime_type)
def get_optimised_thumbnail(self,
document_path,
mime_type,
file_name=None):
thumbnail = self.get_thumbnail(document_path, mime_type, file_name)
if settings.OPTIMIZE_THUMBNAILS:
out_path = os.path.join(self.tempdir, "thumb_optipng.png")
@@ -253,5 +322,5 @@ class DocumentParser(LoggingMixin):
return self.date
def cleanup(self):
self.log("debug", "Deleting directory {}".format(self.tempdir))
self.log("debug", f"Deleting directory {self.tempdir}")
shutil.rmtree(self.tempdir)

View File

@@ -1,117 +1,145 @@
import hashlib
import logging
import os
from django.conf import settings
from tqdm import tqdm
from documents.models import Document
class SanityMessage:
message = None
class SanityCheckMessages:
def __init__(self):
self._messages = []
def error(self, message):
self._messages.append({"level": logging.ERROR, "message": message})
def warning(self, message):
self._messages.append({"level": logging.WARNING, "message": message})
def info(self, message):
self._messages.append({"level": logging.INFO, "message": message})
def log_messages(self):
logger = logging.getLogger("paperless.sanity_checker")
if len(self._messages) == 0:
logger.info("Sanity checker detected no issues.")
else:
for msg in self._messages:
logger.log(msg['level'], msg['message'])
def __len__(self):
return len(self._messages)
def __getitem__(self, item):
return self._messages[item]
def has_error(self):
return any([msg['level'] == logging.ERROR for msg in self._messages])
def has_warning(self):
return any([msg['level'] == logging.WARNING for msg in self._messages])
class SanityWarning(SanityMessage):
def __init__(self, message):
self.message = message
def __str__(self):
return f"Warning: {self.message}"
class SanityCheckFailedException(Exception):
pass
class SanityError(SanityMessage):
def __init__(self, message):
self.message = message
def __str__(self):
return f"ERROR: {self.message}"
class SanityFailedError(Exception):
def __init__(self, messages):
self.messages = messages
def __str__(self):
message_string = "\n".join([str(m) for m in self.messages])
return (
f"The following issuse were found by the sanity checker:\n"
f"{message_string}\n\n===============\n\n")
def check_sanity():
messages = []
def check_sanity(progress=False):
messages = SanityCheckMessages()
present_files = []
for root, subdirs, files in os.walk(settings.MEDIA_ROOT):
for f in files:
present_files.append(os.path.normpath(os.path.join(root, f)))
for doc in Document.objects.all():
lockfile = os.path.normpath(settings.MEDIA_LOCK)
if lockfile in present_files:
present_files.remove(lockfile)
if progress:
docs = tqdm(Document.objects.all())
else:
docs = Document.objects.all()
for doc in docs:
# Check sanity of the thumbnail
if not os.path.isfile(doc.thumbnail_path):
messages.append(SanityError(
f"Thumbnail of document {doc.pk} does not exist."))
messages.error(f"Thumbnail of document {doc.pk} does not exist.")
else:
present_files.remove(os.path.normpath(doc.thumbnail_path))
if os.path.normpath(doc.thumbnail_path) in present_files:
present_files.remove(os.path.normpath(doc.thumbnail_path))
try:
with doc.thumbnail_file as f:
f.read()
except OSError as e:
messages.append(SanityError(
messages.error(
f"Cannot read thumbnail file of document {doc.pk}: {e}"
))
)
# Check sanity of the original file
# TODO: extract method
if not os.path.isfile(doc.source_path):
messages.append(SanityError(
f"Original of document {doc.pk} does not exist."))
messages.error(f"Original of document {doc.pk} does not exist.")
else:
present_files.remove(os.path.normpath(doc.source_path))
if os.path.normpath(doc.source_path) in present_files:
present_files.remove(os.path.normpath(doc.source_path))
try:
with doc.source_file as f:
checksum = hashlib.md5(f.read()).hexdigest()
except OSError as e:
messages.append(SanityError(
f"Cannot read original file of document {doc.pk}: {e}"))
messages.error(
f"Cannot read original file of document {doc.pk}: {e}")
else:
if not checksum == doc.checksum:
messages.append(SanityError(
messages.error(
f"Checksum mismatch of document {doc.pk}. "
f"Stored: {doc.checksum}, actual: {checksum}."
))
)
# Check sanity of the archive file.
if doc.archive_checksum:
if doc.archive_checksum and not doc.archive_filename:
messages.error(
f"Document {doc.pk} has an archive file checksum, but no "
f"archive filename."
)
elif not doc.archive_checksum and doc.archive_filename:
messages.error(
f"Document {doc.pk} has an archive file, but its checksum is "
f"missing."
)
elif doc.has_archive_version:
if not os.path.isfile(doc.archive_path):
messages.append(SanityError(
messages.error(
f"Archived version of document {doc.pk} does not exist."
))
)
else:
present_files.remove(os.path.normpath(doc.archive_path))
if os.path.normpath(doc.archive_path) in present_files:
present_files.remove(os.path.normpath(doc.archive_path))
try:
with doc.archive_file as f:
checksum = hashlib.md5(f.read()).hexdigest()
except OSError as e:
messages.append(SanityError(
messages.error(
f"Cannot read archive file of document {doc.pk}: {e}"
))
)
else:
if not checksum == doc.archive_checksum:
messages.append(SanityError(
f"Checksum mismatch of archive {doc.pk}. "
f"Stored: {doc.checksum}, actual: {checksum}."
))
messages.error(
f"Checksum mismatch of archived document "
f"{doc.pk}. "
f"Stored: {doc.archive_checksum}, "
f"actual: {checksum}."
)
# other document checks
if not doc.content:
messages.append(SanityWarning(
f"Document {doc.pk} has no content."
))
messages.info(f"Document {doc.pk} has no content.")
for extra_file in present_files:
messages.append(SanityWarning(
f"Orphaned file in media dir: {extra_file}"
))
messages.warning(f"Orphaned file in media dir: {extra_file}")
return messages

View File

@@ -1,12 +1,62 @@
import re
import magic
from django.utils.text import slugify
from rest_framework import serializers
from rest_framework.fields import SerializerMethodField
from .models import Correspondent, Tag, Document, Log, DocumentType
from . import bulk_edit
from .models import Correspondent, Tag, Document, DocumentType, \
SavedView, SavedViewFilterRule, MatchingModel
from .parsers import is_mime_type_supported
from django.utils.translation import gettext as _
class CorrespondentSerializer(serializers.HyperlinkedModelSerializer):
# https://www.django-rest-framework.org/api-guide/serializers/#example
class DynamicFieldsModelSerializer(serializers.ModelSerializer):
"""
A ModelSerializer that takes an additional `fields` argument that
controls which fields should be displayed.
"""
def __init__(self, *args, **kwargs):
# Don't pass the 'fields' arg up to the superclass
fields = kwargs.pop('fields', None)
# Instantiate the superclass normally
super(DynamicFieldsModelSerializer, self).__init__(*args, **kwargs)
if fields is not None:
# Drop any fields that are not specified in the `fields` argument.
allowed = set(fields)
existing = set(self.fields)
for field_name in existing - allowed:
self.fields.pop(field_name)
class MatchingModelSerializer(serializers.ModelSerializer):
document_count = serializers.IntegerField(read_only=True)
def get_slug(self, obj):
return slugify(obj.name)
slug = SerializerMethodField()
def validate_match(self, match):
if 'matching_algorithm' in self.initial_data and self.initial_data['matching_algorithm'] == MatchingModel.MATCH_REGEX: # NOQA: E501
try:
re.compile(match)
except Exception as e:
raise serializers.ValidationError(
_("Invalid regular expresssion: %(error)s") %
{'error': str(e)}
)
return match
class CorrespondentSerializer(MatchingModelSerializer):
last_correspondence = serializers.DateTimeField(read_only=True)
class Meta:
@@ -23,9 +73,7 @@ class CorrespondentSerializer(serializers.HyperlinkedModelSerializer):
)
class DocumentTypeSerializer(serializers.HyperlinkedModelSerializer):
document_count = serializers.IntegerField(read_only=True)
class DocumentTypeSerializer(MatchingModelSerializer):
class Meta:
model = DocumentType
@@ -40,9 +88,7 @@ class DocumentTypeSerializer(serializers.HyperlinkedModelSerializer):
)
class TagSerializer(serializers.HyperlinkedModelSerializer):
document_count = serializers.IntegerField(read_only=True)
class TagSerializer(MatchingModelSerializer):
class Meta:
model = Tag
@@ -74,13 +120,23 @@ class DocumentTypeField(serializers.PrimaryKeyRelatedField):
return DocumentType.objects.all()
class DocumentSerializer(serializers.ModelSerializer):
class DocumentSerializer(DynamicFieldsModelSerializer):
correspondent_id = CorrespondentField(
allow_null=True, source='correspondent')
tags_id = TagsField(many=True, source='tags')
document_type_id = DocumentTypeField(
allow_null=True, source='document_type')
correspondent = CorrespondentField(allow_null=True)
tags = TagsField(many=True)
document_type = DocumentTypeField(allow_null=True)
original_file_name = SerializerMethodField()
archived_file_name = SerializerMethodField()
def get_original_file_name(self, obj):
return obj.get_public_filename()
def get_archived_file_name(self, obj):
if obj.has_archive_version:
return obj.get_public_filename(archive=True)
else:
return None
class Meta:
model = Document
@@ -88,28 +144,280 @@ class DocumentSerializer(serializers.ModelSerializer):
fields = (
"id",
"correspondent",
"correspondent_id",
"document_type",
"document_type_id",
"title",
"content",
"tags",
"tags_id",
"created",
"modified",
"added",
"archive_serial_number"
"archive_serial_number",
"original_file_name",
"archived_file_name",
)
class LogSerializer(serializers.ModelSerializer):
class SavedViewFilterRuleSerializer(serializers.ModelSerializer):
class Meta:
model = Log
fields = (
"id",
"created",
"message",
"group",
"level"
)
model = SavedViewFilterRule
fields = ["rule_type", "value"]
class SavedViewSerializer(serializers.ModelSerializer):
filter_rules = SavedViewFilterRuleSerializer(many=True)
class Meta:
model = SavedView
depth = 1
fields = ["id", "name", "show_on_dashboard", "show_in_sidebar",
"sort_field", "sort_reverse", "filter_rules"]
def update(self, instance, validated_data):
if 'filter_rules' in validated_data:
rules_data = validated_data.pop('filter_rules')
else:
rules_data = None
super(SavedViewSerializer, self).update(instance, validated_data)
if rules_data is not None:
SavedViewFilterRule.objects.filter(saved_view=instance).delete()
for rule_data in rules_data:
SavedViewFilterRule.objects.create(
saved_view=instance, **rule_data)
return instance
def create(self, validated_data):
rules_data = validated_data.pop('filter_rules')
saved_view = SavedView.objects.create(**validated_data)
for rule_data in rules_data:
SavedViewFilterRule.objects.create(
saved_view=saved_view, **rule_data)
return saved_view
class DocumentListSerializer(serializers.Serializer):
documents = serializers.ListField(
required=True,
label="Documents",
write_only=True,
child=serializers.IntegerField()
)
def _validate_document_id_list(self, documents, name="documents"):
if not type(documents) == list:
raise serializers.ValidationError(f"{name} must be a list")
if not all([type(i) == int for i in documents]):
raise serializers.ValidationError(
f"{name} must be a list of integers")
count = Document.objects.filter(id__in=documents).count()
if not count == len(documents):
raise serializers.ValidationError(
f"Some documents in {name} don't exist or were "
f"specified twice.")
def validate_documents(self, documents):
self._validate_document_id_list(documents)
return documents
class BulkEditSerializer(DocumentListSerializer):
method = serializers.ChoiceField(
choices=[
"set_correspondent",
"set_document_type",
"add_tag",
"remove_tag",
"modify_tags",
"delete"
],
label="Method",
write_only=True,
)
parameters = serializers.DictField(allow_empty=True)
def _validate_tag_id_list(self, tags, name="tags"):
if not type(tags) == list:
raise serializers.ValidationError(f"{name} must be a list")
if not all([type(i) == int for i in tags]):
raise serializers.ValidationError(
f"{name} must be a list of integers")
count = Tag.objects.filter(id__in=tags).count()
if not count == len(tags):
raise serializers.ValidationError(
f"Some tags in {name} don't exist or were specified twice.")
def validate_method(self, method):
if method == "set_correspondent":
return bulk_edit.set_correspondent
elif method == "set_document_type":
return bulk_edit.set_document_type
elif method == "add_tag":
return bulk_edit.add_tag
elif method == "remove_tag":
return bulk_edit.remove_tag
elif method == "modify_tags":
return bulk_edit.modify_tags
elif method == "delete":
return bulk_edit.delete
else:
raise serializers.ValidationError("Unsupported method.")
def _validate_parameters_tags(self, parameters):
if 'tag' in parameters:
tag_id = parameters['tag']
try:
Tag.objects.get(id=tag_id)
except Tag.DoesNotExist:
raise serializers.ValidationError("Tag does not exist")
else:
raise serializers.ValidationError("tag not specified")
def _validate_parameters_document_type(self, parameters):
if 'document_type' in parameters:
document_type_id = parameters['document_type']
if document_type_id is None:
# None is ok
return
try:
DocumentType.objects.get(id=document_type_id)
except DocumentType.DoesNotExist:
raise serializers.ValidationError(
"Document type does not exist")
else:
raise serializers.ValidationError("document_type not specified")
def _validate_parameters_correspondent(self, parameters):
if 'correspondent' in parameters:
correspondent_id = parameters['correspondent']
if correspondent_id is None:
return
try:
Correspondent.objects.get(id=correspondent_id)
except Correspondent.DoesNotExist:
raise serializers.ValidationError(
"Correspondent does not exist")
else:
raise serializers.ValidationError("correspondent not specified")
def _validate_parameters_modify_tags(self, parameters):
if "add_tags" in parameters:
self._validate_tag_id_list(parameters['add_tags'], "add_tags")
else:
raise serializers.ValidationError("add_tags not specified")
if "remove_tags" in parameters:
self._validate_tag_id_list(parameters['remove_tags'],
"remove_tags")
else:
raise serializers.ValidationError("remove_tags not specified")
def validate(self, attrs):
method = attrs['method']
parameters = attrs['parameters']
if method == bulk_edit.set_correspondent:
self._validate_parameters_correspondent(parameters)
elif method == bulk_edit.set_document_type:
self._validate_parameters_document_type(parameters)
elif method == bulk_edit.add_tag or method == bulk_edit.remove_tag:
self._validate_parameters_tags(parameters)
elif method == bulk_edit.modify_tags:
self._validate_parameters_modify_tags(parameters)
return attrs
class PostDocumentSerializer(serializers.Serializer):
document = serializers.FileField(
label="Document",
write_only=True,
)
title = serializers.CharField(
label="Title",
write_only=True,
required=False,
)
correspondent = serializers.PrimaryKeyRelatedField(
queryset=Correspondent.objects.all(),
label="Correspondent",
allow_null=True,
write_only=True,
required=False,
)
document_type = serializers.PrimaryKeyRelatedField(
queryset=DocumentType.objects.all(),
label="Document type",
allow_null=True,
write_only=True,
required=False,
)
tags = serializers.PrimaryKeyRelatedField(
many=True,
queryset=Tag.objects.all(),
label="Tags",
write_only=True,
required=False,
)
def validate_document(self, document):
document_data = document.file.read()
mime_type = magic.from_buffer(document_data, mime=True)
if not is_mime_type_supported(mime_type):
raise serializers.ValidationError(
_("File type %(type)s not supported") %
{'type': mime_type}
)
return document.name, document_data
def validate_correspondent(self, correspondent):
if correspondent:
return correspondent.id
else:
return None
def validate_document_type(self, document_type):
if document_type:
return document_type.id
else:
return None
def validate_tags(self, tags):
if tags:
return [tag.id for tag in tags]
else:
return None
class BulkDownloadSerializer(DocumentListSerializer):
content = serializers.ChoiceField(
choices=["archive", "originals", "both"],
default="archive"
)
compression = serializers.ChoiceField(
choices=["none", "deflated", "bzip2", "lzma"],
default="none"
)
def validate_compression(self, compression):
import zipfile
return {
"none": zipfile.ZIP_STORED,
"deflated": zipfile.ZIP_DEFLATED,
"bzip2": zipfile.ZIP_BZIP2,
"lzma": zipfile.ZIP_LZMA
}[compression]

View File

@@ -1,24 +1,24 @@
import logging
import os
from subprocess import Popen
from django.conf import settings
from django.contrib.admin.models import ADDITION, LogEntry
from django.contrib.auth.models import User
from django.contrib.contenttypes.models import ContentType
from django.db import models, DatabaseError
from django.db.models import Q
from django.dispatch import receiver
from django.utils import timezone
from rest_framework.reverse import reverse
from filelock import FileLock
from .. import index, matching
from ..file_handling import delete_empty_directories, generate_filename, \
create_source_path_directory, archive_name_from_filename
from .. import matching
from ..file_handling import delete_empty_directories, \
create_source_path_directory, \
generate_unique_filename
from ..models import Document, Tag
def logger(message, group):
logging.getLogger(__name__).debug(message, extra={"group": group})
logger = logging.getLogger("paperless.handlers")
def add_inbox_tags(sender, document=None, logging_group=None, **kwargs):
@@ -36,7 +36,7 @@ def set_correspondent(sender,
if document.correspondent and not replace:
return
potential_correspondents = matching.match_correspondents(document.content,
potential_correspondents = matching.match_correspondents(document,
classifier)
potential_count = len(potential_correspondents)
@@ -46,23 +46,23 @@ def set_correspondent(sender,
selected = None
if potential_count > 1:
if use_first:
logger(
logger.info(
f"Detected {potential_count} potential correspondents, "
f"so we've opted for {selected}",
logging_group
extra={'group': logging_group}
)
else:
logger(
logger.info(
f"Detected {potential_count} potential correspondents, "
f"not assigning any correspondent",
logging_group
extra={'group': logging_group}
)
return
if selected or replace:
logger(
logger.info(
f"Assigning correspondent {selected} to {document}",
logging_group
extra={'group': logging_group}
)
document.correspondent = selected
@@ -79,7 +79,7 @@ def set_document_type(sender,
if document.document_type and not replace:
return
potential_document_type = matching.match_document_types(document.content,
potential_document_type = matching.match_document_types(document,
classifier)
potential_count = len(potential_document_type)
@@ -90,23 +90,23 @@ def set_document_type(sender,
if potential_count > 1:
if use_first:
logger(
logger.info(
f"Detected {potential_count} potential document types, "
f"so we've opted for {selected}",
logging_group
extra={'group': logging_group}
)
else:
logger(
logger.info(
f"Detected {potential_count} potential document types, "
f"not assigning any document type",
logging_group
extra={'group': logging_group}
)
return
if selected or replace:
logger(
logger.info(
f"Assigning document type {selected} to {document}",
logging_group
extra={'group': logging_group}
)
document.document_type = selected
@@ -119,13 +119,16 @@ def set_tags(sender,
classifier=None,
replace=False,
**kwargs):
if replace:
document.tags.clear()
current_tags = set([])
else:
current_tags = set(document.tags.all())
matched_tags = matching.match_tags(document.content, classifier)
if replace:
Document.tags.through.objects.filter(document=document).exclude(
Q(tag__is_inbox_tag=True)).exclude(
Q(tag__match="") & ~Q(tag__matching_algorithm=Tag.MATCH_AUTO)
).delete()
current_tags = set(document.tags.all())
matched_tags = matching.match_tags(document, classifier)
relevant_tags = set(matched_tags) - current_tags
@@ -133,82 +136,60 @@ def set_tags(sender,
return
message = 'Tagging "{}" with "{}"'
logger(
message.format(document, ", ".join([t.slug for t in relevant_tags])),
logging_group
logger.info(
message.format(document, ", ".join([t.name for t in relevant_tags])),
extra={'group': logging_group}
)
document.tags.add(*relevant_tags)
def run_pre_consume_script(sender, filename, **kwargs):
if not settings.PRE_CONSUME_SCRIPT:
return
Popen((settings.PRE_CONSUME_SCRIPT, filename)).wait()
def run_post_consume_script(sender, document, **kwargs):
if not settings.POST_CONSUME_SCRIPT:
return
Popen((
settings.POST_CONSUME_SCRIPT,
str(document.pk),
document.file_name,
os.path.normpath(document.source_path),
os.path.normpath(document.thumbnail_path),
reverse("document-download", kwargs={"pk": document.pk}),
reverse("document-thumb", kwargs={"pk": document.pk}),
str(document.correspondent),
str(",".join(document.tags.all().values_list("slug", flat=True)))
)).wait()
@receiver(models.signals.post_delete, sender=Document)
def cleanup_document_deletion(sender, instance, using, **kwargs):
for f in (instance.source_path,
instance.archive_path,
instance.thumbnail_path):
if os.path.isfile(f):
try:
os.unlink(f)
logging.getLogger(__name__).debug(
f"Deleted file {f}.")
except OSError as e:
logging.getLogger(__name__).warning(
f"While deleting document {instance.file_name}, the file "
f"{f} could not be deleted: {e}"
)
with FileLock(settings.MEDIA_LOCK):
for filename in (instance.source_path,
instance.archive_path,
instance.thumbnail_path):
if filename and os.path.isfile(filename):
try:
os.unlink(filename)
logger.debug(
f"Deleted file {filename}.")
except OSError as e:
logger.warning(
f"While deleting document {str(instance)}, the file "
f"{filename} could not be deleted: {e}"
)
delete_empty_directories(
os.path.dirname(instance.source_path),
root=settings.ORIGINALS_DIR
)
delete_empty_directories(
os.path.dirname(instance.source_path),
root=settings.ORIGINALS_DIR
)
delete_empty_directories(
os.path.dirname(instance.archive_path),
root=settings.ARCHIVE_DIR
)
if instance.has_archive_version:
delete_empty_directories(
os.path.dirname(instance.archive_path),
root=settings.ARCHIVE_DIR
)
class CannotMoveFilesException(Exception):
pass
def validate_move(instance, old_path, new_path):
if not os.path.isfile(old_path):
# Can't do anything if the old file does not exist anymore.
logging.getLogger(__name__).fatal(
logger.fatal(
f"Document {str(instance)}: File {old_path} has gone.")
return False
raise CannotMoveFilesException()
if os.path.isfile(new_path):
# Can't do anything if the new file already exists. Skip updating file.
logging.getLogger(__name__).warning(
logger.warning(
f"Document {str(instance)}: Cannot rename file "
f"since target path {new_path} already exists.")
return False
return True
raise CannotMoveFilesException()
@receiver(models.signals.m2m_changed, sender=Document.tags.through)
@@ -226,81 +207,86 @@ def update_filename_and_move_files(sender, instance, **kwargs):
# This will in turn cause this logic to move the file where it belongs.
return
old_filename = instance.filename
new_filename = generate_filename(instance)
if new_filename == instance.filename:
# Don't do anything if its the same.
return
old_source_path = instance.source_path
new_source_path = os.path.join(settings.ORIGINALS_DIR, new_filename)
if not validate_move(instance, old_source_path, new_source_path):
return
# archive files are optional, archive checksum tells us if we have one,
# since this is None for documents without archived files.
if instance.archive_checksum:
new_archive_filename = archive_name_from_filename(new_filename)
old_archive_path = instance.archive_path
new_archive_path = os.path.join(settings.ARCHIVE_DIR,
new_archive_filename)
if not validate_move(instance, old_archive_path, new_archive_path):
return
create_source_path_directory(new_archive_path)
else:
old_archive_path = None
new_archive_path = None
create_source_path_directory(new_source_path)
try:
os.rename(old_source_path, new_source_path)
if instance.archive_checksum:
os.rename(old_archive_path, new_archive_path)
instance.filename = new_filename
# Don't save here to prevent infinite recursion.
Document.objects.filter(pk=instance.pk).update(filename=new_filename)
logging.getLogger(__name__).debug(
f"Moved file {old_source_path} to {new_source_path}.")
if instance.archive_checksum:
logging.getLogger(__name__).debug(
f"Moved file {old_archive_path} to {new_archive_path}.")
except OSError as e:
instance.filename = old_filename
# this happens when we can't move a file. If that's the case for the
# archive file, we try our best to revert the changes.
with FileLock(settings.MEDIA_LOCK):
try:
os.rename(new_source_path, old_source_path)
os.rename(new_archive_path, old_archive_path)
except Exception as e:
# This is fine, since:
# A: if we managed to move source from A to B, we will also manage
# to move it from B to A. If not, we have a serious issue
# that's going to get caught by the santiy checker.
# all files remain in place and will never be overwritten,
# so this is not the end of the world.
# B: if moving the orignal file failed, nothing has changed anyway.
pass
except DatabaseError as e:
os.rename(new_source_path, old_source_path)
if instance.archive_checksum:
os.rename(new_archive_path, old_archive_path)
instance.filename = old_filename
old_filename = instance.filename
old_source_path = instance.source_path
if not os.path.isfile(old_source_path):
delete_empty_directories(os.path.dirname(old_source_path),
root=settings.ORIGINALS_DIR)
instance.filename = generate_unique_filename(instance)
move_original = old_filename != instance.filename
if old_archive_path and not os.path.isfile(old_archive_path):
delete_empty_directories(os.path.dirname(old_archive_path),
root=settings.ARCHIVE_DIR)
old_archive_filename = instance.archive_filename
old_archive_path = instance.archive_path
if instance.has_archive_version:
instance.archive_filename = generate_unique_filename(
instance, archive_filename=True
)
move_archive = old_archive_filename != instance.archive_filename # NOQA: E501
else:
move_archive = False
if not move_original and not move_archive:
# Don't do anything if filenames did not change.
return
if move_original:
validate_move(instance, old_source_path, instance.source_path)
create_source_path_directory(instance.source_path)
os.rename(old_source_path, instance.source_path)
if move_archive:
validate_move(
instance, old_archive_path, instance.archive_path)
create_source_path_directory(instance.archive_path)
os.rename(old_archive_path, instance.archive_path)
# Don't save() here to prevent infinite recursion.
Document.objects.filter(pk=instance.pk).update(
filename=instance.filename,
archive_filename=instance.archive_filename,
)
except (OSError, DatabaseError, CannotMoveFilesException):
# This happens when either:
# - moving the files failed due to file system errors
# - saving to the database failed due to database errors
# In both cases, we need to revert to the original state.
# Try to move files to their original location.
try:
if move_original and os.path.isfile(instance.source_path):
os.rename(instance.source_path, old_source_path)
if move_archive and os.path.isfile(instance.archive_path):
os.rename(instance.archive_path, old_archive_path)
except Exception as e:
# This is fine, since:
# A: if we managed to move source from A to B, we will also
# manage to move it from B to A. If not, we have a serious
# issue that's going to get caught by the santiy checker.
# All files remain in place and will never be overwritten,
# so this is not the end of the world.
# B: if moving the orignal file failed, nothing has changed
# anyway.
pass
# restore old values on the instance
instance.filename = old_filename
instance.archive_filename = old_archive_filename
# finally, remove any empty sub folders. This will do nothing if
# something has failed above.
if not os.path.isfile(old_source_path):
delete_empty_directories(os.path.dirname(old_source_path),
root=settings.ORIGINALS_DIR)
if instance.has_archive_version and not os.path.isfile(old_archive_path): # NOQA: E501
delete_empty_directories(os.path.dirname(old_archive_path),
root=settings.ARCHIVE_DIR)
def set_log_entry(sender, document=None, logging_group=None, **kwargs):
@@ -319,4 +305,6 @@ def set_log_entry(sender, document=None, logging_group=None, **kwargs):
def add_to_index(sender, document, **kwargs):
from documents import index
index.add_or_update_document(document)

View File

@@ -1,14 +1,17 @@
import logging
import tqdm
from django.conf import settings
from django.db.models.signals import post_save
from whoosh.writing import AsyncWriter
from documents import index, sanity_checker
from documents.classifier import DocumentClassifier, \
IncompatibleClassifierVersionError
from documents.classifier import DocumentClassifier, load_classifier
from documents.consumer import Consumer, ConsumerError
from documents.models import Document
from documents.sanity_checker import SanityFailedError
from documents.models import Document, Tag, DocumentType, Correspondent
from documents.sanity_checker import SanityCheckFailedException
logger = logging.getLogger("paperless.tasks")
def index_optimize():
@@ -23,34 +26,39 @@ def index_reindex():
ix = index.open_index(recreate=True)
with AsyncWriter(ix) as writer:
for document in documents:
for document in tqdm.tqdm(documents):
index.update_document(writer, document)
def train_classifier():
classifier = DocumentClassifier()
if (not Tag.objects.filter(
matching_algorithm=Tag.MATCH_AUTO).exists() and
not DocumentType.objects.filter(
matching_algorithm=Tag.MATCH_AUTO).exists() and
not Correspondent.objects.filter(
matching_algorithm=Tag.MATCH_AUTO).exists()):
try:
# load the classifier, since we might not have to train it again.
classifier.reload()
except (FileNotFoundError, IncompatibleClassifierVersionError):
# This is what we're going to fix here.
pass
return
classifier = load_classifier()
if not classifier:
classifier = DocumentClassifier()
try:
if classifier.train():
logging.getLogger(__name__).info(
logger.info(
"Saving updated classifier model to {}...".format(
settings.MODEL_FILE)
)
classifier.save_classifier()
classifier.save()
else:
logging.getLogger(__name__).debug(
logger.debug(
"Training data unchanged."
)
except Exception as e:
logging.getLogger(__name__).error(
logger.warning(
"Classifier error: " + str(e)
)
@@ -60,7 +68,8 @@ def consume_file(path,
override_title=None,
override_correspondent_id=None,
override_document_type_id=None,
override_tag_ids=None):
override_tag_ids=None,
task_id=None):
document = Consumer().try_consume_file(
path,
@@ -68,7 +77,9 @@ def consume_file(path,
override_title=override_title,
override_correspondent_id=override_correspondent_id,
override_document_type_id=override_document_type_id,
override_tag_ids=override_tag_ids)
override_tag_ids=override_tag_ids,
task_id=task_id
)
if document:
return "Success. New document id {} created".format(
@@ -82,7 +93,27 @@ def consume_file(path,
def sanity_check():
messages = sanity_checker.check_sanity()
if len(messages) > 0:
raise SanityFailedError(messages)
messages.log_messages()
if messages.has_error():
raise SanityCheckFailedException(
"Sanity check failed with errors. See log.")
elif messages.has_warning():
return "Sanity check exited with warnings. See log."
elif len(messages) > 0:
return "Sanity check exited with infos. See log."
else:
return "No issues detected."
def bulk_update_documents(document_ids):
documents = Document.objects.filter(id__in=document_ids)
ix = index.open_index()
for doc in documents:
post_save.send(Document, instance=doc, created=False)
with AsyncWriter(ix) as writer:
for doc in documents:
index.update_document(writer, doc)

View File

@@ -1,19 +1,26 @@
<!doctype html>
{% load static %}
{% load i18n %}
<html lang="en">
<head>
<meta charset="utf-8">
<title>PaperlessUi</title>
<title>Paperless-ng</title>
<base href="/">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="username" content="{{username}}">
<meta name="full_name" content="{{full_name}}">
<meta name="cookie_prefix" content="{{cookie_prefix}}">
<link rel="icon" type="image/x-icon" href="favicon.ico">
<link rel="stylesheet" href="{% static 'frontend/styles.css' %}"></head>
<link rel="manifest" href="{% static webmanifest %}">
<link rel="stylesheet" href="{% static styles_css %}">
<link rel="apple-touch-icon" href="apple-touch-icon.png">
</head>
<body>
<app-root>Loading...</app-root>
<script src="{% static 'frontend/runtime.js' %}" defer></script>
<script src="{% static 'frontend/polyfills.js' %}" defer></script>
<script src="{% static 'frontend/main.js' %}" defer></script>
<app-root>{% translate "Paperless-ng is loading..." %}</app-root>
<script src="{% static runtime_js %}" defer></script>
<script src="{% static polyfills_js %}" defer></script>
<script src="{% static main_js %}" defer></script>
</body>
</html>

View File

@@ -1,6 +1,7 @@
<!doctype html>
{% load static %}
{% load i18n %}
<html lang="en">
<head>
@@ -9,7 +10,7 @@
<meta name="description" content="">
<meta name="author" content="Mark Otto, Jacob Thornton, and Bootstrap contributors">
<meta name="generator" content="Jekyll v4.1.1">
<title>Paperless Sign In</title>
<title>{% translate "Paperless-ng signed out" %}</title>
<!-- Bootstrap core CSS -->
<link href="{% static 'bootstrap.min.css' %}" rel="stylesheet">
@@ -36,9 +37,9 @@
<body class="text-center">
<div class="form-signin">
<img class="mb-4" src="{% static 'frontend/assets/logo.svg' %}" alt="" width="300">
<p>You have been successfully logged out. Bye!</p>
<a href="/">Sign in again</a>
<img class="mb-4" src="{% static 'frontend/en-US/assets/logo.svg' %}" alt="" width="300">
<p>{% translate "You have been successfully logged out. Bye!" %}</p>
<a href="/">{% translate "Sign in again" %}</a>
</div>
</body>
</html>

View File

@@ -1,6 +1,7 @@
<!doctype html>
{% load static %}
{% load i18n %}
<html lang="en">
<head>
@@ -9,7 +10,7 @@
<meta name="description" content="">
<meta name="author" content="Mark Otto, Jacob Thornton, and Bootstrap contributors">
<meta name="generator" content="Jekyll v4.1.1">
<title>Paperless Sign In</title>
<title>{% translate "Paperless-ng sign in" %}</title>
<!-- Bootstrap core CSS -->
<link href="{% static 'bootstrap.min.css' %}" rel="stylesheet">
@@ -37,18 +38,20 @@
<body class="text-center">
<form class="form-signin" method="post">
{% csrf_token %}
<img class="mb-4" src="{% static 'frontend/assets/logo.svg' %}" alt="" width="300">
<p>Please sign in.</p>
<img class="mb-4" src="{% static 'frontend/en-US/assets/logo.svg' %}" alt="" width="300">
<p>{% translate "Please sign in." %}</p>
{% if form.errors %}
<div class="alert alert-danger" role="alert">
Your username and password didn't match. Please try again.
{% translate "Your username and password didn't match. Please try again." %}
</div>
{% endif %}
<label for="inputUsername" class="sr-only">Username</label>
<input type="text" name="username" id="inputUsername" class="form-control" placeholder="Username" required autofocus>
<label for="inputPassword" class="sr-only">Password</label>
<input type="password" name="password" id="inputPassword" class="form-control" placeholder="Password" required>
<button class="btn btn-lg btn-primary btn-block" type="submit">Sign in</button>
{% translate "Username" as i18n_username %}
{% translate "Password" as i18n_password %}
<label for="inputUsername" class="sr-only">{{ i18n_username }}</label>
<input type="text" name="username" id="inputUsername" class="form-control" placeholder="{{ i18n_username }}" required autofocus>
<label for="inputPassword" class="sr-only">{{ i18n_password }}</label>
<input type="password" name="password" id="inputPassword" class="form-control" placeholder="{{ i18n_password }}" required>
<button class="btn btn-lg btn-primary btn-block" type="submit">{% translate "Sign in" %}</button>
</form>
</body>
</html>

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.7 KiB

View File

@@ -0,0 +1 @@
This is a test file.

Binary file not shown.

View File

@@ -0,0 +1,63 @@
from unittest import mock
from django.contrib.admin.sites import AdminSite
from django.test import TestCase
from django.utils import timezone
from documents import index
from documents.admin import DocumentAdmin
from documents.models import Document
from documents.tests.utils import DirectoriesMixin
class TestDocumentAdmin(DirectoriesMixin, TestCase):
def get_document_from_index(self, doc):
ix = index.open_index()
with ix.searcher() as searcher:
return searcher.document(id=doc.id)
def setUp(self) -> None:
super(TestDocumentAdmin, self).setUp()
self.doc_admin = DocumentAdmin(model=Document, admin_site=AdminSite())
def test_save_model(self):
doc = Document.objects.create(title="test")
doc.title = "new title"
self.doc_admin.save_model(None, doc, None, None)
self.assertEqual(Document.objects.get(id=doc.id).title, "new title")
self.assertEqual(self.get_document_from_index(doc)['title'], "new title")
def test_delete_model(self):
doc = Document.objects.create(title="test")
index.add_or_update_document(doc)
self.assertIsNotNone(self.get_document_from_index(doc))
self.doc_admin.delete_model(None, doc)
self.assertRaises(Document.DoesNotExist, Document.objects.get, id=doc.id)
self.assertIsNone(self.get_document_from_index(doc))
def test_delete_queryset(self):
docs = []
for i in range(42):
doc = Document.objects.create(title="Many documents with the same title", checksum=f"{i:02}")
docs.append(doc)
index.add_or_update_document(doc)
self.assertEqual(Document.objects.count(), 42)
for doc in docs:
self.assertIsNotNone(self.get_document_from_index(doc))
self.doc_admin.delete_queryset(None, Document.objects.all())
self.assertEqual(Document.objects.count(), 0)
for doc in docs:
self.assertIsNone(self.get_document_from_index(doc))
def test_created(self):
doc = Document.objects.create(title="test", created=timezone.datetime(2020, 4, 12))
self.assertEqual(self.doc_admin.created_(doc), "2020-04-12")

File diff suppressed because it is too large Load Diff

View File

@@ -1,9 +1,12 @@
import unittest
from unittest import mock
from django.core.checks import Error
from django.test import TestCase
from .factories import DocumentFactory
from ..checks import changed_password_check
from .. import document_consumer_declaration
from ..checks import changed_password_check, parser_check
from ..models import Document
@@ -15,3 +18,13 @@ class ChecksTestCase(TestCase):
def test_changed_password_check_no_encryption(self):
DocumentFactory.create(storage_type=Document.STORAGE_TYPE_UNENCRYPTED)
self.assertEqual(changed_password_check(None), [])
def test_parser_check(self):
self.assertEqual(parser_check(None), [])
with mock.patch('documents.checks.document_consumer_declaration.send') as m:
m.return_value = []
self.assertEqual(parser_check(None), [Error("No parsers found. This is a bug. The consumer won't be "
"able to consume any documents without parsers.")])

View File

@@ -1,10 +1,13 @@
import os
import tempfile
from time import sleep
from pathlib import Path
from unittest import mock
import pytest
from django.conf import settings
from django.test import TestCase, override_settings
from documents.classifier import DocumentClassifier, IncompatibleClassifierVersionError
from documents.classifier import DocumentClassifier, IncompatibleClassifierVersionError, load_classifier
from documents.models import Correspondent, Document, Tag, DocumentType
from documents.tests.utils import DirectoriesMixin
@@ -82,37 +85,19 @@ class TestClassifier(DirectoriesMixin, TestCase):
self.assertTrue(self.classifier.train())
self.assertFalse(self.classifier.train())
self.classifier.save_classifier()
self.classifier.save()
classifier2 = DocumentClassifier()
current_ver = DocumentClassifier.FORMAT_VERSION
with mock.patch("documents.classifier.DocumentClassifier.FORMAT_VERSION", current_ver+1):
# assure that we won't load old classifiers.
self.assertRaises(IncompatibleClassifierVersionError, classifier2.reload)
self.assertRaises(IncompatibleClassifierVersionError, classifier2.load)
self.classifier.save_classifier()
self.classifier.save()
# assure that we can load the classifier after saving it.
classifier2.reload()
def testReload(self):
self.generate_test_data()
self.assertTrue(self.classifier.train())
self.classifier.save_classifier()
classifier2 = DocumentClassifier()
classifier2.reload()
v1 = classifier2.classifier_version
# change the classifier after some time.
sleep(1)
self.classifier.save_classifier()
classifier2.reload()
v2 = classifier2.classifier_version
self.assertNotEqual(v1, v2)
classifier2.load()
@override_settings(DATA_DIR=tempfile.mkdtemp())
def testSaveClassifier(self):
@@ -121,12 +106,21 @@ class TestClassifier(DirectoriesMixin, TestCase):
self.classifier.train()
self.classifier.save_classifier()
self.classifier.save()
new_classifier = DocumentClassifier()
new_classifier.reload()
new_classifier.load()
self.assertFalse(new_classifier.train())
@override_settings(MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"))
def test_load_and_classify(self):
self.generate_test_data()
new_classifier = DocumentClassifier()
new_classifier.load()
self.assertCountEqual(new_classifier.predict_tags(self.doc2.content), [45, 12])
def test_one_correspondent_predict(self):
c1 = Correspondent.objects.create(name="c1", matching_algorithm=Correspondent.MATCH_AUTO)
doc1 = Document.objects.create(title="doc1", content="this is a document from c1", correspondent=c1, checksum="A")
@@ -235,3 +229,42 @@ class TestClassifier(DirectoriesMixin, TestCase):
self.classifier.train()
self.assertListEqual(self.classifier.predict_tags(doc1.content), [t1.pk])
self.assertListEqual(self.classifier.predict_tags(doc2.content), [])
def test_load_classifier_not_exists(self):
self.assertFalse(os.path.exists(settings.MODEL_FILE))
self.assertIsNone(load_classifier())
@mock.patch("documents.classifier.DocumentClassifier.load")
def test_load_classifier(self, load):
Path(settings.MODEL_FILE).touch()
self.assertIsNotNone(load_classifier())
load.assert_called_once()
@override_settings(CACHES={'default': {'BACKEND': 'django.core.cache.backends.locmem.LocMemCache'}})
@override_settings(MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"))
@pytest.mark.skip(reason="Disabled caching due to high memory usage - need to investigate.")
def test_load_classifier_cached(self):
classifier = load_classifier()
self.assertIsNotNone(classifier)
with mock.patch("documents.classifier.DocumentClassifier.load") as load:
classifier2 = load_classifier()
load.assert_not_called()
@mock.patch("documents.classifier.DocumentClassifier.load")
def test_load_classifier_incompatible_version(self, load):
Path(settings.MODEL_FILE).touch()
self.assertTrue(os.path.exists(settings.MODEL_FILE))
load.side_effect = IncompatibleClassifierVersionError()
self.assertIsNone(load_classifier())
self.assertFalse(os.path.exists(settings.MODEL_FILE))
@mock.patch("documents.classifier.DocumentClassifier.load")
def test_load_classifier_os_error(self, load):
Path(settings.MODEL_FILE).touch()
self.assertTrue(os.path.exists(settings.MODEL_FILE))
load.side_effect = OSError()
self.assertIsNone(load_classifier())
self.assertTrue(os.path.exists(settings.MODEL_FILE))

View File

@@ -5,12 +5,14 @@ import tempfile
from unittest import mock
from unittest.mock import MagicMock
from django.conf import settings
from django.test import TestCase, override_settings
from .utils import DirectoriesMixin
from ..consumer import Consumer, ConsumerError
from ..models import FileInfo, Tag, Correspondent, DocumentType, Document
from ..parsers import DocumentParser, ParseError
from ..tasks import sanity_check
class TestAttributes(TestCase):
@@ -27,83 +29,8 @@ class TestAttributes(TestCase):
self.assertEqual(file_info.title, title, filename)
self.assertEqual(tuple([t.slug for t in file_info.tags]), tags, filename)
self.assertEqual(tuple([t.name for t in file_info.tags]), tags, filename)
def test_guess_attributes_from_name0(self):
self._test_guess_attributes_from_name(
"Sender - Title.pdf", "Sender", "Title", ())
def test_guess_attributes_from_name1(self):
self._test_guess_attributes_from_name(
"Spaced Sender - Title.pdf", "Spaced Sender", "Title", ())
def test_guess_attributes_from_name2(self):
self._test_guess_attributes_from_name(
"Sender - Spaced Title.pdf", "Sender", "Spaced Title", ())
def test_guess_attributes_from_name3(self):
self._test_guess_attributes_from_name(
"Dashed-Sender - Title.pdf", "Dashed-Sender", "Title", ())
def test_guess_attributes_from_name4(self):
self._test_guess_attributes_from_name(
"Sender - Dashed-Title.pdf", "Sender", "Dashed-Title", ())
def test_guess_attributes_from_name5(self):
self._test_guess_attributes_from_name(
"Sender - Title - tag1,tag2,tag3.pdf",
"Sender",
"Title",
self.TAGS
)
def test_guess_attributes_from_name6(self):
self._test_guess_attributes_from_name(
"Spaced Sender - Title - tag1,tag2,tag3.pdf",
"Spaced Sender",
"Title",
self.TAGS
)
def test_guess_attributes_from_name7(self):
self._test_guess_attributes_from_name(
"Sender - Spaced Title - tag1,tag2,tag3.pdf",
"Sender",
"Spaced Title",
self.TAGS
)
def test_guess_attributes_from_name8(self):
self._test_guess_attributes_from_name(
"Dashed-Sender - Title - tag1,tag2,tag3.pdf",
"Dashed-Sender",
"Title",
self.TAGS
)
def test_guess_attributes_from_name9(self):
self._test_guess_attributes_from_name(
"Sender - Dashed-Title - tag1,tag2,tag3.pdf",
"Sender",
"Dashed-Title",
self.TAGS
)
def test_guess_attributes_from_name10(self):
self._test_guess_attributes_from_name(
"Σενδερ - Τιτλε - tag1,tag2,tag3.pdf",
"Σενδερ",
"Τιτλε",
self.TAGS
)
def test_guess_attributes_from_name_when_correspondent_empty(self):
self._test_guess_attributes_from_name(
' - weird empty correspondent but should not break.pdf',
None,
'weird empty correspondent but should not break',
()
)
def test_guess_attributes_from_name_when_title_starts_with_dash(self):
self._test_guess_attributes_from_name(
@@ -121,28 +48,6 @@ class TestAttributes(TestCase):
()
)
def test_guess_attributes_from_name_when_title_is_empty(self):
self._test_guess_attributes_from_name(
'weird correspondent but should not break - .pdf',
'weird correspondent but should not break',
'',
()
)
def test_case_insensitive_tag_creation(self):
"""
Tags should be detected and created as lower case.
:return:
"""
filename = "Title - Correspondent - tAg1,TAG2.pdf"
self.assertEqual(len(FileInfo.from_filename(filename).tags), 2)
path = "Title - Correspondent - tag1,tag2.pdf"
self.assertEqual(len(FileInfo.from_filename(filename).tags), 2)
self.assertEqual(Tag.objects.all().count(), 2)
class TestFieldPermutations(TestCase):
@@ -188,7 +93,7 @@ class TestFieldPermutations(TestCase):
self.assertEqual(info.tags, (), filename)
else:
self.assertEqual(
[t.slug for t in info.tags], tags.split(','),
[t.name for t in info.tags], tags.split(','),
filename
)
@@ -199,69 +104,7 @@ class TestFieldPermutations(TestCase):
filename = template.format(**spec)
self._test_guessed_attributes(filename, **spec)
def test_title_and_correspondent(self):
template = '{correspondent} - {title}.pdf'
for correspondent in self.valid_correspondents:
for title in self.valid_titles:
spec = dict(correspondent=correspondent, title=title)
filename = template.format(**spec)
self._test_guessed_attributes(filename, **spec)
def test_title_and_correspondent_and_tags(self):
template = '{correspondent} - {title} - {tags}.pdf'
for correspondent in self.valid_correspondents:
for title in self.valid_titles:
for tags in self.valid_tags:
spec = dict(correspondent=correspondent, title=title,
tags=tags)
filename = template.format(**spec)
self._test_guessed_attributes(filename, **spec)
def test_created_and_correspondent_and_title_and_tags(self):
template = (
"{created} - "
"{correspondent} - "
"{title} - "
"{tags}.pdf"
)
for created in self.valid_dates:
for correspondent in self.valid_correspondents:
for title in self.valid_titles:
for tags in self.valid_tags:
spec = {
"created": created,
"correspondent": correspondent,
"title": title,
"tags": tags,
}
self._test_guessed_attributes(
template.format(**spec), **spec)
def test_created_and_correspondent_and_title(self):
template = "{created} - {correspondent} - {title}.pdf"
for created in self.valid_dates:
for correspondent in self.valid_correspondents:
for title in self.valid_titles:
# Skip cases where title looks like a tag as we can't
# accommodate such cases.
if title.lower() == title:
continue
spec = {
"created": created,
"correspondent": correspondent,
"title": title
}
self._test_guessed_attributes(
template.format(**spec), **spec)
def test_created_and_title(self):
template = "{created} - {title}.pdf"
for created in self.valid_dates:
@@ -273,21 +116,6 @@ class TestFieldPermutations(TestCase):
self._test_guessed_attributes(
template.format(**spec), **spec)
def test_created_and_title_and_tags(self):
template = "{created} - {title} - {tags}.pdf"
for created in self.valid_dates:
for title in self.valid_titles:
for tags in self.valid_tags:
spec = {
"created": created,
"title": title,
"tags": tags
}
self._test_guessed_attributes(
template.format(**spec), **spec)
def test_invalid_date_format(self):
info = FileInfo.from_filename("06112017Z - title.pdf")
self.assertEqual(info.title, "title")
@@ -336,54 +164,46 @@ class TestFieldPermutations(TestCase):
info = FileInfo.from_filename(filename)
self.assertEqual(info.title, "anotherall")
# Complex transformation without date in replacement string
with self.settings(
FILENAME_PARSE_TRANSFORMS=[(exact_patt, repl1)]):
info = FileInfo.from_filename(filename)
self.assertEqual(info.title, "0001")
self.assertEqual(len(info.tags), 2)
self.assertEqual(info.tags[0].slug, "tag1")
self.assertEqual(info.tags[1].slug, "tag2")
self.assertIsNone(info.created)
# Complex transformation with date in replacement string
with self.settings(
FILENAME_PARSE_TRANSFORMS=[
(none_patt, "none.gif"),
(exact_patt, repl2), # <-- matches
(exact_patt, repl1),
(all_patt, "all.gif")]):
info = FileInfo.from_filename(filename)
self.assertEqual(info.title, "0001")
self.assertEqual(len(info.tags), 2)
self.assertEqual(info.tags[0].slug, "tag1")
self.assertEqual(info.tags[1].slug, "tag2")
self.assertEqual(info.created.year, 2019)
self.assertEqual(info.created.month, 9)
self.assertEqual(info.created.day, 8)
class DummyParser(DocumentParser):
def get_thumbnail(self, document_path, mime_type):
def get_thumbnail(self, document_path, mime_type, file_name=None):
# not important during tests
raise NotImplementedError()
def __init__(self, logging_group, scratch_dir, archive_path):
super(DummyParser, self).__init__(logging_group)
super(DummyParser, self).__init__(logging_group, None)
_, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
self.archive_path = archive_path
def get_optimised_thumbnail(self, document_path, mime_type):
def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
return self.fake_thumb
def parse(self, document_path, mime_type):
def parse(self, document_path, mime_type, file_name=None):
self.text = "The Text"
class CopyParser(DocumentParser):
def get_thumbnail(self, document_path, mime_type, file_name=None):
return self.fake_thumb
def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
return self.fake_thumb
def __init__(self, logging_group, progress_callback=None):
super(CopyParser, self).__init__(logging_group, progress_callback)
_, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=self.tempdir)
def parse(self, document_path, mime_type, file_name=None):
self.text = "The text"
self.archive_path = os.path.join(self.tempdir, "archive.pdf")
shutil.copy(document_path, self.archive_path)
class FaultyParser(DocumentParser):
def get_thumbnail(self, document_path, mime_type):
def get_thumbnail(self, document_path, mime_type, file_name=None):
# not important during tests
raise NotImplementedError()
@@ -391,10 +211,10 @@ class FaultyParser(DocumentParser):
super(FaultyParser, self).__init__(logging_group)
_, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
def get_optimised_thumbnail(self, document_path, mime_type):
def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
return self.fake_thumb
def parse(self, document_path, mime_type):
def parse(self, document_path, mime_type, file_name=None):
raise ParseError("Does not compute.")
@@ -403,6 +223,8 @@ def fake_magic_from_file(file, mime=False):
if mime:
if os.path.splitext(file)[1] == ".pdf":
return "application/pdf"
elif os.path.splitext(file)[1] == ".png":
return "image/png"
else:
return "unknown"
else:
@@ -412,10 +234,24 @@ def fake_magic_from_file(file, mime=False):
@mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
class TestConsumer(DirectoriesMixin, TestCase):
def make_dummy_parser(self, logging_group):
def _assert_first_last_send_progress(self, first_status="STARTING", last_status="SUCCESS", first_progress=0, first_progress_max=100, last_progress=100, last_progress_max=100):
self._send_progress.assert_called()
args, kwargs = self._send_progress.call_args_list[0]
self.assertEqual(args[0], first_progress)
self.assertEqual(args[1], first_progress_max)
self.assertEqual(args[2], first_status)
args, kwargs = self._send_progress.call_args_list[len(self._send_progress.call_args_list) - 1]
self.assertEqual(args[0], last_progress)
self.assertEqual(args[1], last_progress_max)
self.assertEqual(args[2], last_status)
def make_dummy_parser(self, logging_group, progress_callback=None):
return DummyParser(logging_group, self.dirs.scratch_dir, self.get_test_archive_file())
def make_faulty_parser(self, logging_group):
def make_faulty_parser(self, logging_group, progress_callback=None):
return FaultyParser(logging_group, self.dirs.scratch_dir)
def setUp(self):
@@ -428,7 +264,11 @@ class TestConsumer(DirectoriesMixin, TestCase):
"mime_types": {"application/pdf": ".pdf"},
"weight": 0
})]
self.addCleanup(patcher.stop)
# this prevents websocket message reports during testing.
patcher = mock.patch("documents.consumer.Consumer._send_progress")
self._send_progress = patcher.start()
self.addCleanup(patcher.stop)
self.consumer = Consumer()
@@ -456,6 +296,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
self.assertIsNone(document.correspondent)
self.assertIsNone(document.document_type)
self.assertEqual(document.filename, "0000001.pdf")
self.assertEqual(document.archive_filename, "0000001.pdf")
self.assertTrue(os.path.isfile(
document.source_path
@@ -474,31 +315,36 @@ class TestConsumer(DirectoriesMixin, TestCase):
self.assertFalse(os.path.isfile(filename))
self._assert_first_last_send_progress()
def testOverrideFilename(self):
filename = self.get_test_file()
override_filename = "My Bank - Statement for November.pdf"
override_filename = "Statement for November.pdf"
document = self.consumer.try_consume_file(filename, override_filename=override_filename)
self.assertEqual(document.correspondent.name, "My Bank")
self.assertEqual(document.title, "Statement for November")
def testOverrideTitle(self):
self._assert_first_last_send_progress()
def testOverrideTitle(self):
document = self.consumer.try_consume_file(self.get_test_file(), override_title="Override Title")
self.assertEqual(document.title, "Override Title")
self._assert_first_last_send_progress()
def testOverrideCorrespondent(self):
c = Correspondent.objects.create(name="test")
document = self.consumer.try_consume_file(self.get_test_file(), override_correspondent_id=c.pk)
self.assertEqual(document.correspondent.id, c.id)
self._assert_first_last_send_progress()
def testOverrideDocumentType(self):
dt = DocumentType.objects.create(name="test")
document = self.consumer.try_consume_file(self.get_test_file(), override_document_type_id=dt.pk)
self.assertEqual(document.document_type.id, dt.id)
self._assert_first_last_send_progress()
def testOverrideTags(self):
t1 = Tag.objects.create(name="t1")
@@ -509,37 +355,42 @@ class TestConsumer(DirectoriesMixin, TestCase):
self.assertIn(t1, document.tags.all())
self.assertNotIn(t2, document.tags.all())
self.assertIn(t3, document.tags.all())
self._assert_first_last_send_progress()
def testNotAFile(self):
try:
self.consumer.try_consume_file("non-existing-file")
except ConsumerError as e:
self.assertTrue(str(e).endswith('It is not a file'))
return
self.fail("Should throw exception")
self.assertRaisesMessage(
ConsumerError,
"File not found",
self.consumer.try_consume_file,
"non-existing-file"
)
self._assert_first_last_send_progress(last_status="FAILED")
def testDuplicates1(self):
self.consumer.try_consume_file(self.get_test_file())
try:
self.consumer.try_consume_file(self.get_test_file())
except ConsumerError as e:
self.assertTrue(str(e).endswith("It is a duplicate."))
return
self.assertRaisesMessage(
ConsumerError,
"It is a duplicate",
self.consumer.try_consume_file,
self.get_test_file()
)
self.fail("Should throw exception")
self._assert_first_last_send_progress(last_status="FAILED")
def testDuplicates2(self):
self.consumer.try_consume_file(self.get_test_file())
try:
self.consumer.try_consume_file(self.get_test_archive_file())
except ConsumerError as e:
self.assertTrue(str(e).endswith("It is a duplicate."))
return
self.assertRaisesMessage(
ConsumerError,
"It is a duplicate",
self.consumer.try_consume_file,
self.get_test_archive_file()
)
self.fail("Should throw exception")
self._assert_first_last_send_progress(last_status="FAILED")
def testDuplicates3(self):
self.consumer.try_consume_file(self.get_test_archive_file())
@@ -549,13 +400,15 @@ class TestConsumer(DirectoriesMixin, TestCase):
def testNoParsers(self, m):
m.return_value = []
try:
self.consumer.try_consume_file(self.get_test_file())
except ConsumerError as e:
self.assertTrue("No parsers abvailable for" in str(e))
return
self.assertRaisesMessage(
ConsumerError,
"sample.pdf: Unsupported mime type application/pdf",
self.consumer.try_consume_file,
self.get_test_file()
)
self._assert_first_last_send_progress(last_status="FAILED")
self.fail("Should throw exception")
@mock.patch("documents.parsers.document_consumer_declaration.send")
def testFaultyParser(self, m):
@@ -565,24 +418,28 @@ class TestConsumer(DirectoriesMixin, TestCase):
"weight": 0
})]
try:
self.consumer.try_consume_file(self.get_test_file())
except ConsumerError as e:
self.assertEqual(str(e), "Does not compute.")
return
self.assertRaisesMessage(
ConsumerError,
"sample.pdf: Error while consuming document sample.pdf: Does not compute.",
self.consumer.try_consume_file,
self.get_test_file()
)
self.fail("Should throw exception.")
self._assert_first_last_send_progress(last_status="FAILED")
@mock.patch("documents.consumer.Consumer._write")
def testPostSaveError(self, m):
filename = self.get_test_file()
m.side_effect = OSError("NO.")
try:
self.consumer.try_consume_file(filename)
except ConsumerError as e:
self.assertEqual(str(e), "NO.")
else:
self.fail("Should raise exception")
self.assertRaisesMessage(
ConsumerError,
"sample.pdf: The following error occured while consuming sample.pdf: NO.",
self.consumer.try_consume_file,
filename
)
self._assert_first_last_send_progress(last_status="FAILED")
# file not deleted
self.assertTrue(os.path.isfile(filename))
@@ -594,14 +451,16 @@ class TestConsumer(DirectoriesMixin, TestCase):
def testFilenameHandling(self):
filename = self.get_test_file()
document = self.consumer.try_consume_file(filename, override_filename="Bank - Test.pdf", override_title="new docs")
document = self.consumer.try_consume_file(filename, override_title="new docs")
self.assertEqual(document.title, "new docs")
self.assertEqual(document.correspondent.name, "Bank")
self.assertEqual(document.filename, "bank/new-docs-0000001.pdf")
self.assertEqual(document.filename, "none/new docs.pdf")
self.assertEqual(document.archive_filename, "none/new docs.pdf")
self._assert_first_last_send_progress()
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@mock.patch("documents.signals.handlers.generate_filename")
@mock.patch("documents.signals.handlers.generate_unique_filename")
def testFilenameHandlingUnstableFormat(self, m):
filenames = ["this", "that", "now this", "i cant decide"]
@@ -611,20 +470,22 @@ class TestConsumer(DirectoriesMixin, TestCase):
filenames.insert(0, f)
return f
m.side_effect = lambda f: get_filename()
m.side_effect = lambda f, archive_filename = False: get_filename()
filename = self.get_test_file()
Tag.objects.create(name="test", is_inbox_tag=True)
document = self.consumer.try_consume_file(filename, override_filename="Bank - Test.pdf", override_title="new docs")
document = self.consumer.try_consume_file(filename, override_title="new docs")
self.assertEqual(document.title, "new docs")
self.assertEqual(document.correspondent.name, "Bank")
self.assertIsNotNone(os.path.isfile(document.title))
self.assertTrue(os.path.isfile(document.source_path))
self.assertTrue(os.path.isfile(document.archive_path))
@mock.patch("documents.consumer.DocumentClassifier")
self._assert_first_last_send_progress()
@mock.patch("documents.consumer.load_classifier")
def testClassifyDocument(self, m):
correspondent = Correspondent.objects.create(name="test")
dtype = DocumentType.objects.create(name="test")
@@ -642,3 +503,161 @@ class TestConsumer(DirectoriesMixin, TestCase):
self.assertEqual(document.document_type, dtype)
self.assertIn(t1, document.tags.all())
self.assertNotIn(t2, document.tags.all())
self._assert_first_last_send_progress()
@override_settings(CONSUMER_DELETE_DUPLICATES=True)
def test_delete_duplicate(self):
dst = self.get_test_file()
self.assertTrue(os.path.isfile(dst))
doc = self.consumer.try_consume_file(dst)
self._assert_first_last_send_progress()
self.assertFalse(os.path.isfile(dst))
self.assertIsNotNone(doc)
self._send_progress.reset_mock()
dst = self.get_test_file()
self.assertTrue(os.path.isfile(dst))
self.assertRaises(ConsumerError, self.consumer.try_consume_file, dst)
self.assertFalse(os.path.isfile(dst))
self._assert_first_last_send_progress(last_status="FAILED")
@override_settings(CONSUMER_DELETE_DUPLICATES=False)
def test_no_delete_duplicate(self):
dst = self.get_test_file()
self.assertTrue(os.path.isfile(dst))
doc = self.consumer.try_consume_file(dst)
self.assertFalse(os.path.isfile(dst))
self.assertIsNotNone(doc)
dst = self.get_test_file()
self.assertTrue(os.path.isfile(dst))
self.assertRaises(ConsumerError, self.consumer.try_consume_file, dst)
self.assertTrue(os.path.isfile(dst))
self._assert_first_last_send_progress(last_status="FAILED")
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
@mock.patch("documents.parsers.document_consumer_declaration.send")
def test_similar_filenames(self, m):
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), os.path.join(settings.CONSUMPTION_DIR, "simple.pdf"))
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.png"), os.path.join(settings.CONSUMPTION_DIR, "simple.png"))
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple-noalpha.png"), os.path.join(settings.CONSUMPTION_DIR, "simple.png.pdf"))
m.return_value = [(None, {
"parser": CopyParser,
"mime_types": {"application/pdf": ".pdf", "image/png": ".png"},
"weight": 0
})]
doc1 = self.consumer.try_consume_file(os.path.join(settings.CONSUMPTION_DIR, "simple.png"))
doc2 = self.consumer.try_consume_file(os.path.join(settings.CONSUMPTION_DIR, "simple.pdf"))
doc3 = self.consumer.try_consume_file(os.path.join(settings.CONSUMPTION_DIR, "simple.png.pdf"))
self.assertEqual(doc1.filename, "simple.png")
self.assertEqual(doc1.archive_filename, "simple.pdf")
self.assertEqual(doc2.filename, "simple.pdf")
self.assertEqual(doc2.archive_filename, "simple_01.pdf")
self.assertEqual(doc3.filename, "simple.png.pdf")
self.assertEqual(doc3.archive_filename, "simple.png.pdf")
sanity_check()
class PreConsumeTestCase(TestCase):
@mock.patch("documents.consumer.Popen")
@override_settings(PRE_CONSUME_SCRIPT=None)
def test_no_pre_consume_script(self, m):
c = Consumer()
c.path = "path-to-file"
c.run_pre_consume_script()
m.assert_not_called()
@mock.patch("documents.consumer.Popen")
@mock.patch("documents.consumer.Consumer._send_progress")
@override_settings(PRE_CONSUME_SCRIPT="does-not-exist")
def test_pre_consume_script_not_found(self, m, m2):
c = Consumer()
c.filename = "somefile.pdf"
c.path = "path-to-file"
self.assertRaises(ConsumerError, c.run_pre_consume_script)
@mock.patch("documents.consumer.Popen")
def test_pre_consume_script(self, m):
with tempfile.NamedTemporaryFile() as script:
with override_settings(PRE_CONSUME_SCRIPT=script.name):
c = Consumer()
c.path = "path-to-file"
c.run_pre_consume_script()
m.assert_called_once()
args, kwargs = m.call_args
command = args[0]
self.assertEqual(command[0], script.name)
self.assertEqual(command[1], "path-to-file")
class PostConsumeTestCase(TestCase):
@mock.patch("documents.consumer.Popen")
@override_settings(POST_CONSUME_SCRIPT=None)
def test_no_post_consume_script(self, m):
doc = Document.objects.create(title="Test", mime_type="application/pdf")
tag1 = Tag.objects.create(name="a")
tag2 = Tag.objects.create(name="b")
doc.tags.add(tag1)
doc.tags.add(tag2)
Consumer().run_post_consume_script(doc)
m.assert_not_called()
@override_settings(POST_CONSUME_SCRIPT="does-not-exist")
@mock.patch("documents.consumer.Consumer._send_progress")
def test_post_consume_script_not_found(self, m):
doc = Document.objects.create(title="Test", mime_type="application/pdf")
c = Consumer()
c.filename = "somefile.pdf"
self.assertRaises(ConsumerError, c.run_post_consume_script, doc)
@mock.patch("documents.consumer.Popen")
def test_post_consume_script_simple(self, m):
with tempfile.NamedTemporaryFile() as script:
with override_settings(POST_CONSUME_SCRIPT=script.name):
doc = Document.objects.create(title="Test", mime_type="application/pdf")
Consumer().run_post_consume_script(doc)
m.assert_called_once()
@mock.patch("documents.consumer.Popen")
def test_post_consume_script_with_correspondent(self, m):
with tempfile.NamedTemporaryFile() as script:
with override_settings(POST_CONSUME_SCRIPT=script.name):
c = Correspondent.objects.create(name="my_bank")
doc = Document.objects.create(title="Test", mime_type="application/pdf", correspondent=c)
tag1 = Tag.objects.create(name="a")
tag2 = Tag.objects.create(name="b")
doc.tags.add(tag1)
doc.tags.add(tag2)
Consumer().run_post_consume_script(doc)
m.assert_called_once()
args, kwargs = m.call_args
command = args[0]
self.assertEqual(command[0], script.name)
self.assertEqual(command[1], str(doc.pk))
self.assertEqual(command[5], f"/api/documents/{doc.pk}/download/")
self.assertEqual(command[6], f"/api/documents/{doc.pk}/thumb/")
self.assertEqual(command[7], "my_bank")
self.assertCountEqual(command[8].split(","), ["a", "b"])

View File

@@ -1,7 +1,6 @@
import datetime
import os
import shutil
from unittest import mock
from uuid import uuid4
from dateutil import tz
@@ -9,7 +8,6 @@ from django.conf import settings
from django.test import TestCase, override_settings
from documents.parsers import parse_date
from paperless_tesseract.parsers import RasterisedDocumentParser
class TestDate(TestCase):
@@ -138,3 +136,18 @@ class TestDate(TestCase):
@override_settings(FILENAME_DATE_ORDER="YMD")
def test_filename_date_parse_invalid(self, *args):
self.assertIsNone(parse_date("/tmp/20 408000l 2475 - test.pdf", "No date in here"))
@override_settings(IGNORE_DATES=(datetime.date(2019, 11, 3), datetime.date(2020, 1, 17)))
def test_ignored_dates(self, *args):
text = (
"lorem ipsum 110319, 20200117 and lorem 13.02.2018 lorem "
"ipsum"
)
date = parse_date("", text)
self.assertEqual(
date,
datetime.datetime(
2018, 2, 13, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)

View File

@@ -1,10 +1,10 @@
import shutil
import tempfile
from datetime import datetime
from pathlib import Path
from unittest import mock
from django.test import TestCase, override_settings
from django.utils import timezone
from ..models import Document, Correspondent
@@ -47,20 +47,20 @@ class TestDocument(TestCase):
def test_file_name(self):
doc = Document(mime_type="application/pdf", title="test", created=datetime(2020, 12, 25))
self.assertEqual(doc.file_name, "20201225-test.pdf")
doc = Document(mime_type="application/pdf", title="test", created=timezone.datetime(2020, 12, 25))
self.assertEqual(doc.get_public_filename(), "2020-12-25 test.pdf")
def test_file_name_jpg(self):
doc = Document(mime_type="image/jpeg", title="test", created=datetime(2020, 12, 25))
self.assertEqual(doc.file_name, "20201225-test.jpg")
doc = Document(mime_type="image/jpeg", title="test", created=timezone.datetime(2020, 12, 25))
self.assertEqual(doc.get_public_filename(), "2020-12-25 test.jpg")
def test_file_name_unknown(self):
doc = Document(mime_type="application/zip", title="test", created=datetime(2020, 12, 25))
self.assertEqual(doc.file_name, "20201225-test.zip")
doc = Document(mime_type="application/zip", title="test", created=timezone.datetime(2020, 12, 25))
self.assertEqual(doc.get_public_filename(), "2020-12-25 test.zip")
def test_file_name_invalid(self):
def test_file_name_invalid_type(self):
doc = Document(mime_type="image/jpegasd", title="test", created=datetime(2020, 12, 25))
self.assertEqual(doc.file_name, "20201225-test")
doc = Document(mime_type="image/jpegasd", title="test", created=timezone.datetime(2020, 12, 25))
self.assertEqual(doc.get_public_filename(), "2020-12-25 test")

View File

@@ -1,15 +1,20 @@
import datetime
import hashlib
import os
import shutil
import random
import uuid
from pathlib import Path
from unittest import mock
from django.conf import settings
from django.db import DatabaseError
from django.test import TestCase, override_settings
from django.utils import timezone
from .utils import DirectoriesMixin
from ..file_handling import generate_filename, create_source_path_directory, delete_empty_directories
from ..models import Document, Correspondent
from ..file_handling import generate_filename, create_source_path_directory, delete_empty_directories, \
generate_unique_filename
from ..models import Document, Correspondent, Tag, DocumentType
class TestFileHandling(DirectoriesMixin, TestCase):
@@ -40,13 +45,13 @@ class TestFileHandling(DirectoriesMixin, TestCase):
document.filename = generate_filename(document)
# Ensure that filename is properly generated
self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk))
self.assertEqual(document.filename, "none/none.pdf")
# Enable encryption and check again
document.storage_type = Document.STORAGE_TYPE_GPG
document.filename = generate_filename(document)
self.assertEqual(document.filename,
"none/none-{:07d}.pdf.gpg".format(document.pk))
"none/none.pdf.gpg")
document.save()
@@ -62,7 +67,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Check proper handling of files
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/test"), True)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/test/test-{:07d}.pdf.gpg".format(document.pk)), True)
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/test/test.pdf.gpg"), True)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
def test_file_renaming_missing_permissions(self):
@@ -74,12 +79,12 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated
document.filename = generate_filename(document)
self.assertEqual(document.filename,
"none/none-{:07d}.pdf".format(document.pk))
"none/none.pdf")
create_source_path_directory(document.source_path)
Path(document.source_path).touch()
# Test source_path
self.assertEqual(document.source_path, settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk))
self.assertEqual(document.source_path, settings.ORIGINALS_DIR + "/none/none.pdf")
# Make the folder read- and execute-only (no writing and no renaming)
os.chmod(settings.ORIGINALS_DIR + "/none", 0o555)
@@ -89,8 +94,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
document.save()
# Check proper handling of files
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk)), True)
self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk))
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True)
self.assertEqual(document.filename, "none/none.pdf")
os.chmod(settings.ORIGINALS_DIR + "/none", 0o777)
@@ -108,7 +113,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated
document.filename = generate_filename(document)
self.assertEqual(document.filename,
"none/none-{:07d}.pdf".format(document.pk))
"none/none.pdf")
create_source_path_directory(document.source_path)
Path(document.source_path).touch()
@@ -125,8 +130,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Check proper handling of files
self.assertTrue(os.path.isfile(document.source_path))
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk)), True)
self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk))
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True)
self.assertEqual(document.filename, "none/none.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
def test_document_delete(self):
@@ -138,7 +143,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated
document.filename = generate_filename(document)
self.assertEqual(document.filename,
"none/none-{:07d}.pdf".format(document.pk))
"none/none.pdf")
create_source_path_directory(document.source_path)
Path(document.source_path).touch()
@@ -146,7 +151,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure file deletion after delete
pk = document.pk
document.delete()
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(pk)), False)
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), False)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
@@ -168,7 +173,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated
document.filename = generate_filename(document)
self.assertEqual(document.filename,
"none/none-{:07d}.pdf".format(document.pk))
"none/none.pdf")
create_source_path_directory(document.source_path)
@@ -185,6 +190,24 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), True)
self.assertTrue(os.path.isfile(important_file))
@override_settings(PAPERLESS_FILENAME_FORMAT="{document_type} - {title}")
def test_document_type(self):
dt = DocumentType.objects.create(name="my_doc_type")
d = Document.objects.create(title="the_doc", mime_type="application/pdf")
self.assertEqual(generate_filename(d), "none - the_doc.pdf")
d.document_type = dt
self.assertEqual(generate_filename(d), "my_doc_type - the_doc.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{asn} - {title}")
def test_asn(self):
d1 = Document.objects.create(title="the_doc", mime_type="application/pdf", archive_serial_number=652, checksum="A")
d2 = Document.objects.create(title="the_doc", mime_type="application/pdf", archive_serial_number=None, checksum="B")
self.assertEqual(generate_filename(d1), "652 - the_doc.pdf")
self.assertEqual(generate_filename(d2), "none - the_doc.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
def test_tags_with_underscore(self):
document = Document()
@@ -199,7 +222,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated
self.assertEqual(generate_filename(document),
"demo-{:07d}.pdf".format(document.pk))
"demo.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
def test_tags_with_dash(self):
@@ -215,7 +238,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated
self.assertEqual(generate_filename(document),
"demo-{:07d}.pdf".format(document.pk))
"demo.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
def test_tags_malformed(self):
@@ -231,7 +254,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated
self.assertEqual(generate_filename(document),
"none-{:07d}.pdf".format(document.pk))
"none.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}")
def test_tags_all(self):
@@ -246,7 +269,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated
self.assertEqual(generate_filename(document),
"demo-{:07d}.pdf".format(document.pk))
"demo.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[1]}")
def test_tags_out_of_bounds(self):
@@ -261,7 +284,58 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated
self.assertEqual(generate_filename(document),
"none-{:07d}.pdf".format(document.pk))
"none.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags}")
def test_tags_without_args(self):
document = Document()
document.mime_type = "application/pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
self.assertEqual(generate_filename(document), f"{document.pk:07}.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{title} {tag_list}")
def test_tag_list(self):
doc = Document.objects.create(title="doc1", mime_type="application/pdf")
doc.tags.create(name="tag2")
doc.tags.create(name="tag1")
self.assertEqual(generate_filename(doc), "doc1 tag1,tag2.pdf")
doc = Document.objects.create(title="doc2", checksum="B", mime_type="application/pdf")
self.assertEqual(generate_filename(doc), "doc2.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="//etc/something/{title}")
def test_filename_relative(self):
doc = Document.objects.create(title="doc1", mime_type="application/pdf")
doc.filename = generate_filename(doc)
doc.save()
self.assertEqual(doc.source_path, os.path.join(settings.ORIGINALS_DIR, "etc", "something", "doc1.pdf"))
@override_settings(PAPERLESS_FILENAME_FORMAT="{created_year}-{created_month}-{created_day}")
def test_created_year_month_day(self):
d1 = timezone.make_aware(datetime.datetime(2020, 3, 6, 1, 1, 1))
doc1 = Document.objects.create(title="doc1", mime_type="application/pdf", created=d1)
self.assertEqual(generate_filename(doc1), "2020-03-06.pdf")
doc1.created = timezone.make_aware(datetime.datetime(2020, 11, 16, 1, 1, 1))
self.assertEqual(generate_filename(doc1), "2020-11-16.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{added_year}-{added_month}-{added_day}")
def test_added_year_month_day(self):
d1 = timezone.make_aware(datetime.datetime(232, 1, 9, 1, 1, 1))
doc1 = Document.objects.create(title="doc1", mime_type="application/pdf", added=d1)
self.assertEqual(generate_filename(doc1), "232-01-09.pdf")
doc1.added = timezone.make_aware(datetime.datetime(2020, 11, 16, 1, 1, 1))
self.assertEqual(generate_filename(doc1), "2020-11-16.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}")
def test_nested_directory_cleanup(self):
@@ -272,7 +346,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated
document.filename = generate_filename(document)
self.assertEqual(document.filename, "none/none/none-{:07d}.pdf".format(document.pk))
self.assertEqual(document.filename, "none/none/none.pdf")
create_source_path_directory(document.source_path)
Path(document.source_path).touch()
@@ -282,7 +356,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
pk = document.pk
document.delete()
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none-{:07d}.pdf".format(pk)), False)
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none.pdf"), False)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), False)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR), True)
@@ -330,6 +404,60 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(generate_filename(document), "0000001.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
def test_duplicates(self):
document = Document.objects.create(mime_type="application/pdf", title="qwe", checksum="A", pk=1)
document2 = Document.objects.create(mime_type="application/pdf", title="qwe", checksum="B", pk=2)
Path(document.source_path).touch()
Path(document2.source_path).touch()
document.filename = "0000001.pdf"
document.save()
self.assertTrue(os.path.isfile(document.source_path))
self.assertEqual(document.filename, "qwe.pdf")
document2.filename = "0000002.pdf"
document2.save()
self.assertTrue(os.path.isfile(document.source_path))
self.assertEqual(document2.filename, "qwe_01.pdf")
# saving should not change the file names.
document.save()
self.assertTrue(os.path.isfile(document.source_path))
self.assertEqual(document.filename, "qwe.pdf")
document2.save()
self.assertTrue(os.path.isfile(document.source_path))
self.assertEqual(document2.filename, "qwe_01.pdf")
document.delete()
self.assertFalse(os.path.isfile(document.source_path))
# filename free, should remove _01 suffix
document2.save()
self.assertTrue(os.path.isfile(document.source_path))
self.assertEqual(document2.filename, "qwe.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
@mock.patch("documents.signals.handlers.Document.objects.filter")
def test_no_update_without_change(self, m):
doc = Document.objects.create(title="document", filename="document.pdf", archive_filename="document.pdf", checksum="A", archive_checksum="B", mime_type="application/pdf")
Path(doc.source_path).touch()
Path(doc.archive_path).touch()
doc.save()
m.assert_not_called()
class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
@@ -339,7 +467,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", filename="0000001.pdf", checksum="A", archive_checksum="B")
doc = Document.objects.create(mime_type="application/pdf", filename="0000001.pdf", checksum="A", archive_filename="0000001.pdf", archive_checksum="B")
self.assertTrue(os.path.isfile(original))
self.assertTrue(os.path.isfile(archive))
@@ -352,22 +480,21 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
self.assertFalse(os.path.isfile(original))
self.assertFalse(os.path.isfile(archive))
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
self.assertEqual(doc.source_path, os.path.join(settings.ORIGINALS_DIR, "none", "my_doc-0000001.pdf"))
self.assertEqual(doc.archive_path, os.path.join(settings.ARCHIVE_DIR, "none", "my_doc-0000001.pdf"))
self.assertEqual(doc.source_path, os.path.join(settings.ORIGINALS_DIR, "none", "my_doc.pdf"))
self.assertEqual(doc.archive_path, os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf"))
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
def test_move_archive_gone(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
#Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
self.assertTrue(os.path.isfile(original))
self.assertFalse(os.path.isfile(archive))
@@ -378,14 +505,49 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
def test_move_archive_exists(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
existing_archive_file = os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf")
Path(original).touch()
Path(archive).touch()
os.makedirs(os.path.join(settings.ARCHIVE_DIR, "none"))
Path(os.path.join(settings.ARCHIVE_DIR, "none", "my_doc-0000001.pdf")).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
Path(existing_archive_file).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
self.assertFalse(os.path.isfile(original))
self.assertFalse(os.path.isfile(archive))
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
self.assertTrue(os.path.isfile(existing_archive_file))
self.assertEqual(doc.archive_filename, "none/my_doc_01.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
def test_move_original_only(self):
original = os.path.join(settings.ORIGINALS_DIR, "document_01.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "document.pdf")
Path(original).touch()
Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="document", filename="document_01.pdf", checksum="A",
archive_checksum="B", archive_filename="document.pdf")
self.assertEqual(doc.filename, "document.pdf")
self.assertEqual(doc.archive_filename, "document.pdf")
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
def test_move_archive_only(self):
original = os.path.join(settings.ORIGINALS_DIR, "document.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "document_01.pdf")
Path(original).touch()
Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="document", filename="document.pdf", checksum="A",
archive_checksum="B", archive_filename="document_01.pdf")
self.assertEqual(doc.filename, "document.pdf")
self.assertEqual(doc.archive_filename, "document.pdf")
self.assertTrue(os.path.isfile(original))
self.assertTrue(os.path.isfile(archive))
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
@@ -406,8 +568,9 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
m.assert_called()
self.assertTrue(os.path.isfile(original))
self.assertTrue(os.path.isfile(archive))
self.assertTrue(os.path.isfile(doc.source_path))
@@ -419,7 +582,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
#Path(original).touch()
Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", archive_filename="0000001.pdf", checksum="A", archive_checksum="B")
self.assertFalse(os.path.isfile(original))
self.assertTrue(os.path.isfile(archive))
@@ -443,19 +606,21 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", archive_filename="0000001.pdf", checksum="A", archive_checksum="B")
m.assert_called()
self.assertTrue(os.path.isfile(original))
self.assertTrue(os.path.isfile(archive))
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="")
def test_archive_deleted(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
Path(archive).touch()
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
self.assertTrue(os.path.isfile(original))
self.assertTrue(os.path.isfile(archive))
@@ -469,6 +634,28 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertFalse(os.path.isfile(doc.source_path))
self.assertFalse(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
def test_archive_deleted2(self):
original = os.path.join(settings.ORIGINALS_DIR, "document.png")
original2 = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
Path(original2).touch()
Path(archive).touch()
doc1 = Document.objects.create(mime_type="image/png", title="document", filename="document.png", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
doc2 = Document.objects.create(mime_type="application/pdf", title="0000001", filename="0000001.pdf", checksum="C")
self.assertTrue(os.path.isfile(doc1.source_path))
self.assertTrue(os.path.isfile(doc1.archive_path))
self.assertTrue(os.path.isfile(doc2.source_path))
doc2.delete()
self.assertTrue(os.path.isfile(doc1.source_path))
self.assertTrue(os.path.isfile(doc1.archive_path))
self.assertFalse(os.path.isfile(doc2.source_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
def test_database_error(self):
@@ -476,7 +663,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
Path(original).touch()
Path(archive).touch()
doc = Document(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B")
doc = Document(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_filename="0000001.pdf", archive_checksum="B")
with mock.patch("documents.signals.handlers.Document.objects.filter") as m:
m.side_effect = DatabaseError()
doc.save()
@@ -485,3 +672,45 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertTrue(os.path.isfile(archive))
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
class TestFilenameGeneration(TestCase):
@override_settings(
PAPERLESS_FILENAME_FORMAT="{title}"
)
def test_invalid_characters(self):
doc = Document.objects.create(title="This. is the title.", mime_type="application/pdf", pk=1, checksum="1")
self.assertEqual(generate_filename(doc), "This. is the title.pdf")
doc = Document.objects.create(title="my\\invalid/../title:yay", mime_type="application/pdf", pk=2, checksum="2")
self.assertEqual(generate_filename(doc), "my-invalid-..-title-yay.pdf")
@override_settings(
PAPERLESS_FILENAME_FORMAT="{created}"
)
def test_date(self):
doc = Document.objects.create(title="does not matter", created=timezone.make_aware(datetime.datetime(2020,5,21, 7,36,51, 153)), mime_type="application/pdf", pk=2, checksum="2")
self.assertEqual(generate_filename(doc), "2020-05-21.pdf")
def run():
doc = Document.objects.create(checksum=str(uuid.uuid4()), title=str(uuid.uuid4()), content="wow")
doc.filename = generate_unique_filename(doc)
Path(doc.thumbnail_path).touch()
with open(doc.source_path, "w") as f:
f.write(str(uuid.uuid4()))
with open(doc.source_path, "rb") as f:
doc.checksum = hashlib.md5(f.read()).hexdigest()
with open(doc.archive_path, "w") as f:
f.write(str(uuid.uuid4()))
with open(doc.archive_path, "rb") as f:
doc.archive_checksum = hashlib.md5(f.read()).hexdigest()
doc.save()
for i in range(30):
doc.title = str(random.randrange(1, 5))
doc.save()

View File

@@ -1,6 +1,9 @@
from django.test import TestCase
from documents import index
from documents.index import JsonFormatter
from documents.models import Document
from documents.tests.utils import DirectoriesMixin
class JsonFormatterTest(TestCase):
@@ -12,3 +15,21 @@ class JsonFormatterTest(TestCase):
self.assertListEqual(self.formatter.format([]), [])
class TestAutoComplete(DirectoriesMixin, TestCase):
def test_auto_complete(self):
doc1 = Document.objects.create(title="doc1", checksum="A", content="test test2 test3")
doc2 = Document.objects.create(title="doc2", checksum="B", content="test test2")
doc3 = Document.objects.create(title="doc3", checksum="C", content="test2")
index.add_or_update_document(doc1)
index.add_or_update_document(doc2)
index.add_or_update_document(doc3)
ix = index.open_index()
self.assertListEqual(index.autocomplete(ix, "tes"), [b"test3", b"test", b"test2"])
self.assertListEqual(index.autocomplete(ix, "tes", limit=3), [b"test3", b"test", b"test2"])
self.assertListEqual(index.autocomplete(ix, "tes", limit=1), [b"test3"])
self.assertListEqual(index.autocomplete(ix, "tes", limit=0), [])

View File

@@ -1,66 +0,0 @@
import logging
import uuid
from unittest import mock
from django.test import TestCase, override_settings
from ..models import Log
class TestPaperlessLog(TestCase):
def __init__(self, *args, **kwargs):
TestCase.__init__(self, *args, **kwargs)
self.logger = logging.getLogger(
"documents.management.commands.document_consumer")
@override_settings(DISABLE_DBHANDLER=False)
def test_that_it_saves_at_all(self):
kw = {"group": uuid.uuid4()}
self.assertEqual(Log.objects.all().count(), 0)
with mock.patch("logging.StreamHandler.emit") as __:
# Debug messages are ignored by default
self.logger.debug("This is a debugging message", extra=kw)
self.assertEqual(Log.objects.all().count(), 1)
self.logger.info("This is an informational message", extra=kw)
self.assertEqual(Log.objects.all().count(), 2)
self.logger.warning("This is an warning message", extra=kw)
self.assertEqual(Log.objects.all().count(), 3)
self.logger.error("This is an error message", extra=kw)
self.assertEqual(Log.objects.all().count(), 4)
self.logger.critical("This is a critical message", extra=kw)
self.assertEqual(Log.objects.all().count(), 5)
@override_settings(DISABLE_DBHANDLER=False)
def test_groups(self):
kw1 = {"group": uuid.uuid4()}
kw2 = {"group": uuid.uuid4()}
self.assertEqual(Log.objects.all().count(), 0)
with mock.patch("logging.StreamHandler.emit") as __:
self.logger.info("This is an informational message", extra=kw2)
self.assertEqual(Log.objects.all().count(), 1)
self.assertEqual(Log.objects.filter(group=kw2["group"]).count(), 1)
self.logger.warning("This is an warning message", extra=kw1)
self.assertEqual(Log.objects.all().count(), 2)
self.assertEqual(Log.objects.filter(group=kw1["group"]).count(), 1)
self.logger.error("This is an error message", extra=kw2)
self.assertEqual(Log.objects.all().count(), 3)
self.assertEqual(Log.objects.filter(group=kw2["group"]).count(), 2)
self.logger.critical("This is a critical message", extra=kw1)
self.assertEqual(Log.objects.all().count(), 4)
self.assertEqual(Log.objects.filter(group=kw1["group"]).count(), 2)

View File

@@ -0,0 +1,193 @@
import hashlib
import tempfile
import filecmp
import os
import shutil
from pathlib import Path
from unittest import mock
from django.test import TestCase, override_settings
from django.core.management import call_command
from documents.file_handling import generate_filename
from documents.management.commands.document_archiver import handle_document
from documents.models import Document
from documents.tests.utils import DirectoriesMixin
sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
class TestArchiver(DirectoriesMixin, TestCase):
def make_models(self):
return Document.objects.create(checksum="A", title="A", content="first document", mime_type="application/pdf")
def test_archiver(self):
doc = self.make_models()
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"))
call_command('document_archiver')
def test_handle_document(self):
doc = self.make_models()
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"))
handle_document(doc.pk)
doc = Document.objects.get(id=doc.id)
self.assertIsNotNone(doc.checksum)
self.assertIsNotNone(doc.archive_checksum)
self.assertTrue(os.path.isfile(doc.archive_path))
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(filecmp.cmp(sample_file, doc.source_path))
self.assertEqual(doc.archive_filename, "none/A.pdf")
def test_unknown_mime_type(self):
doc = self.make_models()
doc.mime_type = "sdgfh"
doc.save()
shutil.copy(sample_file, doc.source_path)
handle_document(doc.pk)
doc = Document.objects.get(id=doc.id)
self.assertIsNotNone(doc.checksum)
self.assertIsNone(doc.archive_checksum)
self.assertIsNone(doc.archive_filename)
self.assertTrue(os.path.isfile(doc.source_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
def test_naming_priorities(self):
doc1 = Document.objects.create(checksum="A", title="document", content="first document", mime_type="application/pdf", filename="document.pdf")
doc2 = Document.objects.create(checksum="B", title="document", content="second document", mime_type="application/pdf", filename="document_01.pdf")
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"document.pdf"))
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"document_01.pdf"))
handle_document(doc2.pk)
handle_document(doc1.pk)
doc1 = Document.objects.get(id=doc1.id)
doc2 = Document.objects.get(id=doc2.id)
self.assertEqual(doc1.archive_filename, "document.pdf")
self.assertEqual(doc2.archive_filename, "document_01.pdf")
class TestDecryptDocuments(TestCase):
@override_settings(
ORIGINALS_DIR=os.path.join(os.path.dirname(__file__), "samples", "originals"),
THUMBNAIL_DIR=os.path.join(os.path.dirname(__file__), "samples", "thumb"),
PASSPHRASE="test",
PAPERLESS_FILENAME_FORMAT=None
)
@mock.patch("documents.management.commands.decrypt_documents.input")
def test_decrypt(self, m):
media_dir = tempfile.mkdtemp()
originals_dir = os.path.join(media_dir, "documents", "originals")
thumb_dir = os.path.join(media_dir, "documents", "thumbnails")
os.makedirs(originals_dir, exist_ok=True)
os.makedirs(thumb_dir, exist_ok=True)
override_settings(
ORIGINALS_DIR=originals_dir,
THUMBNAIL_DIR=thumb_dir,
PASSPHRASE="test"
).enable()
doc = Document.objects.create(checksum="82186aaa94f0b98697d704b90fd1c072", title="wow", filename="0000004.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000004.pdf.gpg"), os.path.join(originals_dir, "0000004.pdf.gpg"))
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", f"0000004.png.gpg"), os.path.join(thumb_dir, f"{doc.id:07}.png.gpg"))
call_command('decrypt_documents')
doc.refresh_from_db()
self.assertEqual(doc.storage_type, Document.STORAGE_TYPE_UNENCRYPTED)
self.assertEqual(doc.filename, "0000004.pdf")
self.assertTrue(os.path.isfile(os.path.join(originals_dir, "0000004.pdf")))
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(os.path.join(thumb_dir, f"{doc.id:07}.png")))
self.assertTrue(os.path.isfile(doc.thumbnail_path))
with doc.source_file as f:
checksum = hashlib.md5(f.read()).hexdigest()
self.assertEqual(checksum, doc.checksum)
class TestMakeIndex(TestCase):
@mock.patch("documents.management.commands.document_index.index_reindex")
def test_reindex(self, m):
call_command("document_index", "reindex")
m.assert_called_once()
@mock.patch("documents.management.commands.document_index.index_optimize")
def test_optimize(self, m):
call_command("document_index", "optimize")
m.assert_called_once()
class TestRenamer(DirectoriesMixin, TestCase):
@override_settings(PAPERLESS_FILENAME_FORMAT="")
def test_rename(self):
doc = Document.objects.create(title="test", mime_type="image/jpeg")
doc.filename = generate_filename(doc)
doc.archive_filename = generate_filename(doc, archive_filename=True)
doc.save()
Path(doc.source_path).touch()
Path(doc.archive_path).touch()
with override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}"):
call_command("document_renamer")
doc2 = Document.objects.get(id=doc.id)
self.assertEqual(doc2.filename, "none/test.jpg")
self.assertEqual(doc2.archive_filename, "none/test.pdf")
self.assertFalse(os.path.isfile(doc.source_path))
self.assertFalse(os.path.isfile(doc.archive_path))
self.assertTrue(os.path.isfile(doc2.source_path))
self.assertTrue(os.path.isfile(doc2.archive_path))
class TestCreateClassifier(TestCase):
@mock.patch("documents.management.commands.document_create_classifier.train_classifier")
def test_create_classifier(self, m):
call_command("document_create_classifier")
m.assert_called_once()
class TestSanityChecker(DirectoriesMixin, TestCase):
def test_no_issues(self):
with self.assertLogs() as capture:
call_command("document_sanity_checker")
self.assertEqual(len(capture.output), 1)
self.assertIn("Sanity checker detected no issues.", capture.output[0])
def test_errors(self):
doc = Document.objects.create(title="test", content="test", filename="test.pdf", checksum="abc")
Path(doc.source_path).touch()
Path(doc.thumbnail_path).touch()
with self.assertLogs() as capture:
call_command("document_sanity_checker")
self.assertEqual(len(capture.output), 1)
self.assertIn("Checksum mismatch of document", capture.output[0])

View File

@@ -1,42 +0,0 @@
import filecmp
import os
import shutil
from django.core.management import call_command
from django.test import TestCase
from documents.management.commands.document_archiver import handle_document
from documents.models import Document
from documents.tests.utils import DirectoriesMixin
sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
class TestArchiver(DirectoriesMixin, TestCase):
def make_models(self):
self.d1 = Document.objects.create(checksum="A", title="A", content="first document", pk=1, mime_type="application/pdf")
#self.d2 = Document.objects.create(checksum="B", title="B", content="second document")
#self.d3 = Document.objects.create(checksum="C", title="C", content="unrelated document")
def test_archiver(self):
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, "0000001.pdf"))
self.make_models()
call_command('document_archiver')
def test_handle_document(self):
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, "0000001.pdf"))
self.make_models()
handle_document(self.d1)
doc = Document.objects.get(id=self.d1.id)
self.assertIsNotNone(doc.checksum)
self.assertTrue(os.path.isfile(doc.archive_path))
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(filecmp.cmp(sample_file, doc.source_path))

View File

@@ -203,7 +203,7 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
self.assertRaises(CommandError, call_command, 'document_consumer', '--oneshot')
@override_settings(CONSUMER_POLLING=1)
@override_settings(CONSUMER_POLLING=1, CONSUMER_POLLING_DELAY=1, CONSUMER_POLLING_RETRY_COUNT=20)
class TestConsumerPolling(TestConsumer):
# just do all the tests with polling
pass
@@ -215,8 +215,7 @@ class TestConsumerRecursive(TestConsumer):
pass
@override_settings(CONSUMER_RECURSIVE=True)
@override_settings(CONSUMER_POLLING=1)
@override_settings(CONSUMER_RECURSIVE=True, CONSUMER_POLLING=1, CONSUMER_POLLING_DELAY=1, CONSUMER_POLLING_RETRY_COUNT=20)
class TestConsumerRecursivePolling(TestConsumer):
# just do all the tests with polling and recursive
pass
@@ -230,7 +229,7 @@ class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
tag_names = ("existingTag", "Space Tag")
# Create a Tag prior to consuming a file using it in path
tag_ids = [Tag.objects.create(name=tag_names[0]).pk,]
tag_ids = [Tag.objects.create(name="existingtag").pk,]
self.t_start()
@@ -257,6 +256,6 @@ class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
# their order.
self.assertCountEqual(kwargs["override_tag_ids"], tag_ids)
@override_settings(CONSUMER_POLLING=1)
@override_settings(CONSUMER_POLLING=1, CONSUMER_POLLING_DELAY=1, CONSUMER_POLLING_RETRY_COUNT=20)
def test_consume_file_with_path_tags_polling(self):
self.test_consume_file_with_path_tags()

View File

@@ -1,57 +0,0 @@
import hashlib
import json
import os
import shutil
import tempfile
from unittest import mock
from django.core.management import call_command
from django.test import TestCase, override_settings
from documents.management.commands import document_exporter
from documents.models import Document, Tag, DocumentType, Correspondent
class TestDecryptDocuments(TestCase):
@override_settings(
ORIGINALS_DIR=os.path.join(os.path.dirname(__file__), "samples", "originals"),
THUMBNAIL_DIR=os.path.join(os.path.dirname(__file__), "samples", "thumb"),
PASSPHRASE="test",
PAPERLESS_FILENAME_FORMAT=None
)
@mock.patch("documents.management.commands.decrypt_documents.input")
def test_decrypt(self, m):
media_dir = tempfile.mkdtemp()
originals_dir = os.path.join(media_dir, "documents", "originals")
thumb_dir = os.path.join(media_dir, "documents", "thumbnails")
os.makedirs(originals_dir, exist_ok=True)
os.makedirs(thumb_dir, exist_ok=True)
override_settings(
ORIGINALS_DIR=originals_dir,
THUMBNAIL_DIR=thumb_dir,
PASSPHRASE="test"
).enable()
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf.gpg"), os.path.join(originals_dir, "0000002.pdf.gpg"))
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", "0000002.png.gpg"), os.path.join(thumb_dir, "0000002.png.gpg"))
Document.objects.create(checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg", id=2, mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
call_command('decrypt_documents')
doc = Document.objects.get(id=2)
self.assertEqual(doc.storage_type, Document.STORAGE_TYPE_UNENCRYPTED)
self.assertEqual(doc.filename, "0000002.pdf")
self.assertTrue(os.path.isfile(os.path.join(originals_dir, "0000002.pdf")))
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(os.path.join(thumb_dir, "0000002.png")))
self.assertTrue(os.path.isfile(doc.thumbnail_path))
with doc.source_file as f:
checksum = hashlib.md5(f.read()).hexdigest()
self.assertEqual(checksum, doc.checksum)

View File

@@ -3,59 +3,224 @@ import json
import os
import shutil
import tempfile
from pathlib import Path
from unittest import mock
from django.core.management import call_command
from django.test import TestCase, override_settings
from documents.management.commands import document_exporter
from documents.models import Document, Tag, DocumentType, Correspondent
from documents.tests.utils import DirectoriesMixin
from documents.sanity_checker import check_sanity
from documents.settings import EXPORTER_FILE_NAME
from documents.tests.utils import DirectoriesMixin, paperless_environment
class TestExporter(DirectoriesMixin, TestCase):
class TestExportImport(DirectoriesMixin, TestCase):
def setUp(self) -> None:
self.target = tempfile.mkdtemp()
self.addCleanup(shutil.rmtree, self.target)
self.d1 = Document.objects.create(content="Content", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", title="wow1", filename="0000001.pdf", mime_type="application/pdf", archive_filename="0000001.pdf")
self.d2 = Document.objects.create(content="Content", checksum="9c9691e51741c1f4f41a20896af31770", title="wow2", filename="0000002.pdf", mime_type="application/pdf")
self.d3 = Document.objects.create(content="Content", checksum="d38d7ed02e988e072caf924e0f3fcb76", title="wow2", filename="0000003.pdf", mime_type="application/pdf")
self.d4 = Document.objects.create(content="Content", checksum="82186aaa94f0b98697d704b90fd1c072", title="wow_dec", filename="0000004.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
self.t1 = Tag.objects.create(name="t")
self.dt1 = DocumentType.objects.create(name="dt")
self.c1 = Correspondent.objects.create(name="c")
self.d1.tags.add(self.t1)
self.d1.correspondent = self.c1
self.d1.document_type = self.dt1
self.d1.save()
super(TestExportImport, self).setUp()
def _get_document_from_manifest(self, manifest, id):
f = list(filter(lambda d: d['model'] == "documents.document" and d['pk'] == id, manifest))
if len(f) == 1:
return f[0]
else:
raise ValueError(f"document with id {id} does not exist in manifest")
@override_settings(
PASSPHRASE="test"
)
def test_exporter(self):
def _do_export(self, use_filename_format=False, compare_checksums=False, delete=False):
args = ['document_exporter', self.target]
if use_filename_format:
args += ["--use-filename-format"]
if compare_checksums:
args += ["--compare-checksums"]
if delete:
args += ["--delete"]
call_command(*args)
with open(os.path.join(self.target, "manifest.json")) as f:
manifest = json.load(f)
return manifest
def test_exporter(self, use_filename_format=False):
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
file = os.path.join(self.dirs.originals_dir, "0000001.pdf")
manifest = self._do_export(use_filename_format=use_filename_format)
Document.objects.create(checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", title="wow", filename="0000001.pdf", id=1, mime_type="application/pdf")
Document.objects.create(checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg", id=2, mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
Tag.objects.create(name="t")
DocumentType.objects.create(name="dt")
Correspondent.objects.create(name="c")
self.assertEqual(len(manifest), 7)
self.assertEqual(len(list(filter(lambda e: e['model'] == 'documents.document', manifest))), 4)
target = tempfile.mkdtemp()
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
call_command('document_exporter', target)
with open(os.path.join(target, "manifest.json")) as f:
manifest = json.load(f)
self.assertEqual(len(manifest), 5)
self.assertEqual(self._get_document_from_manifest(manifest, self.d1.id)['fields']['title'], "wow1")
self.assertEqual(self._get_document_from_manifest(manifest, self.d2.id)['fields']['title'], "wow2")
self.assertEqual(self._get_document_from_manifest(manifest, self.d3.id)['fields']['title'], "wow2")
self.assertEqual(self._get_document_from_manifest(manifest, self.d4.id)['fields']['title'], "wow_dec")
for element in manifest:
if element['model'] == 'documents.document':
fname = os.path.join(target, element[document_exporter.EXPORTER_FILE_NAME])
fname = os.path.join(self.target, element[document_exporter.EXPORTER_FILE_NAME])
self.assertTrue(os.path.exists(fname))
self.assertTrue(os.path.exists(os.path.join(target, element[document_exporter.EXPORTER_THUMBNAIL_NAME])))
self.assertTrue(os.path.exists(os.path.join(self.target, element[document_exporter.EXPORTER_THUMBNAIL_NAME])))
with open(fname, "rb") as f:
checksum = hashlib.md5(f.read()).hexdigest()
self.assertEqual(checksum, element['fields']['checksum'])
self.assertEqual(element['fields']['storage_type'], Document.STORAGE_TYPE_UNENCRYPTED)
if document_exporter.EXPORTER_ARCHIVE_NAME in element:
fname = os.path.join(target, element[document_exporter.EXPORTER_ARCHIVE_NAME])
fname = os.path.join(self.target, element[document_exporter.EXPORTER_ARCHIVE_NAME])
self.assertTrue(os.path.exists(fname))
with open(fname, "rb") as f:
checksum = hashlib.md5(f.read()).hexdigest()
self.assertEqual(checksum, element['fields']['archive_checksum'])
Document.objects.create(checksum="AAAAAAAAAAAAAAAAA", title="wow", filename="0000004.pdf", id=3, mime_type="application/pdf")
with paperless_environment() as dirs:
self.assertEqual(Document.objects.count(), 4)
Document.objects.all().delete()
Correspondent.objects.all().delete()
DocumentType.objects.all().delete()
Tag.objects.all().delete()
self.assertEqual(Document.objects.count(), 0)
call_command('document_importer', self.target)
self.assertEqual(Document.objects.count(), 4)
self.assertEqual(Tag.objects.count(), 1)
self.assertEqual(Correspondent.objects.count(), 1)
self.assertEqual(DocumentType.objects.count(), 1)
self.assertEqual(Document.objects.get(id=self.d1.id).title, "wow1")
self.assertEqual(Document.objects.get(id=self.d2.id).title, "wow2")
self.assertEqual(Document.objects.get(id=self.d3.id).title, "wow2")
self.assertEqual(Document.objects.get(id=self.d4.id).title, "wow_dec")
messages = check_sanity()
# everything is alright after the test
self.assertEqual(len(messages), 0, str([str(m) for m in messages]))
def test_exporter_with_filename_format(self):
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
with override_settings(PAPERLESS_FILENAME_FORMAT="{created_year}/{correspondent}/{title}"):
self.test_exporter(use_filename_format=True)
def test_update_export_changed_time(self):
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
self._do_export()
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
st_mtime_1 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime
with mock.patch("documents.management.commands.document_exporter.shutil.copy2") as m:
self._do_export()
m.assert_not_called()
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
st_mtime_2 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime
Path(self.d1.source_path).touch()
with mock.patch("documents.management.commands.document_exporter.shutil.copy2") as m:
self._do_export()
self.assertEqual(m.call_count, 1)
st_mtime_3 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
self.assertNotEqual(st_mtime_1, st_mtime_2)
self.assertNotEqual(st_mtime_2, st_mtime_3)
def test_update_export_changed_checksum(self):
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
self._do_export()
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
with mock.patch("documents.management.commands.document_exporter.shutil.copy2") as m:
self._do_export()
m.assert_not_called()
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
self.d2.checksum = "asdfasdgf3"
self.d2.save()
with mock.patch("documents.management.commands.document_exporter.shutil.copy2") as m:
self._do_export(compare_checksums=True)
self.assertEqual(m.call_count, 1)
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
def test_update_export_deleted_document(self):
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
manifest = self._do_export()
self.assertTrue(len(manifest), 7)
doc_from_manifest = self._get_document_from_manifest(manifest, self.d3.id)
self.assertTrue(os.path.isfile(os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])))
self.d3.delete()
manifest = self._do_export()
self.assertRaises(ValueError, self._get_document_from_manifest, manifest, self.d3.id)
self.assertTrue(os.path.isfile(os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])))
manifest = self._do_export(delete=True)
self.assertFalse(os.path.isfile(os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])))
self.assertTrue(len(manifest), 6)
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}/{correspondent}")
def test_update_export_changed_location(self):
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
m = self._do_export(use_filename_format=True)
self.assertTrue(os.path.isfile(os.path.join(self.target, "wow1", "c.pdf")))
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
self.d1.title = "new_title"
self.d1.save()
self._do_export(use_filename_format=True, delete=True)
self.assertFalse(os.path.isfile(os.path.join(self.target, "wow1", "c.pdf")))
self.assertFalse(os.path.isdir(os.path.join(self.target, "wow1")))
self.assertTrue(os.path.isfile(os.path.join(self.target, "new_title", "c.pdf")))
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
self.assertTrue(os.path.isfile(os.path.join(self.target, "wow2", "none.pdf")))
self.assertTrue(os.path.isfile(os.path.join(self.target, "wow2", "none_01.pdf")))
def test_export_missing_files(self):
target = tempfile.mkdtemp()
self.addCleanup(shutil.rmtree, target)
Document.objects.create(checksum="AAAAAAAAAAAAAAAAA", title="wow", filename="0000004.pdf", mime_type="application/pdf")
self.assertRaises(FileNotFoundError, call_command, 'document_exporter', target)

View File

@@ -14,6 +14,12 @@ class TestRetagger(DirectoriesMixin, TestCase):
self.tag_first = Tag.objects.create(name="tag1", match="first", matching_algorithm=Tag.MATCH_ANY)
self.tag_second = Tag.objects.create(name="tag2", match="second", matching_algorithm=Tag.MATCH_ANY)
self.tag_inbox = Tag.objects.create(name="test", is_inbox_tag=True)
self.tag_no_match = Tag.objects.create(name="test2")
self.d3.tags.add(self.tag_inbox)
self.d3.tags.add(self.tag_no_match)
self.correspondent_first = Correspondent.objects.create(
name="c1", match="first", matching_algorithm=Correspondent.MATCH_ANY)
@@ -38,7 +44,7 @@ class TestRetagger(DirectoriesMixin, TestCase):
self.assertEqual(d_first.tags.count(), 1)
self.assertEqual(d_second.tags.count(), 1)
self.assertEqual(d_unrelated.tags.count(), 0)
self.assertEqual(d_unrelated.tags.count(), 2)
self.assertEqual(d_first.tags.first(), self.tag_first)
self.assertEqual(d_second.tags.first(), self.tag_second)
@@ -56,3 +62,17 @@ class TestRetagger(DirectoriesMixin, TestCase):
self.assertEqual(d_first.correspondent, self.correspondent_first)
self.assertEqual(d_second.correspondent, self.correspondent_second)
def test_overwrite_preserve_inbox(self):
self.d1.tags.add(self.tag_second)
call_command('document_retagger', '--tags', '--overwrite')
d_first, d_second, d_unrelated = self.get_updated_docs()
self.assertIsNotNone(Tag.objects.get(id=self.tag_second.id))
self.assertCountEqual([tag.id for tag in d_first.tags.all()], [self.tag_first.id])
self.assertCountEqual([tag.id for tag in d_second.tags.all()], [self.tag_second.id])
self.assertCountEqual([tag.id for tag in d_unrelated.tags.all()], [self.tag_inbox.id, self.tag_no_match.id])

View File

@@ -0,0 +1,52 @@
import os
import shutil
from unittest import mock
from django.core.management import call_command
from django.test import TestCase
from documents.management.commands.document_thumbnails import _process_document
from documents.models import Document, Tag, Correspondent, DocumentType
from documents.tests.utils import DirectoriesMixin
class TestMakeThumbnails(DirectoriesMixin, TestCase):
def make_models(self):
self.d1 = Document.objects.create(checksum="A", title="A", content="first document", mime_type="application/pdf", filename="test.pdf")
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), self.d1.source_path)
self.d2 = Document.objects.create(checksum="Ass", title="A", content="first document", mime_type="application/pdf", filename="test2.pdf")
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), self.d2.source_path)
def setUp(self) -> None:
super(TestMakeThumbnails, self).setUp()
self.make_models()
def test_process_document(self):
self.assertFalse(os.path.isfile(self.d1.thumbnail_path))
_process_document(self.d1.id)
self.assertTrue(os.path.isfile(self.d1.thumbnail_path))
@mock.patch("documents.management.commands.document_thumbnails.shutil.move")
def test_process_document_invalid_mime_type(self, m):
self.d1.mime_type = "asdasdasd"
self.d1.save()
_process_document(self.d1.id)
m.assert_not_called()
def test_command(self):
self.assertFalse(os.path.isfile(self.d1.thumbnail_path))
self.assertFalse(os.path.isfile(self.d2.thumbnail_path))
call_command('document_thumbnails')
self.assertTrue(os.path.isfile(self.d1.thumbnail_path))
self.assertTrue(os.path.isfile(self.d2.thumbnail_path))
def test_command_documentid(self):
self.assertFalse(os.path.isfile(self.d1.thumbnail_path))
self.assertFalse(os.path.isfile(self.d2.thumbnail_path))
call_command('document_thumbnails', '-d', f"{self.d1.id}")
self.assertTrue(os.path.isfile(self.d1.thumbnail_path))
self.assertFalse(os.path.isfile(self.d2.thumbnail_path))

View File

@@ -21,13 +21,15 @@ class TestMatching(TestCase):
matching_algorithm=getattr(klass, algorithm)
)
for string in true:
doc = Document(content=string)
self.assertTrue(
matching.matches(instance, string),
matching.matches(instance, doc),
'"%s" should match "%s" but it does not' % (text, string)
)
for string in false:
doc = Document(content=string)
self.assertFalse(
matching.matches(instance, string),
matching.matches(instance, doc),
'"%s" should not match "%s" but it does' % (text, string)
)
@@ -169,7 +171,7 @@ class TestMatching(TestCase):
def test_match_regex(self):
self._test_matching(
r"alpha\w+gamma",
"alpha\w+gamma",
"MATCH_REGEX",
(
"I have alpha_and_gamma in me",
@@ -187,6 +189,16 @@ class TestMatching(TestCase):
)
)
def test_tach_invalid_regex(self):
self._test_matching(
"[[",
"MATCH_REGEX",
[],
[
"Don't match this"
]
)
def test_match_fuzzy(self):
self._test_matching(

View File

@@ -0,0 +1,325 @@
import hashlib
import os
import shutil
from pathlib import Path
from unittest import mock
from django.conf import settings
from django.test import override_settings
from documents.parsers import ParseError
from documents.tests.utils import DirectoriesMixin, TestMigrations
STORAGE_TYPE_GPG = "gpg"
def archive_name_from_filename(filename):
return os.path.splitext(filename)[0] + ".pdf"
def archive_path_old(self):
if self.filename:
fname = archive_name_from_filename(self.filename)
else:
fname = "{:07}.pdf".format(self.pk)
return os.path.join(
settings.ARCHIVE_DIR,
fname
)
def archive_path_new(doc):
if doc.archive_filename is not None:
return os.path.join(
settings.ARCHIVE_DIR,
str(doc.archive_filename)
)
else:
return None
def source_path(doc):
if doc.filename:
fname = str(doc.filename)
else:
fname = "{:07}{}".format(doc.pk, doc.file_type)
if doc.storage_type == STORAGE_TYPE_GPG:
fname += ".gpg" # pragma: no cover
return os.path.join(
settings.ORIGINALS_DIR,
fname
)
def thumbnail_path(doc):
file_name = "{:07}.png".format(doc.pk)
if doc.storage_type == STORAGE_TYPE_GPG:
file_name += ".gpg"
return os.path.join(
settings.THUMBNAIL_DIR,
file_name
)
def make_test_document(document_class, title: str, mime_type: str, original: str, original_filename: str, archive: str = None, archive_filename: str = None):
doc = document_class()
doc.filename = original_filename
doc.title = title
doc.mime_type = mime_type
doc.content = "the content, does not matter for this test"
doc.save()
shutil.copy2(original, source_path(doc))
with open(original, "rb") as f:
doc.checksum = hashlib.md5(f.read()).hexdigest()
if archive:
if archive_filename:
doc.archive_filename = archive_filename
shutil.copy2(archive, archive_path_new(doc))
else:
shutil.copy2(archive, archive_path_old(doc))
with open(archive, "rb") as f:
doc.archive_checksum = hashlib.md5(f.read()).hexdigest()
doc.save()
Path(thumbnail_path(doc)).touch()
return doc
simple_jpg = os.path.join(os.path.dirname(__file__), "samples", "simple.jpg")
simple_pdf = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
simple_pdf2 = os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf")
simple_pdf3 = os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000003.pdf")
simple_txt = os.path.join(os.path.dirname(__file__), "samples", "simple.txt")
simple_png = os.path.join(os.path.dirname(__file__), "samples", "simple-noalpha.png")
simple_png2 = os.path.join(os.path.dirname(__file__), "examples", "no-text.png")
@override_settings(PAPERLESS_FILENAME_FORMAT="")
class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations):
migrate_from = '1011_auto_20210101_2340'
migrate_to = '1012_fix_archive_files'
def setUpBeforeMigration(self, apps):
Document = apps.get_model("documents", "Document")
self.unrelated = make_test_document(Document, "unrelated", "application/pdf", simple_pdf3, "unrelated.pdf", simple_pdf)
self.no_text = make_test_document(Document, "no-text", "image/png", simple_png2, "no-text.png", simple_pdf)
self.doc_no_archive = make_test_document(Document, "no_archive", "text/plain", simple_txt, "no_archive.txt")
self.clash1 = make_test_document(Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf)
self.clash2 = make_test_document(Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf)
self.clash3 = make_test_document(Document, "clash", "image/png", simple_png, "clash.png", simple_pdf)
self.clash4 = make_test_document(Document, "clash.png", "application/pdf", simple_pdf2, "clash.png.pdf", simple_pdf2)
self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash2))
self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash3))
self.assertNotEqual(archive_path_old(self.clash1), archive_path_old(self.clash4))
def testArchiveFilesMigrated(self):
Document = self.apps.get_model('documents', 'Document')
for doc in Document.objects.all():
if doc.archive_checksum:
self.assertIsNotNone(doc.archive_filename)
self.assertTrue(os.path.isfile(archive_path_new(doc)))
else:
self.assertIsNone(doc.archive_filename)
with open(source_path(doc), "rb") as f:
original_checksum = hashlib.md5(f.read()).hexdigest()
self.assertEqual(original_checksum, doc.checksum)
if doc.archive_checksum:
self.assertTrue(os.path.isfile(archive_path_new(doc)))
with open(archive_path_new(doc), "rb") as f:
archive_checksum = hashlib.md5(f.read()).hexdigest()
self.assertEqual(archive_checksum, doc.archive_checksum)
self.assertEqual(Document.objects.filter(archive_checksum__isnull=False).count(), 6)
def test_filenames(self):
Document = self.apps.get_model('documents', 'Document')
self.assertEqual(Document.objects.get(id=self.unrelated.id).archive_filename, "unrelated.pdf")
self.assertEqual(Document.objects.get(id=self.no_text.id).archive_filename, "no-text.pdf")
self.assertEqual(Document.objects.get(id=self.doc_no_archive.id).archive_filename, None)
self.assertEqual(Document.objects.get(id=self.clash1.id).archive_filename, f"{self.clash1.id:07}.pdf")
self.assertEqual(Document.objects.get(id=self.clash2.id).archive_filename, f"{self.clash2.id:07}.pdf")
self.assertEqual(Document.objects.get(id=self.clash3.id).archive_filename, f"{self.clash3.id:07}.pdf")
self.assertEqual(Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
class TestMigrateArchiveFilesWithFilenameFormat(TestMigrateArchiveFiles):
def test_filenames(self):
Document = self.apps.get_model('documents', 'Document')
self.assertEqual(Document.objects.get(id=self.unrelated.id).archive_filename, "unrelated.pdf")
self.assertEqual(Document.objects.get(id=self.no_text.id).archive_filename, "no-text.pdf")
self.assertEqual(Document.objects.get(id=self.doc_no_archive.id).archive_filename, None)
self.assertEqual(Document.objects.get(id=self.clash1.id).archive_filename, "none/clash.pdf")
self.assertEqual(Document.objects.get(id=self.clash2.id).archive_filename, "none/clash_01.pdf")
self.assertEqual(Document.objects.get(id=self.clash3.id).archive_filename, "none/clash_02.pdf")
self.assertEqual(Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf")
def fake_parse_wrapper(parser, path, mime_type, file_name):
parser.archive_path = None
parser.text = "the text"
@override_settings(PAPERLESS_FILENAME_FORMAT="")
class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
migrate_from = '1011_auto_20210101_2340'
migrate_to = '1012_fix_archive_files'
auto_migrate = False
def test_archive_missing(self):
Document = self.apps.get_model("documents", "Document")
doc = make_test_document(Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf)
os.unlink(archive_path_old(doc))
self.assertRaisesMessage(ValueError, "does not exist at: ", self.performMigration)
def test_parser_missing(self):
Document = self.apps.get_model("documents", "Document")
doc1 = make_test_document(Document, "document", "invalid/typesss768", simple_png, "document.png", simple_pdf)
doc2 = make_test_document(Document, "document", "invalid/typesss768", simple_jpg, "document.jpg", simple_pdf)
self.assertRaisesMessage(ValueError, "no parsers are available", self.performMigration)
@mock.patch("documents.migrations.1012_fix_archive_files.parse_wrapper")
def test_parser_error(self, m):
m.side_effect = ParseError()
Document = self.apps.get_model("documents", "Document")
doc1 = make_test_document(Document, "document", "image/png", simple_png, "document.png", simple_pdf)
doc2 = make_test_document(Document, "document", "application/pdf", simple_jpg, "document.jpg", simple_pdf)
self.assertIsNotNone(doc1.archive_checksum)
self.assertIsNotNone(doc2.archive_checksum)
with self.assertLogs() as capture:
self.performMigration()
self.assertEqual(m.call_count, 6)
self.assertEqual(
len(list(filter(lambda log: "Parse error, will try again in 5 seconds" in log, capture.output))),
4)
self.assertEqual(
len(list(filter(lambda log: "Unable to regenerate archive document for ID:" in log, capture.output))),
2)
Document = self.apps.get_model("documents", "Document")
doc1 = Document.objects.get(id=doc1.id)
doc2 = Document.objects.get(id=doc2.id)
self.assertIsNone(doc1.archive_checksum)
self.assertIsNone(doc2.archive_checksum)
self.assertIsNone(doc1.archive_filename)
self.assertIsNone(doc2.archive_filename)
@mock.patch("documents.migrations.1012_fix_archive_files.parse_wrapper")
def test_parser_no_archive(self, m):
m.side_effect = fake_parse_wrapper
Document = self.apps.get_model("documents", "Document")
doc1 = make_test_document(Document, "document", "image/png", simple_png, "document.png", simple_pdf)
doc2 = make_test_document(Document, "document", "application/pdf", simple_jpg, "document.jpg", simple_pdf)
with self.assertLogs() as capture:
self.performMigration()
self.assertEqual(
len(list(filter(lambda log: "Parser did not return an archive document for document" in log, capture.output))),
2)
Document = self.apps.get_model("documents", "Document")
doc1 = Document.objects.get(id=doc1.id)
doc2 = Document.objects.get(id=doc2.id)
self.assertIsNone(doc1.archive_checksum)
self.assertIsNone(doc2.archive_checksum)
self.assertIsNone(doc1.archive_filename)
self.assertIsNone(doc2.archive_filename)
@override_settings(PAPERLESS_FILENAME_FORMAT="")
class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations):
migrate_from = '1012_fix_archive_files'
migrate_to = '1011_auto_20210101_2340'
def setUpBeforeMigration(self, apps):
Document = apps.get_model("documents", "Document")
doc_unrelated = make_test_document(Document, "unrelated", "application/pdf", simple_pdf2, "unrelated.txt", simple_pdf2, "unrelated.pdf")
doc_no_archive = make_test_document(Document, "no_archive", "text/plain", simple_txt, "no_archive.txt")
clashB = make_test_document(Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf, "clash_02.pdf")
def testArchiveFilesReverted(self):
Document = self.apps.get_model('documents', 'Document')
for doc in Document.objects.all():
if doc.archive_checksum:
self.assertTrue(os.path.isfile(archive_path_old(doc)))
with open(source_path(doc), "rb") as f:
original_checksum = hashlib.md5(f.read()).hexdigest()
self.assertEqual(original_checksum, doc.checksum)
if doc.archive_checksum:
self.assertTrue(os.path.isfile(archive_path_old(doc)))
with open(archive_path_old(doc), "rb") as f:
archive_checksum = hashlib.md5(f.read()).hexdigest()
self.assertEqual(archive_checksum, doc.archive_checksum)
self.assertEqual(Document.objects.filter(archive_checksum__isnull=False).count(), 2)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
class TestMigrateArchiveFilesBackwardsWithFilenameFormat(TestMigrateArchiveFilesBackwards):
pass
@override_settings(PAPERLESS_FILENAME_FORMAT="")
class TestMigrateArchiveFilesBackwardsErrors(DirectoriesMixin, TestMigrations):
migrate_from = '1012_fix_archive_files'
migrate_to = '1011_auto_20210101_2340'
auto_migrate = False
def test_filename_clash(self):
Document = self.apps.get_model("documents", "Document")
self.clashA = make_test_document(Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf, "clash_02.pdf")
self.clashB = make_test_document(Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf, "clash_01.pdf")
self.assertRaisesMessage(ValueError, "would clash with another archive filename", self.performMigration)
def test_filename_exists(self):
Document = self.apps.get_model("documents", "Document")
self.clashA = make_test_document(Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf, "clash.pdf")
self.clashB = make_test_document(Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf, "clash_01.pdf")
self.assertRaisesMessage(ValueError, "file already exists.", self.performMigration)

View File

@@ -0,0 +1,88 @@
import os
import shutil
from django.conf import settings
from django.test import override_settings
from documents.parsers import get_default_file_extension
from documents.tests.utils import DirectoriesMixin, TestMigrations
STORAGE_TYPE_UNENCRYPTED = "unencrypted"
STORAGE_TYPE_GPG = "gpg"
def source_path_before(self):
if self.filename:
fname = str(self.filename)
else:
fname = "{:07}.{}".format(self.pk, self.file_type)
if self.storage_type == STORAGE_TYPE_GPG:
fname += ".gpg"
return os.path.join(
settings.ORIGINALS_DIR,
fname
)
def file_type_after(self):
return get_default_file_extension(self.mime_type)
def source_path_after(doc):
if doc.filename:
fname = str(doc.filename)
else:
fname = "{:07}{}".format(doc.pk, file_type_after(doc))
if doc.storage_type == STORAGE_TYPE_GPG:
fname += ".gpg" # pragma: no cover
return os.path.join(
settings.ORIGINALS_DIR,
fname
)
@override_settings(PASSPHRASE="test")
class TestMigrateMimeType(DirectoriesMixin, TestMigrations):
migrate_from = '1002_auto_20201111_1105'
migrate_to = '1003_mime_types'
def setUpBeforeMigration(self, apps):
Document = apps.get_model("documents", "Document")
doc = Document.objects.create(title="test", file_type="pdf", filename="file1.pdf")
self.doc_id = doc.id
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), source_path_before(doc))
doc2 = Document.objects.create(checksum="B", file_type="pdf", storage_type=STORAGE_TYPE_GPG)
self.doc2_id = doc2.id
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000004.pdf.gpg"), source_path_before(doc2))
def testMimeTypesMigrated(self):
Document = self.apps.get_model('documents', 'Document')
doc = Document.objects.get(id=self.doc_id)
self.assertEqual(doc.mime_type, "application/pdf")
doc2 = Document.objects.get(id=self.doc2_id)
self.assertEqual(doc2.mime_type, "application/pdf")
@override_settings(PASSPHRASE="test")
class TestMigrateMimeTypeBackwards(DirectoriesMixin, TestMigrations):
migrate_from = '1003_mime_types'
migrate_to = '1002_auto_20201111_1105'
def setUpBeforeMigration(self, apps):
Document = apps.get_model("documents", "Document")
doc = Document.objects.create(title="test", mime_type="application/pdf", filename="file1.pdf")
self.doc_id = doc.id
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), source_path_after(doc))
def testMimeTypesReverted(self):
Document = self.apps.get_model('documents', 'Document')
doc = Document.objects.get(id=self.doc_id)
self.assertEqual(doc.file_type, "pdf")

View File

@@ -68,7 +68,7 @@ class TestParserDiscovery(TestCase):
)
def fake_get_thumbnail(self, path, mimetype):
def fake_get_thumbnail(self, path, mimetype, file_name):
return os.path.join(os.path.dirname(__file__), "examples", "no-text.png")
@@ -89,15 +89,15 @@ class TestBaseParser(TestCase):
def test_get_optimised_thumbnail(self):
parser = DocumentParser(None)
parser.get_optimised_thumbnail("any", "not important")
parser.get_optimised_thumbnail("any", "not important", "document.pdf")
@mock.patch("documents.parsers.DocumentParser.get_thumbnail", fake_get_thumbnail)
@override_settings(OPTIMIZE_THUMBNAILS=False)
def test_get_optimised_thumb_disabled(self):
parser = DocumentParser(None)
path = parser.get_optimised_thumbnail("any", "not important")
self.assertEqual(path, fake_get_thumbnail(None, None, None))
path = parser.get_optimised_thumbnail("any", "not important", "document.pdf")
self.assertEqual(path, fake_get_thumbnail(None, None, None, None))
class TestParserAvailability(TestCase):
@@ -114,9 +114,10 @@ class TestParserAvailability(TestCase):
self.assertEqual(get_default_file_extension('application/zip'), ".zip")
self.assertEqual(get_default_file_extension('aasdasd/dgfgf'), "")
self.assertEqual(get_parser_class_for_mime_type('application/pdf'), RasterisedDocumentParser)
self.assertEqual(get_parser_class_for_mime_type('text/plain'), TextDocumentParser)
self.assertIsInstance(get_parser_class_for_mime_type('application/pdf')(logging_group=None), RasterisedDocumentParser)
self.assertIsInstance(get_parser_class_for_mime_type('text/plain')(logging_group=None), TextDocumentParser)
self.assertEqual(get_parser_class_for_mime_type('text/sdgsdf'), None)
self.assertTrue(is_file_ext_supported('.pdf'))
self.assertFalse(is_file_ext_supported('.hsdfh'))
self.assertFalse(is_file_ext_supported(''))

View File

@@ -1,57 +0,0 @@
from unittest import mock
from django.test import TestCase, override_settings
from documents.models import Document, Tag, Correspondent
from documents.signals.handlers import run_post_consume_script
class PostConsumeTestCase(TestCase):
@mock.patch("documents.signals.handlers.Popen")
@override_settings(POST_CONSUME_SCRIPT=None)
def test_no_post_consume_script(self, m):
doc = Document.objects.create(title="Test", mime_type="application/pdf")
tag1 = Tag.objects.create(name="a")
tag2 = Tag.objects.create(name="b")
doc.tags.add(tag1)
doc.tags.add(tag2)
run_post_consume_script(None, doc)
m.assert_not_called()
@mock.patch("documents.signals.handlers.Popen")
@override_settings(POST_CONSUME_SCRIPT="script")
def test_post_consume_script_simple(self, m):
doc = Document.objects.create(title="Test", mime_type="application/pdf")
run_post_consume_script(None, doc)
m.assert_called_once()
@mock.patch("documents.signals.handlers.Popen")
@override_settings(POST_CONSUME_SCRIPT="script")
def test_post_consume_script_with_correspondent(self, m):
c = Correspondent.objects.create(name="my_bank")
doc = Document.objects.create(title="Test", mime_type="application/pdf", correspondent=c)
tag1 = Tag.objects.create(name="a")
tag2 = Tag.objects.create(name="b")
doc.tags.add(tag1)
doc.tags.add(tag2)
run_post_consume_script(None, doc)
m.assert_called_once()
args, kwargs = m.call_args
command = args[0]
self.assertEqual(command[0], "script")
self.assertEqual(command[1], str(doc.pk))
self.assertEqual(command[5], f"/api/documents/{doc.pk}/download/")
self.assertEqual(command[6], f"/api/documents/{doc.pk}/thumb/")
self.assertEqual(command[7], "my_bank")
# TODO: tags are unordered by default.
self.assertEqual(command[8], "a,b")

View File

@@ -1,23 +1,82 @@
import logging
import os
import shutil
from pathlib import Path
import filelock
from django.conf import settings
from django.test import TestCase
from documents.models import Document
from documents.sanity_checker import check_sanity, SanityFailedError
from documents.sanity_checker import check_sanity, SanityCheckMessages
from documents.tests.utils import DirectoriesMixin
class TestSanityCheckMessages(TestCase):
def test_no_messages(self):
messages = SanityCheckMessages()
self.assertEqual(len(messages), 0)
self.assertFalse(messages.has_error())
self.assertFalse(messages.has_warning())
with self.assertLogs() as capture:
messages.log_messages()
self.assertEqual(len(capture.output), 1)
self.assertEqual(capture.records[0].levelno, logging.INFO)
self.assertEqual(capture.records[0].message, "Sanity checker detected no issues.")
def test_info(self):
messages = SanityCheckMessages()
messages.info("Something might be wrong")
self.assertEqual(len(messages), 1)
self.assertFalse(messages.has_error())
self.assertFalse(messages.has_warning())
with self.assertLogs() as capture:
messages.log_messages()
self.assertEqual(len(capture.output), 1)
self.assertEqual(capture.records[0].levelno, logging.INFO)
self.assertEqual(capture.records[0].message, "Something might be wrong")
def test_warning(self):
messages = SanityCheckMessages()
messages.warning("Something is wrong")
self.assertEqual(len(messages), 1)
self.assertFalse(messages.has_error())
self.assertTrue(messages.has_warning())
with self.assertLogs() as capture:
messages.log_messages()
self.assertEqual(len(capture.output), 1)
self.assertEqual(capture.records[0].levelno, logging.WARNING)
self.assertEqual(capture.records[0].message, "Something is wrong")
def test_error(self):
messages = SanityCheckMessages()
messages.error("Something is seriously wrong")
self.assertEqual(len(messages), 1)
self.assertTrue(messages.has_error())
self.assertFalse(messages.has_warning())
with self.assertLogs() as capture:
messages.log_messages()
self.assertEqual(len(capture.output), 1)
self.assertEqual(capture.records[0].levelno, logging.ERROR)
self.assertEqual(capture.records[0].message, "Something is seriously wrong")
class TestSanityCheck(DirectoriesMixin, TestCase):
def make_test_data(self):
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000001.pdf"), os.path.join(self.dirs.originals_dir, "0000001.pdf"))
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "archive", "0000001.pdf"), os.path.join(self.dirs.archive_dir, "0000001.pdf"))
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", "0000001.png"), os.path.join(self.dirs.thumbnail_dir, "0000001.png"))
with filelock.FileLock(settings.MEDIA_LOCK):
# just make sure that the lockfile is present.
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000001.pdf"), os.path.join(self.dirs.originals_dir, "0000001.pdf"))
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "archive", "0000001.pdf"), os.path.join(self.dirs.archive_dir, "0000001.pdf"))
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", "0000001.png"), os.path.join(self.dirs.thumbnail_dir, "0000001.png"))
return Document.objects.create(title="test", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", content="test", pk=1, filename="0000001.pdf", mime_type="application/pdf")
return Document.objects.create(title="test", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", content="test", pk=1, filename="0000001.pdf", mime_type="application/pdf", archive_filename="0000001.pdf")
def assertSanityError(self, messageRegex):
messages = check_sanity()
self.assertTrue(messages.has_error())
self.assertRegex(messages[0]['message'], messageRegex)
def test_no_docs(self):
self.assertEqual(len(check_sanity()), 0)
@@ -29,59 +88,75 @@ class TestSanityCheck(DirectoriesMixin, TestCase):
def test_no_thumbnail(self):
doc = self.make_test_data()
os.remove(doc.thumbnail_path)
self.assertEqual(len(check_sanity()), 1)
self.assertSanityError("Thumbnail of document .* does not exist")
def test_thumbnail_no_access(self):
doc = self.make_test_data()
os.chmod(doc.thumbnail_path, 0o000)
self.assertEqual(len(check_sanity()), 1)
self.assertSanityError("Cannot read thumbnail file of document")
os.chmod(doc.thumbnail_path, 0o777)
def test_no_original(self):
doc = self.make_test_data()
os.remove(doc.source_path)
self.assertEqual(len(check_sanity()), 1)
self.assertSanityError("Original of document .* does not exist.")
def test_original_no_access(self):
doc = self.make_test_data()
os.chmod(doc.source_path, 0o000)
self.assertEqual(len(check_sanity()), 1)
self.assertSanityError("Cannot read original file of document")
os.chmod(doc.source_path, 0o777)
def test_original_checksum_mismatch(self):
doc = self.make_test_data()
doc.checksum = "WOW"
doc.save()
self.assertEqual(len(check_sanity()), 1)
self.assertSanityError("Checksum mismatch of document")
def test_no_archive(self):
doc = self.make_test_data()
os.remove(doc.archive_path)
self.assertEqual(len(check_sanity()), 1)
self.assertSanityError("Archived version of document .* does not exist.")
def test_archive_no_access(self):
doc = self.make_test_data()
os.chmod(doc.archive_path, 0o000)
self.assertEqual(len(check_sanity()), 1)
self.assertSanityError("Cannot read archive file of document")
os.chmod(doc.archive_path, 0o777)
def test_archive_checksum_mismatch(self):
doc = self.make_test_data()
doc.archive_checksum = "WOW"
doc.save()
self.assertEqual(len(check_sanity()), 1)
self.assertSanityError("Checksum mismatch of archived document")
def test_empty_content(self):
doc = self.make_test_data()
doc.content = ""
doc.save()
self.assertEqual(len(check_sanity()), 1)
messages = check_sanity()
self.assertFalse(messages.has_error())
self.assertFalse(messages.has_warning())
self.assertEqual(len(messages), 1)
self.assertRegex(messages[0]['message'], "Document .* has no content.")
def test_orphaned_file(self):
doc = self.make_test_data()
Path(self.dirs.originals_dir, "orphaned").touch()
self.assertEqual(len(check_sanity()), 1)
messages = check_sanity()
self.assertFalse(messages.has_error())
self.assertTrue(messages.has_warning())
self.assertEqual(len(messages), 1)
self.assertRegex(messages[0]['message'], "Orphaned file in media dir")
def test_all(self):
Document.objects.create(title="test", checksum="dgfhj", archive_checksum="dfhg", content="", pk=1, filename="0000001.pdf")
string = str(SanityFailedError(check_sanity()))
def test_archive_filename_no_checksum(self):
doc = self.make_test_data()
doc.archive_checksum = None
doc.save()
self.assertSanityError("has an archive file, but its checksum is missing.")
def test_archive_checksum_no_filename(self):
doc = self.make_test_data()
doc.archive_filename = None
doc.save()
self.assertSanityError("has an archive file checksum, but no archive filename.")

View File

@@ -0,0 +1,34 @@
import logging
from unittest import mock
from django.test import TestCase
from paperless.settings import default_task_workers, default_threads_per_worker
class TestSettings(TestCase):
@mock.patch("paperless.settings.multiprocessing.cpu_count")
def test_single_core(self, cpu_count):
cpu_count.return_value = 1
default_workers = default_task_workers()
default_threads = default_threads_per_worker(default_workers)
self.assertEqual(default_workers, 1)
self.assertEqual(default_threads, 1)
def test_workers_threads(self):
for i in range(1, 64):
with mock.patch("paperless.settings.multiprocessing.cpu_count") as cpu_count:
cpu_count.return_value = i
default_workers = default_task_workers()
default_threads = default_threads_per_worker(default_workers)
self.assertTrue(default_workers >= 1)
self.assertTrue(default_threads >= 1)
self.assertTrue(default_workers * default_threads <= i, f"{i}")

View File

@@ -1,10 +1,13 @@
from datetime import datetime
import os
from unittest import mock
from django.conf import settings
from django.test import TestCase
from django.utils import timezone
from documents import tasks
from documents.models import Document
from documents.models import Document, Tag, Correspondent, DocumentType
from documents.sanity_checker import SanityCheckMessages, SanityCheckFailedException
from documents.tests.utils import DirectoriesMixin
@@ -20,5 +23,88 @@ class TestTasks(DirectoriesMixin, TestCase):
tasks.index_optimize()
def test_train_classifier(self):
@mock.patch("documents.tasks.load_classifier")
def test_train_classifier_no_auto_matching(self, load_classifier):
tasks.train_classifier()
load_classifier.assert_not_called()
@mock.patch("documents.tasks.load_classifier")
def test_train_classifier_with_auto_tag(self, load_classifier):
load_classifier.return_value = None
Tag.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test")
tasks.train_classifier()
load_classifier.assert_called_once()
self.assertFalse(os.path.isfile(settings.MODEL_FILE))
@mock.patch("documents.tasks.load_classifier")
def test_train_classifier_with_auto_type(self, load_classifier):
load_classifier.return_value = None
DocumentType.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test")
tasks.train_classifier()
load_classifier.assert_called_once()
self.assertFalse(os.path.isfile(settings.MODEL_FILE))
@mock.patch("documents.tasks.load_classifier")
def test_train_classifier_with_auto_correspondent(self, load_classifier):
load_classifier.return_value = None
Correspondent.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test")
tasks.train_classifier()
load_classifier.assert_called_once()
self.assertFalse(os.path.isfile(settings.MODEL_FILE))
def test_train_classifier(self):
c = Correspondent.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test")
doc = Document.objects.create(correspondent=c, content="test", title="test")
self.assertFalse(os.path.isfile(settings.MODEL_FILE))
tasks.train_classifier()
self.assertTrue(os.path.isfile(settings.MODEL_FILE))
mtime = os.stat(settings.MODEL_FILE).st_mtime
tasks.train_classifier()
self.assertTrue(os.path.isfile(settings.MODEL_FILE))
mtime2 = os.stat(settings.MODEL_FILE).st_mtime
self.assertEqual(mtime, mtime2)
doc.content = "test2"
doc.save()
tasks.train_classifier()
self.assertTrue(os.path.isfile(settings.MODEL_FILE))
mtime3 = os.stat(settings.MODEL_FILE).st_mtime
self.assertNotEqual(mtime2, mtime3)
@mock.patch("documents.tasks.sanity_checker.check_sanity")
def test_sanity_check_success(self, m):
m.return_value = SanityCheckMessages()
self.assertEqual(tasks.sanity_check(), "No issues detected.")
m.assert_called_once()
@mock.patch("documents.tasks.sanity_checker.check_sanity")
def test_sanity_check_error(self, m):
messages = SanityCheckMessages()
messages.error("Some error")
m.return_value = messages
self.assertRaises(SanityCheckFailedException, tasks.sanity_check)
m.assert_called_once()
@mock.patch("documents.tasks.sanity_checker.check_sanity")
def test_sanity_check_warning(self, m):
messages = SanityCheckMessages()
messages.warning("Some warning")
m.return_value = messages
self.assertEqual(tasks.sanity_check(), "Sanity check exited with warnings. See log.")
m.assert_called_once()
@mock.patch("documents.tasks.sanity_checker.check_sanity")
def test_sanity_check_info(self, m):
messages = SanityCheckMessages()
messages.info("Some info")
m.return_value = messages
self.assertEqual(tasks.sanity_check(), "Sanity check exited with infos. See log.")
m.assert_called_once()
def test_bulk_update_documents(self):
doc1 = Document.objects.create(title="test", content="my document", checksum="wow", added=timezone.now(),
created=timezone.now(), modified=timezone.now())
tasks.bulk_update_documents([doc1.pk])

View File

@@ -0,0 +1,30 @@
from django.conf import settings
from django.contrib.auth.models import User
from django.test import TestCase
class TestViews(TestCase):
def setUp(self) -> None:
self.user = User.objects.create_user("testuser")
def test_login_redirect(self):
response = self.client.get('/')
self.assertEqual(response.status_code, 302)
self.assertEqual(response.url, "/accounts/login/?next=/")
def test_index(self):
self.client.force_login(self.user)
for (language_given, language_actual) in [("", "en-US"), ("en-US", "en-US"), ("de", "de"), ("en", "en-US"), ("en-us", "en-US"), ("fr", "fr"), ("jp", "en-US")]:
if language_given:
self.client.cookies.load({settings.LANGUAGE_COOKIE_NAME: language_given})
elif settings.LANGUAGE_COOKIE_NAME in self.client.cookies.keys():
self.client.cookies.pop(settings.LANGUAGE_COOKIE_NAME)
response = self.client.get('/', )
self.assertEqual(response.status_code, 200)
self.assertEqual(response.context_data['webmanifest'], f"frontend/{language_actual}/manifest.webmanifest")
self.assertEqual(response.context_data['styles_css'], f"frontend/{language_actual}/styles.css")
self.assertEqual(response.context_data['runtime_js'], f"frontend/{language_actual}/runtime.js")
self.assertEqual(response.context_data['polyfills_js'], f"frontend/{language_actual}/polyfills.js")
self.assertEqual(response.context_data['main_js'], f"frontend/{language_actual}/main.js")

View File

@@ -2,8 +2,12 @@ import os
import shutil
import tempfile
from collections import namedtuple
from contextlib import contextmanager
from django.test import override_settings
from django.apps import apps
from django.db import connection
from django.db.migrations.executor import MigrationExecutor
from django.test import override_settings, TransactionTestCase
def setup_directories():
@@ -18,13 +22,16 @@ def setup_directories():
dirs.originals_dir = os.path.join(dirs.media_dir, "documents", "originals")
dirs.thumbnail_dir = os.path.join(dirs.media_dir, "documents", "thumbnails")
dirs.archive_dir = os.path.join(dirs.media_dir, "documents", "archive")
dirs.logging_dir = os.path.join(dirs.data_dir, "log")
os.makedirs(dirs.index_dir, exist_ok=True)
os.makedirs(dirs.originals_dir, exist_ok=True)
os.makedirs(dirs.thumbnail_dir, exist_ok=True)
os.makedirs(dirs.archive_dir, exist_ok=True)
override_settings(
os.makedirs(dirs.logging_dir, exist_ok=True)
dirs.settings_override = override_settings(
DATA_DIR=dirs.data_dir,
SCRATCH_DIR=dirs.scratch_dir,
MEDIA_ROOT=dirs.media_dir,
@@ -32,10 +39,13 @@ def setup_directories():
THUMBNAIL_DIR=dirs.thumbnail_dir,
ARCHIVE_DIR=dirs.archive_dir,
CONSUMPTION_DIR=dirs.consumption_dir,
LOGGING_DIR=dirs.logging_dir,
INDEX_DIR=dirs.index_dir,
MODEL_FILE=os.path.join(dirs.data_dir, "classification_model.pickle")
MODEL_FILE=os.path.join(dirs.data_dir, "classification_model.pickle"),
MEDIA_LOCK=os.path.join(dirs.media_dir, "media.lock")
).enable()
)
dirs.settings_override.enable()
return dirs
@@ -45,6 +55,18 @@ def remove_dirs(dirs):
shutil.rmtree(dirs.data_dir, ignore_errors=True)
shutil.rmtree(dirs.scratch_dir, ignore_errors=True)
shutil.rmtree(dirs.consumption_dir, ignore_errors=True)
dirs.settings_override.disable()
@contextmanager
def paperless_environment():
dirs = None
try:
dirs = setup_directories()
yield dirs
finally:
if dirs:
remove_dirs(dirs)
class DirectoriesMixin:
@@ -60,3 +82,45 @@ class DirectoriesMixin:
def tearDown(self) -> None:
super(DirectoriesMixin, self).tearDown()
remove_dirs(self.dirs)
class TestMigrations(TransactionTestCase):
@property
def app(self):
return apps.get_containing_app_config(type(self).__module__).name
migrate_from = None
migrate_to = None
auto_migrate = True
def setUp(self):
super(TestMigrations, self).setUp()
assert self.migrate_from and self.migrate_to, \
"TestCase '{}' must define migrate_from and migrate_to properties".format(type(self).__name__)
self.migrate_from = [(self.app, self.migrate_from)]
self.migrate_to = [(self.app, self.migrate_to)]
executor = MigrationExecutor(connection)
old_apps = executor.loader.project_state(self.migrate_from).apps
# Reverse to the original migration
executor.migrate(self.migrate_from)
self.setUpBeforeMigration(old_apps)
self.apps = old_apps
if self.auto_migrate:
self.performMigration()
def performMigration(self):
# Run the migration to test
executor = MigrationExecutor(connection)
executor.loader.build_graph() # reload.
executor.migrate(self.migrate_to)
self.apps = executor.loader.project_state(self.migrate_to).apps
def setUpBeforeMigration(self, apps):
pass

View File

@@ -1,10 +1,21 @@
import logging
import os
import tempfile
import uuid
import zipfile
from datetime import datetime
from time import mktime
from django.db.models import Count, Max
from django.conf import settings
from django.db.models import Count, Max, Case, When, IntegerField
from django.db.models.functions import Lower
from django.http import HttpResponse, HttpResponseBadRequest, Http404
from django.utils.translation import get_language
from django.views.decorators.cache import cache_control
from django.views.generic import TemplateView
from django_filters.rest_framework import DjangoFilterBackend
from django_q.tasks import async_task
from rest_framework import parsers
from rest_framework.decorators import action
from rest_framework.filters import OrderingFilter, SearchFilter
from rest_framework.mixins import (
@@ -19,40 +30,74 @@ from rest_framework.views import APIView
from rest_framework.viewsets import (
GenericViewSet,
ModelViewSet,
ReadOnlyModelViewSet
ViewSet
)
import documents.index as index
from paperless.db import GnuPG
from paperless.views import StandardPagination
from .bulk_download import OriginalAndArchiveStrategy, OriginalsOnlyStrategy, \
ArchiveOnlyStrategy
from .classifier import load_classifier
from .filters import (
CorrespondentFilterSet,
DocumentFilterSet,
TagFilterSet,
DocumentTypeFilterSet,
LogFilterSet
DocumentTypeFilterSet
)
from .forms import UploadForm
from .models import Correspondent, Document, Log, Tag, DocumentType
from .matching import match_correspondents, match_tags, match_document_types
from .models import Correspondent, Document, Tag, DocumentType, SavedView
from .parsers import get_parser_class_for_mime_type
from .serialisers import (
CorrespondentSerializer,
DocumentSerializer,
LogSerializer,
TagSerializer,
DocumentTypeSerializer
DocumentTypeSerializer,
PostDocumentSerializer,
SavedViewSerializer,
BulkEditSerializer,
DocumentListSerializer,
BulkDownloadSerializer
)
logger = logging.getLogger("paperless.api")
class IndexView(TemplateView):
template_name = "index.html"
def get_language(self):
# This is here for the following reason:
# Django identifies languages in the form "en-us"
# However, angular generates locales as "en-US".
# this translates between these two forms.
lang = get_language()
if "-" in lang:
first = lang[:lang.index("-")]
second = lang[lang.index("-")+1:]
return f"{first}-{second.upper()}"
else:
return lang
def get_context_data(self, **kwargs):
context = super().get_context_data(**kwargs)
context['cookie_prefix'] = settings.COOKIE_PREFIX
context['username'] = self.request.user.username
context['full_name'] = self.request.user.get_full_name()
context['styles_css'] = f"frontend/{self.get_language()}/styles.css"
context['runtime_js'] = f"frontend/{self.get_language()}/runtime.js"
context['polyfills_js'] = f"frontend/{self.get_language()}/polyfills.js" # NOQA: E501
context['main_js'] = f"frontend/{self.get_language()}/main.js"
context['webmanifest'] = f"frontend/{self.get_language()}/manifest.webmanifest" # NOQA: E501
return context
class CorrespondentViewSet(ModelViewSet):
model = Correspondent
queryset = Correspondent.objects.annotate(
document_count=Count('documents'),
last_correspondence=Max('documents__created')).order_by('name')
last_correspondence=Max('documents__created')).order_by(Lower('name'))
serializer_class = CorrespondentSerializer
pagination_class = StandardPagination
@@ -71,7 +116,7 @@ class TagViewSet(ModelViewSet):
model = Tag
queryset = Tag.objects.annotate(
document_count=Count('documents')).order_by('name')
document_count=Count('documents')).order_by(Lower('name'))
serializer_class = TagSerializer
pagination_class = StandardPagination
@@ -85,7 +130,7 @@ class DocumentTypeViewSet(ModelViewSet):
model = DocumentType
queryset = DocumentType.objects.annotate(
document_count=Count('documents')).order_by('name')
document_count=Count('documents')).order_by(Lower('name'))
serializer_class = DocumentTypeSerializer
pagination_class = StandardPagination
@@ -118,13 +163,29 @@ class DocumentViewSet(RetrieveModelMixin,
"added",
"archive_serial_number")
def get_queryset(self):
return Document.objects.distinct()
def get_serializer(self, *args, **kwargs):
fields_param = self.request.query_params.get('fields', None)
if fields_param:
fields = fields_param.split(",")
else:
fields = None
serializer_class = self.get_serializer_class()
kwargs.setdefault('context', self.get_serializer_context())
kwargs.setdefault('fields', fields)
return serializer_class(*args, **kwargs)
def update(self, request, *args, **kwargs):
response = super(DocumentViewSet, self).update(
request, *args, **kwargs)
from documents import index
index.add_or_update_document(self.get_object())
return response
def destroy(self, request, *args, **kwargs):
from documents import index
index.remove_document_from_index(self.get_object())
return super(DocumentViewSet, self).destroy(request, *args, **kwargs)
@@ -137,13 +198,13 @@ class DocumentViewSet(RetrieveModelMixin,
def file_response(self, pk, request, disposition):
doc = Document.objects.get(id=pk)
if not self.original_requested(request) and os.path.isfile(doc.archive_path): # NOQA: E501
if not self.original_requested(request) and doc.has_archive_version: # NOQA: E501
file_handle = doc.archive_file
filename = doc.archive_file_name
filename = doc.get_public_filename(archive=True)
mime_type = 'application/pdf'
else:
file_handle = doc.source_file
filename = doc.file_name
filename = doc.get_public_filename()
mime_type = doc.mime_type
if doc.storage_type == Document.STORAGE_TYPE_GPG:
@@ -154,30 +215,76 @@ class DocumentViewSet(RetrieveModelMixin,
disposition, filename)
return response
@action(methods=['post'], detail=False)
def post_document(self, request, pk=None):
# TODO: is this a good implementation?
form = UploadForm(data=request.POST, files=request.FILES)
if form.is_valid():
form.save()
return Response("OK")
def get_metadata(self, file, mime_type):
if not os.path.isfile(file):
return None
parser_class = get_parser_class_for_mime_type(mime_type)
if parser_class:
parser = parser_class(progress_callback=None, logging_group=None)
try:
return parser.extract_metadata(file, mime_type)
except Exception as e:
# TODO: cover GPG errors, remove later.
return []
else:
return HttpResponseBadRequest(str(form.errors))
return []
def get_filesize(self, filename):
if os.path.isfile(filename):
return os.stat(filename).st_size
else:
return None
@action(methods=['get'], detail=True)
def metadata(self, request, pk=None):
try:
doc = Document.objects.get(pk=pk)
return Response({
"paperless__checksum": doc.checksum,
"paperless__mime_type": doc.mime_type,
"paperless__filename": doc.filename,
"paperless__has_archive_version":
os.path.isfile(doc.archive_path)
})
except Document.DoesNotExist:
raise Http404()
meta = {
"original_checksum": doc.checksum,
"original_size": self.get_filesize(doc.source_path),
"original_mime_type": doc.mime_type,
"media_filename": doc.filename,
"has_archive_version": doc.has_archive_version,
"original_metadata": self.get_metadata(
doc.source_path, doc.mime_type),
"archive_checksum": doc.archive_checksum,
"archive_media_filename": doc.archive_filename
}
if doc.has_archive_version:
meta['archive_size'] = self.get_filesize(doc.archive_path)
meta['archive_metadata'] = self.get_metadata(
doc.archive_path, "application/pdf")
else:
meta['archive_size'] = None
meta['archive_metadata'] = None
return Response(meta)
@action(methods=['get'], detail=True)
def suggestions(self, request, pk=None):
try:
doc = Document.objects.get(pk=pk)
except Document.DoesNotExist:
raise Http404()
classifier = load_classifier()
return Response({
"correspondents": [
c.id for c in match_correspondents(doc, classifier)
],
"tags": [t.id for t in match_tags(doc, classifier)],
"document_types": [
dt.id for dt in match_document_types(doc, classifier)
]
})
@action(methods=['get'], detail=True)
def preview(self, request, pk=None):
try:
@@ -191,7 +298,14 @@ class DocumentViewSet(RetrieveModelMixin,
@cache_control(public=False, max_age=315360000)
def thumb(self, request, pk=None):
try:
return HttpResponse(Document.objects.get(id=pk).thumbnail_file,
doc = Document.objects.get(id=pk)
if doc.storage_type == Document.STORAGE_TYPE_GPG:
handle = GnuPG.decrypted(doc.thumbnail_file)
else:
handle = doc.thumbnail_file
# TODO: Send ETag information and use that to send new thumbnails
# if available
return HttpResponse(handle,
content_type='image/png')
except (FileNotFoundError, Document.DoesNotExist):
raise Http404()
@@ -205,45 +319,236 @@ class DocumentViewSet(RetrieveModelMixin,
raise Http404()
class LogViewSet(ReadOnlyModelViewSet):
model = Log
class LogViewSet(ViewSet):
queryset = Log.objects.all()
serializer_class = LogSerializer
permission_classes = (IsAuthenticated,)
log_files = ["paperless", "mail"]
def retrieve(self, request, pk=None, *args, **kwargs):
if pk not in self.log_files:
raise Http404()
filename = os.path.join(settings.LOGGING_DIR, f"{pk}.log")
if not os.path.isfile(filename):
raise Http404()
with open(filename, "r") as f:
lines = [line.rstrip() for line in f.readlines()]
return Response(lines)
def list(self, request, *args, **kwargs):
return Response(self.log_files)
class SavedViewViewSet(ModelViewSet):
model = SavedView
queryset = SavedView.objects.all()
serializer_class = SavedViewSerializer
pagination_class = StandardPagination
permission_classes = (IsAuthenticated,)
filter_backends = (DjangoFilterBackend, OrderingFilter)
filterset_class = LogFilterSet
ordering_fields = ("created",)
def get_queryset(self):
user = self.request.user
return SavedView.objects.filter(user=user)
def perform_create(self, serializer):
serializer.save(user=self.request.user)
class BulkEditView(APIView):
permission_classes = (IsAuthenticated,)
serializer_class = BulkEditSerializer
parser_classes = (parsers.JSONParser,)
def get_serializer_context(self):
return {
'request': self.request,
'format': self.format_kwarg,
'view': self
}
def get_serializer(self, *args, **kwargs):
kwargs['context'] = self.get_serializer_context()
return self.serializer_class(*args, **kwargs)
def post(self, request, *args, **kwargs):
serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
method = serializer.validated_data.get("method")
parameters = serializer.validated_data.get("parameters")
documents = serializer.validated_data.get("documents")
try:
# TODO: parameter validation
result = method(documents, **parameters)
return Response({"result": result})
except Exception as e:
return HttpResponseBadRequest(str(e))
class PostDocumentView(APIView):
permission_classes = (IsAuthenticated,)
serializer_class = PostDocumentSerializer
parser_classes = (parsers.MultiPartParser,)
def get_serializer_context(self):
return {
'request': self.request,
'format': self.format_kwarg,
'view': self
}
def get_serializer(self, *args, **kwargs):
kwargs['context'] = self.get_serializer_context()
return self.serializer_class(*args, **kwargs)
def post(self, request, *args, **kwargs):
serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
doc_name, doc_data = serializer.validated_data.get('document')
correspondent_id = serializer.validated_data.get('correspondent')
document_type_id = serializer.validated_data.get('document_type')
tag_ids = serializer.validated_data.get('tags')
title = serializer.validated_data.get('title')
t = int(mktime(datetime.now().timetuple()))
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
with tempfile.NamedTemporaryFile(prefix="paperless-upload-",
dir=settings.SCRATCH_DIR,
delete=False) as f:
f.write(doc_data)
os.utime(f.name, times=(t, t))
temp_filename = f.name
task_id = str(uuid.uuid4())
async_task("documents.tasks.consume_file",
temp_filename,
override_filename=doc_name,
override_title=title,
override_correspondent_id=correspondent_id,
override_document_type_id=document_type_id,
override_tag_ids=tag_ids,
task_id=task_id,
task_name=os.path.basename(doc_name)[:100])
return Response("OK")
class SelectionDataView(APIView):
permission_classes = (IsAuthenticated,)
serializer_class = DocumentListSerializer
parser_classes = (parsers.MultiPartParser, parsers.JSONParser)
def get_serializer_context(self):
return {
'request': self.request,
'format': self.format_kwarg,
'view': self
}
def get_serializer(self, *args, **kwargs):
kwargs['context'] = self.get_serializer_context()
return self.serializer_class(*args, **kwargs)
def post(self, request, format=None):
serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
ids = serializer.validated_data.get('documents')
correspondents = Correspondent.objects.annotate(
document_count=Count(Case(
When(documents__id__in=ids, then=1),
output_field=IntegerField()
)))
tags = Tag.objects.annotate(document_count=Count(Case(
When(documents__id__in=ids, then=1),
output_field=IntegerField()
)))
types = DocumentType.objects.annotate(document_count=Count(Case(
When(documents__id__in=ids, then=1),
output_field=IntegerField()
)))
r = Response({
"selected_correspondents": [{
"id": t.id,
"document_count": t.document_count
} for t in correspondents],
"selected_tags": [{
"id": t.id,
"document_count": t.document_count
} for t in tags],
"selected_document_types": [{
"id": t.id,
"document_count": t.document_count
} for t in types]
})
return r
class SearchView(APIView):
permission_classes = (IsAuthenticated,)
def __init__(self, *args, **kwargs):
super(SearchView, self).__init__(*args, **kwargs)
self.ix = index.open_index()
def add_infos_to_hit(self, r):
doc = Document.objects.get(id=r['id'])
try:
doc = Document.objects.get(id=r['id'])
except Document.DoesNotExist:
logger.warning(
f"Search index returned a non-existing document: "
f"id: {r['id']}, title: {r['title']}. "
f"Search index needs reindex."
)
doc = None
return {'id': r['id'],
'highlights': r.highlights("content", text=doc.content),
'highlights': r.highlights("content", text=doc.content) if doc else None, # NOQA: E501
'score': r.score,
'rank': r.rank,
'document': DocumentSerializer(doc).data,
'document': DocumentSerializer(doc).data if doc else None,
'title': r['title']
}
def get(self, request, format=None):
if 'query' not in request.query_params:
from documents import index
if 'query' in request.query_params:
query = request.query_params['query']
else:
query = None
if 'more_like' in request.query_params:
more_like_id = request.query_params['more_like']
more_like_content = Document.objects.get(id=more_like_id).content
else:
more_like_id = None
more_like_content = None
if not query and not more_like_id:
return Response({
'count': 0,
'page': 0,
'page_count': 0,
'corrected_query': None,
'results': []})
query = request.query_params['query']
try:
page = int(request.query_params.get('page', 1))
except (ValueError, TypeError):
@@ -252,9 +557,10 @@ class SearchView(APIView):
if page < 1:
page = 1
ix = index.open_index()
try:
with index.query_page(self.ix, query, page) as (result_page,
corrected_query):
with index.query_page(ix, page, query, more_like_id, more_like_content) as (result_page, corrected_query): # NOQA: E501
return Response(
{'count': len(result_page),
'page': result_page.pagenum,
@@ -269,10 +575,6 @@ class SearchAutoCompleteView(APIView):
permission_classes = (IsAuthenticated,)
def __init__(self, *args, **kwargs):
super(SearchAutoCompleteView, self).__init__(*args, **kwargs)
self.ix = index.open_index()
def get(self, request, format=None):
if 'term' in request.query_params:
term = request.query_params['term']
@@ -286,7 +588,11 @@ class SearchAutoCompleteView(APIView):
else:
limit = 10
return Response(index.autocomplete(self.ix, term, limit))
from documents import index
ix = index.open_index()
return Response(index.autocomplete(ix, term, limit))
class StatisticsView(APIView):
@@ -294,8 +600,66 @@ class StatisticsView(APIView):
permission_classes = (IsAuthenticated,)
def get(self, request, format=None):
return Response({
'documents_total': Document.objects.all().count(),
'documents_inbox': Document.objects.filter(
documents_total = Document.objects.all().count()
if Tag.objects.filter(is_inbox_tag=True).exists():
documents_inbox = Document.objects.filter(
tags__is_inbox_tag=True).distinct().count()
else:
documents_inbox = None
return Response({
'documents_total': documents_total,
'documents_inbox': documents_inbox,
})
class BulkDownloadView(APIView):
permission_classes = (IsAuthenticated,)
serializer_class = BulkDownloadSerializer
parser_classes = (parsers.JSONParser,)
def get_serializer_context(self):
return {
'request': self.request,
'format': self.format_kwarg,
'view': self
}
def get_serializer(self, *args, **kwargs):
kwargs['context'] = self.get_serializer_context()
return self.serializer_class(*args, **kwargs)
def post(self, request, format=None):
serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
ids = serializer.validated_data.get('documents')
compression = serializer.validated_data.get('compression')
content = serializer.validated_data.get('content')
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
temp = tempfile.NamedTemporaryFile(
dir=settings.SCRATCH_DIR,
suffix="-compressed-archive",
delete=False)
if content == 'both':
strategy_class = OriginalAndArchiveStrategy
elif content == 'originals':
strategy_class = OriginalsOnlyStrategy
else:
strategy_class = ArchiveOnlyStrategy
with zipfile.ZipFile(temp.name, "w", compression) as zipf:
strategy = strategy_class(zipf)
for id in ids:
doc = Document.objects.get(id=id)
strategy.add_document(doc)
with open(temp.name, "rb") as f:
response = HttpResponse(f, content_type="application/zip")
response["Content-Disposition"] = '{}; filename="{}"'.format(
"attachment", "documents.zip")
return response

View File

@@ -0,0 +1,650 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
# This file is distributed under the same license as the PACKAGE package.
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
# Translators:
# Štěpán Šebestian <mys.orangeorange0123@gmail.com>, 2021
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2021-01-28 22:02+0100\n"
"PO-Revision-Date: 2020-12-30 19:27+0000\n"
"Last-Translator: Štěpán Šebestian <mys.orangeorange0123@gmail.com>, 2021\n"
"Language-Team: Czech (https://www.transifex.com/paperless/teams/115905/cs/)\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Language: cs\n"
"Plural-Forms: nplurals=4; plural=(n == 1 && n % 1 == 0) ? 0 : (n >= 2 && n <= 4 && n % 1 == 0) ? 1: (n % 1 != 0 ) ? 2 : 3;\n"
#: documents/apps.py:10
msgid "Documents"
msgstr "Dokumenty"
#: documents/models.py:33
msgid "Any word"
msgstr "Jakékoliv slovo"
#: documents/models.py:34
msgid "All words"
msgstr "Všechna slova"
#: documents/models.py:35
msgid "Exact match"
msgstr "Přesná shoda"
#: documents/models.py:36
msgid "Regular expression"
msgstr "Regulární výraz"
#: documents/models.py:37
msgid "Fuzzy word"
msgstr "Fuzzy slovo"
#: documents/models.py:38
msgid "Automatic"
msgstr "Automatický"
#: documents/models.py:42 documents/models.py:352 paperless_mail/models.py:25
#: paperless_mail/models.py:109
msgid "name"
msgstr "název"
#: documents/models.py:46
msgid "match"
msgstr "shoda"
#: documents/models.py:50
msgid "matching algorithm"
msgstr "algoritmus pro shodu"
#: documents/models.py:56
msgid "is insensitive"
msgstr "je ignorováno"
#: documents/models.py:75 documents/models.py:135
msgid "correspondent"
msgstr "korespondent"
#: documents/models.py:76
msgid "correspondents"
msgstr "korespondenti"
#: documents/models.py:98
msgid "color"
msgstr "barva"
#: documents/models.py:102
msgid "is inbox tag"
msgstr "tag přichozí"
#: documents/models.py:104
msgid ""
"Marks this tag as an inbox tag: All newly consumed documents will be tagged "
"with inbox tags."
msgstr ""
"Označí tento tag jako tag pro příchozí: Všechny nově zkonzumované dokumenty "
"budou označeny tagem pro přichozí"
#: documents/models.py:109
msgid "tag"
msgstr "tag"
#: documents/models.py:110 documents/models.py:166
msgid "tags"
msgstr "tagy"
#: documents/models.py:116 documents/models.py:148
msgid "document type"
msgstr "typ dokumentu"
#: documents/models.py:117
msgid "document types"
msgstr "typy dokumentu"
#: documents/models.py:125
msgid "Unencrypted"
msgstr "Nešifrované"
#: documents/models.py:126
msgid "Encrypted with GNU Privacy Guard"
msgstr "Šifrované pomocí GNU Privacy Guard"
#: documents/models.py:139
msgid "title"
msgstr "titulek"
#: documents/models.py:152
msgid "content"
msgstr "obsah"
#: documents/models.py:154
msgid ""
"The raw, text-only data of the document. This field is primarily used for "
"searching."
msgstr ""
"Nezpracovaná, pouze textová data dokumentu. Toto pole je používáno především"
" pro vyhledávání."
#: documents/models.py:159
msgid "mime type"
msgstr "mime typ"
#: documents/models.py:170
msgid "checksum"
msgstr "kontrolní součet"
#: documents/models.py:174
msgid "The checksum of the original document."
msgstr "Kontrolní součet původního dokumentu"
#: documents/models.py:178
msgid "archive checksum"
msgstr "kontrolní součet archivu"
#: documents/models.py:183
msgid "The checksum of the archived document."
msgstr "Kontrolní součet archivovaného dokumentu."
#: documents/models.py:187 documents/models.py:330
msgid "created"
msgstr "vytvořeno"
#: documents/models.py:191
msgid "modified"
msgstr "upraveno"
#: documents/models.py:195
msgid "storage type"
msgstr "typ úložiště"
#: documents/models.py:203
msgid "added"
msgstr "přidáno"
#: documents/models.py:207
msgid "filename"
msgstr "název souboru"
#: documents/models.py:212
msgid "Current filename in storage"
msgstr "Aktuální název souboru v úložišti"
#: documents/models.py:216
msgid "archive serial number"
msgstr "sériové číslo archivu"
#: documents/models.py:221
msgid "The position of this document in your physical document archive."
msgstr "Pozice dokumentu ve vašem archivu fyzických dokumentů"
#: documents/models.py:227
msgid "document"
msgstr "dokument"
#: documents/models.py:228
msgid "documents"
msgstr "dokumenty"
#: documents/models.py:313
msgid "debug"
msgstr "debug"
#: documents/models.py:314
msgid "information"
msgstr "informace"
#: documents/models.py:315
msgid "warning"
msgstr "varování"
#: documents/models.py:316
msgid "error"
msgstr "chyba"
#: documents/models.py:317
msgid "critical"
msgstr "kritická"
#: documents/models.py:321
msgid "group"
msgstr "skupina"
#: documents/models.py:324
msgid "message"
msgstr "zpráva"
#: documents/models.py:327
msgid "level"
msgstr "úroveň"
#: documents/models.py:334
msgid "log"
msgstr "záznam"
#: documents/models.py:335
msgid "logs"
msgstr "záznamy"
#: documents/models.py:346 documents/models.py:396
msgid "saved view"
msgstr "uložený pohled"
#: documents/models.py:347
msgid "saved views"
msgstr "uložené pohledy"
#: documents/models.py:350
msgid "user"
msgstr "uživatel"
#: documents/models.py:356
msgid "show on dashboard"
msgstr "zobrazit v dashboardu"
#: documents/models.py:359
msgid "show in sidebar"
msgstr "zobrazit v postranním menu"
#: documents/models.py:363
msgid "sort field"
msgstr "pole na řazení"
#: documents/models.py:366
msgid "sort reverse"
msgstr "třídit opačně"
#: documents/models.py:372
msgid "title contains"
msgstr "titulek obsahuje"
#: documents/models.py:373
msgid "content contains"
msgstr "obsah obsahuje"
#: documents/models.py:374
msgid "ASN is"
msgstr "ASN je"
#: documents/models.py:375
msgid "correspondent is"
msgstr "korespondent je"
#: documents/models.py:376
msgid "document type is"
msgstr "typ dokumentu je"
#: documents/models.py:377
msgid "is in inbox"
msgstr "je v příchozích"
#: documents/models.py:378
msgid "has tag"
msgstr "má tag"
#: documents/models.py:379
msgid "has any tag"
msgstr "má jakýkoliv tag"
#: documents/models.py:380
msgid "created before"
msgstr "vytvořeno před"
#: documents/models.py:381
msgid "created after"
msgstr "vytvořeno po"
#: documents/models.py:382
msgid "created year is"
msgstr "rok vytvoření je"
#: documents/models.py:383
msgid "created month is"
msgstr "měsíc vytvoření je"
#: documents/models.py:384
msgid "created day is"
msgstr "den vytvoření je"
#: documents/models.py:385
msgid "added before"
msgstr "přidáno před"
#: documents/models.py:386
msgid "added after"
msgstr "přidáno po"
#: documents/models.py:387
msgid "modified before"
msgstr "upraveno před"
#: documents/models.py:388
msgid "modified after"
msgstr "upraveno po"
#: documents/models.py:389
msgid "does not have tag"
msgstr "nemá tag"
#: documents/models.py:400
msgid "rule type"
msgstr "typ pravidla"
#: documents/models.py:404
msgid "value"
msgstr "hodnota"
#: documents/models.py:410
msgid "filter rule"
msgstr "filtrovací pravidlo"
#: documents/models.py:411
msgid "filter rules"
msgstr "filtrovací pravidla"
#: documents/serialisers.py:383
#, python-format
msgid "File type %(type)s not supported"
msgstr "Typ souboru %(type)s není podporován"
#: documents/templates/index.html:20
msgid "Paperless-ng is loading..."
msgstr "Paperless-ng se načítá..."
#: documents/templates/registration/logged_out.html:13
msgid "Paperless-ng signed out"
msgstr "Odhlášeno od Paperless-ng"
#: documents/templates/registration/logged_out.html:41
msgid "You have been successfully logged out. Bye!"
msgstr "Byli jste úspěšně odhlášeni. Nashledanou!"
#: documents/templates/registration/logged_out.html:42
msgid "Sign in again"
msgstr "Přihlašte se znovu"
#: documents/templates/registration/login.html:13
msgid "Paperless-ng sign in"
msgstr "Paperless-ng přihlášení"
#: documents/templates/registration/login.html:42
msgid "Please sign in."
msgstr "Prosím přihlaste se."
#: documents/templates/registration/login.html:45
msgid "Your username and password didn't match. Please try again."
msgstr "Vaše uživatelské jméno a heslo se neshodují. Prosím, zkuste to znovu."
#: documents/templates/registration/login.html:48
msgid "Username"
msgstr "Uživatelské jméno"
#: documents/templates/registration/login.html:49
msgid "Password"
msgstr "Heslo"
#: documents/templates/registration/login.html:54
msgid "Sign in"
msgstr "Přihlásit se"
#: paperless/settings.py:286
msgid "English"
msgstr "Angličtina"
#: paperless/settings.py:287
msgid "German"
msgstr "Němčina"
#: paperless/settings.py:288
msgid "Dutch"
msgstr "Holandština"
#: paperless/settings.py:289
msgid "French"
msgstr "Francouzština"
#: paperless/urls.py:114
msgid "Paperless-ng administration"
msgstr "Správa Paperless-ng"
#: paperless_mail/admin.py:25
msgid "Filter"
msgstr "Filtr"
#: paperless_mail/admin.py:27
msgid ""
"Paperless will only process mails that match ALL of the filters given below."
msgstr ""
"Paperless zpracuje pouze emaily které odpovídají VŠEM níže zadaným filtrům."
#: paperless_mail/admin.py:37
msgid "Actions"
msgstr "Akce"
#: paperless_mail/admin.py:39
msgid ""
"The action applied to the mail. This action is only performed when documents"
" were consumed from the mail. Mails without attachments will remain entirely"
" untouched."
msgstr ""
"Akce provedena na emailu. Tato akce je provedena jen pokud byly dokumenty "
"zkonzumovány z emailu. Emaily bez příloh zůstanou nedotčeny."
#: paperless_mail/admin.py:46
msgid "Metadata"
msgstr "Metadata"
#: paperless_mail/admin.py:48
msgid ""
"Assign metadata to documents consumed from this rule automatically. If you "
"do not assign tags, types or correspondents here, paperless will still "
"process all matching rules that you have defined."
msgstr ""
"Automaticky přiřadit metadata dokumentům zkonzumovaných z tohoto pravidla. "
"Pokud zde nepřiřadíte tagy, typy nebo korespondenty, paperless stále "
"zpracuje všechna shodující-se pravidla které jste definovali."
#: paperless_mail/apps.py:9
msgid "Paperless mail"
msgstr "Paperless pošta"
#: paperless_mail/models.py:11
msgid "mail account"
msgstr "emailový účet"
#: paperless_mail/models.py:12
msgid "mail accounts"
msgstr "emailové účty"
#: paperless_mail/models.py:19
msgid "No encryption"
msgstr "Žádné šifrování"
#: paperless_mail/models.py:20
msgid "Use SSL"
msgstr "Používat SSL"
#: paperless_mail/models.py:21
msgid "Use STARTTLS"
msgstr "Používat STARTTLS"
#: paperless_mail/models.py:29
msgid "IMAP server"
msgstr "IMAP server"
#: paperless_mail/models.py:33
msgid "IMAP port"
msgstr "IMAP port"
#: paperless_mail/models.py:36
msgid ""
"This is usually 143 for unencrypted and STARTTLS connections, and 993 for "
"SSL connections."
msgstr ""
"Toto je většinou 143 pro nešifrovaná připojení/připojení používající "
"STARTTLS a 993 pro SSL připojení."
#: paperless_mail/models.py:40
msgid "IMAP security"
msgstr "IMAP bezpečnost"
#: paperless_mail/models.py:46
msgid "username"
msgstr "uživatelské jméno"
#: paperless_mail/models.py:50
msgid "password"
msgstr "heslo"
#: paperless_mail/models.py:60
msgid "mail rule"
msgstr "mailové pravidlo"
#: paperless_mail/models.py:61
msgid "mail rules"
msgstr "mailová pravidla"
#: paperless_mail/models.py:67
msgid "Only process attachments."
msgstr "Zpracovávat jen přílohy"
#: paperless_mail/models.py:68
msgid "Process all files, including 'inline' attachments."
msgstr "Zpracovat všechny soubory, včetně vložených příloh"
#: paperless_mail/models.py:78
msgid "Mark as read, don't process read mails"
msgstr "Označit jako přečtené, nezpracovávat přečtené emaily"
#: paperless_mail/models.py:79
msgid "Flag the mail, don't process flagged mails"
msgstr "Označit email, nezpracovávat označené emaily"
#: paperless_mail/models.py:80
msgid "Move to specified folder"
msgstr "Přesunout do specifikované složky"
#: paperless_mail/models.py:81
msgid "Delete"
msgstr "Odstranit"
#: paperless_mail/models.py:88
msgid "Use subject as title"
msgstr "Použít předmět jako titulek"
#: paperless_mail/models.py:89
msgid "Use attachment filename as title"
msgstr "Použít název souboru u přílohy jako titulek"
#: paperless_mail/models.py:99
msgid "Do not assign a correspondent"
msgstr "Nepřiřazovat korespondenta"
#: paperless_mail/models.py:101
msgid "Use mail address"
msgstr "Použít emailovou adresu"
#: paperless_mail/models.py:103
msgid "Use name (or mail address if not available)"
msgstr "Použít jméno (nebo emailovou adresu pokud jméno není dostupné)"
#: paperless_mail/models.py:105
msgid "Use correspondent selected below"
msgstr "Použít korespondenta vybraného níže"
#: paperless_mail/models.py:113
msgid "order"
msgstr "pořadí"
#: paperless_mail/models.py:120
msgid "account"
msgstr "účet"
#: paperless_mail/models.py:124
msgid "folder"
msgstr "složka"
#: paperless_mail/models.py:128
msgid "filter from"
msgstr "filtrovat z"
#: paperless_mail/models.py:131
msgid "filter subject"
msgstr "název filtru"
#: paperless_mail/models.py:134
msgid "filter body"
msgstr "tělo filtru"
#: paperless_mail/models.py:138
msgid "filter attachment filename"
msgstr "název souboru u přílohy filtru"
#: paperless_mail/models.py:140
msgid ""
"Only consume documents which entirely match this filename if specified. "
"Wildcards such as *.pdf or *invoice* are allowed. Case insensitive."
msgstr ""
"Konzumovat jen dokumenty které přesně odpovídají tomuto názvu souboru pokud "
"specifikováno. Zástupné znaky jako *.pdf nebo *invoice* jsou povoleny. "
"Nezáleží na velikosti písmen."
#: paperless_mail/models.py:146
msgid "maximum age"
msgstr "maximální stáří"
#: paperless_mail/models.py:148
msgid "Specified in days."
msgstr "Specifikováno ve dnech."
#: paperless_mail/models.py:151
msgid "attachment type"
msgstr "typ přílohy"
#: paperless_mail/models.py:154
msgid ""
"Inline attachments include embedded images, so it's best to combine this "
"option with a filename filter."
msgstr ""
"Vložené přílohy zahrnují vložené obrázky, takže je nejlepší tuto možnost "
"kombinovat s filtrem na název souboru"
#: paperless_mail/models.py:159
msgid "action"
msgstr "akce"
#: paperless_mail/models.py:165
msgid "action parameter"
msgstr "parametr akce"
#: paperless_mail/models.py:167
msgid ""
"Additional parameter for the action selected above, i.e., the target folder "
"of the move to folder action."
msgstr ""
"Další parametr pro výše vybranou akci, napříkad cílová složka akce přesunutí"
" do složky."
#: paperless_mail/models.py:173
msgid "assign title from"
msgstr "nastavit titulek z"
#: paperless_mail/models.py:183
msgid "assign this tag"
msgstr "přiřadit tento tag"
#: paperless_mail/models.py:191
msgid "assign this document type"
msgstr "přiřadit tento typ dokumentu"
#: paperless_mail/models.py:195
msgid "assign correspondent from"
msgstr "přiřadit korespondenta z"
#: paperless_mail/models.py:205
msgid "assign this correspondent"
msgstr "přiřadit tohoto korespondenta"

View File

@@ -0,0 +1,676 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
# This file is distributed under the same license as the PACKAGE package.
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
# Translators:
# Jonas Winkler, 2021
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2021-02-24 16:49+0100\n"
"PO-Revision-Date: 2021-02-16 18:37+0000\n"
"Last-Translator: Jonas Winkler, 2021\n"
"Language-Team: German (https://www.transifex.com/paperless/teams/115905/de/)\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Language: de\n"
"Plural-Forms: nplurals=2; plural=(n != 1);\n"
#: documents/apps.py:10
msgid "Documents"
msgstr "Dokumente"
#: documents/models.py:32
msgid "Any word"
msgstr "Irgendein Wort"
#: documents/models.py:33
msgid "All words"
msgstr "Alle Wörter"
#: documents/models.py:34
msgid "Exact match"
msgstr "Exakte Übereinstimmung"
#: documents/models.py:35
msgid "Regular expression"
msgstr "Regulärer Ausdruck"
#: documents/models.py:36
msgid "Fuzzy word"
msgstr "Ungenaues Wort"
#: documents/models.py:37
msgid "Automatic"
msgstr "Automatisch"
#: documents/models.py:41 documents/models.py:364 paperless_mail/models.py:25
#: paperless_mail/models.py:109
msgid "name"
msgstr "Name"
#: documents/models.py:45
msgid "match"
msgstr "Zuweisungsmuster"
#: documents/models.py:49
msgid "matching algorithm"
msgstr "Zuweisungsalgorithmus"
#: documents/models.py:55
msgid "is insensitive"
msgstr "Groß-/Kleinschreibung irrelevant"
#: documents/models.py:74 documents/models.py:134
msgid "correspondent"
msgstr "Korrespondent"
#: documents/models.py:75
msgid "correspondents"
msgstr "Korrespondenten"
#: documents/models.py:97
msgid "color"
msgstr "Farbe"
#: documents/models.py:101
msgid "is inbox tag"
msgstr "Posteingangs-Tag"
#: documents/models.py:103
msgid ""
"Marks this tag as an inbox tag: All newly consumed documents will be tagged "
"with inbox tags."
msgstr ""
"Markiert das Tag als Posteingangs-Tag. Neue Dokumente werden immer mit "
"diesem Tag versehen."
#: documents/models.py:108
msgid "tag"
msgstr "Tag"
#: documents/models.py:109 documents/models.py:165
msgid "tags"
msgstr "Tags"
#: documents/models.py:115 documents/models.py:147
msgid "document type"
msgstr "Dokumenttyp"
#: documents/models.py:116
msgid "document types"
msgstr "Dokumenttypen"
#: documents/models.py:124
msgid "Unencrypted"
msgstr "Nicht verschlüsselt"
#: documents/models.py:125
msgid "Encrypted with GNU Privacy Guard"
msgstr "Verschlüsselt mit GNU Privacy Guard"
#: documents/models.py:138
msgid "title"
msgstr "Titel"
#: documents/models.py:151
msgid "content"
msgstr "Inhalt"
#: documents/models.py:153
msgid ""
"The raw, text-only data of the document. This field is primarily used for "
"searching."
msgstr ""
"Der Inhalt des Dokuments in Textform. Dieses Feld wird primär für die Suche "
"verwendet."
#: documents/models.py:158
msgid "mime type"
msgstr "MIME-Typ"
#: documents/models.py:169
msgid "checksum"
msgstr "Prüfsumme"
#: documents/models.py:173
msgid "The checksum of the original document."
msgstr "Die Prüfsumme des originalen Dokuments."
#: documents/models.py:177
msgid "archive checksum"
msgstr "Archiv-Prüfsumme"
#: documents/models.py:182
msgid "The checksum of the archived document."
msgstr "Die Prüfsumme des archivierten Dokuments."
#: documents/models.py:186 documents/models.py:342
msgid "created"
msgstr "Ausgestellt"
#: documents/models.py:190
msgid "modified"
msgstr "Geändert"
#: documents/models.py:194
msgid "storage type"
msgstr "Speichertyp"
#: documents/models.py:202
msgid "added"
msgstr "Hinzugefügt"
#: documents/models.py:206
msgid "filename"
msgstr "Dateiname"
#: documents/models.py:212
msgid "Current filename in storage"
msgstr "Aktueller Dateiname im Datenspeicher"
#: documents/models.py:216
msgid "archive filename"
msgstr "Archiv-Dateiname"
#: documents/models.py:222
msgid "Current archive filename in storage"
msgstr "Aktueller Dateiname im Archiv"
#: documents/models.py:226
msgid "archive serial number"
msgstr "Archiv-Seriennummer"
#: documents/models.py:231
msgid "The position of this document in your physical document archive."
msgstr "Die Position dieses Dokuments in Ihrem physischen Dokumentenarchiv."
#: documents/models.py:237
msgid "document"
msgstr "Dokument"
#: documents/models.py:238
msgid "documents"
msgstr "Dokumente"
#: documents/models.py:325
msgid "debug"
msgstr "Debug"
#: documents/models.py:326
msgid "information"
msgstr "Information"
#: documents/models.py:327
msgid "warning"
msgstr "Warnung"
#: documents/models.py:328
msgid "error"
msgstr "Fehler"
#: documents/models.py:329
msgid "critical"
msgstr "Kritisch"
#: documents/models.py:333
msgid "group"
msgstr "Gruppe"
#: documents/models.py:336
msgid "message"
msgstr "Nachricht"
#: documents/models.py:339
msgid "level"
msgstr "Level"
#: documents/models.py:346
msgid "log"
msgstr "Protokoll"
#: documents/models.py:347
msgid "logs"
msgstr "Protokoll"
#: documents/models.py:358 documents/models.py:408
msgid "saved view"
msgstr "Gespeicherte Ansicht"
#: documents/models.py:359
msgid "saved views"
msgstr "Gespeicherte Ansichten"
#: documents/models.py:362
msgid "user"
msgstr "Benutzer"
#: documents/models.py:368
msgid "show on dashboard"
msgstr "Auf Startseite zeigen"
#: documents/models.py:371
msgid "show in sidebar"
msgstr "In Seitenleiste zeigen"
#: documents/models.py:375
msgid "sort field"
msgstr "Sortierfeld"
#: documents/models.py:378
msgid "sort reverse"
msgstr "Umgekehrte Sortierung"
#: documents/models.py:384
msgid "title contains"
msgstr "Titel enthält"
#: documents/models.py:385
msgid "content contains"
msgstr "Inhalt enthält"
#: documents/models.py:386
msgid "ASN is"
msgstr "ASN ist"
#: documents/models.py:387
msgid "correspondent is"
msgstr "Korrespondent ist"
#: documents/models.py:388
msgid "document type is"
msgstr "Dokumenttyp ist"
#: documents/models.py:389
msgid "is in inbox"
msgstr "Ist im Posteingang"
#: documents/models.py:390
msgid "has tag"
msgstr "Hat Tag"
#: documents/models.py:391
msgid "has any tag"
msgstr "Hat irgendein Tag"
#: documents/models.py:392
msgid "created before"
msgstr "Ausgestellt vor"
#: documents/models.py:393
msgid "created after"
msgstr "Ausgestellt nach"
#: documents/models.py:394
msgid "created year is"
msgstr "Ausgestellt im Jahr"
#: documents/models.py:395
msgid "created month is"
msgstr "Ausgestellt im Monat"
#: documents/models.py:396
msgid "created day is"
msgstr "Ausgestellt am Tag"
#: documents/models.py:397
msgid "added before"
msgstr "Hinzugefügt vor"
#: documents/models.py:398
msgid "added after"
msgstr "Hinzugefügt nach"
#: documents/models.py:399
msgid "modified before"
msgstr "Geändert vor"
#: documents/models.py:400
msgid "modified after"
msgstr "Geändert nach"
#: documents/models.py:401
msgid "does not have tag"
msgstr "Hat nicht folgendes Tag"
#: documents/models.py:412
msgid "rule type"
msgstr "Regeltyp"
#: documents/models.py:416
msgid "value"
msgstr "Wert"
#: documents/models.py:422
msgid "filter rule"
msgstr "Filterregel"
#: documents/models.py:423
msgid "filter rules"
msgstr "Filterregeln"
#: documents/serialisers.py:52
#, python-format
msgid "Invalid regular expresssion: %(error)s"
msgstr "Ungültiger regulärer Ausdruck: %(error)s"
#: documents/serialisers.py:378
#, python-format
msgid "File type %(type)s not supported"
msgstr "Dateityp %(type)s nicht unterstützt"
#: documents/templates/index.html:20
msgid "Paperless-ng is loading..."
msgstr "Paperless-ng wird geladen..."
#: documents/templates/registration/logged_out.html:13
msgid "Paperless-ng signed out"
msgstr "Paperless-ng abgemeldet"
#: documents/templates/registration/logged_out.html:41
msgid "You have been successfully logged out. Bye!"
msgstr "Sie wurden erfolgreich abgemeldet. Auf Wiedersehen!"
#: documents/templates/registration/logged_out.html:42
msgid "Sign in again"
msgstr "Erneut anmelden"
#: documents/templates/registration/login.html:13
msgid "Paperless-ng sign in"
msgstr "Paperless-ng Anmeldung"
#: documents/templates/registration/login.html:42
msgid "Please sign in."
msgstr "Bitte melden Sie sich an."
#: documents/templates/registration/login.html:45
msgid "Your username and password didn't match. Please try again."
msgstr ""
"Ihr Benutzername und Passwort stimmen nicht überein. Bitte versuchen Sie es "
"erneut."
#: documents/templates/registration/login.html:48
msgid "Username"
msgstr "Benutzername"
#: documents/templates/registration/login.html:49
msgid "Password"
msgstr "Passwort"
#: documents/templates/registration/login.html:54
msgid "Sign in"
msgstr "Anmelden"
#: paperless/settings.py:291
msgid "English (US)"
msgstr "Englisch (US)"
#: paperless/settings.py:292
msgid "English (GB)"
msgstr "Englisch (UK)"
#: paperless/settings.py:293
msgid "German"
msgstr "Deutsch"
#: paperless/settings.py:294
msgid "Dutch"
msgstr "Niederländisch"
#: paperless/settings.py:295
msgid "French"
msgstr "Französisch"
#: paperless/settings.py:296
msgid "Portuguese (Brazil)"
msgstr "Portugiesisch (Brasilien)"
#: paperless/urls.py:118
msgid "Paperless-ng administration"
msgstr "Paperless-ng Administration"
#: paperless_mail/admin.py:25
msgid "Filter"
msgstr "Filter"
#: paperless_mail/admin.py:27
msgid ""
"Paperless will only process mails that match ALL of the filters given below."
msgstr ""
"Paperless wird nur E-Mails verarbeiten, für die alle der hier angegebenen "
"Filter zutreffen."
#: paperless_mail/admin.py:37
msgid "Actions"
msgstr "Aktionen"
#: paperless_mail/admin.py:39
msgid ""
"The action applied to the mail. This action is only performed when documents"
" were consumed from the mail. Mails without attachments will remain entirely"
" untouched."
msgstr ""
"Die Aktion, die auf E-Mails angewendet werden soll. Diese Aktion wird nur "
"auf E-Mails angewendet, aus denen Anhänge verarbeitet wurden. E-Mails ohne "
"Anhänge werden vollständig ignoriert."
#: paperless_mail/admin.py:46
msgid "Metadata"
msgstr "Metadaten"
#: paperless_mail/admin.py:48
msgid ""
"Assign metadata to documents consumed from this rule automatically. If you "
"do not assign tags, types or correspondents here, paperless will still "
"process all matching rules that you have defined."
msgstr ""
"Folgende Metadaten werden Dokumenten dieser Regel automatisch zugewiesen. "
"Wenn Sie hier nichts auswählen wird Paperless weiterhin alle "
"Zuweisungsalgorithmen ausführen und Metadaten auf Basis des Dokumentinhalts "
"zuweisen."
#: paperless_mail/apps.py:9
msgid "Paperless mail"
msgstr "Paperless E-Mail"
#: paperless_mail/models.py:11
msgid "mail account"
msgstr "E-Mail-Konto"
#: paperless_mail/models.py:12
msgid "mail accounts"
msgstr "E-Mail-Konten"
#: paperless_mail/models.py:19
msgid "No encryption"
msgstr "Keine Verschlüsselung"
#: paperless_mail/models.py:20
msgid "Use SSL"
msgstr "SSL benutzen"
#: paperless_mail/models.py:21
msgid "Use STARTTLS"
msgstr "STARTTLS benutzen"
#: paperless_mail/models.py:29
msgid "IMAP server"
msgstr "IMAP-Server"
#: paperless_mail/models.py:33
msgid "IMAP port"
msgstr "IMAP-Port"
#: paperless_mail/models.py:36
msgid ""
"This is usually 143 for unencrypted and STARTTLS connections, and 993 for "
"SSL connections."
msgstr ""
"Dies ist in der Regel 143 für unverschlüsselte und STARTTLS-Verbindungen und"
" 993 für SSL-Verbindungen."
#: paperless_mail/models.py:40
msgid "IMAP security"
msgstr "IMAP-Sicherheit"
#: paperless_mail/models.py:46
msgid "username"
msgstr "Benutzername"
#: paperless_mail/models.py:50
msgid "password"
msgstr "Passwort"
#: paperless_mail/models.py:60
msgid "mail rule"
msgstr "E-Mail-Regel"
#: paperless_mail/models.py:61
msgid "mail rules"
msgstr "E-Mail-Regeln"
#: paperless_mail/models.py:67
msgid "Only process attachments."
msgstr "Nur Anhänge verarbeiten."
#: paperless_mail/models.py:68
msgid "Process all files, including 'inline' attachments."
msgstr "Alle Dateien verarbeiten, auch 'inline'-Anhänge."
#: paperless_mail/models.py:78
msgid "Mark as read, don't process read mails"
msgstr "Als gelesen markieren, gelesene E-Mails nicht verarbeiten"
#: paperless_mail/models.py:79
msgid "Flag the mail, don't process flagged mails"
msgstr "Als wichtig markieren, markierte E-Mails nicht verarbeiten"
#: paperless_mail/models.py:80
msgid "Move to specified folder"
msgstr "In angegebenen Ordner verschieben"
#: paperless_mail/models.py:81
msgid "Delete"
msgstr "Löschen"
#: paperless_mail/models.py:88
msgid "Use subject as title"
msgstr "Betreff als Titel verwenden"
#: paperless_mail/models.py:89
msgid "Use attachment filename as title"
msgstr "Dateiname des Anhangs als Titel verwenden"
#: paperless_mail/models.py:99
msgid "Do not assign a correspondent"
msgstr "Keinen Korrespondenten zuweisen"
#: paperless_mail/models.py:101
msgid "Use mail address"
msgstr "E-Mail-Adresse benutzen"
#: paperless_mail/models.py:103
msgid "Use name (or mail address if not available)"
msgstr "Absendername benutzen (oder E-Mail-Adressen, wenn nicht verfügbar)"
#: paperless_mail/models.py:105
msgid "Use correspondent selected below"
msgstr "Nachfolgend ausgewählten Korrespondent verwenden"
#: paperless_mail/models.py:113
msgid "order"
msgstr "Reihenfolge"
#: paperless_mail/models.py:120
msgid "account"
msgstr "Konto"
#: paperless_mail/models.py:124
msgid "folder"
msgstr "Ordner"
#: paperless_mail/models.py:128
msgid "filter from"
msgstr "Absender filtern"
#: paperless_mail/models.py:131
msgid "filter subject"
msgstr "Betreff filtern"
#: paperless_mail/models.py:134
msgid "filter body"
msgstr "Nachrichteninhalt filtern"
#: paperless_mail/models.py:138
msgid "filter attachment filename"
msgstr "Anhang-Dateiname filtern"
#: paperless_mail/models.py:140
msgid ""
"Only consume documents which entirely match this filename if specified. "
"Wildcards such as *.pdf or *invoice* are allowed. Case insensitive."
msgstr ""
"Wenn angegeben werden nur Dateien verarbeitet, die diesem Dateinamen exakt "
"entsprechen. Platzhalter wie *.pdf oder *rechnung* sind erlaubt. Groß- und "
"Kleinschreibung ist irrelevant."
#: paperless_mail/models.py:146
msgid "maximum age"
msgstr "Maximales Alter"
#: paperless_mail/models.py:148
msgid "Specified in days."
msgstr "Angegeben in Tagen."
#: paperless_mail/models.py:151
msgid "attachment type"
msgstr "Dateianhangstyp"
#: paperless_mail/models.py:154
msgid ""
"Inline attachments include embedded images, so it's best to combine this "
"option with a filename filter."
msgstr ""
"'Inline'-Anhänge schließen eingebettete Bilder mit ein, daher sollte diese "
"Einstellung mit einem Dateinamenfilter kombiniert werden."
#: paperless_mail/models.py:159
msgid "action"
msgstr "Aktion"
#: paperless_mail/models.py:165
msgid "action parameter"
msgstr "Parameter für Aktion"
#: paperless_mail/models.py:167
msgid ""
"Additional parameter for the action selected above, i.e., the target folder "
"of the move to folder action."
msgstr ""
"Zusätzlicher Parameter für die oben ausgewählte Aktion, zum Beispiel der "
"Zielordner für die Aktion \"In angegebenen Ordner verschieben\""
#: paperless_mail/models.py:173
msgid "assign title from"
msgstr "Titel zuweisen von"
#: paperless_mail/models.py:183
msgid "assign this tag"
msgstr "Dieses Tag zuweisen"
#: paperless_mail/models.py:191
msgid "assign this document type"
msgstr "Diesen Dokumenttyp zuweisen"
#: paperless_mail/models.py:195
msgid "assign correspondent from"
msgstr "Korrespondent zuweisen von"
#: paperless_mail/models.py:205
msgid "assign this correspondent"
msgstr "Diesen Korrespondent zuweisen"

View File

@@ -0,0 +1,672 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
# This file is distributed under the same license as the PACKAGE package.
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
# Translators:
# Ali Bates, 2021
# Jonas Winkler, 2021
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2021-02-24 16:49+0100\n"
"PO-Revision-Date: 2021-02-16 18:37+0000\n"
"Last-Translator: Jonas Winkler, 2021\n"
"Language-Team: English (United Kingdom) (https://www.transifex.com/paperless/teams/115905/en_GB/)\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Language: en_GB\n"
"Plural-Forms: nplurals=2; plural=(n != 1);\n"
#: documents/apps.py:10
msgid "Documents"
msgstr "Documents"
#: documents/models.py:32
msgid "Any word"
msgstr "Any word"
#: documents/models.py:33
msgid "All words"
msgstr "All words"
#: documents/models.py:34
msgid "Exact match"
msgstr "Exact match"
#: documents/models.py:35
msgid "Regular expression"
msgstr "Regular expression"
#: documents/models.py:36
msgid "Fuzzy word"
msgstr "Fuzzy word"
#: documents/models.py:37
msgid "Automatic"
msgstr "Automatic"
#: documents/models.py:41 documents/models.py:364 paperless_mail/models.py:25
#: paperless_mail/models.py:109
msgid "name"
msgstr "name"
#: documents/models.py:45
msgid "match"
msgstr "match"
#: documents/models.py:49
msgid "matching algorithm"
msgstr "matching algorithm"
#: documents/models.py:55
msgid "is insensitive"
msgstr "is insensitive"
#: documents/models.py:74 documents/models.py:134
msgid "correspondent"
msgstr "correspondent"
#: documents/models.py:75
msgid "correspondents"
msgstr "correspondents"
#: documents/models.py:97
msgid "color"
msgstr "colour"
#: documents/models.py:101
msgid "is inbox tag"
msgstr "is inbox tag"
#: documents/models.py:103
msgid ""
"Marks this tag as an inbox tag: All newly consumed documents will be tagged "
"with inbox tags."
msgstr ""
"Marks this tag as an inbox tag: All newly consumed documents will be tagged "
"with inbox tags."
#: documents/models.py:108
msgid "tag"
msgstr "tag"
#: documents/models.py:109 documents/models.py:165
msgid "tags"
msgstr "tags"
#: documents/models.py:115 documents/models.py:147
msgid "document type"
msgstr "document type"
#: documents/models.py:116
msgid "document types"
msgstr "document types"
#: documents/models.py:124
msgid "Unencrypted"
msgstr "Unencrypted"
#: documents/models.py:125
msgid "Encrypted with GNU Privacy Guard"
msgstr "Encrypted with GNU Privacy Guard"
#: documents/models.py:138
msgid "title"
msgstr "title"
#: documents/models.py:151
msgid "content"
msgstr "content"
#: documents/models.py:153
msgid ""
"The raw, text-only data of the document. This field is primarily used for "
"searching."
msgstr ""
"The raw, text-only data of the document. This field is primarily used for "
"searching."
#: documents/models.py:158
msgid "mime type"
msgstr "mime type"
#: documents/models.py:169
msgid "checksum"
msgstr "checksum"
#: documents/models.py:173
msgid "The checksum of the original document."
msgstr "The checksum of the original document."
#: documents/models.py:177
msgid "archive checksum"
msgstr "archive checksum"
#: documents/models.py:182
msgid "The checksum of the archived document."
msgstr "The checksum of the archived document."
#: documents/models.py:186 documents/models.py:342
msgid "created"
msgstr "created"
#: documents/models.py:190
msgid "modified"
msgstr "modified"
#: documents/models.py:194
msgid "storage type"
msgstr "storage type"
#: documents/models.py:202
msgid "added"
msgstr "added"
#: documents/models.py:206
msgid "filename"
msgstr "filename"
#: documents/models.py:212
msgid "Current filename in storage"
msgstr "Current filename in storage"
#: documents/models.py:216
msgid "archive filename"
msgstr "archive filename"
#: documents/models.py:222
msgid "Current archive filename in storage"
msgstr "Current archive filename in storage"
#: documents/models.py:226
msgid "archive serial number"
msgstr "archive serial number"
#: documents/models.py:231
msgid "The position of this document in your physical document archive."
msgstr "The position of this document in your physical document archive."
#: documents/models.py:237
msgid "document"
msgstr "document"
#: documents/models.py:238
msgid "documents"
msgstr "documents"
#: documents/models.py:325
msgid "debug"
msgstr "debug"
#: documents/models.py:326
msgid "information"
msgstr "information"
#: documents/models.py:327
msgid "warning"
msgstr "warning"
#: documents/models.py:328
msgid "error"
msgstr "error"
#: documents/models.py:329
msgid "critical"
msgstr "critical"
#: documents/models.py:333
msgid "group"
msgstr "group"
#: documents/models.py:336
msgid "message"
msgstr "message"
#: documents/models.py:339
msgid "level"
msgstr "level"
#: documents/models.py:346
msgid "log"
msgstr "log"
#: documents/models.py:347
msgid "logs"
msgstr "logs"
#: documents/models.py:358 documents/models.py:408
msgid "saved view"
msgstr "saved view"
#: documents/models.py:359
msgid "saved views"
msgstr "saved views"
#: documents/models.py:362
msgid "user"
msgstr "user"
#: documents/models.py:368
msgid "show on dashboard"
msgstr "show on dashboard"
#: documents/models.py:371
msgid "show in sidebar"
msgstr "show in sidebar"
#: documents/models.py:375
msgid "sort field"
msgstr "sort field"
#: documents/models.py:378
msgid "sort reverse"
msgstr "sort reverse"
#: documents/models.py:384
msgid "title contains"
msgstr "title contains"
#: documents/models.py:385
msgid "content contains"
msgstr "content contains"
#: documents/models.py:386
msgid "ASN is"
msgstr "ASN is"
#: documents/models.py:387
msgid "correspondent is"
msgstr "correspondent is"
#: documents/models.py:388
msgid "document type is"
msgstr "document type is"
#: documents/models.py:389
msgid "is in inbox"
msgstr "is in inbox"
#: documents/models.py:390
msgid "has tag"
msgstr "has tag"
#: documents/models.py:391
msgid "has any tag"
msgstr "has any tag"
#: documents/models.py:392
msgid "created before"
msgstr "created before"
#: documents/models.py:393
msgid "created after"
msgstr "created after"
#: documents/models.py:394
msgid "created year is"
msgstr "created year is"
#: documents/models.py:395
msgid "created month is"
msgstr "created month is"
#: documents/models.py:396
msgid "created day is"
msgstr "created day is"
#: documents/models.py:397
msgid "added before"
msgstr "added before"
#: documents/models.py:398
msgid "added after"
msgstr "added after"
#: documents/models.py:399
msgid "modified before"
msgstr "modified before"
#: documents/models.py:400
msgid "modified after"
msgstr "modified after"
#: documents/models.py:401
msgid "does not have tag"
msgstr "does not have tag"
#: documents/models.py:412
msgid "rule type"
msgstr "rule type"
#: documents/models.py:416
msgid "value"
msgstr "value"
#: documents/models.py:422
msgid "filter rule"
msgstr "filter rule"
#: documents/models.py:423
msgid "filter rules"
msgstr "filter rules"
#: documents/serialisers.py:52
#, python-format
msgid "Invalid regular expresssion: %(error)s"
msgstr "Invalid regular expresssion: %(error)s"
#: documents/serialisers.py:378
#, python-format
msgid "File type %(type)s not supported"
msgstr "File type %(type)s not supported"
#: documents/templates/index.html:20
msgid "Paperless-ng is loading..."
msgstr "Paperless-ng is loading..."
#: documents/templates/registration/logged_out.html:13
msgid "Paperless-ng signed out"
msgstr "Paperless-ng signed out"
#: documents/templates/registration/logged_out.html:41
msgid "You have been successfully logged out. Bye!"
msgstr "You have been successfully logged out. Bye!"
#: documents/templates/registration/logged_out.html:42
msgid "Sign in again"
msgstr "Sign in again"
#: documents/templates/registration/login.html:13
msgid "Paperless-ng sign in"
msgstr "Paperless-ng sign in"
#: documents/templates/registration/login.html:42
msgid "Please sign in."
msgstr "Please sign in."
#: documents/templates/registration/login.html:45
msgid "Your username and password didn't match. Please try again."
msgstr "Your username and password didn't match. Please try again."
#: documents/templates/registration/login.html:48
msgid "Username"
msgstr "Username"
#: documents/templates/registration/login.html:49
msgid "Password"
msgstr "Password"
#: documents/templates/registration/login.html:54
msgid "Sign in"
msgstr "Sign in"
#: paperless/settings.py:291
msgid "English (US)"
msgstr "English (US)"
#: paperless/settings.py:292
msgid "English (GB)"
msgstr "English (GB)"
#: paperless/settings.py:293
msgid "German"
msgstr "German"
#: paperless/settings.py:294
msgid "Dutch"
msgstr "Dutch"
#: paperless/settings.py:295
msgid "French"
msgstr "French"
#: paperless/settings.py:296
msgid "Portuguese (Brazil)"
msgstr "Portuguese (Brazil)"
#: paperless/urls.py:118
msgid "Paperless-ng administration"
msgstr "Paperless-ng administration"
#: paperless_mail/admin.py:25
msgid "Filter"
msgstr "Filter"
#: paperless_mail/admin.py:27
msgid ""
"Paperless will only process mails that match ALL of the filters given below."
msgstr ""
"Paperless will only process mails that match ALL of the filters given below."
#: paperless_mail/admin.py:37
msgid "Actions"
msgstr "Actions"
#: paperless_mail/admin.py:39
msgid ""
"The action applied to the mail. This action is only performed when documents"
" were consumed from the mail. Mails without attachments will remain entirely"
" untouched."
msgstr ""
"The action applied to the mail. This action is only performed when documents"
" were consumed from the mail. Mails without attachments will remain entirely"
" untouched."
#: paperless_mail/admin.py:46
msgid "Metadata"
msgstr "Metadata"
#: paperless_mail/admin.py:48
msgid ""
"Assign metadata to documents consumed from this rule automatically. If you "
"do not assign tags, types or correspondents here, paperless will still "
"process all matching rules that you have defined."
msgstr ""
"Assign metadata to documents consumed from this rule automatically. If you "
"do not assign tags, types or correspondents here, paperless will still "
"process all matching rules that you have defined."
#: paperless_mail/apps.py:9
msgid "Paperless mail"
msgstr "Paperless mail"
#: paperless_mail/models.py:11
msgid "mail account"
msgstr "mail account"
#: paperless_mail/models.py:12
msgid "mail accounts"
msgstr "mail accounts"
#: paperless_mail/models.py:19
msgid "No encryption"
msgstr "No encryption"
#: paperless_mail/models.py:20
msgid "Use SSL"
msgstr "Use SSL"
#: paperless_mail/models.py:21
msgid "Use STARTTLS"
msgstr "Use STARTTLS"
#: paperless_mail/models.py:29
msgid "IMAP server"
msgstr "IMAP server"
#: paperless_mail/models.py:33
msgid "IMAP port"
msgstr "IMAP port"
#: paperless_mail/models.py:36
msgid ""
"This is usually 143 for unencrypted and STARTTLS connections, and 993 for "
"SSL connections."
msgstr ""
"This is usually 143 for unencrypted and STARTTLS connections, and 993 for "
"SSL connections."
#: paperless_mail/models.py:40
msgid "IMAP security"
msgstr "IMAP security"
#: paperless_mail/models.py:46
msgid "username"
msgstr "username"
#: paperless_mail/models.py:50
msgid "password"
msgstr "password"
#: paperless_mail/models.py:60
msgid "mail rule"
msgstr "mail rule"
#: paperless_mail/models.py:61
msgid "mail rules"
msgstr "mail rules"
#: paperless_mail/models.py:67
msgid "Only process attachments."
msgstr "Only process attachments."
#: paperless_mail/models.py:68
msgid "Process all files, including 'inline' attachments."
msgstr "Process all files, including 'inline' attachments."
#: paperless_mail/models.py:78
msgid "Mark as read, don't process read mails"
msgstr "Mark as read, don't process read mails"
#: paperless_mail/models.py:79
msgid "Flag the mail, don't process flagged mails"
msgstr "Flag the mail, don't process flagged mails"
#: paperless_mail/models.py:80
msgid "Move to specified folder"
msgstr "Move to specified folder"
#: paperless_mail/models.py:81
msgid "Delete"
msgstr "Delete"
#: paperless_mail/models.py:88
msgid "Use subject as title"
msgstr "Use subject as title"
#: paperless_mail/models.py:89
msgid "Use attachment filename as title"
msgstr "Use attachment filename as title"
#: paperless_mail/models.py:99
msgid "Do not assign a correspondent"
msgstr "Do not assign a correspondent"
#: paperless_mail/models.py:101
msgid "Use mail address"
msgstr "Use mail address"
#: paperless_mail/models.py:103
msgid "Use name (or mail address if not available)"
msgstr "Use name (or mail address if not available)"
#: paperless_mail/models.py:105
msgid "Use correspondent selected below"
msgstr "Use correspondent selected below"
#: paperless_mail/models.py:113
msgid "order"
msgstr "order"
#: paperless_mail/models.py:120
msgid "account"
msgstr "account"
#: paperless_mail/models.py:124
msgid "folder"
msgstr "folder"
#: paperless_mail/models.py:128
msgid "filter from"
msgstr "filter from"
#: paperless_mail/models.py:131
msgid "filter subject"
msgstr "filter subject"
#: paperless_mail/models.py:134
msgid "filter body"
msgstr "filter body"
#: paperless_mail/models.py:138
msgid "filter attachment filename"
msgstr "filter attachment filename"
#: paperless_mail/models.py:140
msgid ""
"Only consume documents which entirely match this filename if specified. "
"Wildcards such as *.pdf or *invoice* are allowed. Case insensitive."
msgstr ""
"Only consume documents which entirely match this filename if specified. "
"Wildcards such as *.pdf or *invoice* are allowed. Case insensitive."
#: paperless_mail/models.py:146
msgid "maximum age"
msgstr "maximum age"
#: paperless_mail/models.py:148
msgid "Specified in days."
msgstr "Specified in days."
#: paperless_mail/models.py:151
msgid "attachment type"
msgstr "attachment type"
#: paperless_mail/models.py:154
msgid ""
"Inline attachments include embedded images, so it's best to combine this "
"option with a filename filter."
msgstr ""
"Inline attachments include embedded images, so it's best to combine this "
"option with a filename filter."
#: paperless_mail/models.py:159
msgid "action"
msgstr "action"
#: paperless_mail/models.py:165
msgid "action parameter"
msgstr "action parameter"
#: paperless_mail/models.py:167
msgid ""
"Additional parameter for the action selected above, i.e., the target folder "
"of the move to folder action."
msgstr ""
"Additional parameter for the action selected above, i.e., the target folder "
"of the move to folder action."
#: paperless_mail/models.py:173
msgid "assign title from"
msgstr "assign title from"
#: paperless_mail/models.py:183
msgid "assign this tag"
msgstr "assign this tag"
#: paperless_mail/models.py:191
msgid "assign this document type"
msgstr "assign this document type"
#: paperless_mail/models.py:195
msgid "assign correspondent from"
msgstr "assign correspondent from"
#: paperless_mail/models.py:205
msgid "assign this correspondent"
msgstr "assign this correspondent"

View File

@@ -0,0 +1,648 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
# This file is distributed under the same license as the PACKAGE package.
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2021-02-24 16:49+0100\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"Language: \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
#: documents/apps.py:10
msgid "Documents"
msgstr ""
#: documents/models.py:32
msgid "Any word"
msgstr ""
#: documents/models.py:33
msgid "All words"
msgstr ""
#: documents/models.py:34
msgid "Exact match"
msgstr ""
#: documents/models.py:35
msgid "Regular expression"
msgstr ""
#: documents/models.py:36
msgid "Fuzzy word"
msgstr ""
#: documents/models.py:37
msgid "Automatic"
msgstr ""
#: documents/models.py:41 documents/models.py:364 paperless_mail/models.py:25
#: paperless_mail/models.py:109
msgid "name"
msgstr ""
#: documents/models.py:45
msgid "match"
msgstr ""
#: documents/models.py:49
msgid "matching algorithm"
msgstr ""
#: documents/models.py:55
msgid "is insensitive"
msgstr ""
#: documents/models.py:74 documents/models.py:134
msgid "correspondent"
msgstr ""
#: documents/models.py:75
msgid "correspondents"
msgstr ""
#: documents/models.py:97
msgid "color"
msgstr ""
#: documents/models.py:101
msgid "is inbox tag"
msgstr ""
#: documents/models.py:103
msgid ""
"Marks this tag as an inbox tag: All newly consumed documents will be tagged "
"with inbox tags."
msgstr ""
#: documents/models.py:108
msgid "tag"
msgstr ""
#: documents/models.py:109 documents/models.py:165
msgid "tags"
msgstr ""
#: documents/models.py:115 documents/models.py:147
msgid "document type"
msgstr ""
#: documents/models.py:116
msgid "document types"
msgstr ""
#: documents/models.py:124
msgid "Unencrypted"
msgstr ""
#: documents/models.py:125
msgid "Encrypted with GNU Privacy Guard"
msgstr ""
#: documents/models.py:138
msgid "title"
msgstr ""
#: documents/models.py:151
msgid "content"
msgstr ""
#: documents/models.py:153
msgid ""
"The raw, text-only data of the document. This field is primarily used for "
"searching."
msgstr ""
#: documents/models.py:158
msgid "mime type"
msgstr ""
#: documents/models.py:169
msgid "checksum"
msgstr ""
#: documents/models.py:173
msgid "The checksum of the original document."
msgstr ""
#: documents/models.py:177
msgid "archive checksum"
msgstr ""
#: documents/models.py:182
msgid "The checksum of the archived document."
msgstr ""
#: documents/models.py:186 documents/models.py:342
msgid "created"
msgstr ""
#: documents/models.py:190
msgid "modified"
msgstr ""
#: documents/models.py:194
msgid "storage type"
msgstr ""
#: documents/models.py:202
msgid "added"
msgstr ""
#: documents/models.py:206
msgid "filename"
msgstr ""
#: documents/models.py:212
msgid "Current filename in storage"
msgstr ""
#: documents/models.py:216
msgid "archive filename"
msgstr ""
#: documents/models.py:222
msgid "Current archive filename in storage"
msgstr ""
#: documents/models.py:226
msgid "archive serial number"
msgstr ""
#: documents/models.py:231
msgid "The position of this document in your physical document archive."
msgstr ""
#: documents/models.py:237
msgid "document"
msgstr ""
#: documents/models.py:238
msgid "documents"
msgstr ""
#: documents/models.py:325
msgid "debug"
msgstr ""
#: documents/models.py:326
msgid "information"
msgstr ""
#: documents/models.py:327
msgid "warning"
msgstr ""
#: documents/models.py:328
msgid "error"
msgstr ""
#: documents/models.py:329
msgid "critical"
msgstr ""
#: documents/models.py:333
msgid "group"
msgstr ""
#: documents/models.py:336
msgid "message"
msgstr ""
#: documents/models.py:339
msgid "level"
msgstr ""
#: documents/models.py:346
msgid "log"
msgstr ""
#: documents/models.py:347
msgid "logs"
msgstr ""
#: documents/models.py:358 documents/models.py:408
msgid "saved view"
msgstr ""
#: documents/models.py:359
msgid "saved views"
msgstr ""
#: documents/models.py:362
msgid "user"
msgstr ""
#: documents/models.py:368
msgid "show on dashboard"
msgstr ""
#: documents/models.py:371
msgid "show in sidebar"
msgstr ""
#: documents/models.py:375
msgid "sort field"
msgstr ""
#: documents/models.py:378
msgid "sort reverse"
msgstr ""
#: documents/models.py:384
msgid "title contains"
msgstr ""
#: documents/models.py:385
msgid "content contains"
msgstr ""
#: documents/models.py:386
msgid "ASN is"
msgstr ""
#: documents/models.py:387
msgid "correspondent is"
msgstr ""
#: documents/models.py:388
msgid "document type is"
msgstr ""
#: documents/models.py:389
msgid "is in inbox"
msgstr ""
#: documents/models.py:390
msgid "has tag"
msgstr ""
#: documents/models.py:391
msgid "has any tag"
msgstr ""
#: documents/models.py:392
msgid "created before"
msgstr ""
#: documents/models.py:393
msgid "created after"
msgstr ""
#: documents/models.py:394
msgid "created year is"
msgstr ""
#: documents/models.py:395
msgid "created month is"
msgstr ""
#: documents/models.py:396
msgid "created day is"
msgstr ""
#: documents/models.py:397
msgid "added before"
msgstr ""
#: documents/models.py:398
msgid "added after"
msgstr ""
#: documents/models.py:399
msgid "modified before"
msgstr ""
#: documents/models.py:400
msgid "modified after"
msgstr ""
#: documents/models.py:401
msgid "does not have tag"
msgstr ""
#: documents/models.py:412
msgid "rule type"
msgstr ""
#: documents/models.py:416
msgid "value"
msgstr ""
#: documents/models.py:422
msgid "filter rule"
msgstr ""
#: documents/models.py:423
msgid "filter rules"
msgstr ""
#: documents/serialisers.py:52
#, python-format
msgid "Invalid regular expresssion: %(error)s"
msgstr ""
#: documents/serialisers.py:378
#, python-format
msgid "File type %(type)s not supported"
msgstr ""
#: documents/templates/index.html:20
msgid "Paperless-ng is loading..."
msgstr ""
#: documents/templates/registration/logged_out.html:13
msgid "Paperless-ng signed out"
msgstr ""
#: documents/templates/registration/logged_out.html:41
msgid "You have been successfully logged out. Bye!"
msgstr ""
#: documents/templates/registration/logged_out.html:42
msgid "Sign in again"
msgstr ""
#: documents/templates/registration/login.html:13
msgid "Paperless-ng sign in"
msgstr ""
#: documents/templates/registration/login.html:42
msgid "Please sign in."
msgstr ""
#: documents/templates/registration/login.html:45
msgid "Your username and password didn't match. Please try again."
msgstr ""
#: documents/templates/registration/login.html:48
msgid "Username"
msgstr ""
#: documents/templates/registration/login.html:49
msgid "Password"
msgstr ""
#: documents/templates/registration/login.html:54
msgid "Sign in"
msgstr ""
#: paperless/settings.py:291
msgid "English (US)"
msgstr ""
#: paperless/settings.py:292
msgid "English (GB)"
msgstr ""
#: paperless/settings.py:293
msgid "German"
msgstr ""
#: paperless/settings.py:294
msgid "Dutch"
msgstr ""
#: paperless/settings.py:295
msgid "French"
msgstr ""
#: paperless/settings.py:296
msgid "Portuguese (Brazil)"
msgstr ""
#: paperless/urls.py:118
msgid "Paperless-ng administration"
msgstr ""
#: paperless_mail/admin.py:25
msgid "Filter"
msgstr ""
#: paperless_mail/admin.py:27
msgid ""
"Paperless will only process mails that match ALL of the filters given below."
msgstr ""
#: paperless_mail/admin.py:37
msgid "Actions"
msgstr ""
#: paperless_mail/admin.py:39
msgid ""
"The action applied to the mail. This action is only performed when documents "
"were consumed from the mail. Mails without attachments will remain entirely "
"untouched."
msgstr ""
#: paperless_mail/admin.py:46
msgid "Metadata"
msgstr ""
#: paperless_mail/admin.py:48
msgid ""
"Assign metadata to documents consumed from this rule automatically. If you "
"do not assign tags, types or correspondents here, paperless will still "
"process all matching rules that you have defined."
msgstr ""
#: paperless_mail/apps.py:9
msgid "Paperless mail"
msgstr ""
#: paperless_mail/models.py:11
msgid "mail account"
msgstr ""
#: paperless_mail/models.py:12
msgid "mail accounts"
msgstr ""
#: paperless_mail/models.py:19
msgid "No encryption"
msgstr ""
#: paperless_mail/models.py:20
msgid "Use SSL"
msgstr ""
#: paperless_mail/models.py:21
msgid "Use STARTTLS"
msgstr ""
#: paperless_mail/models.py:29
msgid "IMAP server"
msgstr ""
#: paperless_mail/models.py:33
msgid "IMAP port"
msgstr ""
#: paperless_mail/models.py:36
msgid ""
"This is usually 143 for unencrypted and STARTTLS connections, and 993 for "
"SSL connections."
msgstr ""
#: paperless_mail/models.py:40
msgid "IMAP security"
msgstr ""
#: paperless_mail/models.py:46
msgid "username"
msgstr ""
#: paperless_mail/models.py:50
msgid "password"
msgstr ""
#: paperless_mail/models.py:60
msgid "mail rule"
msgstr ""
#: paperless_mail/models.py:61
msgid "mail rules"
msgstr ""
#: paperless_mail/models.py:67
msgid "Only process attachments."
msgstr ""
#: paperless_mail/models.py:68
msgid "Process all files, including 'inline' attachments."
msgstr ""
#: paperless_mail/models.py:78
msgid "Mark as read, don't process read mails"
msgstr ""
#: paperless_mail/models.py:79
msgid "Flag the mail, don't process flagged mails"
msgstr ""
#: paperless_mail/models.py:80
msgid "Move to specified folder"
msgstr ""
#: paperless_mail/models.py:81
msgid "Delete"
msgstr ""
#: paperless_mail/models.py:88
msgid "Use subject as title"
msgstr ""
#: paperless_mail/models.py:89
msgid "Use attachment filename as title"
msgstr ""
#: paperless_mail/models.py:99
msgid "Do not assign a correspondent"
msgstr ""
#: paperless_mail/models.py:101
msgid "Use mail address"
msgstr ""
#: paperless_mail/models.py:103
msgid "Use name (or mail address if not available)"
msgstr ""
#: paperless_mail/models.py:105
msgid "Use correspondent selected below"
msgstr ""
#: paperless_mail/models.py:113
msgid "order"
msgstr ""
#: paperless_mail/models.py:120
msgid "account"
msgstr ""
#: paperless_mail/models.py:124
msgid "folder"
msgstr ""
#: paperless_mail/models.py:128
msgid "filter from"
msgstr ""
#: paperless_mail/models.py:131
msgid "filter subject"
msgstr ""
#: paperless_mail/models.py:134
msgid "filter body"
msgstr ""
#: paperless_mail/models.py:138
msgid "filter attachment filename"
msgstr ""
#: paperless_mail/models.py:140
msgid ""
"Only consume documents which entirely match this filename if specified. "
"Wildcards such as *.pdf or *invoice* are allowed. Case insensitive."
msgstr ""
#: paperless_mail/models.py:146
msgid "maximum age"
msgstr ""
#: paperless_mail/models.py:148
msgid "Specified in days."
msgstr ""
#: paperless_mail/models.py:151
msgid "attachment type"
msgstr ""
#: paperless_mail/models.py:154
msgid ""
"Inline attachments include embedded images, so it's best to combine this "
"option with a filename filter."
msgstr ""
#: paperless_mail/models.py:159
msgid "action"
msgstr ""
#: paperless_mail/models.py:165
msgid "action parameter"
msgstr ""
#: paperless_mail/models.py:167
msgid ""
"Additional parameter for the action selected above, i.e., the target folder "
"of the move to folder action."
msgstr ""
#: paperless_mail/models.py:173
msgid "assign title from"
msgstr ""
#: paperless_mail/models.py:183
msgid "assign this tag"
msgstr ""
#: paperless_mail/models.py:191
msgid "assign this document type"
msgstr ""
#: paperless_mail/models.py:195
msgid "assign correspondent from"
msgstr ""
#: paperless_mail/models.py:205
msgid "assign this correspondent"
msgstr ""

View File

@@ -0,0 +1,678 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
# This file is distributed under the same license as the PACKAGE package.
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
# Translators:
# Jonas Winkler, 2021
# Philmo67, 2021
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2021-02-24 16:49+0100\n"
"PO-Revision-Date: 2021-02-16 18:37+0000\n"
"Last-Translator: Philmo67, 2021\n"
"Language-Team: French (https://www.transifex.com/paperless/teams/115905/fr/)\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Language: fr\n"
"Plural-Forms: nplurals=2; plural=(n > 1);\n"
#: documents/apps.py:10
msgid "Documents"
msgstr "Documents"
#: documents/models.py:32
msgid "Any word"
msgstr "Un des mots"
#: documents/models.py:33
msgid "All words"
msgstr "Tous les mots"
#: documents/models.py:34
msgid "Exact match"
msgstr "Concordance exacte"
#: documents/models.py:35
msgid "Regular expression"
msgstr "Expression régulière"
#: documents/models.py:36
msgid "Fuzzy word"
msgstr "Mot approximatif"
#: documents/models.py:37
msgid "Automatic"
msgstr "Automatique"
#: documents/models.py:41 documents/models.py:364 paperless_mail/models.py:25
#: paperless_mail/models.py:109
msgid "name"
msgstr "nom"
#: documents/models.py:45
msgid "match"
msgstr "rapprochement"
#: documents/models.py:49
msgid "matching algorithm"
msgstr "algorithme de rapprochement"
#: documents/models.py:55
msgid "is insensitive"
msgstr "est insensible à la casse"
#: documents/models.py:74 documents/models.py:134
msgid "correspondent"
msgstr "correspondant"
#: documents/models.py:75
msgid "correspondents"
msgstr "correspondants"
#: documents/models.py:97
msgid "color"
msgstr "couleur"
#: documents/models.py:101
msgid "is inbox tag"
msgstr "est une étiquette de boîte de réception"
#: documents/models.py:103
msgid ""
"Marks this tag as an inbox tag: All newly consumed documents will be tagged "
"with inbox tags."
msgstr ""
"Marque cette étiquette comme étiquette de boîte de réception : ces "
"étiquettes sont affectées à tous les documents nouvellement traités."
#: documents/models.py:108
msgid "tag"
msgstr "étiquette"
#: documents/models.py:109 documents/models.py:165
msgid "tags"
msgstr "étiquettes"
#: documents/models.py:115 documents/models.py:147
msgid "document type"
msgstr "type de document"
#: documents/models.py:116
msgid "document types"
msgstr "types de document"
#: documents/models.py:124
msgid "Unencrypted"
msgstr "Non chiffré"
#: documents/models.py:125
msgid "Encrypted with GNU Privacy Guard"
msgstr "Chiffré avec GNU Privacy Guard"
#: documents/models.py:138
msgid "title"
msgstr "titre"
#: documents/models.py:151
msgid "content"
msgstr "contenu"
#: documents/models.py:153
msgid ""
"The raw, text-only data of the document. This field is primarily used for "
"searching."
msgstr ""
"Les données brutes du document, en format texte uniquement. Ce champ est "
"principalement utilisé pour la recherche."
#: documents/models.py:158
msgid "mime type"
msgstr "type mime"
#: documents/models.py:169
msgid "checksum"
msgstr "somme de contrôle"
#: documents/models.py:173
msgid "The checksum of the original document."
msgstr "La somme de contrôle du document original."
#: documents/models.py:177
msgid "archive checksum"
msgstr "somme de contrôle de l'archive"
#: documents/models.py:182
msgid "The checksum of the archived document."
msgstr "La somme de contrôle du document archivé."
#: documents/models.py:186 documents/models.py:342
msgid "created"
msgstr "créé le"
#: documents/models.py:190
msgid "modified"
msgstr "modifié"
#: documents/models.py:194
msgid "storage type"
msgstr "forme d'enregistrement :"
#: documents/models.py:202
msgid "added"
msgstr "date d'ajout"
#: documents/models.py:206
msgid "filename"
msgstr "nom du fichier"
#: documents/models.py:212
msgid "Current filename in storage"
msgstr "Nom du fichier courant en base de données"
#: documents/models.py:216
msgid "archive filename"
msgstr "nom de fichier de l'archive"
#: documents/models.py:222
msgid "Current archive filename in storage"
msgstr "Nom du fichier d'archive courant en base de données"
#: documents/models.py:226
msgid "archive serial number"
msgstr "numéro de série de l'archive"
#: documents/models.py:231
msgid "The position of this document in your physical document archive."
msgstr ""
"Le classement de ce document dans votre archive de documents physiques."
#: documents/models.py:237
msgid "document"
msgstr "document"
#: documents/models.py:238
msgid "documents"
msgstr "documents"
#: documents/models.py:325
msgid "debug"
msgstr "débogage"
#: documents/models.py:326
msgid "information"
msgstr "information"
#: documents/models.py:327
msgid "warning"
msgstr "avertissement"
#: documents/models.py:328
msgid "error"
msgstr "erreur"
#: documents/models.py:329
msgid "critical"
msgstr "critique"
#: documents/models.py:333
msgid "group"
msgstr "groupe"
#: documents/models.py:336
msgid "message"
msgstr "message"
#: documents/models.py:339
msgid "level"
msgstr "niveau"
#: documents/models.py:346
msgid "log"
msgstr "rapport"
#: documents/models.py:347
msgid "logs"
msgstr "rapports"
#: documents/models.py:358 documents/models.py:408
msgid "saved view"
msgstr "vue enregistrée"
#: documents/models.py:359
msgid "saved views"
msgstr "vues enregistrées"
#: documents/models.py:362
msgid "user"
msgstr "utilisateur"
#: documents/models.py:368
msgid "show on dashboard"
msgstr "montrer sur le tableau de bord"
#: documents/models.py:371
msgid "show in sidebar"
msgstr "montrer dans la barre latérale"
#: documents/models.py:375
msgid "sort field"
msgstr "champ de tri"
#: documents/models.py:378
msgid "sort reverse"
msgstr "tri inverse"
#: documents/models.py:384
msgid "title contains"
msgstr "le titre contient"
#: documents/models.py:385
msgid "content contains"
msgstr "le contenu contient"
#: documents/models.py:386
msgid "ASN is"
msgstr "le NSA est"
#: documents/models.py:387
msgid "correspondent is"
msgstr "le correspondant est"
#: documents/models.py:388
msgid "document type is"
msgstr "le type de document est"
#: documents/models.py:389
msgid "is in inbox"
msgstr "est dans la boîte de réception"
#: documents/models.py:390
msgid "has tag"
msgstr "porte l'étiquette"
#: documents/models.py:391
msgid "has any tag"
msgstr "porte l'une des étiquettes"
#: documents/models.py:392
msgid "created before"
msgstr "créé avant"
#: documents/models.py:393
msgid "created after"
msgstr "créé après"
#: documents/models.py:394
msgid "created year is"
msgstr "l'année de création est"
#: documents/models.py:395
msgid "created month is"
msgstr "le mois de création est"
#: documents/models.py:396
msgid "created day is"
msgstr "le jour de création est"
#: documents/models.py:397
msgid "added before"
msgstr "ajouté avant"
#: documents/models.py:398
msgid "added after"
msgstr "ajouté après"
#: documents/models.py:399
msgid "modified before"
msgstr "modifié avant"
#: documents/models.py:400
msgid "modified after"
msgstr "modifié après"
#: documents/models.py:401
msgid "does not have tag"
msgstr "ne porte pas d'étiquette"
#: documents/models.py:412
msgid "rule type"
msgstr "type de règle"
#: documents/models.py:416
msgid "value"
msgstr "valeur"
#: documents/models.py:422
msgid "filter rule"
msgstr "règle de filtrage"
#: documents/models.py:423
msgid "filter rules"
msgstr "règles de filtrage"
#: documents/serialisers.py:52
#, python-format
msgid "Invalid regular expresssion: %(error)s"
msgstr "Expression régulière incorrecte : %(error)s"
#: documents/serialisers.py:378
#, python-format
msgid "File type %(type)s not supported"
msgstr "Type de fichier %(type)s non pris en charge"
#: documents/templates/index.html:20
msgid "Paperless-ng is loading..."
msgstr "Paperless-ng est en cours de chargement..."
#: documents/templates/registration/logged_out.html:13
msgid "Paperless-ng signed out"
msgstr "Déconnecté de Paperless-ng"
#: documents/templates/registration/logged_out.html:41
msgid "You have been successfully logged out. Bye!"
msgstr "Vous avez été déconnecté avec succès. Au revoir !"
#: documents/templates/registration/logged_out.html:42
msgid "Sign in again"
msgstr "Se reconnecter"
#: documents/templates/registration/login.html:13
msgid "Paperless-ng sign in"
msgstr "Connexion à Paperless-ng"
#: documents/templates/registration/login.html:42
msgid "Please sign in."
msgstr "Veuillez vous connecter."
#: documents/templates/registration/login.html:45
msgid "Your username and password didn't match. Please try again."
msgstr ""
"Votre nom d'utilisateur et votre mot de passe ne correspondent pas. Veuillez"
" réessayer."
#: documents/templates/registration/login.html:48
msgid "Username"
msgstr "Nom d'utilisateur"
#: documents/templates/registration/login.html:49
msgid "Password"
msgstr "Mot de passe"
#: documents/templates/registration/login.html:54
msgid "Sign in"
msgstr "S'identifier"
#: paperless/settings.py:291
msgid "English (US)"
msgstr "Anglais (US)"
#: paperless/settings.py:292
msgid "English (GB)"
msgstr "Anglais (GB)"
#: paperless/settings.py:293
msgid "German"
msgstr "Allemand"
#: paperless/settings.py:294
msgid "Dutch"
msgstr "Néerlandais"
#: paperless/settings.py:295
msgid "French"
msgstr "Français"
#: paperless/settings.py:296
msgid "Portuguese (Brazil)"
msgstr "Portugais (Brésil)"
#: paperless/urls.py:118
msgid "Paperless-ng administration"
msgstr "Administration de Paperless-ng"
#: paperless_mail/admin.py:25
msgid "Filter"
msgstr "Filtrage"
#: paperless_mail/admin.py:27
msgid ""
"Paperless will only process mails that match ALL of the filters given below."
msgstr ""
"Paperless-ng ne traitera que les courriers qui correspondent à TOUS les "
"filtres ci-dessous."
#: paperless_mail/admin.py:37
msgid "Actions"
msgstr "Actions"
#: paperless_mail/admin.py:39
msgid ""
"The action applied to the mail. This action is only performed when documents"
" were consumed from the mail. Mails without attachments will remain entirely"
" untouched."
msgstr ""
"Action appliquée au courriel. Cette action n'est exécutée que lorsque les "
"documents ont été traités depuis des courriels. Les courriels sans pièces "
"jointes demeurent totalement inchangés."
#: paperless_mail/admin.py:46
msgid "Metadata"
msgstr "Métadonnées"
#: paperless_mail/admin.py:48
msgid ""
"Assign metadata to documents consumed from this rule automatically. If you "
"do not assign tags, types or correspondents here, paperless will still "
"process all matching rules that you have defined."
msgstr ""
"Affecter automatiquement des métadonnées aux documents traités à partir de "
"cette règle. Si vous n'affectez pas d'étiquette, de type ou de correspondant"
" ici, Paperless-ng appliquera toutes les autres règles de rapprochement que "
"vous avez définies."
#: paperless_mail/apps.py:9
msgid "Paperless mail"
msgstr "Paperless-ng pour le courriel"
#: paperless_mail/models.py:11
msgid "mail account"
msgstr "compte de messagerie"
#: paperless_mail/models.py:12
msgid "mail accounts"
msgstr "comptes de messagerie"
#: paperless_mail/models.py:19
msgid "No encryption"
msgstr "Pas de chiffrement"
#: paperless_mail/models.py:20
msgid "Use SSL"
msgstr "Utiliser SSL"
#: paperless_mail/models.py:21
msgid "Use STARTTLS"
msgstr "Utiliser STARTTLS"
#: paperless_mail/models.py:29
msgid "IMAP server"
msgstr "Serveur IMAP"
#: paperless_mail/models.py:33
msgid "IMAP port"
msgstr "Port IMAP"
#: paperless_mail/models.py:36
msgid ""
"This is usually 143 for unencrypted and STARTTLS connections, and 993 for "
"SSL connections."
msgstr ""
"Généralement 143 pour les connexions non chiffrées et STARTTLS, et 993 pour "
"les connexions SSL."
#: paperless_mail/models.py:40
msgid "IMAP security"
msgstr "Sécurité IMAP"
#: paperless_mail/models.py:46
msgid "username"
msgstr "nom d'utilisateur"
#: paperless_mail/models.py:50
msgid "password"
msgstr "mot de passe"
#: paperless_mail/models.py:60
msgid "mail rule"
msgstr "règle de courriel"
#: paperless_mail/models.py:61
msgid "mail rules"
msgstr "règles de courriel"
#: paperless_mail/models.py:67
msgid "Only process attachments."
msgstr "Ne traiter que les pièces jointes."
#: paperless_mail/models.py:68
msgid "Process all files, including 'inline' attachments."
msgstr "Traiter tous les fichiers, y compris les pièces jointes \"en ligne\"."
#: paperless_mail/models.py:78
msgid "Mark as read, don't process read mails"
msgstr "Marquer comme lu, ne pas traiter les courriels lus"
#: paperless_mail/models.py:79
msgid "Flag the mail, don't process flagged mails"
msgstr "Marquer le courriel, ne pas traiter les courriels marqués"
#: paperless_mail/models.py:80
msgid "Move to specified folder"
msgstr "Déplacer vers le dossier spécifié"
#: paperless_mail/models.py:81
msgid "Delete"
msgstr "Supprimer"
#: paperless_mail/models.py:88
msgid "Use subject as title"
msgstr "Utiliser le sujet en tant que titre"
#: paperless_mail/models.py:89
msgid "Use attachment filename as title"
msgstr "Utiliser le nom de la pièce jointe en tant que titre"
#: paperless_mail/models.py:99
msgid "Do not assign a correspondent"
msgstr "Ne pas affecter de correspondant"
#: paperless_mail/models.py:101
msgid "Use mail address"
msgstr "Utiliser l'adresse électronique"
#: paperless_mail/models.py:103
msgid "Use name (or mail address if not available)"
msgstr "Utiliser le nom (ou l'adresse électronique s'il n'est pas disponible)"
#: paperless_mail/models.py:105
msgid "Use correspondent selected below"
msgstr "Utiliser le correspondant sélectionné ci-dessous"
#: paperless_mail/models.py:113
msgid "order"
msgstr "ordre"
#: paperless_mail/models.py:120
msgid "account"
msgstr "compte"
#: paperless_mail/models.py:124
msgid "folder"
msgstr "répertoire"
#: paperless_mail/models.py:128
msgid "filter from"
msgstr "filtrer l'expéditeur"
#: paperless_mail/models.py:131
msgid "filter subject"
msgstr "filtrer le sujet"
#: paperless_mail/models.py:134
msgid "filter body"
msgstr "filtrer le corps du message"
#: paperless_mail/models.py:138
msgid "filter attachment filename"
msgstr "filtrer le nom de fichier de la pièce jointe"
#: paperless_mail/models.py:140
msgid ""
"Only consume documents which entirely match this filename if specified. "
"Wildcards such as *.pdf or *invoice* are allowed. Case insensitive."
msgstr ""
"Ne traiter que les documents correspondant intégralement à ce nom de fichier"
" s'il est spécifié. Les jokers tels que *.pdf ou *facture* sont autorisés. "
"La casse n'est pas prise en compte."
#: paperless_mail/models.py:146
msgid "maximum age"
msgstr "âge maximum"
#: paperless_mail/models.py:148
msgid "Specified in days."
msgstr "En jours."
#: paperless_mail/models.py:151
msgid "attachment type"
msgstr "type de pièce jointe"
#: paperless_mail/models.py:154
msgid ""
"Inline attachments include embedded images, so it's best to combine this "
"option with a filename filter."
msgstr ""
"Les pièces jointes en ligne comprennent les images intégrées, il est donc "
"préférable de combiner cette option avec un filtre de nom de fichier."
#: paperless_mail/models.py:159
msgid "action"
msgstr "action"
#: paperless_mail/models.py:165
msgid "action parameter"
msgstr "paramètre d'action"
#: paperless_mail/models.py:167
msgid ""
"Additional parameter for the action selected above, i.e., the target folder "
"of the move to folder action."
msgstr ""
"Paramètre supplémentaire pour l'action sélectionnée ci-dessus, par exemple "
"le dossier cible de l'action de déplacement vers un dossier."
#: paperless_mail/models.py:173
msgid "assign title from"
msgstr "affecter le titre depuis"
#: paperless_mail/models.py:183
msgid "assign this tag"
msgstr "affecter cette étiquette"
#: paperless_mail/models.py:191
msgid "assign this document type"
msgstr "affecter ce type de document"
#: paperless_mail/models.py:195
msgid "assign correspondent from"
msgstr "affecter le correspondant depuis"
#: paperless_mail/models.py:205
msgid "assign this correspondent"
msgstr "affecter ce correspondant"

View File

@@ -0,0 +1,664 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
# This file is distributed under the same license as the PACKAGE package.
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
# Translators:
# Jonas Winkler, 2021
# Jo Vandeginste <jo.vandeginste@gmail.com>, 2021
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2021-02-16 14:52+0100\n"
"PO-Revision-Date: 2021-02-16 18:37+0000\n"
"Last-Translator: Jo Vandeginste <jo.vandeginste@gmail.com>, 2021\n"
"Language-Team: Dutch (Netherlands) (https://www.transifex.com/paperless/teams/115905/nl_NL/)\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Language: nl_NL\n"
"Plural-Forms: nplurals=2; plural=(n != 1);\n"
#: documents/apps.py:10
msgid "Documents"
msgstr "Documenten"
#: documents/models.py:32
msgid "Any word"
msgstr "Eender welk woord"
#: documents/models.py:33
msgid "All words"
msgstr "Alle woorden"
#: documents/models.py:34
msgid "Exact match"
msgstr "Exacte overeenkomst"
#: documents/models.py:35
msgid "Regular expression"
msgstr "Reguliere expressie"
#: documents/models.py:36
msgid "Fuzzy word"
msgstr "Gelijkaardig woord"
#: documents/models.py:37
msgid "Automatic"
msgstr "Automatisch"
#: documents/models.py:41 documents/models.py:364 paperless_mail/models.py:25
#: paperless_mail/models.py:109
msgid "name"
msgstr "naam"
#: documents/models.py:45
msgid "match"
msgstr "Overeenkomst"
#: documents/models.py:49
msgid "matching algorithm"
msgstr "Algoritme voor het bepalen van de overeenkomst"
#: documents/models.py:55
msgid "is insensitive"
msgstr "is niet hoofdlettergevoelig"
#: documents/models.py:74 documents/models.py:134
msgid "correspondent"
msgstr "correspondent"
#: documents/models.py:75
msgid "correspondents"
msgstr "correspondenten"
#: documents/models.py:97
msgid "color"
msgstr "Kleur"
#: documents/models.py:101
msgid "is inbox tag"
msgstr "is \"Postvak in\"-etiket"
#: documents/models.py:103
msgid ""
"Marks this tag as an inbox tag: All newly consumed documents will be tagged "
"with inbox tags."
msgstr ""
"Markeer dit etiket als een \"Postvak in\"-etiket: alle nieuw verwerkte "
"documenten krijgen de \"Postvak in\"-etiketten."
#: documents/models.py:108
msgid "tag"
msgstr "etiket"
#: documents/models.py:109 documents/models.py:165
msgid "tags"
msgstr "etiketten"
#: documents/models.py:115 documents/models.py:147
msgid "document type"
msgstr "documenttype"
#: documents/models.py:116
msgid "document types"
msgstr "documenttypen"
#: documents/models.py:124
msgid "Unencrypted"
msgstr "Niet versleuteld"
#: documents/models.py:125
msgid "Encrypted with GNU Privacy Guard"
msgstr "Versleuteld met GNU Privacy Guard"
#: documents/models.py:138
msgid "title"
msgstr "titel"
#: documents/models.py:151
msgid "content"
msgstr "inhoud"
#: documents/models.py:153
msgid ""
"The raw, text-only data of the document. This field is primarily used for "
"searching."
msgstr ""
"De onbewerkte gegevens van het document. Dit veld wordt voornamelijk "
"gebruikt om te zoeken."
#: documents/models.py:158
msgid "mime type"
msgstr "mimetype"
#: documents/models.py:169
msgid "checksum"
msgstr "checksum"
#: documents/models.py:173
msgid "The checksum of the original document."
msgstr "Het controlecijfer van het originele document."
#: documents/models.py:177
msgid "archive checksum"
msgstr "archief checksum"
#: documents/models.py:182
msgid "The checksum of the archived document."
msgstr "De checksum van het gearchiveerde document."
#: documents/models.py:186 documents/models.py:342
msgid "created"
msgstr "aangemaakt"
#: documents/models.py:190
msgid "modified"
msgstr "gewijzigd"
#: documents/models.py:194
msgid "storage type"
msgstr "type opslag"
#: documents/models.py:202
msgid "added"
msgstr "toegevoegd"
#: documents/models.py:206
msgid "filename"
msgstr "bestandsnaam"
#: documents/models.py:212
msgid "Current filename in storage"
msgstr "Huidige bestandsnaam in opslag"
#: documents/models.py:216
msgid "archive filename"
msgstr "Bestandsnaam in archief"
#: documents/models.py:222
msgid "Current archive filename in storage"
msgstr "Huidige bestandsnaam in archief"
#: documents/models.py:226
msgid "archive serial number"
msgstr "serienummer in archief"
#: documents/models.py:231
msgid "The position of this document in your physical document archive."
msgstr "De positie van dit document in je fysieke documentenarchief."
#: documents/models.py:237
msgid "document"
msgstr "document"
#: documents/models.py:238
msgid "documents"
msgstr "documenten"
#: documents/models.py:325
msgid "debug"
msgstr "debug"
#: documents/models.py:326
msgid "information"
msgstr "informatie"
#: documents/models.py:327
msgid "warning"
msgstr "waarschuwing"
#: documents/models.py:328
msgid "error"
msgstr "fout"
#: documents/models.py:329
msgid "critical"
msgstr "kritisch"
#: documents/models.py:333
msgid "group"
msgstr "groep"
#: documents/models.py:336
msgid "message"
msgstr "bericht"
#: documents/models.py:339
msgid "level"
msgstr "niveau"
#: documents/models.py:346
msgid "log"
msgstr "bericht"
#: documents/models.py:347
msgid "logs"
msgstr "berichten"
#: documents/models.py:358 documents/models.py:408
msgid "saved view"
msgstr "opgeslagen view"
#: documents/models.py:359
msgid "saved views"
msgstr "opgeslagen views"
#: documents/models.py:362
msgid "user"
msgstr "gebruiker"
#: documents/models.py:368
msgid "show on dashboard"
msgstr "weergeven op dashboard"
#: documents/models.py:371
msgid "show in sidebar"
msgstr "weergeven in zijbalk"
#: documents/models.py:375
msgid "sort field"
msgstr "sorteerveld"
#: documents/models.py:378
msgid "sort reverse"
msgstr "omgekeerd sorteren"
#: documents/models.py:384
msgid "title contains"
msgstr "titel bevat"
#: documents/models.py:385
msgid "content contains"
msgstr "inhoud bevat"
#: documents/models.py:386
msgid "ASN is"
msgstr "ASN is"
#: documents/models.py:387
msgid "correspondent is"
msgstr "correspondent is"
#: documents/models.py:388
msgid "document type is"
msgstr "documenttype is"
#: documents/models.py:389
msgid "is in inbox"
msgstr "zit in \"Postvak in\""
#: documents/models.py:390
msgid "has tag"
msgstr "heeft etiket"
#: documents/models.py:391
msgid "has any tag"
msgstr "heeft één van de etiketten"
#: documents/models.py:392
msgid "created before"
msgstr "aangemaakt voor"
#: documents/models.py:393
msgid "created after"
msgstr "aangemaakt na"
#: documents/models.py:394
msgid "created year is"
msgstr "aangemaakt jaar is"
#: documents/models.py:395
msgid "created month is"
msgstr "aangemaakte maand is"
#: documents/models.py:396
msgid "created day is"
msgstr "aangemaakte dag is"
#: documents/models.py:397
msgid "added before"
msgstr "toegevoegd voor"
#: documents/models.py:398
msgid "added after"
msgstr "toegevoegd na"
#: documents/models.py:399
msgid "modified before"
msgstr "gewijzigd voor"
#: documents/models.py:400
msgid "modified after"
msgstr "gewijzigd na"
#: documents/models.py:401
msgid "does not have tag"
msgstr "heeft geen etiket"
#: documents/models.py:412
msgid "rule type"
msgstr "type regel"
#: documents/models.py:416
msgid "value"
msgstr "waarde"
#: documents/models.py:422
msgid "filter rule"
msgstr "filterregel"
#: documents/models.py:423
msgid "filter rules"
msgstr "filterregels"
#: documents/serialisers.py:370
#, python-format
msgid "File type %(type)s not supported"
msgstr "Bestandstype %(type)s niet ondersteund"
#: documents/templates/index.html:20
msgid "Paperless-ng is loading..."
msgstr "Paperless-ng is aan het laden..."
#: documents/templates/registration/logged_out.html:13
msgid "Paperless-ng signed out"
msgstr "Paperless-ng - afmelden"
#: documents/templates/registration/logged_out.html:41
msgid "You have been successfully logged out. Bye!"
msgstr "Je bent nu afgemeld. Tot later!"
#: documents/templates/registration/logged_out.html:42
msgid "Sign in again"
msgstr "Meld je opnieuw aan"
#: documents/templates/registration/login.html:13
msgid "Paperless-ng sign in"
msgstr "Paperless-ng - aanmelden"
#: documents/templates/registration/login.html:42
msgid "Please sign in."
msgstr "Gelieve aan te melden."
#: documents/templates/registration/login.html:45
msgid "Your username and password didn't match. Please try again."
msgstr "Je gebruikersnaam en wachtwoord komen niet overeen. Probeer opnieuw."
#: documents/templates/registration/login.html:48
msgid "Username"
msgstr "Gebruikersnaam"
#: documents/templates/registration/login.html:49
msgid "Password"
msgstr "Wachtwoord"
#: documents/templates/registration/login.html:54
msgid "Sign in"
msgstr "Aanmelden"
#: paperless/settings.py:291
msgid "English (US)"
msgstr "Engels (US)"
#: paperless/settings.py:292
msgid "English (GB)"
msgstr "Engels (Brits)"
#: paperless/settings.py:293
msgid "German"
msgstr "Duits"
#: paperless/settings.py:294
msgid "Dutch"
msgstr "Nederlands"
#: paperless/settings.py:295
msgid "French"
msgstr "Frans"
#: paperless/urls.py:114
msgid "Paperless-ng administration"
msgstr "Paperless-ng administratie"
#: paperless_mail/admin.py:25
msgid "Filter"
msgstr "Filter"
#: paperless_mail/admin.py:27
msgid ""
"Paperless will only process mails that match ALL of the filters given below."
msgstr ""
"Paperless verwerkt alleen e-mails die voldoen aan ALLE onderstaande filters."
#: paperless_mail/admin.py:37
msgid "Actions"
msgstr "Acties"
#: paperless_mail/admin.py:39
msgid ""
"The action applied to the mail. This action is only performed when documents"
" were consumed from the mail. Mails without attachments will remain entirely"
" untouched."
msgstr ""
"De actie die wordt toegepast op de mail. Deze actie wordt alleen uitgevoerd "
"wanneer documenten verwerkt werden uit de mail. Mails zonder bijlage blijven"
" onaangeroerd."
#: paperless_mail/admin.py:46
msgid "Metadata"
msgstr "Metadata"
#: paperless_mail/admin.py:48
msgid ""
"Assign metadata to documents consumed from this rule automatically. If you "
"do not assign tags, types or correspondents here, paperless will still "
"process all matching rules that you have defined."
msgstr ""
"Automatisch metadata toewijzen aan documenten vanuit deze regel. Indien je "
"geen etiketten, documenttypes of correspondenten toewijst, zal Paperless nog"
" steeds alle regels verwerken die je hebt gedefinieerd."
#: paperless_mail/apps.py:9
msgid "Paperless mail"
msgstr "Paperless email"
#: paperless_mail/models.py:11
msgid "mail account"
msgstr "email account"
#: paperless_mail/models.py:12
msgid "mail accounts"
msgstr "email accounts"
#: paperless_mail/models.py:19
msgid "No encryption"
msgstr "Geen versleuteling"
#: paperless_mail/models.py:20
msgid "Use SSL"
msgstr "Gebruik SSL"
#: paperless_mail/models.py:21
msgid "Use STARTTLS"
msgstr "Gebruik STARTTLS"
#: paperless_mail/models.py:29
msgid "IMAP server"
msgstr "IMAP-server"
#: paperless_mail/models.py:33
msgid "IMAP port"
msgstr "IMAP-poort"
#: paperless_mail/models.py:36
msgid ""
"This is usually 143 for unencrypted and STARTTLS connections, and 993 for "
"SSL connections."
msgstr ""
"Dit is gewoonlijk 143 voor onversleutelde of STARTTLS verbindingen, en 993 "
"voor SSL verbindingen."
#: paperless_mail/models.py:40
msgid "IMAP security"
msgstr "IMAP-beveiliging"
#: paperless_mail/models.py:46
msgid "username"
msgstr "gebruikersnaam"
#: paperless_mail/models.py:50
msgid "password"
msgstr "wachtwoord"
#: paperless_mail/models.py:60
msgid "mail rule"
msgstr "email-regel"
#: paperless_mail/models.py:61
msgid "mail rules"
msgstr "email-regels"
#: paperless_mail/models.py:67
msgid "Only process attachments."
msgstr "Alleen bijlagen verwerken"
#: paperless_mail/models.py:68
msgid "Process all files, including 'inline' attachments."
msgstr "Verwerk alle bestanden, inclusief 'inline' bijlagen."
#: paperless_mail/models.py:78
msgid "Mark as read, don't process read mails"
msgstr "Markeer als gelezen, verwerk geen gelezen mails"
#: paperless_mail/models.py:79
msgid "Flag the mail, don't process flagged mails"
msgstr "Markeer de mail, verwerk geen mails met markering"
#: paperless_mail/models.py:80
msgid "Move to specified folder"
msgstr "Verplaats naar gegeven map"
#: paperless_mail/models.py:81
msgid "Delete"
msgstr "Verwijder"
#: paperless_mail/models.py:88
msgid "Use subject as title"
msgstr "Gebruik onderwerp als titel"
#: paperless_mail/models.py:89
msgid "Use attachment filename as title"
msgstr "Gebruik naam van bijlage als titel"
#: paperless_mail/models.py:99
msgid "Do not assign a correspondent"
msgstr "Wijs geen correspondent toe"
#: paperless_mail/models.py:101
msgid "Use mail address"
msgstr "Gebruik het email-adres"
#: paperless_mail/models.py:103
msgid "Use name (or mail address if not available)"
msgstr "Gebruik de naam, en anders het email-adres"
#: paperless_mail/models.py:105
msgid "Use correspondent selected below"
msgstr "Gebruik de hieronder aangeduide correspondent"
#: paperless_mail/models.py:113
msgid "order"
msgstr "volgorde"
#: paperless_mail/models.py:120
msgid "account"
msgstr "account"
#: paperless_mail/models.py:124
msgid "folder"
msgstr "map"
#: paperless_mail/models.py:128
msgid "filter from"
msgstr "filter afzender"
#: paperless_mail/models.py:131
msgid "filter subject"
msgstr "filter onderwerp"
#: paperless_mail/models.py:134
msgid "filter body"
msgstr "filter inhoud"
#: paperless_mail/models.py:138
msgid "filter attachment filename"
msgstr "Filter bestandsnaam van bijlage"
#: paperless_mail/models.py:140
msgid ""
"Only consume documents which entirely match this filename if specified. "
"Wildcards such as *.pdf or *invoice* are allowed. Case insensitive."
msgstr ""
"Alleen documenten verwerken die volledig overeenkomen, indien aangegeven. Je"
" kunt jokertekens gebruiken, zoals *.pdf of *factuur*. Dit is niet "
"hoofdlettergevoelig."
#: paperless_mail/models.py:146
msgid "maximum age"
msgstr "Maximale leeftijd"
#: paperless_mail/models.py:148
msgid "Specified in days."
msgstr "Aangegeven in dagen"
#: paperless_mail/models.py:151
msgid "attachment type"
msgstr "Type bijlage"
#: paperless_mail/models.py:154
msgid ""
"Inline attachments include embedded images, so it's best to combine this "
"option with a filename filter."
msgstr ""
"\"Inline\" bijlagen bevatten vaak ook afbeeldingen. In dit geval valt het "
"aan te raden om ook een filter voor de bestandsnaam op te geven."
#: paperless_mail/models.py:159
msgid "action"
msgstr "actie"
#: paperless_mail/models.py:165
msgid "action parameter"
msgstr "actie parameters"
#: paperless_mail/models.py:167
msgid ""
"Additional parameter for the action selected above, i.e., the target folder "
"of the move to folder action."
msgstr ""
"Extra parameters voor de hierboven gekozen actie, met andere woorden: de "
"bestemmingsmap voor de verplaats-actie."
#: paperless_mail/models.py:173
msgid "assign title from"
msgstr "wijs titel toe van"
#: paperless_mail/models.py:183
msgid "assign this tag"
msgstr "wijs dit etiket toe"
#: paperless_mail/models.py:191
msgid "assign this document type"
msgstr "wijs dit documenttype toe"
#: paperless_mail/models.py:195
msgid "assign correspondent from"
msgstr "wijs correspondent toe van"
#: paperless_mail/models.py:205
msgid "assign this correspondent"
msgstr "wijs deze correspondent toe"

View File

@@ -0,0 +1,672 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
# This file is distributed under the same license as the PACKAGE package.
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
# Translators:
# Jonas Winkler, 2021
# Rodrigo A <rodrigo.avelino@meliuz.com.br>, 2021
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2021-02-24 16:49+0100\n"
"PO-Revision-Date: 2021-02-16 18:37+0000\n"
"Last-Translator: Rodrigo A <rodrigo.avelino@meliuz.com.br>, 2021\n"
"Language-Team: Portuguese (Brazil) (https://www.transifex.com/paperless/teams/115905/pt_BR/)\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Language: pt_BR\n"
"Plural-Forms: nplurals=2; plural=(n > 1);\n"
#: documents/apps.py:10
msgid "Documents"
msgstr "Documentos"
#: documents/models.py:32
msgid "Any word"
msgstr "Qualquer palavra"
#: documents/models.py:33
msgid "All words"
msgstr "Todas as palavras"
#: documents/models.py:34
msgid "Exact match"
msgstr "Detecção exata"
#: documents/models.py:35
msgid "Regular expression"
msgstr "Expressão regular"
#: documents/models.py:36
msgid "Fuzzy word"
msgstr "Palavra difusa (fuzzy)"
#: documents/models.py:37
msgid "Automatic"
msgstr "Automático"
#: documents/models.py:41 documents/models.py:364 paperless_mail/models.py:25
#: paperless_mail/models.py:109
msgid "name"
msgstr "nome"
#: documents/models.py:45
msgid "match"
msgstr "detecção"
#: documents/models.py:49
msgid "matching algorithm"
msgstr "algoritmo de detecção"
#: documents/models.py:55
msgid "is insensitive"
msgstr "diferencia maiúsculas de minúsculas"
#: documents/models.py:74 documents/models.py:134
msgid "correspondent"
msgstr "correspondente"
#: documents/models.py:75
msgid "correspondents"
msgstr "correspondentes"
#: documents/models.py:97
msgid "color"
msgstr "cor"
#: documents/models.py:101
msgid "is inbox tag"
msgstr "é etiqueta caixa de entrada"
#: documents/models.py:103
msgid ""
"Marks this tag as an inbox tag: All newly consumed documents will be tagged "
"with inbox tags."
msgstr ""
"Marca essa etiqueta como caixa de entrada: Todos os novos documentos "
"consumidos terão as etiquetas de caixa de entrada."
#: documents/models.py:108
msgid "tag"
msgstr "etiqueta"
#: documents/models.py:109 documents/models.py:165
msgid "tags"
msgstr "etiquetas"
#: documents/models.py:115 documents/models.py:147
msgid "document type"
msgstr "tipo de documento"
#: documents/models.py:116
msgid "document types"
msgstr "tipos de documento"
#: documents/models.py:124
msgid "Unencrypted"
msgstr "Não encriptado"
#: documents/models.py:125
msgid "Encrypted with GNU Privacy Guard"
msgstr "Encriptado com GNU Privacy Guard"
#: documents/models.py:138
msgid "title"
msgstr "título"
#: documents/models.py:151
msgid "content"
msgstr "conteúdo"
#: documents/models.py:153
msgid ""
"The raw, text-only data of the document. This field is primarily used for "
"searching."
msgstr ""
"O conteúdo de texto bruto do documento. Esse campo é usado principalmente "
"para busca."
#: documents/models.py:158
msgid "mime type"
msgstr "tipo mime"
#: documents/models.py:169
msgid "checksum"
msgstr "some de verificação"
#: documents/models.py:173
msgid "The checksum of the original document."
msgstr "A soma de verificação original do documento."
#: documents/models.py:177
msgid "archive checksum"
msgstr "Soma de verificação de arquivamento."
#: documents/models.py:182
msgid "The checksum of the archived document."
msgstr "A soma de verificação do documento arquivado."
#: documents/models.py:186 documents/models.py:342
msgid "created"
msgstr "criado"
#: documents/models.py:190
msgid "modified"
msgstr "modificado"
#: documents/models.py:194
msgid "storage type"
msgstr "tipo de armazenamento"
#: documents/models.py:202
msgid "added"
msgstr "adicionado"
#: documents/models.py:206
msgid "filename"
msgstr "nome do arquivo"
#: documents/models.py:212
msgid "Current filename in storage"
msgstr "Nome do arquivo atual armazenado"
#: documents/models.py:216
msgid "archive filename"
msgstr "nome do arquivo para arquivamento"
#: documents/models.py:222
msgid "Current archive filename in storage"
msgstr "Nome do arquivo para arquivamento armazenado"
#: documents/models.py:226
msgid "archive serial number"
msgstr "número de sério de arquivamento"
#: documents/models.py:231
msgid "The position of this document in your physical document archive."
msgstr "A posição deste documento no seu arquivamento físico."
#: documents/models.py:237
msgid "document"
msgstr "documento"
#: documents/models.py:238
msgid "documents"
msgstr "documentos"
#: documents/models.py:325
msgid "debug"
msgstr "debug"
#: documents/models.py:326
msgid "information"
msgstr "informação"
#: documents/models.py:327
msgid "warning"
msgstr "aviso"
#: documents/models.py:328
msgid "error"
msgstr "erro"
#: documents/models.py:329
msgid "critical"
msgstr "crítico"
#: documents/models.py:333
msgid "group"
msgstr "grupo"
#: documents/models.py:336
msgid "message"
msgstr "mensagem"
#: documents/models.py:339
msgid "level"
msgstr "nível"
#: documents/models.py:346
msgid "log"
msgstr "log"
#: documents/models.py:347
msgid "logs"
msgstr "logs"
#: documents/models.py:358 documents/models.py:408
msgid "saved view"
msgstr "visualização"
#: documents/models.py:359
msgid "saved views"
msgstr "visualizações"
#: documents/models.py:362
msgid "user"
msgstr "usuário"
#: documents/models.py:368
msgid "show on dashboard"
msgstr "exibir no painel de controle"
#: documents/models.py:371
msgid "show in sidebar"
msgstr "exibir no painel lateral"
#: documents/models.py:375
msgid "sort field"
msgstr "ordenar campo"
#: documents/models.py:378
msgid "sort reverse"
msgstr "odernar reverso"
#: documents/models.py:384
msgid "title contains"
msgstr "título contém"
#: documents/models.py:385
msgid "content contains"
msgstr "conteúdo contém"
#: documents/models.py:386
msgid "ASN is"
msgstr "NSA é"
#: documents/models.py:387
msgid "correspondent is"
msgstr "correspondente é"
#: documents/models.py:388
msgid "document type is"
msgstr "tipo de documento é"
#: documents/models.py:389
msgid "is in inbox"
msgstr "é caixa de entrada"
#: documents/models.py:390
msgid "has tag"
msgstr "contém etiqueta"
#: documents/models.py:391
msgid "has any tag"
msgstr "contém qualquer etiqueta"
#: documents/models.py:392
msgid "created before"
msgstr "criado antes de"
#: documents/models.py:393
msgid "created after"
msgstr "criado depois de"
#: documents/models.py:394
msgid "created year is"
msgstr "ano de criação é"
#: documents/models.py:395
msgid "created month is"
msgstr "mês de criação é"
#: documents/models.py:396
msgid "created day is"
msgstr "dia de criação é"
#: documents/models.py:397
msgid "added before"
msgstr "adicionado antes de"
#: documents/models.py:398
msgid "added after"
msgstr "adicionado depois de"
#: documents/models.py:399
msgid "modified before"
msgstr "modificado antes de"
#: documents/models.py:400
msgid "modified after"
msgstr "modificado depois de"
#: documents/models.py:401
msgid "does not have tag"
msgstr "não tem etiqueta"
#: documents/models.py:412
msgid "rule type"
msgstr "tipo de regra"
#: documents/models.py:416
msgid "value"
msgstr "valor"
#: documents/models.py:422
msgid "filter rule"
msgstr "regra de filtragem"
#: documents/models.py:423
msgid "filter rules"
msgstr "regras de filtragem"
#: documents/serialisers.py:52
#, python-format
msgid "Invalid regular expresssion: %(error)s"
msgstr "Expressão regular inválida: %(error)s"
#: documents/serialisers.py:378
#, python-format
msgid "File type %(type)s not supported"
msgstr "Tipo de arquivo %(type)s não suportado"
#: documents/templates/index.html:20
msgid "Paperless-ng is loading..."
msgstr "Paperless-ng está carregando..."
#: documents/templates/registration/logged_out.html:13
msgid "Paperless-ng signed out"
msgstr "Paperless-ng saiu"
#: documents/templates/registration/logged_out.html:41
msgid "You have been successfully logged out. Bye!"
msgstr "Sua sessão foi encerrada com sucesso. Até mais!"
#: documents/templates/registration/logged_out.html:42
msgid "Sign in again"
msgstr "Entre novamente"
#: documents/templates/registration/login.html:13
msgid "Paperless-ng sign in"
msgstr "Entrar no Paperless-ng"
#: documents/templates/registration/login.html:42
msgid "Please sign in."
msgstr "Por favor, entre na sua conta"
#: documents/templates/registration/login.html:45
msgid "Your username and password didn't match. Please try again."
msgstr "Seu usuário e senha estão incorretos. Por favor, tente novamente."
#: documents/templates/registration/login.html:48
msgid "Username"
msgstr "Usuário"
#: documents/templates/registration/login.html:49
msgid "Password"
msgstr "Senha"
#: documents/templates/registration/login.html:54
msgid "Sign in"
msgstr "Entrar"
#: paperless/settings.py:291
msgid "English (US)"
msgstr "Inglês (EUA)"
#: paperless/settings.py:292
msgid "English (GB)"
msgstr "Inglês (GB)"
#: paperless/settings.py:293
msgid "German"
msgstr "Alemão"
#: paperless/settings.py:294
msgid "Dutch"
msgstr "Holandês"
#: paperless/settings.py:295
msgid "French"
msgstr "Francês"
#: paperless/settings.py:296
msgid "Portuguese (Brazil)"
msgstr "Português (Brasil)"
#: paperless/urls.py:118
msgid "Paperless-ng administration"
msgstr "Administração do Paperless-ng"
#: paperless_mail/admin.py:25
msgid "Filter"
msgstr "Filtro"
#: paperless_mail/admin.py:27
msgid ""
"Paperless will only process mails that match ALL of the filters given below."
msgstr ""
"Paperless processará somente e-mails que se encaixam em TODOS os filtros "
"abaixo."
#: paperless_mail/admin.py:37
msgid "Actions"
msgstr "Ações"
#: paperless_mail/admin.py:39
msgid ""
"The action applied to the mail. This action is only performed when documents"
" were consumed from the mail. Mails without attachments will remain entirely"
" untouched."
msgstr ""
"A ação se aplica ao e-mail. Essa ação só é executada quando documentos foram"
" consumidos do e-mail. E-mails sem anexos permanecerão intactos."
#: paperless_mail/admin.py:46
msgid "Metadata"
msgstr "Metadados"
#: paperless_mail/admin.py:48
msgid ""
"Assign metadata to documents consumed from this rule automatically. If you "
"do not assign tags, types or correspondents here, paperless will still "
"process all matching rules that you have defined."
msgstr ""
"Atribua metadados aos documentos consumidos por esta regra automaticamente. "
"Se você não atribuir etiquetas, tipos ou correspondentes aqui, paperless "
"ainda sim processará todas as regras de detecção que você definiu."
#: paperless_mail/apps.py:9
msgid "Paperless mail"
msgstr "Paperless mail"
#: paperless_mail/models.py:11
msgid "mail account"
msgstr "conta de e-mail"
#: paperless_mail/models.py:12
msgid "mail accounts"
msgstr "contas de e-mail"
#: paperless_mail/models.py:19
msgid "No encryption"
msgstr "Sem encriptação"
#: paperless_mail/models.py:20
msgid "Use SSL"
msgstr "Usar SSL"
#: paperless_mail/models.py:21
msgid "Use STARTTLS"
msgstr "Usar STARTTLS"
#: paperless_mail/models.py:29
msgid "IMAP server"
msgstr "Servidor IMAP"
#: paperless_mail/models.py:33
msgid "IMAP port"
msgstr "Porta IMAP"
#: paperless_mail/models.py:36
msgid ""
"This is usually 143 for unencrypted and STARTTLS connections, and 993 for "
"SSL connections."
msgstr ""
"É geralmente 143 para não encriptado e conexões STARTTLS, e 993 para "
"conexões SSL."
#: paperless_mail/models.py:40
msgid "IMAP security"
msgstr "segurança IMAP"
#: paperless_mail/models.py:46
msgid "username"
msgstr "usuário"
#: paperless_mail/models.py:50
msgid "password"
msgstr "senha"
#: paperless_mail/models.py:60
msgid "mail rule"
msgstr "regra de e-mail"
#: paperless_mail/models.py:61
msgid "mail rules"
msgstr "regras de e-mail"
#: paperless_mail/models.py:67
msgid "Only process attachments."
msgstr "Processar somente anexos."
#: paperless_mail/models.py:68
msgid "Process all files, including 'inline' attachments."
msgstr "Processar todos os arquivos, incluindo anexos 'inline'."
#: paperless_mail/models.py:78
msgid "Mark as read, don't process read mails"
msgstr "Marcar como lido, não processar e-mails lidos"
#: paperless_mail/models.py:79
msgid "Flag the mail, don't process flagged mails"
msgstr "Sinalizar o e-mail, não processar e-mails sinalizados"
#: paperless_mail/models.py:80
msgid "Move to specified folder"
msgstr "Mover para pasta especificada"
#: paperless_mail/models.py:81
msgid "Delete"
msgstr "Excluir"
#: paperless_mail/models.py:88
msgid "Use subject as title"
msgstr "Usar assunto como título"
#: paperless_mail/models.py:89
msgid "Use attachment filename as title"
msgstr "Usar nome do arquivo anexo como título"
#: paperless_mail/models.py:99
msgid "Do not assign a correspondent"
msgstr "Não atribuir um correspondente"
#: paperless_mail/models.py:101
msgid "Use mail address"
msgstr "Usar endereço de e-mail"
#: paperless_mail/models.py:103
msgid "Use name (or mail address if not available)"
msgstr "Usar nome (ou endereço de e-mail se não disponível)"
#: paperless_mail/models.py:105
msgid "Use correspondent selected below"
msgstr "Usar correspondente selecionado abaixo"
#: paperless_mail/models.py:113
msgid "order"
msgstr "ordem"
#: paperless_mail/models.py:120
msgid "account"
msgstr "conta"
#: paperless_mail/models.py:124
msgid "folder"
msgstr "pasta"
#: paperless_mail/models.py:128
msgid "filter from"
msgstr "filtrar de"
#: paperless_mail/models.py:131
msgid "filter subject"
msgstr "filtrar assunto"
#: paperless_mail/models.py:134
msgid "filter body"
msgstr "filtrar corpo"
#: paperless_mail/models.py:138
msgid "filter attachment filename"
msgstr "filtrar nome do arquivo anexo"
#: paperless_mail/models.py:140
msgid ""
"Only consume documents which entirely match this filename if specified. "
"Wildcards such as *.pdf or *invoice* are allowed. Case insensitive."
msgstr ""
"Consumir somente documentos que correspondem a este nome de arquivo se especificado.\n"
"Curingas como *.pdf ou *invoice* são permitidos. Sem diferenciação de maiúsculas e minúsculas."
#: paperless_mail/models.py:146
msgid "maximum age"
msgstr "idade máxima"
#: paperless_mail/models.py:148
msgid "Specified in days."
msgstr "Especificada em dias."
#: paperless_mail/models.py:151
msgid "attachment type"
msgstr "tipo de anexo"
#: paperless_mail/models.py:154
msgid ""
"Inline attachments include embedded images, so it's best to combine this "
"option with a filename filter."
msgstr ""
"Anexos inline incluem imagens inseridas, por isso é melhor combinar essa "
"opção com um filtro de nome de arquivo."
#: paperless_mail/models.py:159
msgid "action"
msgstr "ação"
#: paperless_mail/models.py:165
msgid "action parameter"
msgstr "parâmetro da ação"
#: paperless_mail/models.py:167
msgid ""
"Additional parameter for the action selected above, i.e., the target folder "
"of the move to folder action."
msgstr ""
"Parâmetro adicional para a ação selecionada acima, por exemplo: a pasta de "
"destino da ação de mover pasta."
#: paperless_mail/models.py:173
msgid "assign title from"
msgstr "atribuir título de"
#: paperless_mail/models.py:183
msgid "assign this tag"
msgstr "atribuir esta etiqueta"
#: paperless_mail/models.py:191
msgid "assign this document type"
msgstr "atribuir este tipo de documento"
#: paperless_mail/models.py:195
msgid "assign correspondent from"
msgstr "atribuir correspondente de"
#: paperless_mail/models.py:205
msgid "assign this correspondent"
msgstr "atribuir este correspondente"

23
src/paperless/asgi.py Normal file
View File

@@ -0,0 +1,23 @@
import os
from django.core.asgi import get_asgi_application
# Fetch Django ASGI application early to ensure AppRegistry is populated
# before importing consumers and AuthMiddlewareStack that may import ORM
# models.
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings")
django_asgi_app = get_asgi_application()
from channels.auth import AuthMiddlewareStack # NOQA: E402
from channels.routing import ProtocolTypeRouter, URLRouter # NOQA: E402
from paperless.urls import websocket_urlpatterns # NOQA: E402
application = ProtocolTypeRouter({
"http": get_asgi_application(),
"websocket": AuthMiddlewareStack(
URLRouter(
websocket_urlpatterns
)
),
})

View File

@@ -2,6 +2,7 @@ from django.conf import settings
from django.contrib.auth.models import User
from django.utils.deprecation import MiddlewareMixin
from rest_framework import authentication
from django.contrib.auth.middleware import RemoteUserMiddleware
class AutoLoginMiddleware(MiddlewareMixin):
@@ -26,3 +27,11 @@ class AngularApiAuthenticationOverride(authentication.BaseAuthentication):
return (user, None)
else:
return None
class HttpRemoteUserMiddleware(RemoteUserMiddleware):
""" This class allows authentication via HTTP_REMOTE_USER which is set for
example by certain SSO applications.
"""
header = 'HTTP_REMOTE_USER'

View File

@@ -13,18 +13,17 @@ writeable_hint = (
)
def path_check(env_var):
def path_check(var, directory):
messages = []
directory = os.getenv(env_var)
if directory:
if not os.path.exists(directory):
messages.append(Error(
exists_message.format(env_var),
exists_message.format(var),
exists_hint.format(directory)
))
elif not os.access(directory, os.W_OK | os.X_OK):
messages.append(Error(
writeable_message.format(env_var),
messages.append(Warning(
writeable_message.format(var),
writeable_hint.format(directory)
))
return messages
@@ -36,12 +35,9 @@ def paths_check(app_configs, **kwargs):
Check the various paths for existence, readability and writeability
"""
check_messages = path_check("PAPERLESS_DATA_DIR") + \
path_check("PAPERLESS_MEDIA_ROOT") + \
path_check("PAPERLESS_CONSUMPTION_DIR") + \
path_check("PAPERLESS_STATICDIR")
return check_messages
return path_check("PAPERLESS_DATA_DIR", settings.DATA_DIR) + \
path_check("PAPERLESS_MEDIA_ROOT", settings.MEDIA_ROOT) + \
path_check("PAPERLESS_CONSUMPTION_DIR", settings.CONSUMPTION_DIR)
@register()

View File

@@ -0,0 +1,29 @@
import json
from asgiref.sync import async_to_sync
from channels.exceptions import DenyConnection, AcceptConnection
from channels.generic.websocket import WebsocketConsumer
class StatusConsumer(WebsocketConsumer):
def _authenticated(self):
return 'user' in self.scope and self.scope['user'].is_authenticated
def connect(self):
if not self._authenticated():
raise DenyConnection()
else:
async_to_sync(self.channel_layer.group_add)(
'status_updates', self.channel_name)
raise AcceptConnection()
def disconnect(self, close_code):
async_to_sync(self.channel_layer.group_discard)(
'status_updates', self.channel_name)
def status_update(self, event):
if not self._authenticated():
self.close()
else:
self.send(json.dumps(event['data']))

View File

@@ -4,8 +4,11 @@ import multiprocessing
import os
import re
from concurrent_log_handler.queue import setup_logging_queues
from dotenv import load_dotenv
from django.utils.translation import gettext_lazy as _
# Tap paperless.conf if it's available
if os.path.exists("../paperless.conf"):
load_dotenv("../paperless.conf")
@@ -53,9 +56,15 @@ ARCHIVE_DIR = os.path.join(MEDIA_ROOT, "documents", "archive")
THUMBNAIL_DIR = os.path.join(MEDIA_ROOT, "documents", "thumbnails")
DATA_DIR = os.getenv('PAPERLESS_DATA_DIR', os.path.join(BASE_DIR, "..", "data"))
# Lock file for synchronizing changes to the MEDIA directory across multiple
# threads.
MEDIA_LOCK = os.path.join(MEDIA_ROOT, "media.lock")
INDEX_DIR = os.path.join(DATA_DIR, "index")
MODEL_FILE = os.path.join(DATA_DIR, "classification_model.pickle")
LOGGING_DIR = os.getenv('PAPERLESS_LOGGING_DIR', os.path.join(DATA_DIR, "log"))
CONSUMPTION_DIR = os.getenv("PAPERLESS_CONSUMPTION_DIR", os.path.join(BASE_DIR, "..", "consume"))
# This will be created if it doesn't exist
@@ -65,6 +74,8 @@ SCRATCH_DIR = os.getenv("PAPERLESS_SCRATCH_DIR", "/tmp/paperless")
# Application Definition #
###############################################################################
env_apps = os.getenv("PAPERLESS_APPS").split(",") if os.getenv("PAPERLESS_APPS") else []
INSTALLED_APPS = [
"whitenoise.runserver_nostatic",
@@ -86,17 +97,25 @@ INSTALLED_APPS = [
"django.contrib.admin",
"rest_framework",
"rest_framework.authtoken",
"django_filters",
"django_q",
]
] + env_apps
if DEBUG:
INSTALLED_APPS.append("channels")
REST_FRAMEWORK = {
'DEFAULT_AUTHENTICATION_CLASSES': [
'rest_framework.authentication.BasicAuthentication',
'rest_framework.authentication.SessionAuthentication'
]
'rest_framework.authentication.SessionAuthentication',
'rest_framework.authentication.TokenAuthentication'
],
'DEFAULT_VERSIONING_CLASS': 'rest_framework.versioning.AcceptHeaderVersioning',
'DEFAULT_VERSION': 'v1',
'ALLOWED_VERSIONS': ['1', '2']
}
if DEBUG:
@@ -109,6 +128,7 @@ MIDDLEWARE = [
'whitenoise.middleware.WhiteNoiseMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'corsheaders.middleware.CorsMiddleware',
'django.middleware.locale.LocaleMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
@@ -121,10 +141,11 @@ ROOT_URLCONF = 'paperless.urls'
FORCE_SCRIPT_NAME = os.getenv("PAPERLESS_FORCE_SCRIPT_NAME")
WSGI_APPLICATION = 'paperless.wsgi.application'
ASGI_APPLICATION = "paperless.asgi.application"
STATIC_URL = os.getenv("PAPERLESS_STATIC_URL", "/static/")
# what is this used for?
# TODO: what is this used for?
TEMPLATES = [
{
'BACKEND': 'django.template.backends.django.DjangoTemplates',
@@ -141,6 +162,17 @@ TEMPLATES = [
},
]
CHANNEL_LAYERS = {
"default": {
"BACKEND": "channels_redis.core.RedisChannelLayer",
"CONFIG": {
"hosts": [os.getenv("PAPERLESS_REDIS", "redis://localhost:6379")],
"capacity": 2000, # default 100
"expiry": 15, # default 60
},
},
}
###############################################################################
# Security #
###############################################################################
@@ -153,11 +185,23 @@ if AUTO_LOGIN_USERNAME:
# regular login in case the provided user does not exist.
MIDDLEWARE.insert(_index+1, 'paperless.auth.AutoLoginMiddleware')
ENABLE_HTTP_REMOTE_USER = __get_boolean("PAPERLESS_ENABLE_HTTP_REMOTE_USER")
if ENABLE_HTTP_REMOTE_USER:
MIDDLEWARE.append(
'paperless.auth.HttpRemoteUserMiddleware'
)
AUTHENTICATION_BACKENDS = [
'django.contrib.auth.backends.RemoteUserBackend',
'django.contrib.auth.backends.ModelBackend'
]
REST_FRAMEWORK['DEFAULT_AUTHENTICATION_CLASSES'].append(
'rest_framework.authentication.RemoteUserAuthentication'
)
# X-Frame options for embedded PDF display:
if DEBUG:
X_FRAME_OPTIONS = ''
# this should really be 'allow-from uri' but its not supported in any mayor
# browser.
X_FRAME_OPTIONS = 'ANY'
else:
X_FRAME_OPTIONS = 'SAMEORIGIN'
@@ -204,6 +248,12 @@ AUTH_PASSWORD_VALIDATORS = [
DATA_UPLOAD_MAX_NUMBER_FIELDS = None
COOKIE_PREFIX = os.getenv("PAPERLESS_COOKIE_PREFIX", "")
CSRF_COOKIE_NAME = f"{COOKIE_PREFIX}csrftoken"
SESSION_COOKIE_NAME = f"{COOKIE_PREFIX}sessionid"
LANGUAGE_COOKIE_NAME = f"{COOKIE_PREFIX}django_language"
###############################################################################
# Database #
###############################################################################
@@ -229,6 +279,7 @@ if os.getenv("PAPERLESS_DBHOST"):
"NAME": os.getenv("PAPERLESS_DBNAME", "paperless"),
"USER": os.getenv("PAPERLESS_DBUSER", "paperless"),
"PASSWORD": os.getenv("PAPERLESS_DBPASS", "paperless"),
'OPTIONS': {'sslmode': os.getenv("PAPERLESS_DBSSLMODE", "prefer")},
}
if os.getenv("PAPERLESS_DBPORT"):
DATABASES["default"]["PORT"] = os.getenv("PAPERLESS_DBPORT")
@@ -239,6 +290,19 @@ if os.getenv("PAPERLESS_DBHOST"):
LANGUAGE_CODE = 'en-us'
LANGUAGES = [
("en-us", _("English (US)")),
("en-gb", _("English (GB)")),
("de", _("German")),
("nl-nl", _("Dutch")),
("fr", _("French")),
("pt-br", _("Portuguese (Brazil)"))
]
LOCALE_PATHS = [
os.path.join(BASE_DIR, "locale")
]
TIME_ZONE = os.getenv("PAPERLESS_TIME_ZONE", "UTC")
USE_I18N = True
@@ -251,14 +315,19 @@ USE_TZ = True
# Logging #
###############################################################################
DISABLE_DBHANDLER = __get_boolean("PAPERLESS_DISABLE_DBHANDLER")
setup_logging_queues()
os.makedirs(LOGGING_DIR, exist_ok=True)
LOGROTATE_MAX_SIZE = os.getenv("PAPERLESS_LOGROTATE_MAX_SIZE", 1024*1024)
LOGROTATE_MAX_BACKUPS = os.getenv("PAPERLESS_LOGROTATE_MAX_BACKUPS", 20)
LOGGING = {
"version": 1,
"disable_existing_loggers": False,
'formatters': {
'verbose': {
'format': '{levelname} {asctime} {module} {message}',
'format': '[{asctime}] [{levelname}] [{name}] {message}',
'style': '{',
},
'simple': {
@@ -267,34 +336,39 @@ LOGGING = {
},
},
"handlers": {
"db": {
"level": "DEBUG",
"class": "documents.loggers.PaperlessHandler",
},
"console": {
"level": "WARNING",
"level": "DEBUG" if DEBUG else "INFO",
"class": "logging.StreamHandler",
"formatter": "verbose",
},
"file_paperless": {
"class": "concurrent_log_handler.ConcurrentRotatingFileHandler",
"formatter": "verbose",
"filename": os.path.join(LOGGING_DIR, "paperless.log"),
"maxBytes": LOGROTATE_MAX_SIZE,
"backupCount": LOGROTATE_MAX_BACKUPS
},
"file_mail": {
"class": "concurrent_log_handler.ConcurrentRotatingFileHandler",
"formatter": "verbose",
"filename": os.path.join(LOGGING_DIR, "mail.log"),
"maxBytes": LOGROTATE_MAX_SIZE,
"backupCount": LOGROTATE_MAX_BACKUPS
}
},
"root": {
"handlers": ["console"],
"level": "DEBUG",
"handlers": ["console"]
},
"loggers": {
"documents": {
"handlers": ["db"],
"propagate": True,
"paperless": {
"handlers": ["file_paperless"],
"level": "DEBUG"
},
"paperless_mail": {
"handlers": ["db"],
"propagate": True,
},
"paperless_tesseract": {
"handlers": ["db"],
"propagate": True,
},
},
"handlers": ["file_mail"],
"level": "DEBUG"
}
}
}
###############################################################################
@@ -309,10 +383,15 @@ LOGGING = {
# Favors threads per worker on smaller systems and never exceeds cpu_count()
# in total.
def default_task_workers():
# always leave one core open
available_cores = max(multiprocessing.cpu_count(), 1)
try:
if available_cores < 4:
return available_cores
return max(
math.floor(math.sqrt(multiprocessing.cpu_count())),
math.floor(math.sqrt(available_cores)),
1
)
except NotImplementedError:
@@ -324,22 +403,25 @@ TASK_WORKERS = int(os.getenv("PAPERLESS_TASK_WORKERS", default_task_workers()))
Q_CLUSTER = {
'name': 'paperless',
'catch_up': False,
'recycle': 1,
'workers': TASK_WORKERS,
'redis': os.getenv("PAPERLESS_REDIS", "redis://localhost:6379")
}
def default_threads_per_worker():
def default_threads_per_worker(task_workers):
# always leave one core open
available_cores = max(multiprocessing.cpu_count(), 1)
try:
return max(
math.floor(multiprocessing.cpu_count() / TASK_WORKERS),
math.floor(available_cores / task_workers),
1
)
except NotImplementedError:
return 1
THREADS_PER_WORKER = os.getenv("PAPERLESS_THREADS_PER_WORKER", default_threads_per_worker())
THREADS_PER_WORKER = os.getenv("PAPERLESS_THREADS_PER_WORKER", default_threads_per_worker(TASK_WORKERS))
###############################################################################
# Paperless Specific Settings #
@@ -347,6 +429,12 @@ THREADS_PER_WORKER = os.getenv("PAPERLESS_THREADS_PER_WORKER", default_threads_p
CONSUMER_POLLING = int(os.getenv("PAPERLESS_CONSUMER_POLLING", 0))
CONSUMER_POLLING_DELAY = int(os.getenv("PAPERLESS_CONSUMER_POLLING_DELAY", 5))
CONSUMER_POLLING_RETRY_COUNT = int(
os.getenv("PAPERLESS_CONSUMER_POLLING_RETRY_COUNT", 5)
)
CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
CONSUMER_RECURSIVE = __get_boolean("PAPERLESS_CONSUMER_RECURSIVE")
@@ -371,6 +459,14 @@ OCR_MODE = os.getenv("PAPERLESS_OCR_MODE", "skip")
OCR_IMAGE_DPI = os.getenv("PAPERLESS_OCR_IMAGE_DPI")
OCR_CLEAN = os.getenv("PAPERLESS_OCR_CLEAN", "clean")
OCR_DESKEW = __get_boolean("PAPERLESS_OCR_DESKEW", "true")
OCR_ROTATE_PAGES = __get_boolean("PAPERLESS_OCR_ROTATE_PAGES", "true")
OCR_ROTATE_PAGES_THRESHOLD = float(os.getenv("PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD", 12.0))
OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS", "{}")
# GNUPG needs a home directory for some reason
@@ -415,3 +511,26 @@ for t in json.loads(os.getenv("PAPERLESS_FILENAME_PARSE_TRANSFORMS", "[]")):
# TODO: this should not have a prefix.
# Specify the filename format for out files
PAPERLESS_FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")
THUMBNAIL_FONT_NAME = os.getenv("PAPERLESS_THUMBNAIL_FONT_NAME", "/usr/share/fonts/liberation/LiberationSerif-Regular.ttf")
# Tika settings
PAPERLESS_TIKA_ENABLED = __get_boolean("PAPERLESS_TIKA_ENABLED", "NO")
PAPERLESS_TIKA_ENDPOINT = os.getenv("PAPERLESS_TIKA_ENDPOINT", "http://localhost:9998")
PAPERLESS_TIKA_GOTENBERG_ENDPOINT = os.getenv(
"PAPERLESS_TIKA_GOTENBERG_ENDPOINT", "http://localhost:3000"
)
if PAPERLESS_TIKA_ENABLED:
INSTALLED_APPS.append("paperless_tika.apps.PaperlessTikaConfig")
# List dates that should be ignored when trying to parse date from document text
IGNORE_DATES = set()
if os.getenv("PAPERLESS_IGNORE_DATES", ""):
import dateparser
for s in os.getenv("PAPERLESS_IGNORE_DATES", "").split(","):
d = dateparser.parse(s)
if d:
IGNORE_DATES.add(d.date())

View File

@@ -0,0 +1,54 @@
import os
import shutil
from django.test import TestCase, override_settings
from documents.tests.utils import DirectoriesMixin
from paperless import binaries_check, paths_check
from paperless.checks import debug_mode_check
class TestChecks(DirectoriesMixin, TestCase):
def test_binaries(self):
self.assertEqual(binaries_check(None), [])
@override_settings(CONVERT_BINARY="uuuhh", OPTIPNG_BINARY="forgot")
def test_binaries_fail(self):
self.assertEqual(len(binaries_check(None)), 2)
def test_paths_check(self):
self.assertEqual(paths_check(None), [])
@override_settings(MEDIA_ROOT="uuh",
DATA_DIR="whatever",
CONSUMPTION_DIR="idontcare")
def test_paths_check_dont_exist(self):
msgs = paths_check(None)
self.assertEqual(len(msgs), 3, str(msgs))
for msg in msgs:
self.assertTrue(msg.msg.endswith("is set but doesn't exist."))
def test_paths_check_no_access(self):
os.chmod(self.dirs.data_dir, 0o000)
os.chmod(self.dirs.media_dir, 0o000)
os.chmod(self.dirs.consumption_dir, 0o000)
self.addCleanup(os.chmod, self.dirs.data_dir, 0o777)
self.addCleanup(os.chmod, self.dirs.media_dir, 0o777)
self.addCleanup(os.chmod, self.dirs.consumption_dir, 0o777)
msgs = paths_check(None)
self.assertEqual(len(msgs), 3)
for msg in msgs:
self.assertTrue(msg.msg.endswith("is not writeable"))
@override_settings(DEBUG=False)
def test_debug_disabled(self):
self.assertEqual(debug_mode_check(None), [])
@override_settings(DEBUG=True)
def test_debug_enabled(self):
self.assertEqual(len(debug_mode_check(None)), 1)

View File

@@ -0,0 +1,60 @@
from unittest import mock
from channels.layers import get_channel_layer
from channels.testing import WebsocketCommunicator
from django.test import TestCase, override_settings
from paperless.asgi import application
TEST_CHANNEL_LAYERS = {
'default': {
'BACKEND': 'channels.layers.InMemoryChannelLayer',
},
}
class TestWebSockets(TestCase):
@override_settings(CHANNEL_LAYERS=TEST_CHANNEL_LAYERS)
async def test_no_auth(self):
communicator = WebsocketCommunicator(application, "/ws/status/")
connected, subprotocol = await communicator.connect()
self.assertFalse(connected)
await communicator.disconnect()
@override_settings(CHANNEL_LAYERS=TEST_CHANNEL_LAYERS)
@mock.patch("paperless.consumers.StatusConsumer._authenticated")
async def test_auth(self, _authenticated):
_authenticated.return_value = True
communicator = WebsocketCommunicator(application, "/ws/status/")
connected, subprotocol = await communicator.connect()
self.assertTrue(connected)
await communicator.disconnect()
@override_settings(CHANNEL_LAYERS=TEST_CHANNEL_LAYERS)
@mock.patch("paperless.consumers.StatusConsumer._authenticated")
async def test_receive(self, _authenticated):
_authenticated.return_value = True
communicator = WebsocketCommunicator(application, "/ws/status/")
connected, subprotocol = await communicator.connect()
self.assertTrue(connected)
message = {
"task_id": "test"
}
channel_layer = get_channel_layer()
await channel_layer.group_send("status_updates", {
"type": "status_update",
"data": message
})
response = await communicator.receive_json_from()
self.assertEqual(response, message)
await communicator.disconnect()

View File

@@ -4,8 +4,12 @@ from django.contrib.auth.decorators import login_required
from django.urls import path, re_path
from django.views.decorators.csrf import csrf_exempt
from django.views.generic import RedirectView
from rest_framework.authtoken import views
from rest_framework.routers import DefaultRouter
from django.utils.translation import gettext_lazy as _
from paperless.consumers import StatusConsumer
from documents.views import (
CorrespondentViewSet,
DocumentViewSet,
@@ -15,7 +19,12 @@ from documents.views import (
SearchView,
IndexView,
SearchAutoCompleteView,
StatisticsView
StatisticsView,
PostDocumentView,
SavedViewViewSet,
BulkEditView,
SelectionDataView,
BulkDownloadView
)
from paperless.views import FaviconView
@@ -23,8 +32,9 @@ api_router = DefaultRouter()
api_router.register(r"correspondents", CorrespondentViewSet)
api_router.register(r"document_types", DocumentTypeViewSet)
api_router.register(r"documents", DocumentViewSet)
api_router.register(r"logs", LogViewSet)
api_router.register(r"logs", LogViewSet, basename="logs")
api_router.register(r"tags", TagViewSet)
api_router.register(r"saved_views", SavedViewViewSet)
urlpatterns = [
@@ -45,6 +55,20 @@ urlpatterns = [
StatisticsView.as_view(),
name="statistics"),
re_path(r"^documents/post_document/", PostDocumentView.as_view(),
name="post_document"),
re_path(r"^documents/bulk_edit/", BulkEditView.as_view(),
name="bulk_edit"),
re_path(r"^documents/selection_data/", SelectionDataView.as_view(),
name="selection_data"),
re_path(r"^documents/bulk_download/", BulkDownloadView.as_view(),
name="bulk_download"),
path('token/', views.obtain_auth_token)
] + api_router.urls)),
re_path(r"^favicon.ico$", FaviconView.as_view(), name="favicon"),
@@ -71,7 +95,8 @@ urlpatterns = [
# Frontend assets TODO: this is pretty bad, but it works.
path('assets/<path:path>',
RedirectView.as_view(url='/static/frontend/assets/%(path)s')),
RedirectView.as_view(url='/static/frontend/en-US/assets/%(path)s')),
# TODO: with localization, this is even worse! :/
# login, logout
path('accounts/', include('django.contrib.auth.urls')),
@@ -80,9 +105,14 @@ urlpatterns = [
re_path(r".*", login_required(IndexView.as_view())),
]
websocket_urlpatterns = [
re_path(r'ws/status/$', StatusConsumer.as_asgi()),
]
# Text in each page's <h1> (and above login form).
admin.site.site_header = 'Paperless-ng'
# Text at the end of each page's <title>.
admin.site.site_title = 'Paperless-ng'
# Text at the top of the admin index page.
admin.site.index_title = 'Paperless-ng administration'
admin.site.index_title = _('Paperless-ng administration')

Some files were not shown because too many files have changed in this diff Show More