mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
Merge branch 'dev' into celery-tasks
This commit is contained in:
@@ -1,5 +1,4 @@
|
||||
from django.contrib import admin
|
||||
from django.contrib.auth.models import Group, User
|
||||
from django.utils.html import format_html, format_html_join
|
||||
from django.utils.safestring import mark_safe
|
||||
from whoosh.writing import AsyncWriter
|
||||
@@ -32,7 +31,7 @@ class TagAdmin(admin.ModelAdmin):
|
||||
list_filter = ("colour", "matching_algorithm")
|
||||
list_editable = ("colour", "match", "matching_algorithm")
|
||||
|
||||
readonly_fields = ("slug",)
|
||||
readonly_fields = ("slug", )
|
||||
|
||||
|
||||
class DocumentTypeAdmin(admin.ModelAdmin):
|
||||
@@ -51,9 +50,17 @@ class DocumentTypeAdmin(admin.ModelAdmin):
|
||||
class DocumentAdmin(admin.ModelAdmin):
|
||||
|
||||
search_fields = ("correspondent__name", "title", "content", "tags__name")
|
||||
readonly_fields = ("added", "file_type", "storage_type",)
|
||||
list_display = ("title", "created", "added", "correspondent",
|
||||
"tags_", "archive_serial_number", "document_type")
|
||||
readonly_fields = ("added", "file_type", "storage_type", "filename")
|
||||
list_display = (
|
||||
"title",
|
||||
"created",
|
||||
"added",
|
||||
"correspondent",
|
||||
"tags_",
|
||||
"archive_serial_number",
|
||||
"document_type",
|
||||
"filename"
|
||||
)
|
||||
list_filter = (
|
||||
"document_type",
|
||||
"tags",
|
||||
@@ -120,8 +127,3 @@ admin.site.register(Tag, TagAdmin)
|
||||
admin.site.register(DocumentType, DocumentTypeAdmin)
|
||||
admin.site.register(Document, DocumentAdmin)
|
||||
admin.site.register(Log, LogAdmin)
|
||||
|
||||
|
||||
# Unless we implement multi-user, these default registrations don't make sense.
|
||||
admin.site.unregister(Group)
|
||||
admin.site.unregister(User)
|
||||
|
@@ -1,5 +1,4 @@
|
||||
from django.apps import AppConfig
|
||||
from django.db.models.signals import post_delete
|
||||
|
||||
|
||||
class DocumentsConfig(AppConfig):
|
||||
@@ -14,7 +13,6 @@ class DocumentsConfig(AppConfig):
|
||||
add_inbox_tags,
|
||||
run_pre_consume_script,
|
||||
run_post_consume_script,
|
||||
cleanup_document_deletion,
|
||||
set_log_entry,
|
||||
set_correspondent,
|
||||
set_document_type,
|
||||
@@ -33,6 +31,4 @@ class DocumentsConfig(AppConfig):
|
||||
document_consumption_finished.connect(add_to_index)
|
||||
document_consumption_finished.connect(run_post_consume_script)
|
||||
|
||||
post_delete.connect(cleanup_document_deletion)
|
||||
|
||||
AppConfig.ready(self)
|
||||
|
@@ -4,6 +4,8 @@ from django.conf import settings
|
||||
from django.core.checks import Error, register
|
||||
from django.db.utils import OperationalError, ProgrammingError
|
||||
|
||||
from documents.signals import document_consumer_declaration
|
||||
|
||||
|
||||
@register()
|
||||
def changed_password_check(app_configs, **kwargs):
|
||||
@@ -37,3 +39,17 @@ def changed_password_check(app_configs, **kwargs):
|
||||
"""))]
|
||||
|
||||
return []
|
||||
|
||||
|
||||
@register()
|
||||
def parser_check(app_configs, **kwargs):
|
||||
|
||||
parsers = []
|
||||
for response in document_consumer_declaration.send(None):
|
||||
parsers.append(response[1])
|
||||
|
||||
if len(parsers) == 0:
|
||||
return [Error("No parsers found. This is a bug. The consumer won't be "
|
||||
"able to onsume any documents without parsers.")]
|
||||
else:
|
||||
return []
|
||||
|
@@ -3,7 +3,6 @@ import logging
|
||||
import os
|
||||
import pickle
|
||||
import re
|
||||
import time
|
||||
|
||||
from sklearn.feature_extraction.text import CountVectorizer
|
||||
from sklearn.neural_network import MLPClassifier
|
||||
@@ -64,7 +63,7 @@ class DocumentClassifier(object):
|
||||
|
||||
def save_classifier(self):
|
||||
with open(settings.MODEL_FILE, "wb") as f:
|
||||
pickle.dump(self.FORMAT_VERSION, f) # Version
|
||||
pickle.dump(self.FORMAT_VERSION, f)
|
||||
pickle.dump(self.data_hash, f)
|
||||
pickle.dump(self.data_vectorizer, f)
|
||||
|
||||
@@ -89,16 +88,14 @@ class DocumentClassifier(object):
|
||||
data.append(preprocessed_content)
|
||||
|
||||
y = -1
|
||||
if doc.document_type:
|
||||
if doc.document_type.matching_algorithm == MatchingModel.MATCH_AUTO:
|
||||
y = doc.document_type.pk
|
||||
if doc.document_type and doc.document_type.matching_algorithm == MatchingModel.MATCH_AUTO:
|
||||
y = doc.document_type.pk
|
||||
m.update(y.to_bytes(4, 'little', signed=True))
|
||||
labels_document_type.append(y)
|
||||
|
||||
y = -1
|
||||
if doc.correspondent:
|
||||
if doc.correspondent.matching_algorithm == MatchingModel.MATCH_AUTO:
|
||||
y = doc.correspondent.pk
|
||||
if doc.correspondent and doc.correspondent.matching_algorithm == MatchingModel.MATCH_AUTO:
|
||||
y = doc.correspondent.pk
|
||||
m.update(y.to_bytes(4, 'little', signed=True))
|
||||
labels_correspondent.append(y)
|
||||
|
||||
@@ -120,8 +117,8 @@ class DocumentClassifier(object):
|
||||
|
||||
num_tags = len(labels_tags_unique)
|
||||
# substract 1 since -1 (null) is also part of the classes.
|
||||
num_correspondents = len(labels_correspondent) - 1
|
||||
num_document_types = len(labels_document_type) - 1
|
||||
num_correspondents = len(set(labels_correspondent)) - 1
|
||||
num_document_types = len(set(labels_document_type)) - 1
|
||||
|
||||
logging.getLogger(__name__).debug(
|
||||
"{} documents, {} tag(s), {} correspondent(s), "
|
||||
@@ -137,7 +134,7 @@ class DocumentClassifier(object):
|
||||
logging.getLogger(__name__).debug("Vectorizing data...")
|
||||
self.data_vectorizer = CountVectorizer(
|
||||
analyzer="word",
|
||||
ngram_range=(1,2),
|
||||
ngram_range=(1, 2),
|
||||
min_df=0.01
|
||||
)
|
||||
data_vectorized = self.data_vectorizer.fit_transform(data)
|
||||
|
@@ -3,7 +3,6 @@ import hashlib
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import uuid
|
||||
|
||||
from asgiref.sync import async_to_sync
|
||||
from channels.layers import get_channel_layer
|
||||
@@ -13,7 +12,9 @@ from django.utils import timezone
|
||||
|
||||
from paperless.db import GnuPG
|
||||
from .classifier import DocumentClassifier, IncompatibleClassifierVersionError
|
||||
from .models import Document, FileInfo
|
||||
from .file_handling import generate_filename, create_source_path_directory
|
||||
from .loggers import LoggingMixin
|
||||
from .models import Document, FileInfo, Correspondent, DocumentType, Tag
|
||||
from .parsers import ParseError, get_parser_class
|
||||
from .signals import (
|
||||
document_consumption_finished,
|
||||
@@ -25,17 +26,10 @@ class ConsumerError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class Consumer:
|
||||
"""
|
||||
Loop over every file found in CONSUMPTION_DIR and:
|
||||
1. Convert it to a greyscale pnm
|
||||
2. Use tesseract on the pnm
|
||||
3. Store the document in the MEDIA_ROOT with optional encryption
|
||||
4. Store the OCR'd text in the database
|
||||
5. Delete the document and image(s)
|
||||
"""
|
||||
class Consumer(LoggingMixin):
|
||||
|
||||
def _send_progress(self, filename, current_progress, max_progress, status, message, document_id=None):
|
||||
def _send_progress(self, filename, current_progress, max_progress, status,
|
||||
message, document_id=None):
|
||||
payload = {
|
||||
'filename': os.path.basename(filename),
|
||||
'current_progress': current_progress,
|
||||
@@ -44,156 +38,226 @@ class Consumer:
|
||||
'message': message,
|
||||
'document_id': document_id
|
||||
}
|
||||
async_to_sync(self.channel_layer.group_send)("status_updates", {'type': 'status_update', 'data': payload})
|
||||
async_to_sync(self.channel_layer.group_send)("status_updates",
|
||||
{'type': 'status_update',
|
||||
'data': payload})
|
||||
|
||||
def __init__(self, consume=settings.CONSUMPTION_DIR,
|
||||
scratch=settings.SCRATCH_DIR):
|
||||
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.logging_group = None
|
||||
|
||||
self.consume = consume
|
||||
self.scratch = scratch
|
||||
|
||||
self.classifier = DocumentClassifier()
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.path = None
|
||||
self.filename = None
|
||||
self.override_title = None
|
||||
self.override_correspondent_id = None
|
||||
self.override_tag_ids = None
|
||||
self.override_document_type_id = None
|
||||
|
||||
self.channel_layer = get_channel_layer()
|
||||
|
||||
os.makedirs(self.scratch, exist_ok=True)
|
||||
def pre_check_file_exists(self):
|
||||
if not os.path.isfile(self.path):
|
||||
raise ConsumerError("Cannot consume {}: It is not a file".format(
|
||||
self.path))
|
||||
|
||||
self.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
if settings.PASSPHRASE:
|
||||
self.storage_type = Document.STORAGE_TYPE_GPG
|
||||
|
||||
if not self.consume:
|
||||
def pre_check_consumption_dir(self):
|
||||
if not settings.CONSUMPTION_DIR:
|
||||
raise ConsumerError(
|
||||
"The CONSUMPTION_DIR settings variable does not appear to be "
|
||||
"set."
|
||||
)
|
||||
"set.")
|
||||
|
||||
if not os.path.exists(self.consume):
|
||||
if not os.path.isdir(settings.CONSUMPTION_DIR):
|
||||
raise ConsumerError(
|
||||
"Consumption directory {} does not exist".format(self.consume))
|
||||
"Consumption directory {} does not exist".format(
|
||||
settings.CONSUMPTION_DIR))
|
||||
|
||||
def log(self, level, message):
|
||||
getattr(self.logger, level)(message, extra={
|
||||
"group": self.logging_group
|
||||
})
|
||||
def pre_check_regex(self):
|
||||
if not re.match(FileInfo.REGEXES["title"], self.filename):
|
||||
raise ConsumerError(
|
||||
"Filename {} does not seem to be safe to "
|
||||
"consume".format(self.filename))
|
||||
|
||||
@transaction.atomic
|
||||
def try_consume_file(self, file):
|
||||
"""
|
||||
Return True if file was consumed
|
||||
"""
|
||||
|
||||
self.logging_group = uuid.uuid4()
|
||||
|
||||
if not re.match(FileInfo.REGEXES["title"], file):
|
||||
return False
|
||||
|
||||
doc = file
|
||||
|
||||
if self._is_duplicate(doc):
|
||||
self.log(
|
||||
"warning",
|
||||
"Skipping {} as it appears to be a duplicate".format(doc)
|
||||
def pre_check_duplicate(self):
|
||||
with open(self.path, "rb") as f:
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
if Document.objects.filter(checksum=checksum).exists():
|
||||
if settings.CONSUMER_DELETE_DUPLICATES:
|
||||
os.unlink(self.path)
|
||||
raise ConsumerError(
|
||||
"Not consuming {}: It is a duplicate.".format(self.filename)
|
||||
)
|
||||
return False
|
||||
|
||||
self.log("info", "Consuming {}".format(doc))
|
||||
def pre_check_directories(self):
|
||||
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
|
||||
os.makedirs(settings.THUMBNAIL_DIR, exist_ok=True)
|
||||
os.makedirs(settings.ORIGINALS_DIR, exist_ok=True)
|
||||
|
||||
def try_consume_file(self,
|
||||
path,
|
||||
override_filename=None,
|
||||
override_title=None,
|
||||
override_correspondent_id=None,
|
||||
override_document_type_id=None,
|
||||
override_tag_ids=None):
|
||||
"""
|
||||
Return the document object if it was successfully created.
|
||||
"""
|
||||
|
||||
parser_class = get_parser_class(doc)
|
||||
self.path = path
|
||||
self.filename = override_filename or os.path.basename(path)
|
||||
self.override_title = override_title
|
||||
self.override_correspondent_id = override_correspondent_id
|
||||
self.override_document_type_id = override_document_type_id
|
||||
self.override_tag_ids = override_tag_ids
|
||||
|
||||
# this is for grouping logging entries for this particular file
|
||||
# together.
|
||||
|
||||
self.renew_logging_group()
|
||||
|
||||
# Make sure that preconditions for consuming the file are met.
|
||||
|
||||
self.pre_check_file_exists()
|
||||
self.pre_check_consumption_dir()
|
||||
self.pre_check_directories()
|
||||
self.pre_check_regex()
|
||||
self.pre_check_duplicate()
|
||||
|
||||
self.log("info", "Consuming {}".format(self.filename))
|
||||
|
||||
# Determine the parser class.
|
||||
|
||||
parser_class = get_parser_class(self.filename)
|
||||
if not parser_class:
|
||||
self.log(
|
||||
"error", "No parsers could be found for {}".format(doc))
|
||||
return False
|
||||
raise ConsumerError("No parsers abvailable for {}".format(self.filename))
|
||||
else:
|
||||
self.log("info", "Parser: {}".format(parser_class.__name__))
|
||||
self.log("debug", "Parser: {}".format(parser_class.__name__))
|
||||
|
||||
self._send_progress(file, 0, 100, 'WORKING', 'Consumption started')
|
||||
# Notify all listeners that we're going to do some work.
|
||||
|
||||
self._send_progress(self.filename, 0, 100, 'WORKING', 'Consumption started')
|
||||
|
||||
document_consumption_started.send(
|
||||
sender=self.__class__,
|
||||
filename=doc,
|
||||
filename=self.path,
|
||||
logging_group=self.logging_group
|
||||
)
|
||||
|
||||
def progress_callback(current_progress, max_progress, message):
|
||||
# recalculate progress to be within 20 and 80
|
||||
p = int((current_progress / max_progress) * 60 + 20)
|
||||
self._send_progress(file, p, 100, "WORKING", message)
|
||||
self._send_progress(self.filename, p, 100, "WORKING", message)
|
||||
|
||||
document_parser = parser_class(doc, self.logging_group, progress_callback)
|
||||
# This doesn't parse the document yet, but gives us a parser.
|
||||
|
||||
document_parser = parser_class(self.path, self.logging_group, progress_callback)
|
||||
|
||||
# However, this already created working directories which we have to
|
||||
# clean up.
|
||||
|
||||
# Parse the document. This may take some time.
|
||||
|
||||
try:
|
||||
self.log("info", "Generating thumbnail for {}...".format(doc))
|
||||
self._send_progress(file, 10, 100, 'WORKING',
|
||||
self.log("debug", "Generating thumbnail for {}...".format(self.filename))
|
||||
self._send_progress(self.filename, 10, 100, 'WORKING',
|
||||
'Generating thumbnail...')
|
||||
thumbnail = document_parser.get_optimised_thumbnail()
|
||||
self._send_progress(file, 20, 100, 'WORKING',
|
||||
self.log("debug", "Parsing {}...".format(self.filename))
|
||||
self._send_progress(self.filename, 20, 100, 'WORKING',
|
||||
'Getting text from document...')
|
||||
text = document_parser.get_text()
|
||||
self._send_progress(file, 80, 100, 'WORKING',
|
||||
self._send_progress(self.filename, 80, 100, 'WORKING',
|
||||
'Getting date from document...')
|
||||
date = document_parser.get_date()
|
||||
self._send_progress(file, 85, 100, 'WORKING',
|
||||
'Storing the document...')
|
||||
document = self._store(
|
||||
text,
|
||||
doc,
|
||||
thumbnail,
|
||||
date
|
||||
)
|
||||
except ParseError as e:
|
||||
self.log("fatal", "PARSE FAILURE for {}: {}".format(doc, e))
|
||||
document_parser.cleanup()
|
||||
self._send_progress(self.filename, 100, 100, 'FAILED',
|
||||
"Failed: {}".format(e))
|
||||
raise ConsumerError(e)
|
||||
|
||||
# Prepare the document classifier.
|
||||
|
||||
# TODO: I don't really like to do this here, but this way we avoid
|
||||
# reloading the classifier multiple times, since there are multiple
|
||||
# post-consume hooks that all require the classifier.
|
||||
|
||||
try:
|
||||
classifier = DocumentClassifier()
|
||||
classifier.reload()
|
||||
except (FileNotFoundError, IncompatibleClassifierVersionError) as e:
|
||||
logging.getLogger(__name__).warning(
|
||||
"Cannot classify documents: {}.".format(e))
|
||||
classifier = None
|
||||
self._send_progress(self.filename, 85, 100, 'WORKING',
|
||||
'Storing the document...')
|
||||
# now that everything is done, we can start to store the document
|
||||
# in the system. This will be a transaction and reasonably fast.
|
||||
try:
|
||||
with transaction.atomic():
|
||||
|
||||
# store the document.
|
||||
document = self._store(
|
||||
text=text,
|
||||
date=date
|
||||
)
|
||||
|
||||
# If we get here, it was successful. Proceed with post-consume
|
||||
# hooks. If they fail, nothing will get changed.
|
||||
|
||||
self._send_progress(self.filename, 90, 100, 'WORKING',
|
||||
'Performing post-consumption tasks...')
|
||||
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__,
|
||||
document=document,
|
||||
logging_group=self.logging_group,
|
||||
classifier=classifier
|
||||
)
|
||||
|
||||
# After everything is in the database, copy the files into
|
||||
# place. If this fails, we'll also rollback the transaction.
|
||||
|
||||
create_source_path_directory(document.source_path)
|
||||
self._write(document, self.path, document.source_path)
|
||||
self._write(document, thumbnail, document.thumbnail_path)
|
||||
|
||||
# Delete the file only if it was successfully consumed
|
||||
self.log("debug", "Deleting file {}".format(self.path))
|
||||
os.unlink(self.path)
|
||||
except Exception as e:
|
||||
raise ConsumerError(e)
|
||||
self._send_progress(file, 100, 100, 'FAILED',
|
||||
"Failed: {}".format(e))
|
||||
|
||||
finally:
|
||||
document_parser.cleanup()
|
||||
return False
|
||||
else:
|
||||
document_parser.cleanup()
|
||||
self._cleanup_doc(doc)
|
||||
|
||||
self.log(
|
||||
"info",
|
||||
"Document {} consumption finished".format(document)
|
||||
)
|
||||
self.log(
|
||||
"info",
|
||||
"Document {} consumption finished".format(document)
|
||||
)
|
||||
|
||||
classifier = None
|
||||
self._send_progress(file, 100, 100, 'SUCCESS',
|
||||
'Finished.', document.id)
|
||||
|
||||
try:
|
||||
self.classifier.reload()
|
||||
classifier = self.classifier
|
||||
except (FileNotFoundError, IncompatibleClassifierVersionError) as e:
|
||||
logging.getLogger(__name__).warning("Cannot classify documents: {}.".format(e))
|
||||
return document
|
||||
|
||||
self._send_progress(file, 90, 100, 'WORKING',
|
||||
'Performing post-consumption tasks...')
|
||||
def _store(self, text, date):
|
||||
|
||||
document_consumption_finished.send(
|
||||
sender=self.__class__,
|
||||
document=document,
|
||||
logging_group=self.logging_group,
|
||||
classifier=classifier
|
||||
)
|
||||
self._send_progress(file, 100, 100, 'SUCCESS',
|
||||
'Finished.', document.id)
|
||||
return True
|
||||
# If someone gave us the original filename, use it instead of doc.
|
||||
|
||||
def _store(self, text, doc, thumbnail, date):
|
||||
file_info = FileInfo.from_path(self.filename)
|
||||
|
||||
file_info = FileInfo.from_path(doc)
|
||||
|
||||
stats = os.stat(doc)
|
||||
stats = os.stat(self.path)
|
||||
|
||||
self.log("debug", "Saving record to database")
|
||||
|
||||
created = file_info.created or date or timezone.make_aware(
|
||||
datetime.datetime.fromtimestamp(stats.st_mtime))
|
||||
datetime.datetime.fromtimestamp(stats.st_mtime))
|
||||
|
||||
with open(doc, "rb") as f:
|
||||
if settings.PASSPHRASE:
|
||||
storage_type = Document.STORAGE_TYPE_GPG
|
||||
else:
|
||||
storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
with open(self.path, "rb") as f:
|
||||
document = Document.objects.create(
|
||||
correspondent=file_info.correspondent,
|
||||
title=file_info.title,
|
||||
@@ -202,7 +266,7 @@ class Consumer:
|
||||
checksum=hashlib.md5(f.read()).hexdigest(),
|
||||
created=created,
|
||||
modified=created,
|
||||
storage_type=self.storage_type
|
||||
storage_type=storage_type
|
||||
)
|
||||
|
||||
relevant_tags = set(file_info.tags)
|
||||
@@ -211,14 +275,30 @@ class Consumer:
|
||||
self.log("debug", "Tagging with {}".format(tag_names))
|
||||
document.tags.add(*relevant_tags)
|
||||
|
||||
self._write(document, doc, document.source_path)
|
||||
self._write(document, thumbnail, document.thumbnail_path)
|
||||
self.apply_overrides(document)
|
||||
|
||||
#TODO: why do we need to save the document again?
|
||||
document.filename = generate_filename(document)
|
||||
|
||||
# We need to save the document twice, since we need the PK of the
|
||||
# document in order to create its filename above.
|
||||
document.save()
|
||||
|
||||
return document
|
||||
|
||||
def apply_overrides(self, document):
|
||||
if self.override_title:
|
||||
document.title = self.override_title
|
||||
|
||||
if self.override_correspondent_id:
|
||||
document.correspondent = Correspondent.objects.get(pk=self.override_correspondent_id)
|
||||
|
||||
if self.override_document_type_id:
|
||||
document.document_type = DocumentType.objects.get(pk=self.override_document_type_id)
|
||||
|
||||
if self.override_tag_ids:
|
||||
for tag_id in self.override_tag_ids:
|
||||
document.tags.add(Tag.objects.get(pk=tag_id))
|
||||
|
||||
def _write(self, document, source, target):
|
||||
with open(source, "rb") as read_file:
|
||||
with open(target, "wb") as write_file:
|
||||
@@ -227,13 +307,3 @@ class Consumer:
|
||||
return
|
||||
self.log("debug", "Encrypting")
|
||||
write_file.write(GnuPG.encrypted(read_file))
|
||||
|
||||
def _cleanup_doc(self, doc):
|
||||
self.log("debug", "Deleting document {}".format(doc))
|
||||
os.unlink(doc)
|
||||
|
||||
@staticmethod
|
||||
def _is_duplicate(doc):
|
||||
with open(doc, "rb") as f:
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
return Document.objects.filter(checksum=checksum).exists()
|
||||
|
102
src/documents/file_handling.py
Normal file
102
src/documents/file_handling.py
Normal file
@@ -0,0 +1,102 @@
|
||||
import logging
|
||||
import os
|
||||
from collections import defaultdict
|
||||
|
||||
from django.conf import settings
|
||||
from django.template.defaultfilters import slugify
|
||||
|
||||
|
||||
def create_source_path_directory(source_path):
|
||||
os.makedirs(os.path.dirname(source_path), exist_ok=True)
|
||||
|
||||
|
||||
def delete_empty_directories(directory):
|
||||
# Go up in the directory hierarchy and try to delete all directories
|
||||
directory = os.path.normpath(directory)
|
||||
root = os.path.normpath(settings.ORIGINALS_DIR)
|
||||
|
||||
if not directory.startswith(root + os.path.sep):
|
||||
# don't do anything outside our originals folder.
|
||||
|
||||
# append os.path.set so that we avoid these cases:
|
||||
# directory = /home/originals2/test
|
||||
# root = /home/originals ("/" gets appended and startswith fails)
|
||||
return
|
||||
|
||||
while directory != root:
|
||||
if not os.listdir(directory):
|
||||
# it's empty
|
||||
try:
|
||||
os.rmdir(directory)
|
||||
except OSError:
|
||||
# whatever. empty directories aren't that bad anyway.
|
||||
return
|
||||
else:
|
||||
# it's not empty.
|
||||
return
|
||||
|
||||
# go one level up
|
||||
directory = os.path.normpath(os.path.dirname(directory))
|
||||
|
||||
|
||||
def many_to_dictionary(field):
|
||||
# Converts ManyToManyField to dictionary by assuming, that field
|
||||
# entries contain an _ or - which will be used as a delimiter
|
||||
mydictionary = dict()
|
||||
|
||||
for index, t in enumerate(field.all()):
|
||||
# Populate tag names by index
|
||||
mydictionary[index] = slugify(t.name)
|
||||
|
||||
# Find delimiter
|
||||
delimiter = t.name.find('_')
|
||||
|
||||
if delimiter == -1:
|
||||
delimiter = t.name.find('-')
|
||||
|
||||
if delimiter == -1:
|
||||
continue
|
||||
|
||||
key = t.name[:delimiter]
|
||||
value = t.name[delimiter + 1:]
|
||||
|
||||
mydictionary[slugify(key)] = slugify(value)
|
||||
|
||||
return mydictionary
|
||||
|
||||
|
||||
def generate_filename(document):
|
||||
# Create filename based on configured format
|
||||
path = ""
|
||||
|
||||
try:
|
||||
if settings.PAPERLESS_FILENAME_FORMAT is not None:
|
||||
tags = defaultdict(lambda: slugify(None),
|
||||
many_to_dictionary(document.tags))
|
||||
path = settings.PAPERLESS_FILENAME_FORMAT.format(
|
||||
correspondent=slugify(document.correspondent),
|
||||
title=slugify(document.title),
|
||||
created=slugify(document.created),
|
||||
created_year=document.created.year if document.created else "none",
|
||||
created_month=document.created.month if document.created else "none",
|
||||
created_day=document.created.day if document.created else "none",
|
||||
added=slugify(document.added),
|
||||
added_year=document.added.year if document.added else "none",
|
||||
added_month=document.added.month if document.added else "none",
|
||||
added_day=document.added.day if document.added else "none",
|
||||
tags=tags,
|
||||
)
|
||||
except (ValueError, KeyError, IndexError):
|
||||
logging.getLogger(__name__).warning("Invalid PAPERLESS_FILENAME_FORMAT: {}, falling back to default,".format(settings.PAPERLESS_FILENAME_FORMAT))
|
||||
|
||||
# Always append the primary key to guarantee uniqueness of filename
|
||||
if len(path) > 0:
|
||||
filename = "%s-%07i.%s" % (path, document.pk, document.file_type)
|
||||
else:
|
||||
filename = "%07i.%s" % (document.pk, document.file_type)
|
||||
|
||||
# Append .gpg for encrypted files
|
||||
if document.storage_type == document.STORAGE_TYPE_GPG:
|
||||
filename += ".gpg"
|
||||
|
||||
return filename
|
@@ -1,10 +1,11 @@
|
||||
import os
|
||||
|
||||
import tempfile
|
||||
from datetime import datetime
|
||||
from time import mktime
|
||||
|
||||
from django import forms
|
||||
from django.conf import settings
|
||||
from django_q.tasks import async_task
|
||||
from pathvalidate import validate_filename, ValidationError
|
||||
|
||||
|
||||
@@ -19,12 +20,6 @@ class UploadForm(forms.Form):
|
||||
raise forms.ValidationError("That filename is suspicious.")
|
||||
return self.cleaned_data.get("document")
|
||||
|
||||
def get_filename(self, i=None):
|
||||
return os.path.join(
|
||||
settings.CONSUMPTION_DIR,
|
||||
"{}_{}".format(str(i), self.cleaned_data.get("document").name) if i else self.cleaned_data.get("document").name
|
||||
)
|
||||
|
||||
def save(self):
|
||||
"""
|
||||
Since the consumer already does a lot of work, it's easier just to save
|
||||
@@ -33,15 +28,16 @@ class UploadForm(forms.Form):
|
||||
"""
|
||||
|
||||
document = self.cleaned_data.get("document").read()
|
||||
original_filename = self.cleaned_data.get("document").name
|
||||
|
||||
t = int(mktime(datetime.now().timetuple()))
|
||||
|
||||
file_name = self.get_filename()
|
||||
i = 0
|
||||
while os.path.exists(file_name):
|
||||
i += 1
|
||||
file_name = self.get_filename(i)
|
||||
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
|
||||
|
||||
# TODO: dont just append pdf. This is here for taht weird regex check at the start of the consumer.
|
||||
with tempfile.NamedTemporaryFile(prefix="paperless-upload-", suffix=".pdf", dir=settings.SCRATCH_DIR, delete=False) as f:
|
||||
|
||||
with open(file_name, "wb") as f:
|
||||
f.write(document)
|
||||
os.utime(file_name, times=(t, t))
|
||||
os.utime(f.name, times=(t, t))
|
||||
|
||||
async_task("documents.tasks.consume_file", f.name, override_filename=original_filename, task_name=os.path.basename(original_filename))
|
||||
|
@@ -1,7 +1,6 @@
|
||||
import logging
|
||||
from contextlib import contextmanager
|
||||
|
||||
from django.db import models
|
||||
from django.dispatch import receiver
|
||||
from whoosh import highlight
|
||||
from whoosh.fields import Schema, TEXT, NUMERIC
|
||||
from whoosh.highlight import Formatter, get_text
|
||||
@@ -9,10 +8,8 @@ from whoosh.index import create_in, exists_in, open_dir
|
||||
from whoosh.qparser import MultifieldParser
|
||||
from whoosh.writing import AsyncWriter
|
||||
|
||||
from documents.models import Document
|
||||
from paperless import settings
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -69,6 +66,9 @@ def open_index(recreate=False):
|
||||
if exists_in(settings.INDEX_DIR) and not recreate:
|
||||
return open_dir(settings.INDEX_DIR)
|
||||
else:
|
||||
# TODO: this is not thread safe. If 2 instances try to create the index
|
||||
# at the same time, this fails. This currently prevents parallel
|
||||
# tests.
|
||||
return create_in(settings.INDEX_DIR, get_schema())
|
||||
|
||||
|
||||
@@ -99,15 +99,19 @@ def remove_document_from_index(document):
|
||||
remove_document(writer, document)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def query_page(ix, query, page):
|
||||
with ix.searcher() as searcher:
|
||||
searcher = ix.searcher()
|
||||
try:
|
||||
query_parser = MultifieldParser(["content", "title", "correspondent"],
|
||||
ix.schema).parse(query)
|
||||
result_page = searcher.search_page(query_parser, page)
|
||||
result_page.results.fragmenter = highlight.ContextFragmenter(
|
||||
surround=50)
|
||||
result_page.results.formatter = JsonFormatter()
|
||||
return result_page
|
||||
yield result_page
|
||||
finally:
|
||||
searcher.close()
|
||||
|
||||
|
||||
def autocomplete(ix, term, limit=10):
|
||||
|
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
import uuid
|
||||
|
||||
|
||||
class PaperlessHandler(logging.Handler):
|
||||
@@ -13,3 +14,19 @@ class PaperlessHandler(logging.Handler):
|
||||
kwargs["group"] = record.group
|
||||
|
||||
Log.objects.create(**kwargs)
|
||||
|
||||
|
||||
class LoggingMixin:
|
||||
|
||||
logging_group = None
|
||||
|
||||
def renew_logging_group(self):
|
||||
self.logging_group = uuid.uuid4()
|
||||
|
||||
def log(self, level, message):
|
||||
target = ".".join([self.__class__.__module__, self.__class__.__name__])
|
||||
logger = logging.getLogger(target)
|
||||
|
||||
getattr(logger, level)(message, extra={
|
||||
"group": self.logging_group
|
||||
})
|
||||
|
@@ -1,250 +0,0 @@
|
||||
import datetime
|
||||
import imaplib
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from base64 import b64decode
|
||||
from email import policy
|
||||
from email.parser import BytesParser
|
||||
from dateutil import parser
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
from .models import Correspondent
|
||||
|
||||
|
||||
class MailFetcherError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class InvalidMessageError(MailFetcherError):
|
||||
pass
|
||||
|
||||
|
||||
class Loggable(object):
|
||||
|
||||
def __init__(self, group=None):
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.logging_group = group or uuid.uuid4()
|
||||
|
||||
def log(self, level, message):
|
||||
getattr(self.logger, level)(message, extra={
|
||||
"group": self.logging_group
|
||||
})
|
||||
|
||||
|
||||
class Message(Loggable):
|
||||
"""
|
||||
A crude, but simple email message class. We assume that there's a subject
|
||||
and n attachments, and that we don't care about the message body.
|
||||
"""
|
||||
|
||||
SECRET = os.getenv("PAPERLESS_EMAIL_SECRET")
|
||||
|
||||
def __init__(self, data, group=None):
|
||||
"""
|
||||
Cribbed heavily from
|
||||
https://www.ianlewis.org/en/parsing-email-attachments-python
|
||||
"""
|
||||
|
||||
Loggable.__init__(self, group=group)
|
||||
|
||||
self.subject = None
|
||||
self.time = None
|
||||
self.attachment = None
|
||||
|
||||
message = BytesParser(policy=policy.default).parsebytes(data)
|
||||
self.subject = str(message["Subject"]).replace("\r\n", "")
|
||||
self.body = str(message.get_body())
|
||||
|
||||
self.check_subject()
|
||||
self.check_body()
|
||||
|
||||
self._set_time(message)
|
||||
|
||||
self.log("info", 'Importing email: "{}"'.format(self.subject))
|
||||
|
||||
attachments = []
|
||||
for part in message.walk():
|
||||
|
||||
content_disposition = part.get("Content-Disposition")
|
||||
if not content_disposition:
|
||||
continue
|
||||
|
||||
dispositions = content_disposition.strip().split(";")
|
||||
if len(dispositions) < 2:
|
||||
continue
|
||||
|
||||
if not dispositions[0].lower() == "attachment" and \
|
||||
"filename" not in dispositions[1].lower():
|
||||
continue
|
||||
|
||||
file_data = part.get_payload()
|
||||
|
||||
attachments.append(Attachment(
|
||||
b64decode(file_data), content_type=part.get_content_type()))
|
||||
|
||||
if len(attachments) == 0:
|
||||
raise InvalidMessageError(
|
||||
"There don't appear to be any attachments to this message")
|
||||
|
||||
if len(attachments) > 1:
|
||||
raise InvalidMessageError(
|
||||
"There's more than one attachment to this message. It cannot "
|
||||
"be indexed automatically."
|
||||
)
|
||||
|
||||
self.attachment = attachments[0]
|
||||
|
||||
def __bool__(self):
|
||||
return bool(self.attachment)
|
||||
|
||||
def check_subject(self):
|
||||
if self.subject is None:
|
||||
raise InvalidMessageError("Message does not have a subject")
|
||||
if not Correspondent.SAFE_REGEX.match(self.subject):
|
||||
raise InvalidMessageError("Message subject is unsafe: {}".format(
|
||||
self.subject))
|
||||
|
||||
def check_body(self):
|
||||
if self.SECRET not in self.body:
|
||||
raise InvalidMessageError("The secret wasn't in the body")
|
||||
|
||||
def _set_time(self, message):
|
||||
self.time = datetime.datetime.now()
|
||||
message_time = message.get("Date")
|
||||
if message_time:
|
||||
try:
|
||||
self.time = parser.parse(message_time)
|
||||
except (ValueError, AttributeError):
|
||||
pass # We assume that "now" is ok
|
||||
|
||||
@property
|
||||
def file_name(self):
|
||||
return "{}.{}".format(self.subject, self.attachment.suffix)
|
||||
|
||||
|
||||
class Attachment(object):
|
||||
|
||||
SAFE_SUFFIX_REGEX = re.compile(
|
||||
r"^(application/(pdf))|(image/(png|jpeg|gif|tiff))$")
|
||||
|
||||
def __init__(self, data, content_type):
|
||||
|
||||
self.content_type = content_type
|
||||
self.data = data
|
||||
self.suffix = None
|
||||
|
||||
m = self.SAFE_SUFFIX_REGEX.match(self.content_type)
|
||||
if not m:
|
||||
raise MailFetcherError(
|
||||
"Not-awesome file type: {}".format(self.content_type))
|
||||
self.suffix = m.group(2) or m.group(4)
|
||||
|
||||
def read(self):
|
||||
return self.data
|
||||
|
||||
|
||||
class MailFetcher(Loggable):
|
||||
|
||||
def __init__(self, consume=settings.CONSUMPTION_DIR):
|
||||
|
||||
Loggable.__init__(self)
|
||||
|
||||
self._connection = None
|
||||
self._host = os.getenv("PAPERLESS_CONSUME_MAIL_HOST")
|
||||
self._port = os.getenv("PAPERLESS_CONSUME_MAIL_PORT")
|
||||
self._username = os.getenv("PAPERLESS_CONSUME_MAIL_USER")
|
||||
self._password = os.getenv("PAPERLESS_CONSUME_MAIL_PASS")
|
||||
self._inbox = os.getenv("PAPERLESS_CONSUME_MAIL_INBOX", "INBOX")
|
||||
|
||||
self._enabled = bool(self._host)
|
||||
if self._enabled and Message.SECRET is None:
|
||||
raise MailFetcherError("No PAPERLESS_EMAIL_SECRET defined")
|
||||
|
||||
self.last_checked = time.time()
|
||||
self.consume = consume
|
||||
|
||||
def pull(self):
|
||||
"""
|
||||
Fetch all available mail at the target address and store it locally in
|
||||
the consumption directory so that the file consumer can pick it up and
|
||||
do its thing.
|
||||
"""
|
||||
|
||||
if self._enabled:
|
||||
|
||||
# Reset the grouping id for each fetch
|
||||
self.logging_group = uuid.uuid4()
|
||||
|
||||
self.log("debug", "Checking mail")
|
||||
|
||||
for message in self._get_messages():
|
||||
|
||||
self.log("info", 'Storing email: "{}"'.format(message.subject))
|
||||
|
||||
t = int(time.mktime(message.time.timetuple()))
|
||||
file_name = os.path.join(self.consume, message.file_name)
|
||||
with open(file_name, "wb") as f:
|
||||
f.write(message.attachment.data)
|
||||
os.utime(file_name, times=(t, t))
|
||||
|
||||
self.last_checked = time.time()
|
||||
|
||||
def _get_messages(self):
|
||||
|
||||
r = []
|
||||
try:
|
||||
|
||||
self._connect()
|
||||
self._login()
|
||||
|
||||
for message in self._fetch():
|
||||
if message:
|
||||
r.append(message)
|
||||
|
||||
self._connection.expunge()
|
||||
self._connection.close()
|
||||
self._connection.logout()
|
||||
|
||||
except MailFetcherError as e:
|
||||
self.log("error", str(e))
|
||||
|
||||
return r
|
||||
|
||||
def _connect(self):
|
||||
try:
|
||||
self._connection = imaplib.IMAP4_SSL(self._host, self._port)
|
||||
except OSError as e:
|
||||
msg = "Problem connecting to {}: {}".format(self._host, e.strerror)
|
||||
raise MailFetcherError(msg)
|
||||
|
||||
def _login(self):
|
||||
|
||||
login = self._connection.login(self._username, self._password)
|
||||
if not login[0] == "OK":
|
||||
raise MailFetcherError("Can't log into mail: {}".format(login[1]))
|
||||
|
||||
inbox = self._connection.select(self._inbox)
|
||||
if not inbox[0] == "OK":
|
||||
raise MailFetcherError("Can't find the inbox: {}".format(inbox[1]))
|
||||
|
||||
def _fetch(self):
|
||||
|
||||
for num in self._connection.search(None, "ALL")[1][0].split():
|
||||
|
||||
__, data = self._connection.fetch(num, "(RFC822)")
|
||||
|
||||
message = None
|
||||
try:
|
||||
message = Message(data[0][1], self.logging_group)
|
||||
except InvalidMessageError as e:
|
||||
self.log("error", str(e))
|
||||
else:
|
||||
self._connection.store(num, "+FLAGS", "\\Deleted")
|
||||
|
||||
if message:
|
||||
yield message
|
@@ -3,11 +3,10 @@ import os
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from watchdog.observers import Observer
|
||||
from django_q.tasks import async_task
|
||||
from watchdog.events import FileSystemEventHandler
|
||||
|
||||
from documents.consumer import Consumer
|
||||
from watchdog.observers import Observer
|
||||
from watchdog.observers.polling import PollingObserver
|
||||
|
||||
try:
|
||||
from inotify_simple import INotify, flags
|
||||
@@ -17,17 +16,25 @@ except ImportError:
|
||||
|
||||
class Handler(FileSystemEventHandler):
|
||||
|
||||
def __init__(self, consumer):
|
||||
self.consumer = consumer
|
||||
def _consume(self, file):
|
||||
if os.path.isfile(file):
|
||||
try:
|
||||
async_task("documents.tasks.consume_file", file, task_name=os.path.basename(file))
|
||||
except Exception as e:
|
||||
# Catch all so that the consumer won't crash.
|
||||
logging.getLogger(__name__).error("Error while consuming document: {}".format(e))
|
||||
|
||||
def on_created(self, event):
|
||||
self.consumer.try_consume_file(event.src_path)
|
||||
self._consume(event.src_path)
|
||||
|
||||
def on_moved(self, event):
|
||||
self._consume(event.src_path)
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
"""
|
||||
On every iteration of an infinite loop, consume what we can from the
|
||||
consumption directory, and fetch any mail available.
|
||||
consumption directory.
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
@@ -35,12 +42,6 @@ class Command(BaseCommand):
|
||||
self.verbosity = 0
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
self.file_consumer = None
|
||||
self.mail_fetcher = None
|
||||
self.first_iteration = True
|
||||
|
||||
self.consumer = Consumer()
|
||||
|
||||
BaseCommand.__init__(self, *args, **kwargs)
|
||||
|
||||
def add_arguments(self, parser):
|
||||
@@ -56,9 +57,6 @@ class Command(BaseCommand):
|
||||
self.verbosity = options["verbosity"]
|
||||
directory = options["directory"]
|
||||
|
||||
for d in (settings.ORIGINALS_DIR, settings.THUMBNAIL_DIR):
|
||||
os.makedirs(d, exist_ok=True)
|
||||
|
||||
logging.getLogger(__name__).info(
|
||||
"Starting document consumer at {}".format(
|
||||
directory
|
||||
@@ -68,11 +66,16 @@ class Command(BaseCommand):
|
||||
# Consume all files as this is not done initially by the watchdog
|
||||
for entry in os.scandir(directory):
|
||||
if entry.is_file():
|
||||
self.consumer.try_consume_file(entry.path)
|
||||
async_task("documents.tasks.consume_file", entry.path, task_name=os.path.basename(entry.path))
|
||||
|
||||
# Start the watchdog. Woof!
|
||||
observer = Observer()
|
||||
event_handler = Handler(self.consumer)
|
||||
if settings.CONSUMER_POLLING > 0:
|
||||
logging.getLogger(__name__).info('Using polling instead of file'
|
||||
'system notifications.')
|
||||
observer = PollingObserver(timeout=settings.CONSUMER_POLLING)
|
||||
else:
|
||||
observer = Observer()
|
||||
event_handler = Handler()
|
||||
observer.schedule(event_handler, directory, recursive=True)
|
||||
observer.start()
|
||||
try:
|
||||
|
@@ -1,4 +1,5 @@
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from ...mixins import Renderable
|
||||
from ...tasks import train_classifier
|
||||
|
||||
|
@@ -1,16 +1,15 @@
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import shutil
|
||||
import time
|
||||
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from django.core import serializers
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
|
||||
from documents.models import Document, Correspondent, Tag, DocumentType
|
||||
from paperless.db import GnuPG
|
||||
|
||||
from ...mixins import Renderable
|
||||
from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME
|
||||
from paperless.db import GnuPG
|
||||
from ...mixins import Renderable
|
||||
|
||||
|
||||
class Command(Renderable, BaseCommand):
|
||||
|
@@ -3,15 +3,14 @@ import os
|
||||
import shutil
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from django.core.management import call_command
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
|
||||
from documents.models import Document
|
||||
from paperless.db import GnuPG
|
||||
|
||||
from ...mixins import Renderable
|
||||
|
||||
from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME
|
||||
from paperless.db import GnuPG
|
||||
from ...file_handling import generate_filename, create_source_path_directory
|
||||
from ...mixins import Renderable
|
||||
|
||||
|
||||
class Command(Renderable, BaseCommand):
|
||||
@@ -82,6 +81,10 @@ class Command(Renderable, BaseCommand):
|
||||
|
||||
def _import_files_from_manifest(self):
|
||||
|
||||
storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
if settings.PASSPHRASE:
|
||||
storage_type = Document.STORAGE_TYPE_GPG
|
||||
|
||||
for record in self.manifest:
|
||||
|
||||
if not record["model"] == "documents.document":
|
||||
@@ -94,6 +97,14 @@ class Command(Renderable, BaseCommand):
|
||||
document_path = os.path.join(self.source, doc_file)
|
||||
thumbnail_path = os.path.join(self.source, thumb_file)
|
||||
|
||||
document.storage_type = storage_type
|
||||
document.filename = generate_filename(document)
|
||||
|
||||
if os.path.isfile(document.source_path):
|
||||
raise FileExistsError(document.source_path)
|
||||
|
||||
create_source_path_directory(document.source_path)
|
||||
|
||||
if settings.PASSPHRASE:
|
||||
|
||||
with open(document_path, "rb") as unencrypted:
|
||||
@@ -109,18 +120,8 @@ class Command(Renderable, BaseCommand):
|
||||
encrypted.write(GnuPG.encrypted(unencrypted))
|
||||
|
||||
else:
|
||||
|
||||
print("Moving {} to {}".format(document_path, document.source_path))
|
||||
shutil.copy(document_path, document.source_path)
|
||||
shutil.copy(thumbnail_path, document.thumbnail_path)
|
||||
|
||||
# Reset the storage type to whatever we've used while importing
|
||||
|
||||
storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
if settings.PASSPHRASE:
|
||||
storage_type = Document.STORAGE_TYPE_GPG
|
||||
|
||||
Document.objects.filter(
|
||||
pk__in=[r["pk"] for r in self.manifest]
|
||||
).update(
|
||||
storage_type=storage_type
|
||||
)
|
||||
document.save()
|
||||
|
@@ -8,5 +8,5 @@ class Command(BaseCommand):
|
||||
help = "A quick & dirty way to see what's in the logs"
|
||||
|
||||
def handle(self, *args, **options):
|
||||
for l in Log.objects.order_by("pk"):
|
||||
print(l)
|
||||
for log in Log.objects.order_by("pk"):
|
||||
print(log)
|
||||
|
@@ -1,7 +1,6 @@
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from documents.models import Document, Tag
|
||||
|
||||
from documents.models import Document
|
||||
from ...mixins import Renderable
|
||||
|
||||
|
||||
|
@@ -9,16 +9,14 @@ def match_correspondents(document_content, classifier):
|
||||
correspondents = Correspondent.objects.all()
|
||||
predicted_correspondent_id = classifier.predict_correspondent(document_content) if classifier else None
|
||||
|
||||
matched_correspondents = [o for o in correspondents if matches(o, document_content) or o.pk == predicted_correspondent_id]
|
||||
return matched_correspondents
|
||||
return [o for o in correspondents if matches(o, document_content) or o.pk == predicted_correspondent_id]
|
||||
|
||||
|
||||
def match_document_types(document_content, classifier):
|
||||
document_types = DocumentType.objects.all()
|
||||
predicted_document_type_id = classifier.predict_document_type(document_content) if classifier else None
|
||||
|
||||
matched_document_types = [o for o in document_types if matches(o, document_content) or o.pk == predicted_document_type_id]
|
||||
return matched_document_types
|
||||
return [o for o in document_types if matches(o, document_content) or o.pk == predicted_document_type_id]
|
||||
|
||||
|
||||
def match_tags(document_content, classifier):
|
||||
|
@@ -1,7 +1,4 @@
|
||||
# Generated by Django 3.1.3 on 2020-11-07 12:35
|
||||
import os
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
|
||||
|
@@ -9,11 +9,11 @@ from django_q.tasks import schedule
|
||||
def add_schedules(apps, schema_editor):
|
||||
schedule('documents.tasks.train_classifier', name="Train the classifier", schedule_type=Schedule.HOURLY)
|
||||
schedule('documents.tasks.index_optimize', name="Optimize the index", schedule_type=Schedule.DAILY)
|
||||
schedule('documents.tasks.consume_mail', name="Check E-Mail", schedule_type=Schedule.MINUTES, minutes=10)
|
||||
|
||||
|
||||
def remove_schedules(apps, schema_editor):
|
||||
Schedule.objects.all().delete()
|
||||
Schedule.objects.filter(func='documents.tasks.train_classifier').delete()
|
||||
Schedule.objects.filter(func='documents.tasks.index_optimize').delete()
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
18
src/documents/migrations/1002_auto_20201111_1105.py
Normal file
18
src/documents/migrations/1002_auto_20201111_1105.py
Normal file
@@ -0,0 +1,18 @@
|
||||
# Generated by Django 3.1.3 on 2020-11-11 11:05
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '1001_auto_20201109_1636'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='filename',
|
||||
field=models.FilePathField(default=None, editable=False, help_text='Current filename in storage', max_length=1024, null=True),
|
||||
),
|
||||
]
|
@@ -3,18 +3,15 @@
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from collections import OrderedDict, defaultdict
|
||||
from collections import OrderedDict
|
||||
|
||||
import dateutil.parser
|
||||
from django.conf import settings
|
||||
from django.db import models
|
||||
from django.dispatch import receiver
|
||||
from django.template.defaultfilters import slugify
|
||||
from django.utils import timezone
|
||||
from django.utils.text import slugify
|
||||
|
||||
|
||||
|
||||
class MatchingModel(models.Model):
|
||||
|
||||
MATCH_ANY = 1
|
||||
@@ -116,6 +113,7 @@ class DocumentType(MatchingModel):
|
||||
|
||||
class Document(models.Model):
|
||||
|
||||
# TODO: why do we need an explicit list
|
||||
TYPE_PDF = "pdf"
|
||||
TYPE_PNG = "png"
|
||||
TYPE_JPG = "jpg"
|
||||
@@ -192,7 +190,7 @@ class Document(models.Model):
|
||||
default=timezone.now, editable=False, db_index=True)
|
||||
|
||||
filename = models.FilePathField(
|
||||
max_length=256,
|
||||
max_length=1024,
|
||||
editable=False,
|
||||
default=None,
|
||||
null=True,
|
||||
@@ -220,123 +218,18 @@ class Document(models.Model):
|
||||
return "{}: {}".format(created, self.correspondent or self.title)
|
||||
return str(created)
|
||||
|
||||
def find_renamed_document(self, subdirectory=""):
|
||||
suffix = "%07i.%s" % (self.pk, self.file_type)
|
||||
|
||||
# Append .gpg for encrypted files
|
||||
if self.storage_type == self.STORAGE_TYPE_GPG:
|
||||
suffix += ".gpg"
|
||||
|
||||
# Go up in the directory hierarchy and try to delete all directories
|
||||
root = os.path.normpath(Document.filename_to_path(subdirectory))
|
||||
|
||||
for filename in os.listdir(root):
|
||||
if filename.endswith(suffix):
|
||||
return os.path.join(subdirectory, filename)
|
||||
|
||||
fullname = os.path.join(subdirectory, filename)
|
||||
if os.path.isdir(Document.filename_to_path(fullname)):
|
||||
return self.find_renamed_document(fullname)
|
||||
|
||||
return None
|
||||
|
||||
@property
|
||||
def source_filename(self):
|
||||
# Initial filename generation (for new documents)
|
||||
if self.filename is None:
|
||||
self.filename = self.generate_source_filename()
|
||||
|
||||
# Check if document is still available under filename
|
||||
elif not os.path.isfile(Document.filename_to_path(self.filename)):
|
||||
recovered_filename = self.find_renamed_document()
|
||||
|
||||
# If we have found the file so update the filename
|
||||
if recovered_filename is not None:
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.warning("Filename of document " + str(self.id) +
|
||||
" has changed and was successfully updated")
|
||||
self.filename = recovered_filename
|
||||
|
||||
# Remove all empty subdirectories from MEDIA_ROOT
|
||||
Document.delete_all_empty_subdirectories(
|
||||
Document.filename_to_path(""))
|
||||
else:
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.error("File of document " + str(self.id) + " has " +
|
||||
"gone and could not be recovered")
|
||||
|
||||
return self.filename
|
||||
|
||||
@staticmethod
|
||||
def many_to_dictionary(field):
|
||||
# Converts ManyToManyField to dictionary by assuming, that field
|
||||
# entries contain an _ or - which will be used as a delimiter
|
||||
mydictionary = dict()
|
||||
|
||||
for index, t in enumerate(field.all()):
|
||||
# Populate tag names by index
|
||||
mydictionary[index] = slugify(t.name)
|
||||
|
||||
# Find delimiter
|
||||
delimiter = t.name.find('_')
|
||||
|
||||
if delimiter == -1:
|
||||
delimiter = t.name.find('-')
|
||||
|
||||
if delimiter == -1:
|
||||
continue
|
||||
|
||||
key = t.name[:delimiter]
|
||||
value = t.name[delimiter+1:]
|
||||
|
||||
mydictionary[slugify(key)] = slugify(value)
|
||||
|
||||
return mydictionary
|
||||
|
||||
def generate_source_filename(self):
|
||||
# Create filename based on configured format
|
||||
if settings.PAPERLESS_FILENAME_FORMAT is not None:
|
||||
tags = defaultdict(lambda: slugify(None),
|
||||
self.many_to_dictionary(self.tags))
|
||||
path = settings.PAPERLESS_FILENAME_FORMAT.format(
|
||||
correspondent=slugify(self.correspondent),
|
||||
title=slugify(self.title),
|
||||
created=slugify(self.created),
|
||||
added=slugify(self.added),
|
||||
tags=tags)
|
||||
else:
|
||||
path = ""
|
||||
|
||||
# Always append the primary key to guarantee uniqueness of filename
|
||||
if len(path) > 0:
|
||||
filename = "%s-%07i.%s" % (path, self.pk, self.file_type)
|
||||
else:
|
||||
filename = "%07i.%s" % (self.pk, self.file_type)
|
||||
|
||||
# Append .gpg for encrypted files
|
||||
if self.storage_type == self.STORAGE_TYPE_GPG:
|
||||
filename += ".gpg"
|
||||
|
||||
return filename
|
||||
|
||||
def create_source_directory(self):
|
||||
new_filename = self.generate_source_filename()
|
||||
|
||||
# Determine the full "target" path
|
||||
dir_new = Document.filename_to_path(os.path.dirname(new_filename))
|
||||
|
||||
# Create new path
|
||||
os.makedirs(dir_new, exist_ok=True)
|
||||
|
||||
@property
|
||||
def source_path(self):
|
||||
return Document.filename_to_path(self.source_filename)
|
||||
if self.filename:
|
||||
fname = str(self.filename)
|
||||
else:
|
||||
fname = "{:07}.{}".format(self.pk, self.file_type)
|
||||
if self.storage_type == self.STORAGE_TYPE_GPG:
|
||||
fname += ".gpg"
|
||||
|
||||
@staticmethod
|
||||
def filename_to_path(filename):
|
||||
return os.path.join(
|
||||
settings.ORIGINALS_DIR,
|
||||
filename
|
||||
fname
|
||||
)
|
||||
|
||||
@property
|
||||
@@ -362,125 +255,6 @@ class Document(models.Model):
|
||||
def thumbnail_file(self):
|
||||
return open(self.thumbnail_path, "rb")
|
||||
|
||||
def set_filename(self, filename):
|
||||
if os.path.isfile(Document.filename_to_path(filename)):
|
||||
self.filename = filename
|
||||
|
||||
@staticmethod
|
||||
def try_delete_empty_directories(directory):
|
||||
# Go up in the directory hierarchy and try to delete all directories
|
||||
directory = os.path.normpath(directory)
|
||||
root = os.path.normpath(Document.filename_to_path(""))
|
||||
|
||||
while directory != root:
|
||||
# Try to delete the current directory
|
||||
try:
|
||||
os.rmdir(directory)
|
||||
except os.error:
|
||||
# Directory not empty, no need to go further up
|
||||
return
|
||||
|
||||
# Cut off actual directory and go one level up
|
||||
directory, _ = os.path.split(directory)
|
||||
directory = os.path.normpath(directory)
|
||||
|
||||
@staticmethod
|
||||
def delete_all_empty_subdirectories(directory):
|
||||
# Go through all folders and try to delete all directories
|
||||
root = os.path.normpath(Document.filename_to_path(directory))
|
||||
|
||||
for filename in os.listdir(root):
|
||||
fullname = os.path.join(directory, filename)
|
||||
|
||||
if not os.path.isdir(Document.filename_to_path(fullname)):
|
||||
continue
|
||||
|
||||
# Go into subdirectory to see, if there is more to delete
|
||||
Document.delete_all_empty_subdirectories(
|
||||
os.path.join(directory, filename))
|
||||
|
||||
# Try to delete the directory
|
||||
try:
|
||||
os.rmdir(Document.filename_to_path(fullname))
|
||||
continue
|
||||
except os.error:
|
||||
# Directory not empty, no need to go further up
|
||||
continue
|
||||
|
||||
|
||||
@receiver(models.signals.m2m_changed, sender=Document.tags.through)
|
||||
@receiver(models.signals.post_save, sender=Document)
|
||||
def update_filename(sender, instance, **kwargs):
|
||||
# Skip if document has not been saved yet
|
||||
if instance.filename is None:
|
||||
return
|
||||
|
||||
# Check is file exists and update filename otherwise
|
||||
if not os.path.isfile(Document.filename_to_path(instance.filename)):
|
||||
instance.filename = instance.source_filename
|
||||
|
||||
# Build the new filename
|
||||
new_filename = instance.generate_source_filename()
|
||||
|
||||
# If the filename is the same, then nothing needs to be done
|
||||
if instance.filename == new_filename:
|
||||
return
|
||||
|
||||
# Determine the full "target" path
|
||||
path_new = instance.filename_to_path(new_filename)
|
||||
dir_new = instance.filename_to_path(os.path.dirname(new_filename))
|
||||
|
||||
# Create new path
|
||||
instance.create_source_directory()
|
||||
|
||||
# Determine the full "current" path
|
||||
path_current = instance.filename_to_path(instance.source_filename)
|
||||
|
||||
# Move file
|
||||
try:
|
||||
os.rename(path_current, path_new)
|
||||
except PermissionError:
|
||||
# Do not update filename in object
|
||||
return
|
||||
except FileNotFoundError:
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.error("Renaming of document " + str(instance.id) + " failed " +
|
||||
"as file " + instance.filename + " was no longer present")
|
||||
return
|
||||
|
||||
# Delete empty directory
|
||||
old_dir = os.path.dirname(instance.filename)
|
||||
old_path = instance.filename_to_path(old_dir)
|
||||
Document.try_delete_empty_directories(old_path)
|
||||
|
||||
instance.filename = new_filename
|
||||
|
||||
# Save instance
|
||||
# This will not cause a cascade of post_save signals, as next time
|
||||
# nothing needs to be renamed
|
||||
instance.save()
|
||||
|
||||
|
||||
@receiver(models.signals.post_delete, sender=Document)
|
||||
def delete_files(sender, instance, **kwargs):
|
||||
if instance.filename is None:
|
||||
return
|
||||
|
||||
# Remove the document
|
||||
old_file = instance.filename_to_path(instance.filename)
|
||||
|
||||
try:
|
||||
os.remove(old_file)
|
||||
except FileNotFoundError:
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.warning("Deleted document " + str(instance.id) + " but file " +
|
||||
old_file + " was no longer present")
|
||||
|
||||
# And remove the directory (if applicable)
|
||||
old_dir = os.path.dirname(instance.filename)
|
||||
old_path = instance.filename_to_path(old_dir)
|
||||
Document.try_delete_empty_directories(old_path)
|
||||
|
||||
|
||||
class Log(models.Model):
|
||||
|
||||
@@ -518,7 +292,7 @@ class FileInfo:
|
||||
non_separated_word=r"([\w,. ]|([^\s]-))"
|
||||
)
|
||||
)
|
||||
|
||||
# TODO: what is this used for
|
||||
formats = "pdf|jpe?g|png|gif|tiff?|te?xt|md|csv"
|
||||
REGEXES = OrderedDict([
|
||||
("created-correspondent-title-tags", re.compile(
|
||||
|
@@ -20,13 +20,16 @@ from django.utils import timezone
|
||||
# - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||
# - MONTH ZZZZ, with ZZZZ being 4 digits
|
||||
# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
|
||||
from documents.loggers import LoggingMixin
|
||||
from documents.signals import document_consumer_declaration
|
||||
|
||||
# TODO: isnt there a date parsing library for this?
|
||||
|
||||
DATE_REGEX = re.compile(
|
||||
r'(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|' + # NOQA: E501
|
||||
r'(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|' + # NOQA: E501
|
||||
r'(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|' + # NOQA: E501
|
||||
r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|' +
|
||||
r'(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|' # NOQA: E501
|
||||
r'(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|' # NOQA: E501
|
||||
r'(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|' # NOQA: E501
|
||||
r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|'
|
||||
r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))'
|
||||
)
|
||||
|
||||
@@ -39,17 +42,16 @@ def get_parser_class(doc):
|
||||
Determine the appropriate parser class based on the file
|
||||
"""
|
||||
|
||||
parsers = []
|
||||
for response in document_consumer_declaration.send(None):
|
||||
parsers.append(response[1])
|
||||
|
||||
#TODO: add a check that checks parser availability.
|
||||
|
||||
options = []
|
||||
for parser in parsers:
|
||||
result = parser(doc)
|
||||
if result:
|
||||
options.append(result)
|
||||
|
||||
# Sein letzter Befehl war: KOMMT! Und sie kamen. Alle. Sogar die Parser.
|
||||
|
||||
for response in document_consumer_declaration.send(None):
|
||||
parser_declaration = response[1]
|
||||
parser_test = parser_declaration["test"]
|
||||
|
||||
if parser_test(doc):
|
||||
options.append(parser_declaration)
|
||||
|
||||
if not options:
|
||||
return None
|
||||
@@ -59,7 +61,7 @@ def get_parser_class(doc):
|
||||
options, key=lambda _: _["weight"], reverse=True)[0]["parser"]
|
||||
|
||||
|
||||
def run_convert(input, output, density=None, scale=None, alpha=None, strip=False, trim=False, type=None, depth=None, extra=None, logging_group=None):
|
||||
def run_convert(input_file, output_file, density=None, scale=None, alpha=None, strip=False, trim=False, type=None, depth=None, extra=None, logging_group=None):
|
||||
environment = os.environ.copy()
|
||||
if settings.CONVERT_MEMORY_LIMIT:
|
||||
environment["MAGICK_MEMORY_LIMIT"] = settings.CONVERT_MEMORY_LIMIT
|
||||
@@ -74,7 +76,7 @@ def run_convert(input, output, density=None, scale=None, alpha=None, strip=False
|
||||
args += ['-trim'] if trim else []
|
||||
args += ['-type', str(type)] if type else []
|
||||
args += ['-depth', str(depth)] if depth else []
|
||||
args += [input, output]
|
||||
args += [input_file, output_file]
|
||||
|
||||
logger.debug("Execute: " + " ".join(args), extra={'group': logging_group})
|
||||
|
||||
@@ -100,17 +102,17 @@ class ParseError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class DocumentParser:
|
||||
class DocumentParser(LoggingMixin):
|
||||
"""
|
||||
Subclass this to make your own parser. Have a look at
|
||||
`paperless_tesseract.parsers` for inspiration.
|
||||
"""
|
||||
|
||||
def __init__(self, path, logging_group, progress_callback):
|
||||
super().__init__()
|
||||
self.logging_group = logging_group
|
||||
self.document_path = path
|
||||
self.tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.logging_group = logging_group
|
||||
self.progress_callback = progress_callback
|
||||
|
||||
def get_thumbnail(self):
|
||||
@@ -121,16 +123,19 @@ class DocumentParser:
|
||||
|
||||
def optimise_thumbnail(self, in_path):
|
||||
|
||||
out_path = os.path.join(self.tempdir, "optipng.png")
|
||||
if settings.OPTIMIZE_THUMBNAILS:
|
||||
out_path = os.path.join(self.tempdir, "optipng.png")
|
||||
|
||||
args = (settings.OPTIPNG_BINARY, "-silent", "-o5", in_path, "-out", out_path)
|
||||
args = (settings.OPTIPNG_BINARY, "-silent", "-o5", in_path, "-out", out_path)
|
||||
|
||||
self.log('debug', 'Execute: ' + " ".join(args))
|
||||
self.log('debug', 'Execute: ' + " ".join(args))
|
||||
|
||||
if not subprocess.Popen(args).wait() == 0:
|
||||
raise ParseError("Optipng failed at {}".format(args))
|
||||
if not subprocess.Popen(args).wait() == 0:
|
||||
raise ParseError("Optipng failed at {}".format(args))
|
||||
|
||||
return out_path
|
||||
return out_path
|
||||
else:
|
||||
return in_path
|
||||
|
||||
def get_optimised_thumbnail(self):
|
||||
return self.optimise_thumbnail(self.get_thumbnail())
|
||||
@@ -222,11 +227,6 @@ class DocumentParser:
|
||||
|
||||
return date
|
||||
|
||||
def log(self, level, message):
|
||||
getattr(self.logger, level)(message, extra={
|
||||
"group": self.logging_group
|
||||
})
|
||||
|
||||
def cleanup(self):
|
||||
self.log("debug", "Deleting directory {}".format(self.tempdir))
|
||||
shutil.rmtree(self.tempdir)
|
||||
|
@@ -105,7 +105,6 @@ class DocumentSerializer(serializers.ModelSerializer):
|
||||
|
||||
class LogSerializer(serializers.ModelSerializer):
|
||||
|
||||
|
||||
class Meta:
|
||||
model = Log
|
||||
fields = (
|
||||
|
@@ -1,5 +1,5 @@
|
||||
from django.dispatch import Signal
|
||||
|
||||
document_consumption_started = Signal(providing_args=["filename"])
|
||||
document_consumption_finished = Signal(providing_args=["document"])
|
||||
document_consumer_declaration = Signal(providing_args=[])
|
||||
document_consumption_started = Signal()
|
||||
document_consumption_finished = Signal()
|
||||
document_consumer_declaration = Signal()
|
||||
|
@@ -6,9 +6,13 @@ from django.conf import settings
|
||||
from django.contrib.admin.models import ADDITION, LogEntry
|
||||
from django.contrib.auth.models import User
|
||||
from django.contrib.contenttypes.models import ContentType
|
||||
from django.db import models, DatabaseError
|
||||
from django.dispatch import receiver
|
||||
from django.utils import timezone
|
||||
|
||||
from .. import index, matching
|
||||
from ..file_handling import delete_empty_directories, generate_filename, \
|
||||
create_source_path_directory
|
||||
from ..models import Document, Tag
|
||||
|
||||
|
||||
@@ -141,17 +145,65 @@ def run_post_consume_script(sender, document, **kwargs):
|
||||
)).wait()
|
||||
|
||||
|
||||
@receiver(models.signals.post_delete, sender=Document)
|
||||
def cleanup_document_deletion(sender, instance, using, **kwargs):
|
||||
|
||||
if not isinstance(instance, Document):
|
||||
return
|
||||
|
||||
for f in (instance.source_path, instance.thumbnail_path):
|
||||
try:
|
||||
os.unlink(f)
|
||||
except FileNotFoundError:
|
||||
pass # The file's already gone, so we're cool with it.
|
||||
|
||||
delete_empty_directories(os.path.dirname(instance.source_path))
|
||||
|
||||
|
||||
@receiver(models.signals.m2m_changed, sender=Document.tags.through)
|
||||
@receiver(models.signals.post_save, sender=Document)
|
||||
def update_filename_and_move_files(sender, instance, **kwargs):
|
||||
|
||||
if not instance.filename:
|
||||
# Can't update the filename if there is not filename to begin with
|
||||
# This happens after the consumer creates a new document.
|
||||
# The PK needs to be set first by saving the document once. When this
|
||||
# happens, the file is not yet in the ORIGINALS_DIR, and thus can't be
|
||||
# renamed anyway. In all other cases, instance.filename will be set.
|
||||
return
|
||||
|
||||
old_filename = instance.filename
|
||||
old_path = instance.source_path
|
||||
new_filename = generate_filename(instance)
|
||||
|
||||
if new_filename == instance.filename:
|
||||
# Don't do anything if its the same.
|
||||
return
|
||||
|
||||
new_path = os.path.join(settings.ORIGINALS_DIR, new_filename)
|
||||
|
||||
if not os.path.isfile(old_path):
|
||||
# Can't do anything if the old file does not exist anymore.
|
||||
logging.getLogger(__name__).fatal('Document {}: File {} has gone.'.format(str(instance), old_path))
|
||||
return
|
||||
|
||||
if os.path.isfile(new_path):
|
||||
# Can't do anything if the new file already exists. Skip updating file.
|
||||
logging.getLogger(__name__).warning('Document {}: Cannot rename file since target path {} already exists.'.format(str(instance), new_path))
|
||||
return
|
||||
|
||||
create_source_path_directory(new_path)
|
||||
|
||||
try:
|
||||
os.rename(old_path, new_path)
|
||||
instance.filename = new_filename
|
||||
instance.save()
|
||||
|
||||
except OSError as e:
|
||||
instance.filename = old_filename
|
||||
except DatabaseError as e:
|
||||
os.rename(new_path, old_path)
|
||||
instance.filename = old_filename
|
||||
|
||||
if not os.path.isfile(old_path):
|
||||
delete_empty_directories(os.path.dirname(old_path))
|
||||
|
||||
|
||||
def set_log_entry(sender, document=None, logging_group=None, **kwargs):
|
||||
|
||||
|
@@ -1,20 +1,15 @@
|
||||
import logging
|
||||
|
||||
from django.conf import settings
|
||||
from django_q.tasks import async_task, result
|
||||
from whoosh.writing import AsyncWriter
|
||||
|
||||
from documents import index
|
||||
from documents.classifier import DocumentClassifier, \
|
||||
IncompatibleClassifierVersionError
|
||||
from documents.mail import MailFetcher
|
||||
from documents.consumer import Consumer, ConsumerError
|
||||
from documents.models import Document
|
||||
|
||||
|
||||
def consume_mail():
|
||||
MailFetcher().pull()
|
||||
|
||||
|
||||
def index_optimize():
|
||||
index.open_index().optimize()
|
||||
|
||||
@@ -55,3 +50,27 @@ def train_classifier():
|
||||
logging.getLogger(__name__).error(
|
||||
"Classifier error: " + str(e)
|
||||
)
|
||||
|
||||
|
||||
def consume_file(path,
|
||||
override_filename=None,
|
||||
override_title=None,
|
||||
override_correspondent_id=None,
|
||||
override_document_type_id=None,
|
||||
override_tag_ids=None):
|
||||
|
||||
document = Consumer().try_consume_file(
|
||||
path,
|
||||
override_filename=override_filename,
|
||||
override_title=override_title,
|
||||
override_correspondent_id=override_correspondent_id,
|
||||
override_document_type_id=override_document_type_id,
|
||||
override_tag_ids=override_tag_ids)
|
||||
|
||||
if document:
|
||||
return "Success. New document id {} created".format(
|
||||
document.pk
|
||||
)
|
||||
else:
|
||||
raise ConsumerError("Unknown error: Returned document was null, but "
|
||||
"no error message was given.")
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -1,208 +0,0 @@
|
||||
Return-Path: <sender@example.com>
|
||||
X-Original-To: sender@mailbox4.mailhost.com
|
||||
Delivered-To: sender@mailbox4.mailhost.com
|
||||
Received: from mx8.mailhost.com (mail8.mailhost.com [75.126.24.68])
|
||||
by mailbox4.mailhost.com (Postfix) with ESMTP id B62BD5498001
|
||||
for <sender@mailbox4.mailhost.com>; Thu, 4 Feb 2016 22:01:17 +0000 (UTC)
|
||||
Received: from localhost (localhost.localdomain [127.0.0.1])
|
||||
by mx8.mailhost.com (Postfix) with ESMTP id B41796F190D
|
||||
for <sender@mailbox4.mailhost.com>; Thu, 4 Feb 2016 22:01:17 +0000 (UTC)
|
||||
X-Spam-Flag: NO
|
||||
X-Spam-Score: 0
|
||||
X-Spam-Level:
|
||||
X-Spam-Status: No, score=0 tagged_above=-999 required=3
|
||||
tests=[RCVD_IN_DNSWL_NONE=-0.0001]
|
||||
Received: from mx8.mailhost.com ([127.0.0.1])
|
||||
by localhost (mail8.mailhost.com [127.0.0.1]) (amavisd-new, port 10024)
|
||||
with ESMTP id 3cj6d28FXsS3 for <sender@mailbox4.mailhost.com>;
|
||||
Thu, 4 Feb 2016 22:01:17 +0000 (UTC)
|
||||
Received: from smtp.mailhost.com (smtp.mailhost.com [74.55.86.74])
|
||||
by mx8.mailhost.com (Postfix) with ESMTP id 527D76F1529
|
||||
for <paperless@example.com>; Thu, 4 Feb 2016 22:01:17 +0000 (UTC)
|
||||
Received: from [10.114.0.19] (nl3x.mullvad.net [46.166.136.162])
|
||||
by smtp.mailhost.com (Postfix) with ESMTP id 9C52420C6FDA
|
||||
for <paperless@example.com>; Thu, 4 Feb 2016 22:01:16 +0000 (UTC)
|
||||
To: paperless@example.com
|
||||
From: Daniel Quinn <sender@example.com>
|
||||
Subject: Test 0
|
||||
Message-ID: <56B3CA2A.6030806@example.com>
|
||||
Date: Thu, 4 Feb 2016 22:01:14 +0000
|
||||
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101
|
||||
Thunderbird/38.5.0
|
||||
MIME-Version: 1.0
|
||||
Content-Type: multipart/mixed;
|
||||
boundary="------------090701020702030809070008"
|
||||
|
||||
This is a multi-part message in MIME format.
|
||||
--------------090701020702030809070008
|
||||
Content-Type: text/plain; charset=utf-8
|
||||
Content-Transfer-Encoding: 7bit
|
||||
|
||||
The secret word is "paperless" :-)
|
||||
|
||||
--------------090701020702030809070008
|
||||
Content-Type: application/pdf;
|
||||
name="test0.pdf"
|
||||
Content-Transfer-Encoding: base64
|
||||
Content-Disposition: attachment;
|
||||
filename="test0.pdf"
|
||||
|
||||
JVBERi0xLjQKJcOkw7zDtsOfCjIgMCBvYmoKPDwvTGVuZ3RoIDMgMCBSL0ZpbHRlci9GbGF0
|
||||
ZURlY29kZT4+CnN0cmVhbQp4nFWLQQvCMAyF7/kVOQutSdeuHZSA0+3gbVDwIN6c3gR38e/b
|
||||
bF4kkPfyvReyjB94IyFVF7pgG0ze4TLDZYevLamzPKEvEFqbMEZfq+WO+5GRHZbHNROLy+So
|
||||
UfFi6g7/RyusEpUl9VsQxQTlHR2oV3wUEzOdhOnXG1aw/o1yK2cYCkww4RdbUCevCmVuZHN0
|
||||
cmVhbQplbmRvYmoKCjMgMCBvYmoKMTM5CmVuZG9iagoKNSAwIG9iago8PC9MZW5ndGggNiAw
|
||||
IFIvRmlsdGVyL0ZsYXRlRGVjb2RlL0xlbmd0aDEgMTA4MjQ+PgpzdHJlYW0KeJzlOWt0G9WZ
|
||||
95uRbNmWLckPWY4SaRTFedmybI8T4rw8sS3ZiZ1YfqWSCbFkS7YEtiQkJSE8GlNeOQ5pUmh5
|
||||
Zkt2l+XQNl3GhLaBpcWw0D19UGALLRRS0gM9nD0lxVBK9wCx97tXI0UJAc727L8d+c587/u9
|
||||
7p0rOZXYEyJaMkV4Io1OBuLOqmqBEPJLQqB0dG9K2NRTsQHhM4Rw/zkWH5+870e7PiRE9Rgh
|
||||
+Y+NT+wf+/b3e4YI0YYJKX41HAoEfxj6vUjIIgltrA0jYef8/nzEr0F8WXgydY2bP7QO8WOI
|
||||
SxOx0cDxxbUmxN9AfOlk4Jr4apWLI8SMKBGigcmQpYXrRBx9KtobjyVTQbJsgZDl91B+PBGK
|
||||
d9838hzipwjhjyIN8EMvLYJ5FOd4lTovX1NQWKQtLtGR/3eX+jCpIJ3qTURH4ux+wcWfIFXk
|
||||
XkIW3qXY+ft898LH/5deaNKPe8hD5DFymLxGrlAYbuIhEbIHKbnX0+QlpNLLQ4bId8n055g9
|
||||
QU4hPy3nJ0doJJe8PORucpL8xwWzeMgkuQ59+QF5DRrIz7BVYuQD0JAbyXNo9QOkbb+UKa4E
|
||||
b2MMHMuhvk7u5w6RbdzbiNxLOZyT05NnyTHYjZZTGOfhbMQbP2P0NnID3vtJmOxFmF3qTZ/+
|
||||
jhQs/AWjuoFsI18jW8hEjsaT8ABfiPUbIA9gTp9mNGeGmd/JX8n9kOPO3YnIN8g4jgBg7Nxh
|
||||
fsvnZOh/ffGDpBhW8dWk4FJcrono5j/mGhc+5JeRQjK4MJehLXQt/IUPzEdVw6rF6k2qX3zR
|
||||
HHnfUE2iNln44/x180H1DvVDWK2HcePouHzI5x0c6O/r9fTs2N7dtW1rZ4fb1d7WukVq2bxp
|
||||
44b1zesuW7umod5Z56hduWJ59TL7UpvVVG7Q60qKiwoLNPl5ahXPAakVZPC7ZL5aMLgDdpc9
|
||||
0OmoFVymcLuj1mV3+2UhIMj4UC23d3Yykj0gC35BXo6PQA7ZL0soOXaRpJSWlLKSoBc2ko10
|
||||
CrsgP99uF07BUK8X4cPtdp8gn2XwdgarljOkGBGbDTWYV9RbwSW794anXX70EWaKCtvsbaFC
|
||||
Ry2ZKSxCsAgheaU9PgMrNwMDuJWu9TMc0RTTaTFSVyAoe3q9rnazzeZz1G6VS+ztjEXamEk5
|
||||
r03OZyaFCHWdHBJmamenbz+lJyP+Gm3QHgzs8sp8AHWnedf09G2yoUZeZW+XV137tgkjD8m1
|
||||
9naXXEOtdvVl5+k6PyXI6mq9XZj+K8Fw7GffvZASUCh51fq/EgrKXJsMfV4bvcxuzPX0tNsu
|
||||
uKf904FTC1MjdkFvn57RaqfjLkw38XjRxKmFJw6ZZfftPlnvD8N6nxK6u69LLuu93Ctz1W4h
|
||||
HEAK/rXYbevMNkNWxvN5bIJpweRghm02moZDpyQygog81etN4wIZMT9KJGeNT+b8lDOb4VQM
|
||||
Us5UhpNV99uxtl393mlZVb01aHdhxg8F5KkR7K4raWHsernkI7PNPl1qEJqdPiYroFdbgxFB
|
||||
Vi/HJKFWrgL2DVWZ1jOk5KP046wZJ1huKBWa7WiG2nHZXX7lb2/YhAYETHRnTboRBryy1I6A
|
||||
FFAq5pqpd6JGwI8Fi7SzYspOe1wut7dmq0vdckX6vUxFUZPL22TiH1W0ZKeLrSvBNe1vT7tA
|
||||
bdl7vY8TceHMTJNgPimSJuJrp8LGNuyy5a5pb3BMtvrNQVx3Y4LXbJMlH1bYZ/eGfLTtMEOr
|
||||
zphZc/hYrwx4u/rtXb1D3nWKI2kGNaeqdl1kxu41p81gA8qaao3g5cy8DwX1SBDcCNhbN+Jd
|
||||
zq/W4NBjwhmVNm7rRsELZpKRRjfkVYIr1K7IUfwCo2raTm2dGWt5FEU7bZ1mm8+Wvhy1HLIF
|
||||
ZWLU0NCkdmZYuE0hQ4P92dbJSDSXJtr0gtcesvvsYUGWPF4aG00Py7KSDJZzpVYDF2A5ycI0
|
||||
ERuyMwhNpuyuMecmV+5geBbtvIi9NcMWpjX2rv5patyuGCTo+VaZ0BaW1hnMbC+gC9qOe6+g
|
||||
xyXNFvT0jCTRxRxeT43Ytwan7f3ejUwa95MbzNfSuUpJF3QNtDpqcWtrnbHDwd4ZCQ72D3kf
|
||||
1+O58OCA91EOuDZ/q29mGfK8jwv40mBUjlIpkSICRailPkQ0TN78uETIFOOqGIHho6eAMJom
|
||||
QwMyeopL0/TpiZaziSTCIUeV5kgZaRXSNGnaFKOxa4bQlEmFakkjFUharpgzzwAlPYqUJ/Ac
|
||||
WwDkpBaKwTyDWn2MfAqmZgokc1piCiWktIcHB89PPTjkPanFt7OZ3XGiVnphu5jCWGx8rbiE
|
||||
IG2U633hab+PLjZixNLgH8hg34xlsm9GR/K0cqE91CoX2VspvYXSW9L0PErPxxYFI6D6FNbe
|
||||
IwPtgMu9NlySwqKfmaf1Z2mlfLipTOv/6MCMVeP3hqfxDFoOG6XTpVwRp+ErjFqigQJeoykw
|
||||
8AW831fAl3KEG/aR0hYj6IxwxghPGeGIEQ4YYdgISBQY/ao5I7xghOOMFzdCjxGsjJGmy0Z4
|
||||
gLFiTE0yQj0TIEZ4k3GnGL2eUTYssHnSakcYo4fx5hhdzsyRVhCYzhwzNMummWJcdM2ZmeOK
|
||||
7HV15koo1+6L6J/hUB5pqTEQ0cTuBtHkHN59hWgohcpmg9hQb1tzmcG+VAd2g81gX1EHNWCo
|
||||
rIANr4jnrjC3qY61my0/v6bhlTVm1d3lL8GG+edeyi/65CrzGnqgAlKOJ7c/4neCJeQJaT8p
|
||||
L68qLikpqCqwWJcs8viWkHJEKqs8Pm1lRRnHqdWGPp9af9wKZ6wwawW9FYgVmhE5aoW4FfxW
|
||||
8FhBskK9FQQrWBkbWVMZLrJeZJqyFY7n0HOTk0hckAAldoy6RaSAyNJQCs0Ye/rTUA/l+ZtB
|
||||
bDRWYOA0G032pfkKuGKNDdz5nT9qufb6xPxVNzy0+6YD88F9t0Mj/1G4btXGr9927q4qh6OK
|
||||
231iybkyCqk5kwMXTg2eT0vV3aQIvy39gzRGtNo8g6HSyBf0+wgPep6vkCpKPb4KndagM3h8
|
||||
uorySlBVQvOHlXC0Erh4JfgrwVMJUiXMVoJcCccZKlSCvhJIJcwxCormSl7YIzQFwywL2fKT
|
||||
RSb9r7D4LAEGUQk+z750+ZqmtZgA/nzQ10mOWkmqdUiF/zhfdfwWqFG9mcalT9bTOHmhiq7B
|
||||
gYV3uV/zz5GVxCc12fLLFxVjS6xaXWzjKystHp+5Us8XeXz5vHFqNcRXg381eFaDsBoeWQ3D
|
||||
q6FnNWT8JVgewmpUSrA26QKhg1kPV6wRK41i45omJ9RxzN3KCvuK5faleRXlxkoLz/165vvu
|
||||
79Q7GrqueeZeX2hX43eOjt/vXL0m0Tu4fcedQy120Nx+dEnpOze1P3Rt0xJb+6j7+iPW5yed
|
||||
nvbmHYsa69p20q8ZpHPhXf5q/mlixt1lUmoxaKqrVYJWW6Xi8di/tHBpr89UYTAsxooZrAZO
|
||||
yxsMRFNozFdhjBWkwuMj+qkVMLwCpBWAwBVYBEw+MbEhljY708knzawn0yvQoESp9N8KDNbQ
|
||||
tBlaYE3TcrYu16yF/BKoKBcb114GL933jT3z82WJmfe3Hr/ncMe2YP/Sdf8E5KZbh4+0jzby
|
||||
T3/1a+duqXLsToBp93VbeNWdgV3OPc/b5y0q9e6obDWxNYs1c6huJEbSIa0oLCnJL+P5SpNK
|
||||
W6T1+Aryi3S4pg29PmJ8wASyCVpM4DTRMiUybSSKivfNpc2NjbSH1NhABvuaFhArxAq7oRzr
|
||||
dFlFCcAO//B1N4RafvvbDfXr++03lyfGuTsdK155ZeDcgS2t+i0mK8u5B3Puxh6qIIvJYWmo
|
||||
CkC3SFOhq1hiqSKY6CprFSa6qkpbWmr0+Er1WnWvT2uctYBsgeMWOGqBKQvELeC3gMcCxAKb
|
||||
8SFZoN4CggX0FphjciiU2R2yO+MVSnFoRUzOzMJINx5bGxXlFqBpx2CwBQ3YdYKhArDlbE3L
|
||||
QbXpwPjab9bX/8vO13/xq6cgMn93OAZ37ILXSqfv9ZQWrbPWvQvqjz6YH+uDYw8/ePJeGus2
|
||||
jPUd3C/LcMecknrKVUWkqkqv0lusZXqPrwz3A4yY5GOD5eurUIGr7PVxRtwGO3J3RsI2wSlG
|
||||
SQN+RldWvxLk+Z0v04HnNz4WXnWeXTA0leJKWr4JcNHT9gNWPMNyu8D9+uq75w/87uWJWN63
|
||||
oT01/9/z1qmbrx7yJeY/dQ/BH/4GUGm75UOT4+PHqxzw/E/+bQX3joHVcwfG+CjWsxA77Anp
|
||||
RoO6iKhJpUlT4vFp9Fy5BwMSTEBMcMYEHhPUm0BvgjmGvmiCWdZ1x01w1ARTJoibwG8CyQRp
|
||||
lQ0PMJKHkeoZVc8YufrHmWZaDe9XfO6bMbtdZpdpNkFYfL0tsy/mNyn7DPYC/+h858uvvvrG
|
||||
b3732FdvvWnPvhtvnoLX5w3z7//507/95dVnnjjz1o+fTb8baR52YB6MxC9txCwY1UbMgg7f
|
||||
hhq9sZwv7/XxRvR8c24kcyyGdABIf8QEw3TxZd3fnd3MxVxfq7E/BQPbFA10UxTSa5Df0XBi
|
||||
aP6y/3rttuOX1fSn5j/85+/dMdG8bBW8/6dz1vmPH3LOh1/+gY36akZfT/Mn0NdvScOktFil
|
||||
KigtqDSpy4xl2IpGnQqPpX2+Yr1RW4D+Vxxn2Z7NJL/5TE49CCtgtm5yJpw0RTBBbtpzX9NE
|
||||
eUUrj5yXNH0H0K5UenQFXY1VtGOh+fj1E18Hcd/8nzUdT7TMXQMW0J6wcu9UOT69r8rRvaIZ
|
||||
yrkxfFPRGPGdnFeF9WiAR6UFgzZv8WIbWbnS4bBpebGxoc7ja9CttC02aB01Do/PqqupqMrL
|
||||
Kygo7/MV6FfgMYev7vPx+r0i7BRhrQjLRDCKkCfCRyK8LcLLIvxUhAdFuEuEERHAI0K7CPVM
|
||||
rlwElQjhuYzgYyKkRJBEaGJs5H0owusizIogMxs3ixAUFRNpGX1G7EURnhXheyIcZWJXibBB
|
||||
BCEzx7r0BMdF8IswkJmjnGm+zTS/KcIUTi/V5PDNTPdt5gAnM4E4mx5n1YmgUdbL8BcfMy88
|
||||
heYcxM6r5wjlbE6Z45lyPsuc0CqzJzTWAOyEVknvVZA9ppVw+edPbcsvOrZ1PSy59izZ/kL7
|
||||
3P75wduPL3K5WioMh+dbDw0Oem86PL9z3z4o4/0165uaa1rn/6Qc5LwnNIXFqrVbMmi/b8m5
|
||||
quyBh/WRE5vhD9hHi8msdAMpKzMVabX5pvwllsV40l2sK0PEaPL4Co0VpbRt9LRtHrTA2xZ4
|
||||
1gL4QlFZoBmRb1ogZYGgBQYs0G6BJgsss4CZsfHNxuW+1/Bt9qIFsq+8LD03o8N/18n3wnPv
|
||||
RRls3/6v69Pn3t7BITz4Xnn11aDl/bXN2WOvt39YOfcq58HbFt6C/eQVPPeapCKSl6ct5gvu
|
||||
v5wvIy3KmRP3qpwDJ+x3NTW53KLo3tXQ2dkgut3s/y30Pzblq28Z1m38K2dN/9b/yzuXdJ7/
|
||||
JXfhrbwqNf0FXJMloV6+bd5FvpJLueDS5zXjN8a3SLWKkHKumdTwS8gAR397Pkw6ES/Hpwd5
|
||||
23DsQHgHPs2oU4NPJ0eUX9KfgR3wDLcaP8e4t/kh/pcqj+ohtSlvY97P895VZtWTRhoDi0SP
|
||||
/bILgX/nf0p4xrVANOvbzqyfgJI7FZgj+WRMgXk8i04qsAplDiqwmpSQexQ4j+jIQwqcT64l
|
||||
P1BgDX43dipwASmBNgUuhCj0KnARWcw9lf0vVx33ugIXkzV8gQKXkEX8Zuq9iv46f4L3KjAQ
|
||||
QaVSYI6UqJYpME/WqhoVWIUyYQVWk8WqgwqcRyyqBxU4n3yoekaBNWSl+ocKXEAWq3+vwIXc
|
||||
G+qPFbiIrNP8RoG1ZFdBiQIXkysLrlTgEtJU8HJ7ZDySilwbCgrBQCogjMbi+xOR8XBKWDm6
|
||||
Smisb6gXOmKx8YmQ0BZLxGOJQCoSi9YVtl0s1ij0oYnOQKpW2BodreuOjITSskJ/KBEZ6wuN
|
||||
75kIJLYkR0PRYCghOISLJS7Gd4YSSYo01tXX1zWc514sHEkKASGVCARDk4HEVUJs7EJHhERo
|
||||
PJJMhRJIjESFwbr+OsETSIWiKSEQDQoDWcWesbHIaIgRR0OJVACFY6kwunrlnkQkGYyM0tmS
|
||||
ddkIctLRnwrtDQnbA6lUKBmLtgaSOBd6NhCJxpK1wr5wZDQs7AskhWAoGRmPInNkv3ChjoDc
|
||||
AMYSjcb2osm9oVr0eywRSoYj0XEhSUNWtIVUOJCiQU+GUonIaGBiYj/WbDKOWiNYpH2RVBgn
|
||||
ngwlhR2hfUJfbDIQ/W5d2hXMzRgmVYhMxhOxvcxHR3I0EQpFcbJAMDASmYik0Fo4kAiMYsYw
|
||||
bZHRJMsIJkKIB6IO155ELB5CT7/S0X1eEB1MZzMZm9iLM1PpaCgUpDOi23tDE6iEE0/EYlfR
|
||||
eMZiCXQ0mAo7cjwfi0VTqBoTAsEgBo7Zio3umaR1wjSnMs4FRhMx5MUnAim0MpmsC6dS8fVO
|
||||
5759++oCSmlGsTJ1aNn5RbzU/nhIqUeCWpmc6MbyR2np9rD60iD6t3YLPXHMjxudExSBWiHT
|
||||
mg11DcoUmMZIPJWsS0Ym6mKJcWePu5u0kwgZx5HCcS0JkSARcAQQDyA0SmIkTvaTBJMKI1Ug
|
||||
K5G6Cp+NpJ404BBIB0rFkD+B+gJpQziBWvQeYHZjJErq8FtE25daa0SoT/Gik2nXIrQV9UfR
|
||||
QjfqjSA3165A+hklgvss1Rwne9CPAFK2kCRqhVAmyCQE4sDxZTa+jL+TQckspxH9qsdPHXp/
|
||||
Kd0vsxxBWwLLdYpxqK+TzP+rkBZDvS/KiIByIVa/JHJCDAsyq9T2IEr0MykP06S5SLHZokxq
|
||||
4BIz9uCMY6g/ymqZkRxltmlPpC3HEA4rWb0SM55gHgSZXia2JM782Rpcujv6mXd72ZzbGZ3i
|
||||
ScZrRTypxJXO2QDzIoZUmot96AmdN8zgAMtnkGnTLosqmiPYd8IXziMougGlLlE2x17FS6pT
|
||||
q+R7jN2TbN4oziEw/9JVvnBugeUpwLKervQkclNMdhTpE/jZr6yzScxKeq4RZSXtY+syrEQ8
|
||||
yewKZAc+97GuiLG6RW1LWY3PZyXdN2NKpwpMN45wjEWRyaOD1YZGEmKeUijA1v4IakywudO+
|
||||
hVl3BFhtQ0qtUyyCTL6CSqTU6zijOIiL9QVd8SElp1/BnaL7khbTGcztTVqTCeZvMsd2lHkb
|
||||
zMaYzjaVmlBmSkc8wXakq7L1GWP9ls5okFlzfE7Ox1huUsqsMeZRED/piqd7K4a6e1g90usp
|
||||
3c2pz2QuwPIbU/TibF9KKb5MsvURZh0YJ+vxbOlE7+injvVh7qoZVdZMneKz8+/Wo37FWQZz
|
||||
10ci68sk+titrP5odtXtyVm/mUr04x7UzfaLuNI/biVzwkUW6Kq5eNdsYPvlhVGkuzGCeIr5
|
||||
k2S5rGMxjCO/B2foZufo9DcHG/p0iWumwLNlBEIEIAzjpIxYwU92wDAZhC1kE0j4lJDXis82
|
||||
xOmzDjaRKZTbhPTNiG9E+gbcPK14b8HRg+MIDhWOtEQ9Sjjx6VRwB+K1qPEC3oENSm1BKn1u
|
||||
Q7wTnx3K0410Fz5dCr4VcXwSP+TjQbyF3Z8ClXQSzpyDF86BcA4OfAKeT2Dqg6MfcO/PrbI+
|
||||
MvfUHNfz3vB7j7zH178HuvdAQ87qz3rO+s/Gzx4/m1eoexe05E9geOvMOuubm04P/n7TG4Pk
|
||||
NEZ2uv605/TUafm0+jTwg2/wRqt+Vpitn43PTs2+OHtmdm5WM/WToz/hfvyk06p70vokZz3Z
|
||||
c/LASd7/MOgetj7Mee73388dPQa6Y9ZjzmP8fffWWe/tsFjvvmuF9cxdc3dxpxZmT95VbHA/
|
||||
CT3QTTZhDnec5Besj2ypgO0Ylg7vVhxOHD04YjiO4MDvPShuxeGEbmkdP/wtKLrDfEfNHdfd
|
||||
cegOdfzWqVuP3spP3XL0Fu6RvU/t5ZKeVdZYtMYa7VhtrRJNg/kiP5iH0+Ds0taR6pVu/7Bk
|
||||
HUahy4fqrUMdq6xlYumgGgNWoaCOt/ItfA8f44/wT/H5mj6PxdqL44xnzsNJngKtW9dj7XH2
|
||||
8KcWzkihLhta2xbfNrWN3+peZe3sWGfVdVg7nB0vdLzZ8V5H3nAHPIB/7kfcT7l5yb3K6Zbc
|
||||
Fpt7cad50ChWDBpAN6gXdYMcYKFFMujULeg4nW5Yd0DH60gL4aaMoIZTcHRmoL+mputU/kJf
|
||||
l6zxXC7DQbm6n96l3iE576BMBocu984AfN13y+HDpHVJl9zY75X9S3xdchABiQJTCOiXzBhJ
|
||||
qy+ZTNWwC2pqEN6Dd1KzpwaJu5NpKsnySU0SkrhHJZkS1FCBNA54r6E8JFA9QO3dSUJvlFmT
|
||||
VqLaScUcU07fGGDa/T/LhW2oCmVuZHN0cmVhbQplbmRvYmoKCjYgMCBvYmoKNjI5MQplbmRv
|
||||
YmoKCjcgMCBvYmoKPDwvVHlwZS9Gb250RGVzY3JpcHRvci9Gb250TmFtZS9CQUFBQUErTGli
|
||||
ZXJhdGlvblNlcmlmCi9GbGFncyA0Ci9Gb250QkJveFstNTQzIC0zMDMgMTI3NyA5ODFdL0l0
|
||||
YWxpY0FuZ2xlIDAKL0FzY2VudCA4OTEKL0Rlc2NlbnQgLTIxNgovQ2FwSGVpZ2h0IDk4MQov
|
||||
U3RlbVYgODAKL0ZvbnRGaWxlMiA1IDAgUgo+PgplbmRvYmoKCjggMCBvYmoKPDwvTGVuZ3Ro
|
||||
IDI5Mi9GaWx0ZXIvRmxhdGVEZWNvZGU+PgpzdHJlYW0KeJxdkctuwyAQRfd8Bct0EfmROA/J
|
||||
spQmseRFH6rbD3BgnCLVGGGy8N+XmUlbqQvQmZl7BxiSY3NqrAnJqx9VC0H2xmoP03jzCuQF
|
||||
rsaKLJfaqHCPaFdD50QSve08BRga249lKZK3WJuCn+XioMcLPIjkxWvwxl7l4uPYxri9OfcF
|
||||
A9ggU1FVUkMf+zx17rkbICHXstGxbMK8jJY/wfvsQOYUZ3wVNWqYXKfAd/YKokzTSpZ1XQmw
|
||||
+l8tK9hy6dVn56M0i9I0LdZV5Jx4s0NeMe+R18TbFXJBnKfIG9ZkyFvWUJ8d5wvkPTPlD8w1
|
||||
8iMz9Tyyl/Qnzp+Qz8xn5JrPPdOj7rfH5+H8f8Ym1c37ODL6JJoVTslY+P1HNzp00foG7l+O
|
||||
gwplbmRzdHJlYW0KZW5kb2JqCgo5IDAgb2JqCjw8L1R5cGUvRm9udC9TdWJ0eXBlL1RydWVU
|
||||
eXBlL0Jhc2VGb250L0JBQUFBQStMaWJlcmF0aW9uU2VyaWYKL0ZpcnN0Q2hhciAwCi9MYXN0
|
||||
Q2hhciAxNQovV2lkdGhzWzc3NyA2MTAgNTAwIDI3NyAzODkgMjUwIDQ0MyAyNzcgNDQzIDUw
|
||||
MCA1MDAgNDQzIDUwMCA3NzcgNTAwIDI1MApdCi9Gb250RGVzY3JpcHRvciA3IDAgUgovVG9V
|
||||
bmljb2RlIDggMCBSCj4+CmVuZG9iagoKMTAgMCBvYmoKPDwvRjEgOSAwIFIKPj4KZW5kb2Jq
|
||||
CgoxMSAwIG9iago8PC9Gb250IDEwIDAgUgovUHJvY1NldFsvUERGL1RleHRdCj4+CmVuZG9i
|
||||
agoKMSAwIG9iago8PC9UeXBlL1BhZ2UvUGFyZW50IDQgMCBSL1Jlc291cmNlcyAxMSAwIFIv
|
||||
TWVkaWFCb3hbMCAwIDU5NSA4NDJdL0dyb3VwPDwvUy9UcmFuc3BhcmVuY3kvQ1MvRGV2aWNl
|
||||
UkdCL0kgdHJ1ZT4+L0NvbnRlbnRzIDIgMCBSPj4KZW5kb2JqCgo0IDAgb2JqCjw8L1R5cGUv
|
||||
UGFnZXMKL1Jlc291cmNlcyAxMSAwIFIKL01lZGlhQm94WyAwIDAgNTk1IDg0MiBdCi9LaWRz
|
||||
WyAxIDAgUiBdCi9Db3VudCAxPj4KZW5kb2JqCgoxMiAwIG9iago8PC9UeXBlL0NhdGFsb2cv
|
||||
UGFnZXMgNCAwIFIKL09wZW5BY3Rpb25bMSAwIFIgL1hZWiBudWxsIG51bGwgMF0KL0xhbmco
|
||||
ZW4tR0IpCj4+CmVuZG9iagoKMTMgMCBvYmoKPDwvQ3JlYXRvcjxGRUZGMDA1NzAwNzIwMDY5
|
||||
MDA3NDAwNjUwMDcyPgovUHJvZHVjZXI8RkVGRjAwNEMwMDY5MDA2MjAwNzIwMDY1MDA0RjAw
|
||||
NjYwMDY2MDA2OTAwNjMwMDY1MDAyMDAwMzUwMDJFMDAzMD4KL0NyZWF0aW9uRGF0ZShEOjIw
|
||||
MTYwMjA0MjIwMDAyWicpPj4KZW5kb2JqCgp4cmVmCjAgMTQKMDAwMDAwMDAwMCA2NTUzNSBm
|
||||
IAowMDAwMDA3NTA5IDAwMDAwIG4gCjAwMDAwMDAwMTkgMDAwMDAgbiAKMDAwMDAwMDIyOSAw
|
||||
MDAwMCBuIAowMDAwMDA3NjUyIDAwMDAwIG4gCjAwMDAwMDAyNDkgMDAwMDAgbiAKMDAwMDAw
|
||||
NjYyNSAwMDAwMCBuIAowMDAwMDA2NjQ2IDAwMDAwIG4gCjAwMDAwMDY4NDEgMDAwMDAgbiAK
|
||||
MDAwMDAwNzIwMiAwMDAwMCBuIAowMDAwMDA3NDIyIDAwMDAwIG4gCjAwMDAwMDc0NTQgMDAw
|
||||
MDAgbiAKMDAwMDAwNzc1MSAwMDAwMCBuIAowMDAwMDA3ODQ4IDAwMDAwIG4gCnRyYWlsZXIK
|
||||
PDwvU2l6ZSAxNC9Sb290IDEyIDAgUgovSW5mbyAxMyAwIFIKL0lEIFsgPDRFN0ZCMEZCMjA4
|
||||
ODBCNURBQkIzQTNEOTQxNDlBRTQ3Pgo8NEU3RkIwRkIyMDg4MEI1REFCQjNBM0Q5NDE0OUFF
|
||||
NDc+IF0KL0RvY0NoZWNrc3VtIC8yQTY0RDMzNzRFQTVEODMwNTRDNEI2RDFEMUY4QzU1RQo+
|
||||
PgpzdGFydHhyZWYKODAxOAolJUVPRgo=
|
||||
--------------090701020702030809070008--
|
217
src/documents/tests/test_api.py
Normal file
217
src/documents/tests/test_api.py
Normal file
@@ -0,0 +1,217 @@
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
from unittest import mock
|
||||
|
||||
from django.contrib.auth.models import User
|
||||
from django.test import override_settings
|
||||
from rest_framework.test import APITestCase
|
||||
|
||||
from documents.models import Document, Correspondent, DocumentType, Tag
|
||||
|
||||
|
||||
class DocumentApiTest(APITestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.scratch_dir = tempfile.mkdtemp()
|
||||
self.media_dir = tempfile.mkdtemp()
|
||||
self.originals_dir = os.path.join(self.media_dir, "documents", "originals")
|
||||
self.thumbnail_dir = os.path.join(self.media_dir, "documents", "thumbnails")
|
||||
|
||||
os.makedirs(self.originals_dir, exist_ok=True)
|
||||
os.makedirs(self.thumbnail_dir, exist_ok=True)
|
||||
|
||||
override_settings(
|
||||
SCRATCH_DIR=self.scratch_dir,
|
||||
MEDIA_ROOT=self.media_dir,
|
||||
ORIGINALS_DIR=self.originals_dir,
|
||||
THUMBNAIL_DIR=self.thumbnail_dir
|
||||
).enable()
|
||||
|
||||
user = User.objects.create_superuser(username="temp_admin")
|
||||
self.client.force_login(user=user)
|
||||
|
||||
def tearDown(self):
|
||||
shutil.rmtree(self.scratch_dir, ignore_errors=True)
|
||||
shutil.rmtree(self.media_dir, ignore_errors=True)
|
||||
|
||||
def testDocuments(self):
|
||||
|
||||
response = self.client.get("/api/documents/").data
|
||||
|
||||
self.assertEqual(response['count'], 0)
|
||||
|
||||
c = Correspondent.objects.create(name="c", pk=41)
|
||||
dt = DocumentType.objects.create(name="dt", pk=63)
|
||||
tag = Tag.objects.create(name="t", pk=85)
|
||||
|
||||
doc = Document.objects.create(title="WOW", content="the content", correspondent=c, document_type=dt, checksum="123")
|
||||
|
||||
doc.tags.add(tag)
|
||||
|
||||
response = self.client.get("/api/documents/", format='json')
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(response.data['count'], 1)
|
||||
|
||||
returned_doc = response.data['results'][0]
|
||||
self.assertEqual(returned_doc['id'], doc.id)
|
||||
self.assertEqual(returned_doc['title'], doc.title)
|
||||
self.assertEqual(returned_doc['correspondent']['name'], c.name)
|
||||
self.assertEqual(returned_doc['document_type']['name'], dt.name)
|
||||
self.assertEqual(returned_doc['correspondent']['id'], c.id)
|
||||
self.assertEqual(returned_doc['document_type']['id'], dt.id)
|
||||
self.assertEqual(returned_doc['correspondent']['id'], returned_doc['correspondent_id'])
|
||||
self.assertEqual(returned_doc['document_type']['id'], returned_doc['document_type_id'])
|
||||
self.assertEqual(len(returned_doc['tags']), 1)
|
||||
self.assertEqual(returned_doc['tags'][0]['name'], tag.name)
|
||||
self.assertEqual(returned_doc['tags'][0]['id'], tag.id)
|
||||
self.assertListEqual(returned_doc['tags_id'], [tag.id])
|
||||
|
||||
c2 = Correspondent.objects.create(name="c2")
|
||||
|
||||
returned_doc['correspondent_id'] = c2.pk
|
||||
returned_doc['title'] = "the new title"
|
||||
|
||||
response = self.client.put('/api/documents/{}/'.format(doc.pk), returned_doc, format='json')
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
doc_after_save = Document.objects.get(id=doc.id)
|
||||
|
||||
self.assertEqual(doc_after_save.correspondent, c2)
|
||||
self.assertEqual(doc_after_save.title, "the new title")
|
||||
|
||||
self.client.delete("/api/documents/{}/".format(doc_after_save.pk))
|
||||
|
||||
self.assertEqual(len(Document.objects.all()), 0)
|
||||
|
||||
def test_document_actions(self):
|
||||
|
||||
_, filename = tempfile.mkstemp(dir=self.originals_dir)
|
||||
|
||||
content = b"This is a test"
|
||||
content_thumbnail = b"thumbnail content"
|
||||
|
||||
with open(filename, "wb") as f:
|
||||
f.write(content)
|
||||
|
||||
doc = Document.objects.create(title="none", filename=os.path.basename(filename), file_type="pdf")
|
||||
|
||||
with open(os.path.join(self.thumbnail_dir, "{:07d}.png".format(doc.pk)), "wb") as f:
|
||||
f.write(content_thumbnail)
|
||||
|
||||
response = self.client.get('/api/documents/{}/download/'.format(doc.pk))
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(response.content, content)
|
||||
|
||||
response = self.client.get('/api/documents/{}/preview/'.format(doc.pk))
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(response.content, content)
|
||||
|
||||
response = self.client.get('/api/documents/{}/thumb/'.format(doc.pk))
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(response.content, content_thumbnail)
|
||||
|
||||
def test_document_actions_not_existing_file(self):
|
||||
|
||||
doc = Document.objects.create(title="none", filename=os.path.basename("asd"), file_type="pdf")
|
||||
|
||||
response = self.client.get('/api/documents/{}/download/'.format(doc.pk))
|
||||
self.assertEqual(response.status_code, 404)
|
||||
|
||||
response = self.client.get('/api/documents/{}/preview/'.format(doc.pk))
|
||||
self.assertEqual(response.status_code, 404)
|
||||
|
||||
response = self.client.get('/api/documents/{}/thumb/'.format(doc.pk))
|
||||
self.assertEqual(response.status_code, 404)
|
||||
|
||||
def test_document_filters(self):
|
||||
|
||||
doc1 = Document.objects.create(title="none1", checksum="A")
|
||||
doc2 = Document.objects.create(title="none2", checksum="B")
|
||||
doc3 = Document.objects.create(title="none3", checksum="C")
|
||||
|
||||
tag_inbox = Tag.objects.create(name="t1", is_inbox_tag=True)
|
||||
tag_2 = Tag.objects.create(name="t2")
|
||||
tag_3 = Tag.objects.create(name="t3")
|
||||
|
||||
doc1.tags.add(tag_inbox)
|
||||
doc2.tags.add(tag_2)
|
||||
doc3.tags.add(tag_2)
|
||||
doc3.tags.add(tag_3)
|
||||
|
||||
response = self.client.get("/api/documents/?is_in_inbox=true")
|
||||
self.assertEqual(response.status_code, 200)
|
||||
results = response.data['results']
|
||||
self.assertEqual(len(results), 1)
|
||||
self.assertEqual(results[0]['id'], doc1.id)
|
||||
|
||||
response = self.client.get("/api/documents/?is_in_inbox=false")
|
||||
self.assertEqual(response.status_code, 200)
|
||||
results = response.data['results']
|
||||
self.assertEqual(len(results), 2)
|
||||
self.assertEqual(results[0]['id'], doc2.id)
|
||||
self.assertEqual(results[1]['id'], doc3.id)
|
||||
|
||||
response = self.client.get("/api/documents/?tags__id__in={},{}".format(tag_inbox.id, tag_3.id))
|
||||
self.assertEqual(response.status_code, 200)
|
||||
results = response.data['results']
|
||||
self.assertEqual(len(results), 2)
|
||||
self.assertEqual(results[0]['id'], doc1.id)
|
||||
self.assertEqual(results[1]['id'], doc3.id)
|
||||
|
||||
response = self.client.get("/api/documents/?tags__id__all={},{}".format(tag_2.id, tag_3.id))
|
||||
self.assertEqual(response.status_code, 200)
|
||||
results = response.data['results']
|
||||
self.assertEqual(len(results), 1)
|
||||
self.assertEqual(results[0]['id'], doc3.id)
|
||||
|
||||
response = self.client.get("/api/documents/?tags__id__all={},{}".format(tag_inbox.id, tag_3.id))
|
||||
self.assertEqual(response.status_code, 200)
|
||||
results = response.data['results']
|
||||
self.assertEqual(len(results), 0)
|
||||
|
||||
response = self.client.get("/api/documents/?tags__id__all={}a{}".format(tag_inbox.id, tag_3.id))
|
||||
self.assertEqual(response.status_code, 200)
|
||||
results = response.data['results']
|
||||
self.assertEqual(len(results), 3)
|
||||
|
||||
@mock.patch("documents.index.autocomplete")
|
||||
def test_search_autocomplete(self, m):
|
||||
m.side_effect = lambda ix, term, limit: [term for _ in range(limit)]
|
||||
|
||||
response = self.client.get("/api/search/autocomplete/?term=test")
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(len(response.data), 10)
|
||||
|
||||
response = self.client.get("/api/search/autocomplete/?term=test&limit=20")
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(len(response.data), 20)
|
||||
|
||||
response = self.client.get("/api/search/autocomplete/?term=test&limit=-1")
|
||||
self.assertEqual(response.status_code, 400)
|
||||
|
||||
response = self.client.get("/api/search/autocomplete/")
|
||||
self.assertEqual(response.status_code, 400)
|
||||
|
||||
response = self.client.get("/api/search/autocomplete/?term=")
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(len(response.data), 10)
|
||||
|
||||
def test_statistics(self):
|
||||
|
||||
doc1 = Document.objects.create(title="none1", checksum="A")
|
||||
doc2 = Document.objects.create(title="none2", checksum="B")
|
||||
doc3 = Document.objects.create(title="none3", checksum="C")
|
||||
|
||||
tag_inbox = Tag.objects.create(name="t1", is_inbox_tag=True)
|
||||
|
||||
doc1.tags.add(tag_inbox)
|
||||
|
||||
response = self.client.get("/api/statistics/")
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(response.data['documents_total'], 3)
|
||||
self.assertEqual(response.data['documents_inbox'], 1)
|
@@ -2,9 +2,9 @@ import unittest
|
||||
|
||||
from django.test import TestCase
|
||||
|
||||
from .factories import DocumentFactory
|
||||
from ..checks import changed_password_check
|
||||
from ..models import Document
|
||||
from .factories import DocumentFactory
|
||||
|
||||
|
||||
class ChecksTestCase(TestCase):
|
||||
|
85
src/documents/tests/test_classifier.py
Normal file
85
src/documents/tests/test_classifier.py
Normal file
@@ -0,0 +1,85 @@
|
||||
import tempfile
|
||||
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
from documents.classifier import DocumentClassifier
|
||||
from documents.models import Correspondent, Document, Tag, DocumentType
|
||||
|
||||
|
||||
class TestClassifier(TestCase):
|
||||
|
||||
def setUp(self):
|
||||
|
||||
self.classifier = DocumentClassifier()
|
||||
|
||||
def generate_test_data(self):
|
||||
self.c1 = Correspondent.objects.create(name="c1", matching_algorithm=Correspondent.MATCH_AUTO)
|
||||
self.c2 = Correspondent.objects.create(name="c2")
|
||||
self.t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||
self.t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_ANY, pk=34, is_inbox_tag=True)
|
||||
self.t3 = Tag.objects.create(name="t3", matching_algorithm=Tag.MATCH_AUTO, pk=45)
|
||||
self.dt = DocumentType.objects.create(name="dt", matching_algorithm=DocumentType.MATCH_AUTO)
|
||||
|
||||
self.doc1 = Document.objects.create(title="doc1", content="this is a document from c1", correspondent=self.c1, checksum="A", document_type=self.dt)
|
||||
self.doc2 = Document.objects.create(title="doc1", content="this is another document, but from c2", correspondent=self.c2, checksum="B")
|
||||
self.doc_inbox = Document.objects.create(title="doc235", content="aa", checksum="C")
|
||||
|
||||
self.doc1.tags.add(self.t1)
|
||||
self.doc2.tags.add(self.t1)
|
||||
self.doc2.tags.add(self.t3)
|
||||
self.doc_inbox.tags.add(self.t2)
|
||||
|
||||
def testNoTrainingData(self):
|
||||
try:
|
||||
self.classifier.train()
|
||||
except ValueError as e:
|
||||
self.assertEqual(str(e), "No training data available.")
|
||||
else:
|
||||
self.fail("Should raise exception")
|
||||
|
||||
def testEmpty(self):
|
||||
Document.objects.create(title="WOW", checksum="3457", content="ASD")
|
||||
self.classifier.train()
|
||||
self.assertIsNone(self.classifier.document_type_classifier)
|
||||
self.assertIsNone(self.classifier.tags_classifier)
|
||||
self.assertIsNone(self.classifier.correspondent_classifier)
|
||||
|
||||
self.assertListEqual(self.classifier.predict_tags(""), [])
|
||||
self.assertIsNone(self.classifier.predict_document_type(""))
|
||||
self.assertIsNone(self.classifier.predict_correspondent(""))
|
||||
|
||||
def testTrain(self):
|
||||
self.generate_test_data()
|
||||
self.classifier.train()
|
||||
self.assertListEqual(list(self.classifier.correspondent_classifier.classes_), [-1, self.c1.pk])
|
||||
self.assertListEqual(list(self.classifier.tags_binarizer.classes_), [self.t1.pk, self.t3.pk])
|
||||
|
||||
def testPredict(self):
|
||||
self.generate_test_data()
|
||||
self.classifier.train()
|
||||
self.assertEqual(self.classifier.predict_correspondent(self.doc1.content), self.c1.pk)
|
||||
self.assertEqual(self.classifier.predict_correspondent(self.doc2.content), None)
|
||||
self.assertTupleEqual(self.classifier.predict_tags(self.doc1.content), (self.t1.pk,))
|
||||
self.assertTupleEqual(self.classifier.predict_tags(self.doc2.content), (self.t1.pk, self.t3.pk))
|
||||
self.assertEqual(self.classifier.predict_document_type(self.doc1.content), self.dt.pk)
|
||||
self.assertEqual(self.classifier.predict_document_type(self.doc2.content), None)
|
||||
|
||||
def testDatasetHashing(self):
|
||||
|
||||
self.generate_test_data()
|
||||
|
||||
self.assertTrue(self.classifier.train())
|
||||
self.assertFalse(self.classifier.train())
|
||||
|
||||
@override_settings(DATA_DIR=tempfile.mkdtemp())
|
||||
def testSaveClassifier(self):
|
||||
|
||||
self.generate_test_data()
|
||||
|
||||
self.classifier.train()
|
||||
|
||||
self.classifier.save_classifier()
|
||||
|
||||
new_classifier = DocumentClassifier()
|
||||
new_classifier.reload()
|
||||
self.assertFalse(new_classifier.train())
|
@@ -1,8 +1,15 @@
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import tempfile
|
||||
from unittest import mock
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from django.test import TestCase
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
from ..models import FileInfo, Tag
|
||||
from ..consumer import Consumer, ConsumerError
|
||||
from ..models import FileInfo, Tag, Correspondent, DocumentType, Document
|
||||
from ..parsers import DocumentParser, ParseError
|
||||
|
||||
|
||||
class TestAttributes(TestCase):
|
||||
@@ -394,3 +401,254 @@ class TestFieldPermutations(TestCase):
|
||||
self.assertEqual(info.created.year, 2019)
|
||||
self.assertEqual(info.created.month, 9)
|
||||
self.assertEqual(info.created.day, 8)
|
||||
|
||||
|
||||
class DummyParser(DocumentParser):
|
||||
|
||||
def get_thumbnail(self):
|
||||
# not important during tests
|
||||
raise NotImplementedError()
|
||||
|
||||
def __init__(self, path, logging_group, scratch_dir):
|
||||
super(DummyParser, self).__init__(path, logging_group)
|
||||
_, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
|
||||
|
||||
def get_optimised_thumbnail(self):
|
||||
return self.fake_thumb
|
||||
|
||||
def get_text(self):
|
||||
return "The Text"
|
||||
|
||||
|
||||
class FaultyParser(DocumentParser):
|
||||
|
||||
def get_thumbnail(self):
|
||||
# not important during tests
|
||||
raise NotImplementedError()
|
||||
|
||||
def __init__(self, path, logging_group, scratch_dir):
|
||||
super(FaultyParser, self).__init__(path, logging_group)
|
||||
_, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
|
||||
|
||||
def get_optimised_thumbnail(self):
|
||||
return self.fake_thumb
|
||||
|
||||
def get_text(self):
|
||||
raise ParseError("Does not compute.")
|
||||
|
||||
|
||||
class TestConsumer(TestCase):
|
||||
|
||||
def make_dummy_parser(self, path, logging_group):
|
||||
return DummyParser(path, logging_group, self.scratch_dir)
|
||||
|
||||
def make_faulty_parser(self, path, logging_group):
|
||||
return FaultyParser(path, logging_group, self.scratch_dir)
|
||||
|
||||
def setUp(self):
|
||||
self.scratch_dir = tempfile.mkdtemp()
|
||||
self.media_dir = tempfile.mkdtemp()
|
||||
self.consumption_dir = tempfile.mkdtemp()
|
||||
|
||||
override_settings(
|
||||
SCRATCH_DIR=self.scratch_dir,
|
||||
MEDIA_ROOT=self.media_dir,
|
||||
ORIGINALS_DIR=os.path.join(self.media_dir, "documents", "originals"),
|
||||
THUMBNAIL_DIR=os.path.join(self.media_dir, "documents", "thumbnails"),
|
||||
CONSUMPTION_DIR=self.consumption_dir
|
||||
).enable()
|
||||
|
||||
patcher = mock.patch("documents.parsers.document_consumer_declaration.send")
|
||||
m = patcher.start()
|
||||
m.return_value = [(None, {
|
||||
"parser": self.make_dummy_parser,
|
||||
"test": lambda _: True,
|
||||
"weight": 0
|
||||
})]
|
||||
|
||||
self.addCleanup(patcher.stop)
|
||||
|
||||
self.consumer = Consumer()
|
||||
|
||||
def tearDown(self):
|
||||
shutil.rmtree(self.scratch_dir, ignore_errors=True)
|
||||
shutil.rmtree(self.media_dir, ignore_errors=True)
|
||||
shutil.rmtree(self.consumption_dir, ignore_errors=True)
|
||||
|
||||
def get_test_file(self):
|
||||
fd, f = tempfile.mkstemp(suffix=".pdf", dir=self.scratch_dir)
|
||||
return f
|
||||
|
||||
def testNormalOperation(self):
|
||||
|
||||
filename = self.get_test_file()
|
||||
document = self.consumer.try_consume_file(filename)
|
||||
|
||||
self.assertEqual(document.content, "The Text")
|
||||
self.assertEqual(document.title, os.path.splitext(os.path.basename(filename))[0])
|
||||
self.assertIsNone(document.correspondent)
|
||||
self.assertIsNone(document.document_type)
|
||||
self.assertEqual(document.filename, "0000001.pdf")
|
||||
|
||||
self.assertTrue(os.path.isfile(
|
||||
document.source_path
|
||||
))
|
||||
|
||||
self.assertTrue(os.path.isfile(
|
||||
document.thumbnail_path
|
||||
))
|
||||
|
||||
self.assertFalse(os.path.isfile(filename))
|
||||
|
||||
def testOverrideFilename(self):
|
||||
filename = self.get_test_file()
|
||||
override_filename = "My Bank - Statement for November.pdf"
|
||||
|
||||
document = self.consumer.try_consume_file(filename, override_filename=override_filename)
|
||||
|
||||
self.assertEqual(document.correspondent.name, "My Bank")
|
||||
self.assertEqual(document.title, "Statement for November")
|
||||
|
||||
def testOverrideTitle(self):
|
||||
|
||||
document = self.consumer.try_consume_file(self.get_test_file(), override_title="Override Title")
|
||||
self.assertEqual(document.title, "Override Title")
|
||||
|
||||
def testOverrideCorrespondent(self):
|
||||
c = Correspondent.objects.create(name="test")
|
||||
|
||||
document = self.consumer.try_consume_file(self.get_test_file(), override_correspondent_id=c.pk)
|
||||
self.assertEqual(document.correspondent.id, c.id)
|
||||
|
||||
def testOverrideDocumentType(self):
|
||||
dt = DocumentType.objects.create(name="test")
|
||||
|
||||
document = self.consumer.try_consume_file(self.get_test_file(), override_document_type_id=dt.pk)
|
||||
self.assertEqual(document.document_type.id, dt.id)
|
||||
|
||||
def testOverrideTags(self):
|
||||
t1 = Tag.objects.create(name="t1")
|
||||
t2 = Tag.objects.create(name="t2")
|
||||
t3 = Tag.objects.create(name="t3")
|
||||
document = self.consumer.try_consume_file(self.get_test_file(), override_tag_ids=[t1.id, t3.id])
|
||||
|
||||
self.assertIn(t1, document.tags.all())
|
||||
self.assertNotIn(t2, document.tags.all())
|
||||
self.assertIn(t3, document.tags.all())
|
||||
|
||||
def testNotAFile(self):
|
||||
try:
|
||||
self.consumer.try_consume_file("non-existing-file")
|
||||
except ConsumerError as e:
|
||||
self.assertTrue(str(e).endswith('It is not a file'))
|
||||
return
|
||||
|
||||
self.fail("Should throw exception")
|
||||
|
||||
@override_settings(CONSUMPTION_DIR=None)
|
||||
def testConsumptionDirUnset(self):
|
||||
try:
|
||||
self.consumer.try_consume_file(self.get_test_file())
|
||||
except ConsumerError as e:
|
||||
self.assertEqual(str(e), "The CONSUMPTION_DIR settings variable does not appear to be set.")
|
||||
return
|
||||
|
||||
self.fail("Should throw exception")
|
||||
|
||||
@override_settings(CONSUMPTION_DIR="asd")
|
||||
def testNoConsumptionDir(self):
|
||||
try:
|
||||
self.consumer.try_consume_file(self.get_test_file())
|
||||
except ConsumerError as e:
|
||||
self.assertEqual(str(e), "Consumption directory asd does not exist")
|
||||
return
|
||||
|
||||
self.fail("Should throw exception")
|
||||
|
||||
def testDuplicates(self):
|
||||
self.consumer.try_consume_file(self.get_test_file())
|
||||
|
||||
try:
|
||||
self.consumer.try_consume_file(self.get_test_file())
|
||||
except ConsumerError as e:
|
||||
self.assertTrue(str(e).endswith("It is a duplicate."))
|
||||
return
|
||||
|
||||
self.fail("Should throw exception")
|
||||
|
||||
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
||||
def testNoParsers(self, m):
|
||||
m.return_value = []
|
||||
|
||||
try:
|
||||
self.consumer.try_consume_file(self.get_test_file())
|
||||
except ConsumerError as e:
|
||||
self.assertTrue(str(e).startswith("No parsers abvailable"))
|
||||
return
|
||||
|
||||
self.fail("Should throw exception")
|
||||
|
||||
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
||||
def testFaultyParser(self, m):
|
||||
m.return_value = [(None, {
|
||||
"parser": self.make_faulty_parser,
|
||||
"test": lambda _: True,
|
||||
"weight": 0
|
||||
})]
|
||||
|
||||
try:
|
||||
self.consumer.try_consume_file(self.get_test_file())
|
||||
except ConsumerError as e:
|
||||
self.assertEqual(str(e), "Does not compute.")
|
||||
return
|
||||
|
||||
self.fail("Should throw exception.")
|
||||
|
||||
@mock.patch("documents.consumer.Consumer._write")
|
||||
def testPostSaveError(self, m):
|
||||
filename = self.get_test_file()
|
||||
m.side_effect = OSError("NO.")
|
||||
try:
|
||||
self.consumer.try_consume_file(filename)
|
||||
except ConsumerError as e:
|
||||
self.assertEqual(str(e), "NO.")
|
||||
else:
|
||||
self.fail("Should raise exception")
|
||||
|
||||
# file not deleted
|
||||
self.assertTrue(os.path.isfile(filename))
|
||||
|
||||
# Database empty
|
||||
self.assertEqual(len(Document.objects.all()), 0)
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
||||
def testFilenameHandling(self):
|
||||
filename = self.get_test_file()
|
||||
|
||||
document = self.consumer.try_consume_file(filename, override_filename="Bank - Test.pdf", override_title="new docs")
|
||||
|
||||
print(document.source_path)
|
||||
print("===")
|
||||
|
||||
self.assertEqual(document.title, "new docs")
|
||||
self.assertEqual(document.correspondent.name, "Bank")
|
||||
self.assertEqual(document.filename, "bank/new-docs-0000001.pdf")
|
||||
|
||||
@mock.patch("documents.consumer.DocumentClassifier")
|
||||
def testClassifyDocument(self, m):
|
||||
correspondent = Correspondent.objects.create(name="test")
|
||||
dtype = DocumentType.objects.create(name="test")
|
||||
t1 = Tag.objects.create(name="t1")
|
||||
t2 = Tag.objects.create(name="t2")
|
||||
|
||||
m.return_value = MagicMock()
|
||||
m.return_value.predict_correspondent.return_value = correspondent.pk
|
||||
m.return_value.predict_document_type.return_value = dtype.pk
|
||||
m.return_value.predict_tags.return_value = [t1.pk]
|
||||
|
||||
document = self.consumer.try_consume_file(self.get_test_file())
|
||||
|
||||
self.assertEqual(document.correspondent, correspondent)
|
||||
self.assertEqual(document.document_type, dtype)
|
||||
self.assertIn(t1, document.tags.all())
|
||||
self.assertNotIn(t2, document.tags.all())
|
||||
|
@@ -1,17 +1,14 @@
|
||||
import datetime
|
||||
import os
|
||||
import shutil
|
||||
from unittest import mock
|
||||
from uuid import uuid4
|
||||
from pathlib import Path
|
||||
from shutil import rmtree
|
||||
from uuid import uuid4
|
||||
|
||||
from dateutil import tz
|
||||
from django.conf import settings
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
from django.utils.text import slugify
|
||||
from ..models import Tag, Document, Correspondent
|
||||
from django.conf import settings
|
||||
from ..file_handling import generate_filename, create_source_path_directory, delete_empty_directories
|
||||
from ..models import Document, Correspondent
|
||||
from ..signals.handlers import update_filename_and_move_files
|
||||
|
||||
|
||||
class TestDate(TestCase):
|
||||
@@ -31,18 +28,6 @@ class TestDate(TestCase):
|
||||
for dirname in self.deletion_list:
|
||||
shutil.rmtree(dirname, ignore_errors=True)
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="")
|
||||
def test_source_filename(self):
|
||||
document = Document()
|
||||
document.file_type = "pdf"
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
document.save()
|
||||
|
||||
self.assertEqual(document.source_filename, "0000001.pdf")
|
||||
|
||||
document.filename = "test.pdf"
|
||||
self.assertEqual(document.source_filename, "test.pdf")
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="")
|
||||
def test_generate_source_filename(self):
|
||||
document = Document()
|
||||
@@ -50,58 +35,50 @@ class TestDate(TestCase):
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
document.save()
|
||||
|
||||
self.assertEqual(document.generate_source_filename(), "0000001.pdf")
|
||||
self.assertEqual(generate_filename(document), "{:07d}.pdf".format(document.pk))
|
||||
|
||||
document.storage_type = Document.STORAGE_TYPE_GPG
|
||||
self.assertEqual(document.generate_source_filename(),
|
||||
"0000001.pdf.gpg")
|
||||
self.assertEqual(generate_filename(document),
|
||||
"{:07d}.pdf.gpg".format(document.pk))
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
|
||||
"{correspondent}")
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||
def test_file_renaming(self):
|
||||
document = Document()
|
||||
document.file_type = "pdf"
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
document.save()
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
tmp = document.source_filename
|
||||
self.assertEqual(document.generate_source_filename(),
|
||||
"none/none-0000001.pdf")
|
||||
document.create_source_directory()
|
||||
Path(document.source_path).touch()
|
||||
# Test default source_path
|
||||
self.assertEqual(document.source_path, settings.ORIGINALS_DIR + "/{:07d}.pdf".format(document.pk))
|
||||
|
||||
# Test source_path
|
||||
self.assertEqual(document.source_path, settings.MEDIA_ROOT +
|
||||
"/documents/originals/none/none-0000001.pdf")
|
||||
document.filename = generate_filename(document)
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk))
|
||||
|
||||
# Enable encryption and check again
|
||||
document.storage_type = Document.STORAGE_TYPE_GPG
|
||||
tmp = document.source_filename
|
||||
self.assertEqual(document.generate_source_filename(),
|
||||
"none/none-0000001.pdf.gpg")
|
||||
document.filename = generate_filename(document)
|
||||
self.assertEqual(document.filename,
|
||||
"none/none-{:07d}.pdf.gpg".format(document.pk))
|
||||
|
||||
document.save()
|
||||
|
||||
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
|
||||
"/documents/originals/none"), True)
|
||||
# test that creating dirs for the source_path creates the correct directory
|
||||
create_source_path_directory(document.source_path)
|
||||
Path(document.source_path).touch()
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), True)
|
||||
|
||||
# Set a correspondent and save the document
|
||||
document.correspondent = Correspondent.objects.get_or_create(
|
||||
name="test")[0]
|
||||
document.correspondent = Correspondent.objects.get_or_create(name="test")[0]
|
||||
document.save()
|
||||
|
||||
# Check proper handling of files
|
||||
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
|
||||
"/documents/originals/test"), True)
|
||||
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
|
||||
"/documents/originals/none"), False)
|
||||
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
|
||||
"originals/test/test-0000001.pdf.gpg"), True)
|
||||
self.assertEqual(document.generate_source_filename(),
|
||||
"test/test-0000001.pdf.gpg")
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/test"), True)
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
|
||||
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/test/test-{:07d}.pdf.gpg".format(document.pk)), True)
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
|
||||
"{correspondent}")
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||
def test_file_renaming_missing_permissions(self):
|
||||
document = Document()
|
||||
document.file_type = "pdf"
|
||||
@@ -109,34 +86,67 @@ class TestDate(TestCase):
|
||||
document.save()
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
tmp = document.source_filename
|
||||
self.assertEqual(document.generate_source_filename(),
|
||||
"none/none-0000001.pdf")
|
||||
document.create_source_directory()
|
||||
document.filename = generate_filename(document)
|
||||
self.assertEqual(document.filename,
|
||||
"none/none-{:07d}.pdf".format(document.pk))
|
||||
create_source_path_directory(document.source_path)
|
||||
Path(document.source_path).touch()
|
||||
|
||||
# Test source_path
|
||||
self.assertEqual(document.source_path, settings.MEDIA_ROOT +
|
||||
"/documents/originals/none/none-0000001.pdf")
|
||||
self.assertEqual(document.source_path, settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(document.pk))
|
||||
|
||||
# Make the folder read- and execute-only (no writing and no renaming)
|
||||
os.chmod(settings.MEDIA_ROOT + "/documents/originals/none", 0o555)
|
||||
os.chmod(settings.ORIGINALS_DIR + "/none", 0o555)
|
||||
|
||||
# Set a correspondent and save the document
|
||||
document.correspondent = Correspondent.objects.get_or_create(
|
||||
name="test")[0]
|
||||
document.correspondent = Correspondent.objects.get_or_create(name="test")[0]
|
||||
document.save()
|
||||
|
||||
# Check proper handling of files
|
||||
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
|
||||
"originals/none/none-0000001.pdf"), True)
|
||||
self.assertEqual(document.source_filename,
|
||||
"none/none-0000001.pdf")
|
||||
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/originals/none/none-{:07d}.pdf".format(document.pk)), True)
|
||||
self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk))
|
||||
|
||||
os.chmod(settings.MEDIA_ROOT + "/documents/originals/none", 0o777)
|
||||
os.chmod(settings.ORIGINALS_DIR + "/none", 0o777)
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
|
||||
"{correspondent}")
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||
def test_file_renaming_database_error(self):
|
||||
|
||||
document1 = Document.objects.create(file_type="pdf", storage_type=Document.STORAGE_TYPE_UNENCRYPTED, checksum="AAAAA")
|
||||
|
||||
document = Document()
|
||||
document.file_type = "pdf"
|
||||
document.checksum = "BBBBB"
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
document.save()
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
document.filename = generate_filename(document)
|
||||
self.assertEqual(document.filename,
|
||||
"none/none-{:07d}.pdf".format(document.pk))
|
||||
create_source_path_directory(document.source_path)
|
||||
Path(document.source_path).touch()
|
||||
|
||||
# Test source_path
|
||||
self.assertTrue(os.path.isfile(document.source_path))
|
||||
|
||||
# Set a correspondent and save the document
|
||||
document.correspondent = Correspondent.objects.get_or_create(
|
||||
name="test")[0]
|
||||
|
||||
# This will cause save() to fail.
|
||||
document.checksum = document1.checksum
|
||||
|
||||
# Assume saving the document initially works, this gets called.
|
||||
# After renaming, an error occurs, and filename is not saved:
|
||||
# document should still be available at document.filename.
|
||||
update_filename_and_move_files(None, document)
|
||||
|
||||
# Check proper handling of files
|
||||
self.assertTrue(os.path.isfile(document.source_path))
|
||||
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/originals/none/none-{:07d}.pdf".format(document.pk)), True)
|
||||
self.assertEqual(document.filename, "none/none-{:07d}.pdf".format(document.pk))
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||
def test_document_delete(self):
|
||||
document = Document()
|
||||
document.file_type = "pdf"
|
||||
@@ -144,21 +154,20 @@ class TestDate(TestCase):
|
||||
document.save()
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
tmp = document.source_filename
|
||||
self.assertEqual(document.generate_source_filename(),
|
||||
"none/none-0000001.pdf")
|
||||
document.create_source_directory()
|
||||
document.filename = generate_filename(document)
|
||||
self.assertEqual(document.filename,
|
||||
"none/none-{:07d}.pdf".format(document.pk))
|
||||
|
||||
create_source_path_directory(document.source_path)
|
||||
Path(document.source_path).touch()
|
||||
|
||||
# Ensure file deletion after delete
|
||||
pk = document.pk
|
||||
document.delete()
|
||||
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT +
|
||||
"/documents/originals/none/none-0000001.pdf"), False)
|
||||
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
|
||||
"/documents/originals/none"), False)
|
||||
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none-{:07d}.pdf".format(pk)), False)
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
|
||||
"{correspondent}")
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||
def test_document_delete_nofile(self):
|
||||
document = Document()
|
||||
document.file_type = "pdf"
|
||||
@@ -167,8 +176,7 @@ class TestDate(TestCase):
|
||||
|
||||
document.delete()
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
|
||||
"{correspondent}")
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||
def test_directory_not_empty(self):
|
||||
document = Document()
|
||||
document.file_type = "pdf"
|
||||
@@ -176,28 +184,24 @@ class TestDate(TestCase):
|
||||
document.save()
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
tmp = document.source_filename
|
||||
self.assertEqual(document.generate_source_filename(),
|
||||
"none/none-0000001.pdf")
|
||||
document.create_source_directory()
|
||||
document.filename = generate_filename(document)
|
||||
self.assertEqual(document.filename,
|
||||
"none/none-{:07d}.pdf".format(document.pk))
|
||||
|
||||
create_source_path_directory(document.source_path)
|
||||
|
||||
Path(document.source_path).touch()
|
||||
Path(document.source_path + "test").touch()
|
||||
important_file = document.source_path + "test"
|
||||
Path(important_file).touch()
|
||||
|
||||
# Set a correspondent and save the document
|
||||
document.correspondent = Correspondent.objects.get_or_create(
|
||||
name="test")[0]
|
||||
document.correspondent = Correspondent.objects.get_or_create(name="test")[0]
|
||||
document.save()
|
||||
|
||||
# Check proper handling of files
|
||||
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
|
||||
"/documents/originals/test"), True)
|
||||
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
|
||||
"/documents/originals/none"), True)
|
||||
|
||||
# Cleanup
|
||||
os.remove(settings.MEDIA_ROOT +
|
||||
"/documents/originals/none/none-0000001.pdftest")
|
||||
os.rmdir(settings.MEDIA_ROOT + "/documents/originals/none")
|
||||
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + "/documents/originals/test"), True)
|
||||
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT + "/documents/originals/none"), True)
|
||||
self.assertTrue(os.path.isfile(important_file))
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
|
||||
def test_tags_with_underscore(self):
|
||||
@@ -212,13 +216,8 @@ class TestDate(TestCase):
|
||||
document.save()
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
tmp = document.source_filename
|
||||
self.assertEqual(document.generate_source_filename(),
|
||||
"demo-0000001.pdf")
|
||||
document.create_source_directory()
|
||||
Path(document.source_path).touch()
|
||||
|
||||
document.delete()
|
||||
self.assertEqual(generate_filename(document),
|
||||
"demo-{:07d}.pdf".format(document.pk))
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
|
||||
def test_tags_with_dash(self):
|
||||
@@ -233,13 +232,8 @@ class TestDate(TestCase):
|
||||
document.save()
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
tmp = document.source_filename
|
||||
self.assertEqual(document.generate_source_filename(),
|
||||
"demo-0000001.pdf")
|
||||
document.create_source_directory()
|
||||
Path(document.source_path).touch()
|
||||
|
||||
document.delete()
|
||||
self.assertEqual(generate_filename(document),
|
||||
"demo-{:07d}.pdf".format(document.pk))
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
|
||||
def test_tags_malformed(self):
|
||||
@@ -254,13 +248,8 @@ class TestDate(TestCase):
|
||||
document.save()
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
tmp = document.source_filename
|
||||
self.assertEqual(document.generate_source_filename(),
|
||||
"none-0000001.pdf")
|
||||
document.create_source_directory()
|
||||
Path(document.source_path).touch()
|
||||
|
||||
document.delete()
|
||||
self.assertEqual(generate_filename(document),
|
||||
"none-{:07d}.pdf".format(document.pk))
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}")
|
||||
def test_tags_all(self):
|
||||
@@ -274,64 +263,25 @@ class TestDate(TestCase):
|
||||
document.save()
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
tmp = document.source_filename
|
||||
self.assertEqual(document.generate_source_filename(),
|
||||
"demo-0000001.pdf")
|
||||
document.create_source_directory()
|
||||
Path(document.source_path).touch()
|
||||
self.assertEqual(generate_filename(document),
|
||||
"demo-{:07d}.pdf".format(document.pk))
|
||||
|
||||
document.delete()
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}")
|
||||
def test_tags_out_of_bounds_0(self):
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[1]}")
|
||||
def test_tags_out_of_bounds(self):
|
||||
document = Document()
|
||||
document.file_type = "pdf"
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
document.save()
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
tmp = document.source_filename
|
||||
self.assertEqual(document.generate_source_filename(),
|
||||
"none-0000001.pdf")
|
||||
document.create_source_directory()
|
||||
Path(document.source_path).touch()
|
||||
|
||||
document.delete()
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[10000000]}")
|
||||
def test_tags_out_of_bounds_10000000(self):
|
||||
document = Document()
|
||||
document.file_type = "pdf"
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
# Add tag to document
|
||||
document.tags.create(name="demo")
|
||||
document.save()
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
tmp = document.source_filename
|
||||
self.assertEqual(document.generate_source_filename(),
|
||||
"none-0000001.pdf")
|
||||
document.create_source_directory()
|
||||
Path(document.source_path).touch()
|
||||
self.assertEqual(generate_filename(document),
|
||||
"none-{:07d}.pdf".format(document.pk))
|
||||
|
||||
document.delete()
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[99]}")
|
||||
def test_tags_out_of_bounds_99(self):
|
||||
document = Document()
|
||||
document.file_type = "pdf"
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
document.save()
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
tmp = document.source_filename
|
||||
self.assertEqual(document.generate_source_filename(),
|
||||
"none-0000001.pdf")
|
||||
document.create_source_directory()
|
||||
Path(document.source_path).touch()
|
||||
|
||||
document.delete()
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
|
||||
"{correspondent}/{correspondent}")
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}")
|
||||
def test_nested_directory_cleanup(self):
|
||||
document = Document()
|
||||
document.file_type = "pdf"
|
||||
@@ -339,153 +289,34 @@ class TestDate(TestCase):
|
||||
document.save()
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
tmp = document.source_filename
|
||||
self.assertEqual(document.generate_source_filename(),
|
||||
"none/none/none-0000001.pdf")
|
||||
document.create_source_directory()
|
||||
document.filename = generate_filename(document)
|
||||
self.assertEqual(document.filename, "none/none/none-{:07d}.pdf".format(document.pk))
|
||||
create_source_path_directory(document.source_path)
|
||||
Path(document.source_path).touch()
|
||||
|
||||
# Check proper handling of files
|
||||
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
|
||||
"/documents/originals/none/none"), True)
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), True)
|
||||
|
||||
pk = document.pk
|
||||
document.delete()
|
||||
|
||||
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT +
|
||||
"/documents/originals/none/none/none-0000001.pdf"),
|
||||
False)
|
||||
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
|
||||
"/documents/originals/none/none"), False)
|
||||
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
|
||||
"/documents/originals/none"), False)
|
||||
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
|
||||
"/documents/originals"), True)
|
||||
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none-{:07d}.pdf".format(pk)), False)
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), False)
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR), True)
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT=None)
|
||||
def test_format_none(self):
|
||||
document = Document()
|
||||
document.pk = 1
|
||||
document.file_type = "pdf"
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
document.save()
|
||||
|
||||
self.assertEqual(document.generate_source_filename(), "0000001.pdf")
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
|
||||
"{correspondent}")
|
||||
def test_document_renamed(self):
|
||||
document = Document()
|
||||
document.file_type = "pdf"
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
document.save()
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
tmp = document.source_filename
|
||||
self.assertEqual(document.generate_source_filename(),
|
||||
"none/none-0000001.pdf")
|
||||
document.create_source_directory()
|
||||
Path(document.source_path).touch()
|
||||
|
||||
# Test source_path
|
||||
self.assertEqual(document.source_path, settings.MEDIA_ROOT +
|
||||
"/documents/originals/none/none-0000001.pdf")
|
||||
|
||||
# Rename the document "illegaly"
|
||||
os.makedirs(settings.MEDIA_ROOT + "/documents/originals/test")
|
||||
os.rename(settings.MEDIA_ROOT + "/documents/originals/" +
|
||||
"none/none-0000001.pdf",
|
||||
settings.MEDIA_ROOT + "/documents/originals/" +
|
||||
"test/test-0000001.pdf")
|
||||
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
|
||||
"originals/test/test-0000001.pdf"), True)
|
||||
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
|
||||
"originals/none/none-0000001.pdf"), False)
|
||||
|
||||
# Set new correspondent and expect document to be saved properly
|
||||
document.correspondent = Correspondent.objects.get_or_create(
|
||||
name="foo")[0]
|
||||
document.save()
|
||||
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
|
||||
"originals/foo/foo-0000001.pdf"), True)
|
||||
|
||||
# Check proper handling of files
|
||||
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
|
||||
"/documents/originals/foo"), True)
|
||||
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
|
||||
"/documents/originals/none"), False)
|
||||
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
|
||||
"/documents/originals/test"), False)
|
||||
self.assertEqual(document.generate_source_filename(),
|
||||
"foo/foo-0000001.pdf")
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
|
||||
"{correspondent}")
|
||||
def test_document_renamed_encrypted(self):
|
||||
document = Document()
|
||||
document.file_type = "pdf"
|
||||
document.storage_type = Document.STORAGE_TYPE_GPG
|
||||
document.save()
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
tmp = document.source_filename
|
||||
self.assertEqual(document.generate_source_filename(),
|
||||
"none/none-0000001.pdf.gpg")
|
||||
document.create_source_directory()
|
||||
Path(document.source_path).touch()
|
||||
|
||||
# Test source_path
|
||||
self.assertEqual(document.source_path, settings.MEDIA_ROOT +
|
||||
"/documents/originals/none/none-0000001.pdf.gpg")
|
||||
|
||||
# Rename the document "illegaly"
|
||||
os.makedirs(settings.MEDIA_ROOT + "/documents/originals/test")
|
||||
os.rename(settings.MEDIA_ROOT + "/documents/originals/" +
|
||||
"none/none-0000001.pdf.gpg",
|
||||
settings.MEDIA_ROOT + "/documents/originals/" +
|
||||
"test/test-0000001.pdf.gpg")
|
||||
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
|
||||
"originals/test/test-0000001.pdf.gpg"), True)
|
||||
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
|
||||
"originals/none/none-0000001.pdf"), False)
|
||||
|
||||
# Set new correspondent and expect document to be saved properly
|
||||
document.correspondent = Correspondent.objects.get_or_create(
|
||||
name="foo")[0]
|
||||
document.save()
|
||||
self.assertEqual(os.path.isfile(settings.MEDIA_ROOT + "/documents/" +
|
||||
"originals/foo/foo-0000001.pdf.gpg"), True)
|
||||
|
||||
# Check proper handling of files
|
||||
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
|
||||
"/documents/originals/foo"), True)
|
||||
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
|
||||
"/documents/originals/none"), False)
|
||||
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
|
||||
"/documents/originals/test"), False)
|
||||
self.assertEqual(document.generate_source_filename(),
|
||||
"foo/foo-0000001.pdf.gpg")
|
||||
|
||||
def test_delete_all_empty_subdirectories(self):
|
||||
# Create our working directory
|
||||
tmp = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
|
||||
os.makedirs(tmp)
|
||||
self.add_to_deletion_list(tmp)
|
||||
|
||||
os.makedirs(os.path.join(tmp, "empty"))
|
||||
os.makedirs(os.path.join(tmp, "empty", "subdirectory"))
|
||||
|
||||
os.makedirs(os.path.join(tmp, "notempty"))
|
||||
Path(os.path.join(tmp, "notempty", "file")).touch()
|
||||
|
||||
Document.delete_all_empty_subdirectories(tmp)
|
||||
|
||||
self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True)
|
||||
self.assertEqual(os.path.isdir(os.path.join(tmp, "empty")), False)
|
||||
self.assertEqual(os.path.isfile(
|
||||
os.path.join(tmp, "notempty", "file")), True)
|
||||
self.assertEqual(generate_filename(document), "0000001.pdf")
|
||||
|
||||
def test_try_delete_empty_directories(self):
|
||||
# Create our working directory
|
||||
tmp = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
|
||||
tmp = os.path.join(settings.ORIGINALS_DIR, "test_delete_empty")
|
||||
os.makedirs(tmp)
|
||||
self.add_to_deletion_list(tmp)
|
||||
|
||||
@@ -493,67 +324,27 @@ class TestDate(TestCase):
|
||||
Path(os.path.join(tmp, "notempty", "file")).touch()
|
||||
os.makedirs(os.path.join(tmp, "notempty", "empty"))
|
||||
|
||||
Document.try_delete_empty_directories(
|
||||
os.path.join(tmp, "notempty", "empty"))
|
||||
delete_empty_directories(os.path.join(tmp, "notempty", "empty"))
|
||||
self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True)
|
||||
self.assertEqual(os.path.isfile(
|
||||
os.path.join(tmp, "notempty", "file")), True)
|
||||
self.assertEqual(os.path.isdir(
|
||||
os.path.join(tmp, "notempty", "empty")), False)
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
|
||||
"{correspondent}")
|
||||
def test_document_accidentally_deleted(self):
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{created/[title]")
|
||||
def test_invalid_format(self):
|
||||
document = Document()
|
||||
document.pk = 1
|
||||
document.file_type = "pdf"
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
document.save()
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
tmp = document.source_filename
|
||||
self.assertEqual(document.generate_source_filename(),
|
||||
"none/none-0000001.pdf")
|
||||
document.create_source_directory()
|
||||
Path(document.source_path).touch()
|
||||
self.assertEqual(generate_filename(document), "0000001.pdf")
|
||||
|
||||
# Test source_path
|
||||
self.assertEqual(document.source_path, settings.MEDIA_ROOT +
|
||||
"/documents/originals/none/none-0000001.pdf")
|
||||
|
||||
# Delete the document "illegaly"
|
||||
os.remove(settings.MEDIA_ROOT + "/documents/originals/" +
|
||||
"none/none-0000001.pdf")
|
||||
|
||||
# Set new correspondent and expect document to be saved properly
|
||||
document.correspondent = Correspondent.objects.get_or_create(
|
||||
name="foo")[0]
|
||||
document.save()
|
||||
|
||||
# Check proper handling of files
|
||||
self.assertEqual(os.path.isdir(settings.MEDIA_ROOT +
|
||||
"/documents/originals/none"), True)
|
||||
self.assertEqual(document.source_filename,
|
||||
"none/none-0000001.pdf")
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/" +
|
||||
"{correspondent}")
|
||||
def test_set_filename(self):
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{created__year}")
|
||||
def test_invalid_format_key(self):
|
||||
document = Document()
|
||||
document.pk = 1
|
||||
document.file_type = "pdf"
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
document.save()
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
tmp = document.source_filename
|
||||
self.assertEqual(document.generate_source_filename(),
|
||||
"none/none-0000001.pdf")
|
||||
document.create_source_directory()
|
||||
Path(document.source_path).touch()
|
||||
|
||||
# Set existing filename
|
||||
document.set_filename(tmp)
|
||||
self.assertEqual(document.source_filename, "none/none-0000001.pdf")
|
||||
|
||||
# Set non-existing filename
|
||||
document.set_filename("doesnotexist")
|
||||
self.assertEqual(document.source_filename, "none/none-0000001.pdf")
|
||||
self.assertEqual(generate_filename(document), "0000001.pdf")
|
||||
|
@@ -1,9 +1,8 @@
|
||||
from django.core.management.base import CommandError
|
||||
from django.test import TestCase
|
||||
|
||||
from ..management.commands.document_importer import Command
|
||||
|
||||
from documents.settings import EXPORTER_FILE_NAME
|
||||
from ..management.commands.document_importer import Command
|
||||
|
||||
|
||||
class TestImporter(TestCase):
|
||||
|
@@ -1,6 +1,5 @@
|
||||
import logging
|
||||
import uuid
|
||||
|
||||
from unittest import mock
|
||||
|
||||
from django.test import TestCase
|
||||
|
@@ -1,91 +0,0 @@
|
||||
import base64
|
||||
import os
|
||||
import magic
|
||||
|
||||
from hashlib import md5
|
||||
from unittest import mock
|
||||
|
||||
from django.conf import settings
|
||||
from django.test import TestCase
|
||||
|
||||
from ..mail import Message, Attachment
|
||||
|
||||
|
||||
class TestMessage(TestCase):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
|
||||
TestCase.__init__(self, *args, **kwargs)
|
||||
self.sample = os.path.join(
|
||||
settings.BASE_DIR,
|
||||
"documents",
|
||||
"tests",
|
||||
"samples",
|
||||
"mail.txt"
|
||||
)
|
||||
|
||||
def test_init(self):
|
||||
|
||||
with open(self.sample, "rb") as f:
|
||||
|
||||
with mock.patch("logging.StreamHandler.emit") as __:
|
||||
message = Message(f.read())
|
||||
|
||||
self.assertTrue(message)
|
||||
self.assertEqual(message.subject, "Test 0")
|
||||
|
||||
data = message.attachment.read()
|
||||
|
||||
self.assertEqual(
|
||||
md5(data).hexdigest(), "7c89655f9e9eb7dd8cde8568e8115d59")
|
||||
|
||||
self.assertEqual(
|
||||
message.attachment.content_type, "application/pdf")
|
||||
with magic.Magic(flags=magic.MAGIC_MIME_TYPE) as m:
|
||||
self.assertEqual(m.id_buffer(data), "application/pdf")
|
||||
|
||||
|
||||
class TestInlineMessage(TestCase):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
|
||||
TestCase.__init__(self, *args, **kwargs)
|
||||
self.sample = os.path.join(
|
||||
settings.BASE_DIR,
|
||||
"documents",
|
||||
"tests",
|
||||
"samples",
|
||||
"inline_mail.txt"
|
||||
)
|
||||
|
||||
def test_init(self):
|
||||
|
||||
with open(self.sample, "rb") as f:
|
||||
|
||||
with mock.patch("logging.StreamHandler.emit") as __:
|
||||
message = Message(f.read())
|
||||
|
||||
self.assertTrue(message)
|
||||
self.assertEqual(message.subject, "Paperless Inline Image")
|
||||
|
||||
data = message.attachment.read()
|
||||
|
||||
self.assertEqual(
|
||||
md5(data).hexdigest(), "30c00a7b42913e65f7fdb0be40b9eef3")
|
||||
|
||||
self.assertEqual(
|
||||
message.attachment.content_type, "image/png")
|
||||
with magic.Magic(flags=magic.MAGIC_MIME_TYPE) as m:
|
||||
self.assertEqual(m.id_buffer(data), "image/png")
|
||||
|
||||
|
||||
class TestAttachment(TestCase):
|
||||
|
||||
def test_init(self):
|
||||
data = base64.encodebytes(b"0")
|
||||
self.assertEqual(Attachment(data, "application/pdf").suffix, "pdf")
|
||||
self.assertEqual(Attachment(data, "image/png").suffix, "png")
|
||||
self.assertEqual(Attachment(data, "image/jpeg").suffix, "jpeg")
|
||||
self.assertEqual(Attachment(data, "image/gif").suffix, "gif")
|
||||
self.assertEqual(Attachment(data, "image/tiff").suffix, "tiff")
|
||||
self.assertEqual(Attachment(data, "image/png").read(), data)
|
@@ -1,7 +1,7 @@
|
||||
from django.test import TestCase
|
||||
|
||||
from ..models import Document, Correspondent
|
||||
from .factories import DocumentFactory, CorrespondentFactory
|
||||
from ..models import Document, Correspondent
|
||||
|
||||
|
||||
class CorrespondentTestCase(TestCase):
|
||||
|
@@ -14,7 +14,7 @@ class TestParserDiscovery(TestCase):
|
||||
pass
|
||||
|
||||
m.return_value = (
|
||||
(None, lambda _: {"weight": 0, "parser": DummyParser}),
|
||||
(None, {"weight": 0, "parser": DummyParser, "test": lambda _: True}),
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
@@ -32,8 +32,8 @@ class TestParserDiscovery(TestCase):
|
||||
pass
|
||||
|
||||
m.return_value = (
|
||||
(None, lambda _: {"weight": 0, "parser": DummyParser1}),
|
||||
(None, lambda _: {"weight": 1, "parser": DummyParser2}),
|
||||
(None, {"weight": 0, "parser": DummyParser1, "test": lambda _: True}),
|
||||
(None, {"weight": 1, "parser": DummyParser2, "test": lambda _: True}),
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
@@ -43,7 +43,7 @@ class TestParserDiscovery(TestCase):
|
||||
|
||||
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
||||
def test__get_parser_class_0_parsers(self, m, *args):
|
||||
m.return_value = ((None, lambda _: None),)
|
||||
m.return_value = []
|
||||
with TemporaryDirectory() as tmpdir:
|
||||
self.assertIsNone(
|
||||
get_parser_class("doc.pdf")
|
||||
|
@@ -1,14 +1,9 @@
|
||||
from django.db.models import Count, Max
|
||||
from django.http import HttpResponse, HttpResponseBadRequest
|
||||
from django.http import HttpResponse, HttpResponseBadRequest, Http404
|
||||
from django.views.decorators.cache import cache_control
|
||||
from django.views.generic import TemplateView
|
||||
from django_filters.rest_framework import DjangoFilterBackend
|
||||
from rest_framework.decorators import action
|
||||
from rest_framework.response import Response
|
||||
from rest_framework.views import APIView
|
||||
|
||||
from paperless.db import GnuPG
|
||||
from paperless.views import StandardPagination
|
||||
from rest_framework.filters import OrderingFilter, SearchFilter
|
||||
from rest_framework.mixins import (
|
||||
DestroyModelMixin,
|
||||
@@ -17,12 +12,17 @@ from rest_framework.mixins import (
|
||||
UpdateModelMixin
|
||||
)
|
||||
from rest_framework.permissions import IsAuthenticated
|
||||
from rest_framework.response import Response
|
||||
from rest_framework.views import APIView
|
||||
from rest_framework.viewsets import (
|
||||
GenericViewSet,
|
||||
ModelViewSet,
|
||||
ReadOnlyModelViewSet
|
||||
)
|
||||
|
||||
import documents.index as index
|
||||
from paperless.db import GnuPG
|
||||
from paperless.views import StandardPagination
|
||||
from .filters import (
|
||||
CorrespondentFilterSet,
|
||||
DocumentFilterSet,
|
||||
@@ -30,8 +30,6 @@ from .filters import (
|
||||
DocumentTypeFilterSet,
|
||||
LogFilterSet
|
||||
)
|
||||
|
||||
import documents.index as index
|
||||
from .forms import UploadForm
|
||||
from .models import Correspondent, Document, Log, Tag, DocumentType
|
||||
from .serialisers import (
|
||||
@@ -54,7 +52,7 @@ class CorrespondentViewSet(ModelViewSet):
|
||||
pagination_class = StandardPagination
|
||||
permission_classes = (IsAuthenticated,)
|
||||
filter_backends = (DjangoFilterBackend, OrderingFilter)
|
||||
filter_class = CorrespondentFilterSet
|
||||
filterset_class = CorrespondentFilterSet
|
||||
ordering_fields = ("name", "matching_algorithm", "match", "document_count", "last_correspondence")
|
||||
|
||||
|
||||
@@ -65,7 +63,7 @@ class TagViewSet(ModelViewSet):
|
||||
pagination_class = StandardPagination
|
||||
permission_classes = (IsAuthenticated,)
|
||||
filter_backends = (DjangoFilterBackend, OrderingFilter)
|
||||
filter_class = TagFilterSet
|
||||
filterset_class = TagFilterSet
|
||||
ordering_fields = ("name", "matching_algorithm", "match", "document_count")
|
||||
|
||||
|
||||
@@ -76,7 +74,7 @@ class DocumentTypeViewSet(ModelViewSet):
|
||||
pagination_class = StandardPagination
|
||||
permission_classes = (IsAuthenticated,)
|
||||
filter_backends = (DjangoFilterBackend, OrderingFilter)
|
||||
filter_class = DocumentTypeFilterSet
|
||||
filterset_class = DocumentTypeFilterSet
|
||||
ordering_fields = ("name", "matching_algorithm", "match", "document_count")
|
||||
|
||||
|
||||
@@ -91,7 +89,7 @@ class DocumentViewSet(RetrieveModelMixin,
|
||||
pagination_class = StandardPagination
|
||||
permission_classes = (IsAuthenticated,)
|
||||
filter_backends = (DjangoFilterBackend, SearchFilter, OrderingFilter)
|
||||
filter_class = DocumentFilterSet
|
||||
filterset_class = DocumentFilterSet
|
||||
search_fields = ("title", "correspondent__name", "content")
|
||||
ordering_fields = (
|
||||
"id", "title", "correspondent__name", "document_type__name", "created", "modified", "added", "archive_serial_number")
|
||||
@@ -106,7 +104,7 @@ class DocumentViewSet(RetrieveModelMixin,
|
||||
return super(DocumentViewSet, self).destroy(request, *args, **kwargs)
|
||||
|
||||
def file_response(self, pk, disposition):
|
||||
#TODO: this should not be necessary here.
|
||||
# TODO: this should not be necessary here.
|
||||
content_types = {
|
||||
Document.TYPE_PDF: "application/pdf",
|
||||
Document.TYPE_PNG: "image/png",
|
||||
@@ -114,7 +112,7 @@ class DocumentViewSet(RetrieveModelMixin,
|
||||
Document.TYPE_GIF: "image/gif",
|
||||
Document.TYPE_TIF: "image/tiff",
|
||||
Document.TYPE_CSV: "text/csv",
|
||||
Document.TYPE_MD: "text/markdown",
|
||||
Document.TYPE_MD: "text/markdown",
|
||||
Document.TYPE_TXT: "text/plain"
|
||||
}
|
||||
|
||||
@@ -132,7 +130,7 @@ class DocumentViewSet(RetrieveModelMixin,
|
||||
|
||||
@action(methods=['post'], detail=False)
|
||||
def post_document(self, request, pk=None):
|
||||
#TODO: is this a good implementation?
|
||||
# TODO: is this a good implementation?
|
||||
form = UploadForm(data=request.POST, files=request.FILES)
|
||||
if form.is_valid():
|
||||
form.save()
|
||||
@@ -142,17 +140,26 @@ class DocumentViewSet(RetrieveModelMixin,
|
||||
|
||||
@action(methods=['get'], detail=True)
|
||||
def preview(self, request, pk=None):
|
||||
response = self.file_response(pk, "inline")
|
||||
return response
|
||||
try:
|
||||
response = self.file_response(pk, "inline")
|
||||
return response
|
||||
except FileNotFoundError:
|
||||
raise Http404("Document source file does not exist")
|
||||
|
||||
@action(methods=['get'], detail=True)
|
||||
@cache_control(public=False, max_age=315360000)
|
||||
def thumb(self, request, pk=None):
|
||||
return HttpResponse(Document.objects.get(id=pk).thumbnail_file, content_type='image/png')
|
||||
try:
|
||||
return HttpResponse(Document.objects.get(id=pk).thumbnail_file, content_type='image/png')
|
||||
except FileNotFoundError:
|
||||
raise Http404("Document thumbnail does not exist")
|
||||
|
||||
@action(methods=['get'], detail=True)
|
||||
def download(self, request, pk=None):
|
||||
return self.file_response(pk, "attachment")
|
||||
try:
|
||||
return self.file_response(pk, "attachment")
|
||||
except FileNotFoundError:
|
||||
raise Http404("Document source file does not exist")
|
||||
|
||||
|
||||
class LogViewSet(ReadOnlyModelViewSet):
|
||||
@@ -163,7 +170,7 @@ class LogViewSet(ReadOnlyModelViewSet):
|
||||
pagination_class = StandardPagination
|
||||
permission_classes = (IsAuthenticated,)
|
||||
filter_backends = (DjangoFilterBackend, OrderingFilter)
|
||||
filter_class = LogFilterSet
|
||||
filterset_class = LogFilterSet
|
||||
ordering_fields = ("created",)
|
||||
|
||||
|
||||
@@ -191,13 +198,12 @@ class SearchView(APIView):
|
||||
except (ValueError, TypeError):
|
||||
page = 1
|
||||
|
||||
result_page = index.query_page(self.ix, query, page)
|
||||
|
||||
return Response(
|
||||
{'count': len(result_page),
|
||||
'page': result_page.pagenum,
|
||||
'page_count': result_page.pagecount,
|
||||
'results': list(map(self.add_infos_to_hit, result_page))})
|
||||
with index.query_page(self.ix, query, page) as result_page:
|
||||
return Response(
|
||||
{'count': len(result_page),
|
||||
'page': result_page.pagenum,
|
||||
'page_count': result_page.pagecount,
|
||||
'results': list(map(self.add_infos_to_hit, result_page))})
|
||||
|
||||
else:
|
||||
return Response({
|
||||
@@ -217,17 +223,16 @@ class SearchAutoCompleteView(APIView):
|
||||
if 'term' in request.query_params:
|
||||
term = request.query_params['term']
|
||||
else:
|
||||
term = None
|
||||
return HttpResponseBadRequest("Term required")
|
||||
|
||||
if 'limit' in request.query_params:
|
||||
limit = int(request.query_params['limit'])
|
||||
if limit <= 0:
|
||||
return HttpResponseBadRequest("Invalid limit")
|
||||
else:
|
||||
limit = 10
|
||||
|
||||
if term is not None:
|
||||
return Response(index.autocomplete(self.ix, term, limit))
|
||||
else:
|
||||
return Response([])
|
||||
return Response(index.autocomplete(self.ix, term, limit))
|
||||
|
||||
|
||||
class StatisticsView(APIView):
|
||||
|
Reference in New Issue
Block a user