Merge branch 'dev' into feature-websockets-status

This commit is contained in:
jonaswinkler
2021-01-23 22:22:17 +01:00
163 changed files with 5843 additions and 2520 deletions

View File

@@ -91,6 +91,11 @@ class Consumer(LoggingMixin):
if not settings.PRE_CONSUME_SCRIPT:
return
if not os.path.isfile(settings.PRE_CONSUME_SCRIPT):
raise ConsumerError(
f"Configured pre-consume script "
f"{settings.PRE_CONSUME_SCRIPT} does not exist.")
try:
Popen((settings.PRE_CONSUME_SCRIPT, self.path)).wait()
except Exception as e:
@@ -102,6 +107,11 @@ class Consumer(LoggingMixin):
if not settings.POST_CONSUME_SCRIPT:
return
if not os.path.isfile(settings.POST_CONSUME_SCRIPT):
raise ConsumerError(
f"Configured post-consume script "
f"{settings.POST_CONSUME_SCRIPT} does not exist.")
try:
Popen((
settings.POST_CONSUME_SCRIPT,

View File

@@ -91,7 +91,7 @@ def generate_unique_filename(doc, root):
return new_filename
def generate_filename(doc, counter=0):
def generate_filename(doc, counter=0, append_gpg=True):
path = ""
try:
@@ -151,7 +151,7 @@ def generate_filename(doc, counter=0):
filename = f"{doc.pk:07}{counter_str}{doc.file_type}"
# Append .gpg for encrypted files
if doc.storage_type == doc.STORAGE_TYPE_GPG:
if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG:
filename += ".gpg"
return filename

View File

@@ -11,6 +11,7 @@ from django import db
from django.conf import settings
from django.core.management.base import BaseCommand
from django.db import transaction
from filelock import FileLock
from whoosh.writing import AsyncWriter
from documents.models import Document
@@ -47,8 +48,10 @@ def handle_document(document_id):
archive_checksum=checksum,
content=parser.get_text()
)
create_source_path_directory(document.archive_path)
shutil.move(parser.get_archive_path(), document.archive_path)
with FileLock(settings.MEDIA_LOCK):
create_source_path_directory(document.archive_path)
shutil.move(parser.get_archive_path(),
document.archive_path)
with AsyncWriter(index.open_index()) as writer:
index.update_document(writer, document)

View File

@@ -5,7 +5,6 @@ from time import sleep
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
from django.utils.text import slugify
from django_q.tasks import async_task
from watchdog.events import FileSystemEventHandler
from watchdog.observers.polling import PollingObserver
@@ -46,7 +45,7 @@ def _consume(filepath):
return
if not is_file_ext_supported(os.path.splitext(filepath)[1]):
logger.debug(
logger.warning(
f"Not consuming file {filepath}: Unknown file extension.")
return

View File

@@ -1,15 +1,21 @@
import hashlib
import json
import os
import shutil
import time
import tqdm
from django.conf import settings
from django.core import serializers
from django.core.management.base import BaseCommand, CommandError
from django.db import transaction
from filelock import FileLock
from documents.models import Document, Correspondent, Tag, DocumentType
from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME, \
EXPORTER_ARCHIVE_NAME
from paperless.db import GnuPG
from ...file_handling import generate_filename, delete_empty_directories
from ...mixins import Renderable
@@ -24,13 +30,47 @@ class Command(Renderable, BaseCommand):
def add_arguments(self, parser):
parser.add_argument("target")
parser.add_argument(
"-c", "--compare-checksums",
default=False,
action="store_true",
help="Compare file checksums when determining whether to export "
"a file or not. If not specified, file size and time "
"modified is used instead."
)
parser.add_argument(
"-f", "--use-filename-format",
default=False,
action="store_true",
help="Use PAPERLESS_FILENAME_FORMAT for storing files in the "
"export directory, if configured."
)
parser.add_argument(
"-d", "--delete",
default=False,
action="store_true",
help="After exporting, delete files in the export directory that "
"do not belong to the current export, such as files from "
"deleted documents."
)
def __init__(self, *args, **kwargs):
BaseCommand.__init__(self, *args, **kwargs)
self.target = None
self.files_in_export_dir = []
self.exported_files = []
self.compare_checksums = False
self.use_filename_format = False
self.delete = False
def handle(self, *args, **options):
self.target = options["target"]
self.compare_checksums = options['compare_checksums']
self.use_filename_format = options['use_filename_format']
self.delete = options['delete']
if not os.path.exists(self.target):
raise CommandError("That path doesn't exist")
@@ -38,83 +78,148 @@ class Command(Renderable, BaseCommand):
if not os.access(self.target, os.W_OK):
raise CommandError("That path doesn't appear to be writable")
if os.listdir(self.target):
raise CommandError("That directory is not empty.")
self.dump()
with FileLock(settings.MEDIA_LOCK):
self.dump()
def dump(self):
# 1. Take a snapshot of what files exist in the current export folder
for root, dirs, files in os.walk(self.target):
self.files_in_export_dir.extend(
map(lambda f: os.path.abspath(os.path.join(root, f)), files)
)
documents = Document.objects.all()
document_map = {d.pk: d for d in documents}
manifest = json.loads(serializers.serialize("json", documents))
# 2. Create manifest, containing all correspondents, types, tags and
# documents
with transaction.atomic():
manifest = json.loads(
serializers.serialize("json", Correspondent.objects.all()))
for index, document_dict in enumerate(manifest):
manifest += json.loads(serializers.serialize(
"json", Tag.objects.all()))
# Force output to unencrypted as that will be the current state.
# The importer will make the decision to encrypt or not.
manifest[index]["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED # NOQA: E501
manifest += json.loads(serializers.serialize(
"json", DocumentType.objects.all()))
documents = Document.objects.order_by("id")
document_map = {d.pk: d for d in documents}
document_manifest = json.loads(
serializers.serialize("json", documents))
manifest += document_manifest
# 3. Export files from each document
for index, document_dict in tqdm.tqdm(enumerate(document_manifest),
total=len(document_manifest)):
# 3.1. store files unencrypted
document_dict["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED # NOQA: E501
document = document_map[document_dict["pk"]]
print(f"Exporting: {document}")
# 3.2. generate a unique filename
filename_counter = 0
while True:
original_name = document.get_public_filename(
counter=filename_counter)
original_target = os.path.join(self.target, original_name)
if self.use_filename_format:
base_name = generate_filename(
document, counter=filename_counter,
append_gpg=False)
else:
base_name = document.get_public_filename(
counter=filename_counter)
if not os.path.exists(original_target):
if base_name not in self.exported_files:
self.exported_files.append(base_name)
break
else:
filename_counter += 1
thumbnail_name = original_name + "-thumbnail.png"
thumbnail_target = os.path.join(self.target, thumbnail_name)
# 3.3. write filenames into manifest
original_name = base_name
original_target = os.path.join(self.target, original_name)
document_dict[EXPORTER_FILE_NAME] = original_name
thumbnail_name = base_name + "-thumbnail.png"
thumbnail_target = os.path.join(self.target, thumbnail_name)
document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name
if os.path.exists(document.archive_path):
archive_name = document.get_public_filename(
archive=True, counter=filename_counter, suffix="_archive")
archive_name = base_name + "-archive.pdf"
archive_target = os.path.join(self.target, archive_name)
document_dict[EXPORTER_ARCHIVE_NAME] = archive_name
else:
archive_target = None
# 3.4. write files to target folder
t = int(time.mktime(document.created.timetuple()))
if document.storage_type == Document.STORAGE_TYPE_GPG:
os.makedirs(os.path.dirname(original_target), exist_ok=True)
with open(original_target, "wb") as f:
f.write(GnuPG.decrypted(document.source_file))
os.utime(original_target, times=(t, t))
os.makedirs(os.path.dirname(thumbnail_target), exist_ok=True)
with open(thumbnail_target, "wb") as f:
f.write(GnuPG.decrypted(document.thumbnail_file))
os.utime(thumbnail_target, times=(t, t))
if archive_target:
os.makedirs(os.path.dirname(archive_target), exist_ok=True)
with open(archive_target, "wb") as f:
f.write(GnuPG.decrypted(document.archive_path))
os.utime(archive_target, times=(t, t))
else:
self.check_and_copy(document.source_path,
document.checksum,
original_target)
shutil.copy(document.source_path, original_target)
shutil.copy(document.thumbnail_path, thumbnail_target)
self.check_and_copy(document.thumbnail_path,
None,
thumbnail_target)
if archive_target:
shutil.copy(document.archive_path, archive_target)
self.check_and_copy(document.archive_path,
document.archive_checksum,
archive_target)
manifest += json.loads(
serializers.serialize("json", Correspondent.objects.all()))
# 4. write manifest to target forlder
manifest_path = os.path.abspath(
os.path.join(self.target, "manifest.json"))
manifest += json.loads(serializers.serialize(
"json", Tag.objects.all()))
manifest += json.loads(serializers.serialize(
"json", DocumentType.objects.all()))
with open(os.path.join(self.target, "manifest.json"), "w") as f:
with open(manifest_path, "w") as f:
json.dump(manifest, f, indent=2)
if self.delete:
# 5. Remove files which we did not explicitly export in this run
if manifest_path in self.files_in_export_dir:
self.files_in_export_dir.remove(manifest_path)
for f in self.files_in_export_dir:
os.remove(f)
delete_empty_directories(os.path.abspath(os.path.dirname(f)),
os.path.abspath(self.target))
def check_and_copy(self, source, source_checksum, target):
if os.path.abspath(target) in self.files_in_export_dir:
self.files_in_export_dir.remove(os.path.abspath(target))
perform_copy = False
if os.path.exists(target):
source_stat = os.stat(source)
target_stat = os.stat(target)
if self.compare_checksums and source_checksum:
with open(target, "rb") as f:
target_checksum = hashlib.md5(f.read()).hexdigest()
perform_copy = target_checksum != source_checksum
elif source_stat.st_mtime != target_stat.st_mtime:
perform_copy = True
elif source_stat.st_size != target_stat.st_size:
perform_copy = True
else:
# Copy if it does not exist
perform_copy = True
if perform_copy:
os.makedirs(os.path.dirname(target), exist_ok=True)
shutil.copy2(source, target)

View File

@@ -148,10 +148,10 @@ class Command(Renderable, BaseCommand):
create_source_path_directory(document.source_path)
shutil.copy(document_path, document.source_path)
shutil.copy(thumbnail_path, document.thumbnail_path)
shutil.copy2(document_path, document.source_path)
shutil.copy2(thumbnail_path, document.thumbnail_path)
if archive_path:
create_source_path_directory(document.archive_path)
shutil.copy(archive_path, document.archive_path)
shutil.copy2(archive_path, document.archive_path)
document.save()

View File

@@ -13,8 +13,14 @@ from ...parsers import get_parser_class_for_mime_type
def _process_document(doc_in):
document = Document.objects.get(id=doc_in)
parser = get_parser_class_for_mime_type(document.mime_type)(
logging_group=None)
parser_class = get_parser_class_for_mime_type(document.mime_type)
if parser_class:
parser = parser_class(logging_group=None)
else:
print(f"{document} No parser for mime type {document.mime_type}")
return
try:
thumb = parser.get_optimised_thumbnail(
document.source_path, document.mime_type)

View File

@@ -1,3 +1,4 @@
import logging
import re
from fuzzywuzzy import fuzz
@@ -5,49 +6,59 @@ from fuzzywuzzy import fuzz
from documents.models import MatchingModel, Correspondent, DocumentType, Tag
def match_correspondents(document_content, classifier):
logger = logging.getLogger(__name__)
def log_reason(matching_model, document, reason):
class_name = type(matching_model).__name__
logger.debug(
f"Assigning {class_name} {matching_model.name} to document "
f"{document} because {reason}")
def match_correspondents(document, classifier):
if classifier:
pred_id = classifier.predict_correspondent(document_content)
pred_id = classifier.predict_correspondent(document.content)
else:
pred_id = None
correspondents = Correspondent.objects.all()
return list(filter(
lambda o: matches(o, document_content) or o.pk == pred_id,
lambda o: matches(o, document) or o.pk == pred_id,
correspondents))
def match_document_types(document_content, classifier):
def match_document_types(document, classifier):
if classifier:
pred_id = classifier.predict_document_type(document_content)
pred_id = classifier.predict_document_type(document.content)
else:
pred_id = None
document_types = DocumentType.objects.all()
return list(filter(
lambda o: matches(o, document_content) or o.pk == pred_id,
lambda o: matches(o, document) or o.pk == pred_id,
document_types))
def match_tags(document_content, classifier):
def match_tags(document, classifier):
if classifier:
predicted_tag_ids = classifier.predict_tags(document_content)
predicted_tag_ids = classifier.predict_tags(document.content)
else:
predicted_tag_ids = []
tags = Tag.objects.all()
return list(filter(
lambda o: matches(o, document_content) or o.pk in predicted_tag_ids,
lambda o: matches(o, document) or o.pk in predicted_tag_ids,
tags))
def matches(matching_model, document_content):
def matches(matching_model, document):
search_kwargs = {}
document_content = document_content.lower()
document_content = document.content.lower()
# Check that match is not empty
if matching_model.match.strip() == "":
@@ -62,26 +73,54 @@ def matches(matching_model, document_content):
rf"\b{word}\b", document_content, **search_kwargs)
if not search_result:
return False
log_reason(
matching_model, document,
f"it contains all of these words: {matching_model.match}"
)
return True
elif matching_model.matching_algorithm == MatchingModel.MATCH_ANY:
for word in _split_match(matching_model):
if re.search(rf"\b{word}\b", document_content, **search_kwargs):
log_reason(
matching_model, document,
f"it contains this word: {word}"
)
return True
return False
elif matching_model.matching_algorithm == MatchingModel.MATCH_LITERAL:
return bool(re.search(
result = bool(re.search(
rf"\b{matching_model.match}\b",
document_content,
**search_kwargs
))
if result:
log_reason(
matching_model, document,
f"it contains this string: \"{matching_model.match}\""
)
return result
elif matching_model.matching_algorithm == MatchingModel.MATCH_REGEX:
return bool(re.search(
re.compile(matching_model.match, **search_kwargs),
document_content
))
try:
match = re.search(
re.compile(matching_model.match, **search_kwargs),
document_content
)
except re.error:
logger.error(
f"Error while processing regular expression "
f"{matching_model.match}"
)
return False
if match:
log_reason(
matching_model, document,
f"the string {match.group()} matches the regular expression "
f"{matching_model.match}"
)
return bool(match)
elif matching_model.matching_algorithm == MatchingModel.MATCH_FUZZY:
match = re.sub(r'[^\w\s]', '', matching_model.match)
@@ -89,8 +128,16 @@ def matches(matching_model, document_content):
if matching_model.is_insensitive:
match = match.lower()
text = text.lower()
return fuzz.partial_ratio(match, text) >= 90
if fuzz.partial_ratio(match, text) >= 90:
# TODO: make this better
log_reason(
matching_model, document,
f"parts of the document content somehow match the string "
f"{matching_model.match}"
)
return True
else:
return False
elif matching_model.matching_algorithm == MatchingModel.MATCH_AUTO:
# this is done elsewhere.

View File

@@ -12,6 +12,7 @@ from django.conf import settings
from django.contrib.auth.models import User
from django.db import models
from django.utils import timezone
from django.utils.timezone import is_aware
from django.utils.translation import gettext_lazy as _
@@ -62,12 +63,6 @@ class MatchingModel(models.Model):
def __str__(self):
return self.name
def save(self, *args, **kwargs):
self.match = self.match.lower()
models.Model.save(self, *args, **kwargs)
class Correspondent(MatchingModel):
@@ -233,7 +228,10 @@ class Document(models.Model):
verbose_name_plural = _("documents")
def __str__(self):
created = datetime.date.isoformat(self.created)
if is_aware(self.created):
created = timezone.localdate(self.created).isoformat()
else:
created = datetime.date.isoformat(self.created)
if self.correspondent and self.title:
return f"{created} {self.correspondent} {self.title}"
else:

View File

@@ -210,6 +210,13 @@ def parse_date(filename, text):
}
)
def __filter(date):
if date and date.year > 1900 and \
date <= timezone.now() and \
date.date() not in settings.IGNORE_DATES:
return date
return None
date = None
# if filename date parsing is enabled, search there first:
@@ -223,7 +230,8 @@ def parse_date(filename, text):
# Skip all matches that do not parse to a proper date
continue
if date and date.year > 1900 and date <= timezone.now():
date = __filter(date)
if date is not None:
return date
# Iterate through all regex matches in text and try to parse the date
@@ -236,10 +244,9 @@ def parse_date(filename, text):
# Skip all matches that do not parse to a proper date
continue
if date and date.year > 1900 and date <= timezone.now():
date = __filter(date)
if date is not None:
break
else:
date = None
return date

View File

@@ -382,13 +382,6 @@ class PostDocumentSerializer(serializers.Serializer):
return document.name, document_data
def validate_title(self, title):
if title:
return title
else:
# do not return empty strings.
return None
def validate_correspondent(self, correspondent):
if correspondent:
return correspondent.id

View File

@@ -38,7 +38,7 @@ def set_correspondent(sender,
if document.correspondent and not replace:
return
potential_correspondents = matching.match_correspondents(document.content,
potential_correspondents = matching.match_correspondents(document,
classifier)
potential_count = len(potential_correspondents)
@@ -81,7 +81,7 @@ def set_document_type(sender,
if document.document_type and not replace:
return
potential_document_type = matching.match_document_types(document.content,
potential_document_type = matching.match_document_types(document,
classifier)
potential_count = len(potential_document_type)
@@ -130,7 +130,7 @@ def set_tags(sender,
current_tags = set(document.tags.all())
matched_tags = matching.match_tags(document.content, classifier)
matched_tags = matching.match_tags(document, classifier)
relevant_tags = set(matched_tags) - current_tags

View File

@@ -1,6 +1,7 @@
<!doctype html>
{% load static %}
{% load i18n %}
<html lang="en">
<head>
@@ -16,7 +17,7 @@
<link rel="stylesheet" href="{% static styles_css %}">
</head>
<body>
<app-root>Loading...</app-root>
<app-root>{% translate "Paperless-ng is loading..." %}</app-root>
<script src="{% static runtime_js %}" defer></script>
<script src="{% static polyfills_js %}" defer></script>
<script src="{% static main_js %}" defer></script>

View File

@@ -1,6 +1,7 @@
<!doctype html>
{% load static %}
{% load i18n %}
<html lang="en">
<head>
@@ -9,7 +10,7 @@
<meta name="description" content="">
<meta name="author" content="Mark Otto, Jacob Thornton, and Bootstrap contributors">
<meta name="generator" content="Jekyll v4.1.1">
<title>Paperless Sign In</title>
<title>{% translate "Paperless-ng signed out" %}</title>
<!-- Bootstrap core CSS -->
<link href="{% static 'bootstrap.min.css' %}" rel="stylesheet">
@@ -36,9 +37,9 @@
<body class="text-center">
<div class="form-signin">
<img class="mb-4" src="{% static 'frontend/assets/logo.svg' %}" alt="" width="300">
<p>You have been successfully logged out. Bye!</p>
<a href="/">Sign in again</a>
<img class="mb-4" src="{% static 'frontend/en-US/assets/logo.svg' %}" alt="" width="300">
<p>{% translate "You have been successfully logged out. Bye!" %}</p>
<a href="/">{% translate "Sign in again" %}</a>
</div>
</body>
</html>

View File

@@ -1,6 +1,7 @@
<!doctype html>
{% load static %}
{% load i18n %}
<html lang="en">
<head>
@@ -9,7 +10,7 @@
<meta name="description" content="">
<meta name="author" content="Mark Otto, Jacob Thornton, and Bootstrap contributors">
<meta name="generator" content="Jekyll v4.1.1">
<title>Paperless Sign In</title>
<title>{% translate "Paperless-ng sign in" %}</title>
<!-- Bootstrap core CSS -->
<link href="{% static 'bootstrap.min.css' %}" rel="stylesheet">
@@ -37,18 +38,20 @@
<body class="text-center">
<form class="form-signin" method="post">
{% csrf_token %}
<img class="mb-4" src="{% static 'frontend/assets/logo.svg' %}" alt="" width="300">
<p>Please sign in.</p>
<img class="mb-4" src="{% static 'frontend/en-US/assets/logo.svg' %}" alt="" width="300">
<p>{% translate "Please sign in." %}</p>
{% if form.errors %}
<div class="alert alert-danger" role="alert">
Your username and password didn't match. Please try again.
{% translate "Your username and password didn't match. Please try again." %}
</div>
{% endif %}
<label for="inputUsername" class="sr-only">Username</label>
<input type="text" name="username" id="inputUsername" class="form-control" placeholder="Username" required autofocus>
<label for="inputPassword" class="sr-only">Password</label>
<input type="password" name="password" id="inputPassword" class="form-control" placeholder="Password" required>
<button class="btn btn-lg btn-primary btn-block" type="submit">Sign in</button>
{% translate "Username" as i18n_username %}
{% translate "Password" as i18n_password %}
<label for="inputUsername" class="sr-only">{{ i18n_username }}</label>
<input type="text" name="username" id="inputUsername" class="form-control" placeholder="{{ i18n_username }}" required autofocus>
<label for="inputPassword" class="sr-only">{{ i18n_password }}</label>
<input type="password" name="password" id="inputPassword" class="form-control" placeholder="{{ i18n_password }}" required>
<button class="btn btn-lg btn-primary btn-block" type="submit">{% translate "Sign in" %}</button>
</form>
</body>
</html>

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.7 KiB

View File

@@ -5,12 +5,14 @@ from django.test import TestCase
from django.utils import timezone
from documents.admin import DocumentAdmin
from documents.models import Document, Tag
from documents.models import Document
from documents.tests.utils import DirectoriesMixin
class TestDocumentAdmin(TestCase):
class TestDocumentAdmin(DirectoriesMixin, TestCase):
def setUp(self) -> None:
super(TestDocumentAdmin, self).setUp()
self.doc_admin = DocumentAdmin(model=Document, admin_site=AdminSite())
@mock.patch("documents.admin.index.add_or_update_document")

View File

@@ -114,8 +114,6 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
results = response.data['results']
self.assertEqual(len(results[0]), 0)
def test_document_actions(self):
_, filename = tempfile.mkstemp(dir=self.dirs.originals_dir)
@@ -230,6 +228,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(len(results), 2)
self.assertCountEqual([results[0]['id'], results[1]['id']], [doc1.id, doc3.id])
response = self.client.get("/api/documents/?tags__id__in={},{}".format(tag_2.id, tag_3.id))
self.assertEqual(response.status_code, 200)
results = response.data['results']
self.assertEqual(len(results), 2)
self.assertCountEqual([results[0]['id'], results[1]['id']], [doc2.id, doc3.id])
response = self.client.get("/api/documents/?tags__id__all={},{}".format(tag_2.id, tag_3.id))
self.assertEqual(response.status_code, 200)
results = response.data['results']
@@ -455,6 +459,23 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertIsNone(kwargs['override_document_type_id'])
self.assertIsNone(kwargs['override_tag_ids'])
@mock.patch("documents.views.async_task")
def test_upload_empty_metadata(self, m):
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
response = self.client.post("/api/documents/post_document/", {"document": f, "title": "", "correspondent": "", "document_type": ""})
self.assertEqual(response.status_code, 200)
m.assert_called_once()
args, kwargs = m.call_args
self.assertEqual(kwargs['override_filename'], "simple.pdf")
self.assertIsNone(kwargs['override_title'])
self.assertIsNone(kwargs['override_correspondent_id'])
self.assertIsNone(kwargs['override_document_type_id'])
self.assertIsNone(kwargs['override_tag_ids'])
@mock.patch("documents.views.async_task")
def test_upload_invalid_form(self, m):
@@ -908,6 +929,14 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
doc2 = Document.objects.get(id=self.doc2.id)
self.assertEqual(doc2.correspondent, self.c1)
def test_api_no_correspondent(self):
response = self.client.post("/api/documents/bulk_edit/", json.dumps({
"documents": [self.doc2.id],
"method": "set_correspondent",
"parameters": {}
}), content_type='application/json')
self.assertEqual(response.status_code, 400)
def test_api_invalid_document_type(self):
self.assertEqual(self.doc2.document_type, self.dt1)
response = self.client.post("/api/documents/bulk_edit/", json.dumps({
@@ -920,6 +949,14 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
doc2 = Document.objects.get(id=self.doc2.id)
self.assertEqual(doc2.document_type, self.dt1)
def test_api_no_document_type(self):
response = self.client.post("/api/documents/bulk_edit/", json.dumps({
"documents": [self.doc2.id],
"method": "set_document_type",
"parameters": {}
}), content_type='application/json')
self.assertEqual(response.status_code, 400)
def test_api_add_invalid_tag(self):
self.assertEqual(list(self.doc2.tags.all()), [self.t1])
response = self.client.post("/api/documents/bulk_edit/", json.dumps({
@@ -931,6 +968,14 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
self.assertEqual(list(self.doc2.tags.all()), [self.t1])
def test_api_add_tag_no_tag(self):
response = self.client.post("/api/documents/bulk_edit/", json.dumps({
"documents": [self.doc2.id],
"method": "add_tag",
"parameters": {}
}), content_type='application/json')
self.assertEqual(response.status_code, 400)
def test_api_delete_invalid_tag(self):
self.assertEqual(list(self.doc2.tags.all()), [self.t1])
response = self.client.post("/api/documents/bulk_edit/", json.dumps({
@@ -942,6 +987,14 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
self.assertEqual(list(self.doc2.tags.all()), [self.t1])
def test_api_delete_tag_no_tag(self):
response = self.client.post("/api/documents/bulk_edit/", json.dumps({
"documents": [self.doc2.id],
"method": "remove_tag",
"parameters": {}
}), content_type='application/json')
self.assertEqual(response.status_code, 400)
def test_api_modify_invalid_tags(self):
self.assertEqual(list(self.doc2.tags.all()), [self.t1])
response = self.client.post("/api/documents/bulk_edit/", json.dumps({
@@ -951,6 +1004,21 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
}), content_type='application/json')
self.assertEqual(response.status_code, 400)
def test_api_modify_tags_no_tags(self):
response = self.client.post("/api/documents/bulk_edit/", json.dumps({
"documents": [self.doc2.id],
"method": "modify_tags",
"parameters": {"remove_tags": [1123123]}
}), content_type='application/json')
self.assertEqual(response.status_code, 400)
response = self.client.post("/api/documents/bulk_edit/", json.dumps({
"documents": [self.doc2.id],
"method": "modify_tags",
"parameters": {'add_tags': [self.t2.id, 1657]}
}), content_type='application/json')
self.assertEqual(response.status_code, 400)
def test_api_selection_data_empty(self):
response = self.client.post("/api/documents/selection_data/", json.dumps({
"documents": []

View File

@@ -468,6 +468,42 @@ class TestConsumer(DirectoriesMixin, TestCase):
self.assertTrue(os.path.isfile(dst))
class PreConsumeTestCase(TestCase):
@mock.patch("documents.consumer.Popen")
@override_settings(PRE_CONSUME_SCRIPT=None)
def test_no_pre_consume_script(self, m):
c = Consumer()
c.path = "path-to-file"
c.run_pre_consume_script()
m.assert_not_called()
@mock.patch("documents.consumer.Popen")
@override_settings(PRE_CONSUME_SCRIPT="does-not-exist")
def test_pre_consume_script_not_found(self, m):
c = Consumer()
c.path = "path-to-file"
self.assertRaises(ConsumerError, c.run_pre_consume_script)
@mock.patch("documents.consumer.Popen")
def test_pre_consume_script(self, m):
with tempfile.NamedTemporaryFile() as script:
with override_settings(PRE_CONSUME_SCRIPT=script.name):
c = Consumer()
c.path = "path-to-file"
c.run_pre_consume_script()
m.assert_called_once()
args, kwargs = m.call_args
command = args[0]
self.assertEqual(command[0], script.name)
self.assertEqual(command[1], "path-to-file")
class PostConsumeTestCase(TestCase):
@mock.patch("documents.consumer.Popen")
@@ -483,36 +519,45 @@ class PostConsumeTestCase(TestCase):
m.assert_not_called()
@mock.patch("documents.consumer.Popen")
@override_settings(POST_CONSUME_SCRIPT="script")
def test_post_consume_script_simple(self, m):
@override_settings(POST_CONSUME_SCRIPT="does-not-exist")
def test_post_consume_script_not_found(self):
doc = Document.objects.create(title="Test", mime_type="application/pdf")
Consumer().run_post_consume_script(doc)
m.assert_called_once()
self.assertRaises(ConsumerError, Consumer().run_post_consume_script, doc)
@mock.patch("documents.consumer.Popen")
def test_post_consume_script_simple(self, m):
with tempfile.NamedTemporaryFile() as script:
with override_settings(POST_CONSUME_SCRIPT=script.name):
doc = Document.objects.create(title="Test", mime_type="application/pdf")
Consumer().run_post_consume_script(doc)
m.assert_called_once()
@mock.patch("documents.consumer.Popen")
@override_settings(POST_CONSUME_SCRIPT="script")
def test_post_consume_script_with_correspondent(self, m):
c = Correspondent.objects.create(name="my_bank")
doc = Document.objects.create(title="Test", mime_type="application/pdf", correspondent=c)
tag1 = Tag.objects.create(name="a")
tag2 = Tag.objects.create(name="b")
doc.tags.add(tag1)
doc.tags.add(tag2)
with tempfile.NamedTemporaryFile() as script:
with override_settings(POST_CONSUME_SCRIPT=script.name):
c = Correspondent.objects.create(name="my_bank")
doc = Document.objects.create(title="Test", mime_type="application/pdf", correspondent=c)
tag1 = Tag.objects.create(name="a")
tag2 = Tag.objects.create(name="b")
doc.tags.add(tag1)
doc.tags.add(tag2)
Consumer().run_post_consume_script(doc)
Consumer().run_post_consume_script(doc)
m.assert_called_once()
m.assert_called_once()
args, kwargs = m.call_args
args, kwargs = m.call_args
command = args[0]
command = args[0]
self.assertEqual(command[0], "script")
self.assertEqual(command[1], str(doc.pk))
self.assertEqual(command[5], f"/api/documents/{doc.pk}/download/")
self.assertEqual(command[6], f"/api/documents/{doc.pk}/thumb/")
self.assertEqual(command[7], "my_bank")
self.assertCountEqual(command[8].split(","), ["a", "b"])
self.assertEqual(command[0], script.name)
self.assertEqual(command[1], str(doc.pk))
self.assertEqual(command[5], f"/api/documents/{doc.pk}/download/")
self.assertEqual(command[6], f"/api/documents/{doc.pk}/thumb/")
self.assertEqual(command[7], "my_bank")
self.assertCountEqual(command[8].split(","), ["a", "b"])

View File

@@ -138,3 +138,18 @@ class TestDate(TestCase):
@override_settings(FILENAME_DATE_ORDER="YMD")
def test_filename_date_parse_invalid(self, *args):
self.assertIsNone(parse_date("/tmp/20 408000l 2475 - test.pdf", "No date in here"))
@override_settings(IGNORE_DATES=(datetime.date(2019, 11, 3), datetime.date(2020, 1, 17)))
def test_ignored_dates(self, *args):
text = (
"lorem ipsum 110319, 20200117 and lorem 13.02.2018 lorem "
"ipsum"
)
date = parse_date("", text)
self.assertEqual(
date,
datetime.datetime(
2018, 2, 13, 0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE)
)
)

View File

@@ -1,10 +1,10 @@
import shutil
import tempfile
from datetime import datetime
from pathlib import Path
from unittest import mock
from django.test import TestCase, override_settings
from django.utils import timezone
from ..models import Document, Correspondent
@@ -47,20 +47,20 @@ class TestDocument(TestCase):
def test_file_name(self):
doc = Document(mime_type="application/pdf", title="test", created=datetime(2020, 12, 25))
doc = Document(mime_type="application/pdf", title="test", created=timezone.datetime(2020, 12, 25))
self.assertEqual(doc.get_public_filename(), "2020-12-25 test.pdf")
def test_file_name_jpg(self):
doc = Document(mime_type="image/jpeg", title="test", created=datetime(2020, 12, 25))
doc = Document(mime_type="image/jpeg", title="test", created=timezone.datetime(2020, 12, 25))
self.assertEqual(doc.get_public_filename(), "2020-12-25 test.jpg")
def test_file_name_unknown(self):
doc = Document(mime_type="application/zip", title="test", created=datetime(2020, 12, 25))
doc = Document(mime_type="application/zip", title="test", created=timezone.datetime(2020, 12, 25))
self.assertEqual(doc.get_public_filename(), "2020-12-25 test.zip")
def test_file_name_invalid_type(self):
doc = Document(mime_type="image/jpegasd", title="test", created=datetime(2020, 12, 25))
doc = Document(mime_type="image/jpegasd", title="test", created=timezone.datetime(2020, 12, 25))
self.assertEqual(doc.get_public_filename(), "2020-12-25 test")

View File

@@ -70,18 +70,18 @@ class TestDecryptDocuments(TestCase):
PASSPHRASE="test"
).enable()
doc = Document.objects.create(checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
doc = Document.objects.create(checksum="82186aaa94f0b98697d704b90fd1c072", title="wow", filename="0000004.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf.gpg"), os.path.join(originals_dir, "0000002.pdf.gpg"))
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", f"0000002.png.gpg"), os.path.join(thumb_dir, f"{doc.id:07}.png.gpg"))
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000004.pdf.gpg"), os.path.join(originals_dir, "0000004.pdf.gpg"))
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", f"0000004.png.gpg"), os.path.join(thumb_dir, f"{doc.id:07}.png.gpg"))
call_command('decrypt_documents')
doc.refresh_from_db()
self.assertEqual(doc.storage_type, Document.STORAGE_TYPE_UNENCRYPTED)
self.assertEqual(doc.filename, "0000002.pdf")
self.assertTrue(os.path.isfile(os.path.join(originals_dir, "0000002.pdf")))
self.assertEqual(doc.filename, "0000004.pdf")
self.assertTrue(os.path.isfile(os.path.join(originals_dir, "0000004.pdf")))
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(os.path.join(thumb_dir, f"{doc.id:07}.png")))
self.assertTrue(os.path.isfile(doc.thumbnail_path))

View File

@@ -3,6 +3,8 @@ import json
import os
import shutil
import tempfile
from pathlib import Path
from unittest import mock
from django.core.management import call_command
from django.test import TestCase, override_settings
@@ -10,54 +12,87 @@ from django.test import TestCase, override_settings
from documents.management.commands import document_exporter
from documents.models import Document, Tag, DocumentType, Correspondent
from documents.sanity_checker import check_sanity
from documents.settings import EXPORTER_FILE_NAME
from documents.tests.utils import DirectoriesMixin, paperless_environment
class TestExportImport(DirectoriesMixin, TestCase):
def setUp(self) -> None:
self.target = tempfile.mkdtemp()
self.addCleanup(shutil.rmtree, self.target)
self.d1 = Document.objects.create(content="Content", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", title="wow1", filename="0000001.pdf", mime_type="application/pdf")
self.d2 = Document.objects.create(content="Content", checksum="9c9691e51741c1f4f41a20896af31770", title="wow2", filename="0000002.pdf", mime_type="application/pdf")
self.d3 = Document.objects.create(content="Content", checksum="d38d7ed02e988e072caf924e0f3fcb76", title="wow2", filename="0000003.pdf", mime_type="application/pdf")
self.d4 = Document.objects.create(content="Content", checksum="82186aaa94f0b98697d704b90fd1c072", title="wow_dec", filename="0000004.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
self.t1 = Tag.objects.create(name="t")
self.dt1 = DocumentType.objects.create(name="dt")
self.c1 = Correspondent.objects.create(name="c")
self.d1.tags.add(self.t1)
self.d1.correspondent = self.c1
self.d1.document_type = self.dt1
self.d1.save()
super(TestExportImport, self).setUp()
def _get_document_from_manifest(self, manifest, id):
f = list(filter(lambda d: d['model'] == "documents.document" and d['pk'] == id, manifest))
if len(f) == 1:
return f[0]
else:
raise ValueError(f"document with id {id} does not exist in manifest")
@override_settings(
PASSPHRASE="test"
)
def test_exporter(self):
def _do_export(self, use_filename_format=False, compare_checksums=False, delete=False):
args = ['document_exporter', self.target]
if use_filename_format:
args += ["--use-filename-format"]
if compare_checksums:
args += ["--compare-checksums"]
if delete:
args += ["--delete"]
call_command(*args)
with open(os.path.join(self.target, "manifest.json")) as f:
manifest = json.load(f)
return manifest
def test_exporter(self, use_filename_format=False):
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
file = os.path.join(self.dirs.originals_dir, "0000001.pdf")
manifest = self._do_export(use_filename_format=use_filename_format)
d1 = Document.objects.create(content="Content", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", title="wow", filename="0000001.pdf", mime_type="application/pdf")
d2 = Document.objects.create(content="Content", checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
t1 = Tag.objects.create(name="t")
dt1 = DocumentType.objects.create(name="dt")
c1 = Correspondent.objects.create(name="c")
self.assertEqual(len(manifest), 7)
self.assertEqual(len(list(filter(lambda e: e['model'] == 'documents.document', manifest))), 4)
d1.tags.add(t1)
d1.correspondents = c1
d1.document_type = dt1
d1.save()
d2.save()
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
target = tempfile.mkdtemp()
self.addCleanup(shutil.rmtree, target)
call_command('document_exporter', target)
with open(os.path.join(target, "manifest.json")) as f:
manifest = json.load(f)
self.assertEqual(len(manifest), 5)
self.assertEqual(self._get_document_from_manifest(manifest, self.d1.id)['fields']['title'], "wow1")
self.assertEqual(self._get_document_from_manifest(manifest, self.d2.id)['fields']['title'], "wow2")
self.assertEqual(self._get_document_from_manifest(manifest, self.d3.id)['fields']['title'], "wow2")
self.assertEqual(self._get_document_from_manifest(manifest, self.d4.id)['fields']['title'], "wow_dec")
for element in manifest:
if element['model'] == 'documents.document':
fname = os.path.join(target, element[document_exporter.EXPORTER_FILE_NAME])
fname = os.path.join(self.target, element[document_exporter.EXPORTER_FILE_NAME])
self.assertTrue(os.path.exists(fname))
self.assertTrue(os.path.exists(os.path.join(target, element[document_exporter.EXPORTER_THUMBNAIL_NAME])))
self.assertTrue(os.path.exists(os.path.join(self.target, element[document_exporter.EXPORTER_THUMBNAIL_NAME])))
with open(fname, "rb") as f:
checksum = hashlib.md5(f.read()).hexdigest()
self.assertEqual(checksum, element['fields']['checksum'])
self.assertEqual(element['fields']['storage_type'], Document.STORAGE_TYPE_UNENCRYPTED)
if document_exporter.EXPORTER_ARCHIVE_NAME in element:
fname = os.path.join(target, element[document_exporter.EXPORTER_ARCHIVE_NAME])
fname = os.path.join(self.target, element[document_exporter.EXPORTER_ARCHIVE_NAME])
self.assertTrue(os.path.exists(fname))
with open(fname, "rb") as f:
@@ -65,24 +100,123 @@ class TestExportImport(DirectoriesMixin, TestCase):
self.assertEqual(checksum, element['fields']['archive_checksum'])
with paperless_environment() as dirs:
self.assertEqual(Document.objects.count(), 2)
self.assertEqual(Document.objects.count(), 4)
Document.objects.all().delete()
Correspondent.objects.all().delete()
DocumentType.objects.all().delete()
Tag.objects.all().delete()
self.assertEqual(Document.objects.count(), 0)
call_command('document_importer', target)
self.assertEqual(Document.objects.count(), 2)
call_command('document_importer', self.target)
self.assertEqual(Document.objects.count(), 4)
self.assertEqual(Tag.objects.count(), 1)
self.assertEqual(Correspondent.objects.count(), 1)
self.assertEqual(DocumentType.objects.count(), 1)
self.assertEqual(Document.objects.get(id=self.d1.id).title, "wow1")
self.assertEqual(Document.objects.get(id=self.d2.id).title, "wow2")
self.assertEqual(Document.objects.get(id=self.d3.id).title, "wow2")
self.assertEqual(Document.objects.get(id=self.d4.id).title, "wow_dec")
messages = check_sanity()
# everything is alright after the test
self.assertEqual(len(messages), 0, str([str(m) for m in messages]))
@override_settings(
PAPERLESS_FILENAME_FORMAT="{title}"
)
def test_exporter_with_filename_format(self):
self.test_exporter()
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
with override_settings(PAPERLESS_FILENAME_FORMAT="{created_year}/{correspondent}/{title}"):
self.test_exporter(use_filename_format=True)
def test_update_export_changed_time(self):
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
self._do_export()
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
st_mtime_1 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime
with mock.patch("documents.management.commands.document_exporter.shutil.copy2") as m:
self._do_export()
m.assert_not_called()
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
st_mtime_2 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime
Path(self.d1.source_path).touch()
with mock.patch("documents.management.commands.document_exporter.shutil.copy2") as m:
self._do_export()
self.assertEqual(m.call_count, 1)
st_mtime_3 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
self.assertNotEqual(st_mtime_1, st_mtime_2)
self.assertNotEqual(st_mtime_2, st_mtime_3)
def test_update_export_changed_checksum(self):
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
self._do_export()
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
with mock.patch("documents.management.commands.document_exporter.shutil.copy2") as m:
self._do_export()
m.assert_not_called()
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
self.d2.checksum = "asdfasdgf3"
self.d2.save()
with mock.patch("documents.management.commands.document_exporter.shutil.copy2") as m:
self._do_export(compare_checksums=True)
self.assertEqual(m.call_count, 1)
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
def test_update_export_deleted_document(self):
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
manifest = self._do_export()
self.assertTrue(len(manifest), 7)
doc_from_manifest = self._get_document_from_manifest(manifest, self.d3.id)
self.assertTrue(os.path.isfile(os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])))
self.d3.delete()
manifest = self._do_export()
self.assertRaises(ValueError, self._get_document_from_manifest, manifest, self.d3.id)
self.assertTrue(os.path.isfile(os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])))
manifest = self._do_export(delete=True)
self.assertFalse(os.path.isfile(os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])))
self.assertTrue(len(manifest), 6)
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}/{correspondent}")
def test_update_export_changed_location(self):
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
m = self._do_export(use_filename_format=True)
self.assertTrue(os.path.isfile(os.path.join(self.target, "wow1", "c.pdf")))
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
self.d1.title = "new_title"
self.d1.save()
self._do_export(use_filename_format=True, delete=True)
self.assertFalse(os.path.isfile(os.path.join(self.target, "wow1", "c.pdf")))
self.assertFalse(os.path.isdir(os.path.join(self.target, "wow1")))
self.assertTrue(os.path.isfile(os.path.join(self.target, "new_title", "c.pdf")))
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
self.assertTrue(os.path.isfile(os.path.join(self.target, "wow2", "none.pdf")))
self.assertTrue(os.path.isfile(os.path.join(self.target, "wow2", "none_01.pdf")))
def test_export_missing_files(self):

View File

@@ -0,0 +1,52 @@
import os
import shutil
from unittest import mock
from django.core.management import call_command
from django.test import TestCase
from documents.management.commands.document_thumbnails import _process_document
from documents.models import Document, Tag, Correspondent, DocumentType
from documents.tests.utils import DirectoriesMixin
class TestMakeThumbnails(DirectoriesMixin, TestCase):
def make_models(self):
self.d1 = Document.objects.create(checksum="A", title="A", content="first document", mime_type="application/pdf", filename="test.pdf")
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), self.d1.source_path)
self.d2 = Document.objects.create(checksum="Ass", title="A", content="first document", mime_type="application/pdf", filename="test2.pdf")
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), self.d2.source_path)
def setUp(self) -> None:
super(TestMakeThumbnails, self).setUp()
self.make_models()
def test_process_document(self):
self.assertFalse(os.path.isfile(self.d1.thumbnail_path))
_process_document(self.d1.id)
self.assertTrue(os.path.isfile(self.d1.thumbnail_path))
@mock.patch("documents.management.commands.document_thumbnails.shutil.move")
def test_process_document_invalid_mime_type(self, m):
self.d1.mime_type = "asdasdasd"
self.d1.save()
_process_document(self.d1.id)
m.assert_not_called()
def test_command(self):
self.assertFalse(os.path.isfile(self.d1.thumbnail_path))
self.assertFalse(os.path.isfile(self.d2.thumbnail_path))
call_command('document_thumbnails')
self.assertTrue(os.path.isfile(self.d1.thumbnail_path))
self.assertTrue(os.path.isfile(self.d2.thumbnail_path))
def test_command_documentid(self):
self.assertFalse(os.path.isfile(self.d1.thumbnail_path))
self.assertFalse(os.path.isfile(self.d2.thumbnail_path))
call_command('document_thumbnails', '-d', f"{self.d1.id}")
self.assertTrue(os.path.isfile(self.d1.thumbnail_path))
self.assertFalse(os.path.isfile(self.d2.thumbnail_path))

View File

@@ -21,13 +21,15 @@ class TestMatching(TestCase):
matching_algorithm=getattr(klass, algorithm)
)
for string in true:
doc = Document(content=string)
self.assertTrue(
matching.matches(instance, string),
matching.matches(instance, doc),
'"%s" should match "%s" but it does not' % (text, string)
)
for string in false:
doc = Document(content=string)
self.assertFalse(
matching.matches(instance, string),
matching.matches(instance, doc),
'"%s" should not match "%s" but it does' % (text, string)
)
@@ -169,7 +171,7 @@ class TestMatching(TestCase):
def test_match_regex(self):
self._test_matching(
r"alpha\w+gamma",
"alpha\w+gamma",
"MATCH_REGEX",
(
"I have alpha_and_gamma in me",
@@ -187,6 +189,16 @@ class TestMatching(TestCase):
)
)
def test_tach_invalid_regex(self):
self._test_matching(
"[[",
"MATCH_REGEX",
[],
[
"Don't match this"
]
)
def test_match_fuzzy(self):
self._test_matching(

View File

@@ -98,7 +98,7 @@ class TestMigrateMimeType(DirectoriesMixin, TestMigrations):
doc2 = Document.objects.create(checksum="B", file_type="pdf", storage_type=STORAGE_TYPE_GPG)
self.doc2_id = doc2.id
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf.gpg"), source_path_before(doc2))
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000004.pdf.gpg"), source_path_before(doc2))
def testMimeTypesMigrated(self):
Document = self.apps.get_model('documents', 'Document')

View File

@@ -120,3 +120,4 @@ class TestParserAvailability(TestCase):
self.assertTrue(is_file_ext_supported('.pdf'))
self.assertFalse(is_file_ext_supported('.hsdfh'))
self.assertFalse(is_file_ext_supported(''))

View File

@@ -0,0 +1,34 @@
import logging
from unittest import mock
from django.test import TestCase
from paperless.settings import default_task_workers, default_threads_per_worker
class TestSettings(TestCase):
@mock.patch("paperless.settings.multiprocessing.cpu_count")
def test_single_core(self, cpu_count):
cpu_count.return_value = 1
default_workers = default_task_workers()
default_threads = default_threads_per_worker(default_workers)
self.assertEqual(default_workers, 1)
self.assertEqual(default_threads, 1)
def test_workers_threads(self):
for i in range(2, 64):
with mock.patch("paperless.settings.multiprocessing.cpu_count") as cpu_count:
cpu_count.return_value = i
default_workers = default_task_workers()
default_threads = default_threads_per_worker(default_workers)
self.assertTrue(default_workers >= 1)
self.assertTrue(default_threads >= 1)
self.assertTrue(default_workers * default_threads < i, f"{i}")

View File

@@ -0,0 +1,30 @@
from django.conf import settings
from django.contrib.auth.models import User
from django.test import TestCase
class TestViews(TestCase):
def setUp(self) -> None:
self.user = User.objects.create_user("testuser")
def test_login_redirect(self):
response = self.client.get('/')
self.assertEqual(response.status_code, 302)
self.assertEqual(response.url, "/accounts/login/?next=/")
def test_index(self):
self.client.force_login(self.user)
for (language_given, language_actual) in [("", "en-US"), ("en-US", "en-US"), ("de", "de"), ("en", "en-US"), ("en-us", "en-US"), ("fr", "fr"), ("jp", "en-US")]:
if language_given:
self.client.cookies.load({settings.LANGUAGE_COOKIE_NAME: language_given})
elif settings.LANGUAGE_COOKIE_NAME in self.client.cookies.keys():
self.client.cookies.pop(settings.LANGUAGE_COOKIE_NAME)
response = self.client.get('/', )
self.assertEqual(response.status_code, 200)
self.assertEqual(response.context_data['webmanifest'], f"frontend/{language_actual}/manifest.webmanifest")
self.assertEqual(response.context_data['styles_css'], f"frontend/{language_actual}/styles.css")
self.assertEqual(response.context_data['runtime_js'], f"frontend/{language_actual}/runtime.js")
self.assertEqual(response.context_data['polyfills_js'], f"frontend/{language_actual}/polyfills.js")
self.assertEqual(response.context_data['main_js'], f"frontend/{language_actual}/main.js")

View File

@@ -1,3 +1,4 @@
import logging
import os
import tempfile
from datetime import datetime
@@ -79,7 +80,7 @@ class IndexView(TemplateView):
context['runtime_js'] = f"frontend/{self.get_language()}/runtime.js"
context['polyfills_js'] = f"frontend/{self.get_language()}/polyfills.js" # NOQA: E501
context['main_js'] = f"frontend/{self.get_language()}/main.js"
context['manifest'] = f"frontend/{self.get_language()}/manifest.webmanifest" # NOQA: E501
context['webmanifest'] = f"frontend/{self.get_language()}/manifest.webmanifest" # NOQA: E501
return context
@@ -158,6 +159,9 @@ class DocumentViewSet(RetrieveModelMixin,
"added",
"archive_serial_number")
def get_queryset(self):
return Document.objects.distinct()
def get_serializer(self, *args, **kwargs):
fields_param = self.request.query_params.get('fields', None)
if fields_param:
@@ -458,12 +462,21 @@ class SearchView(APIView):
self.ix = index.open_index()
def add_infos_to_hit(self, r):
doc = Document.objects.get(id=r['id'])
try:
doc = Document.objects.get(id=r['id'])
except Document.DoesNotExist:
logging.getLogger(__name__).warning(
f"Search index returned a non-existing document: "
f"id: {r['id']}, title: {r['title']}. "
f"Search index needs reindex."
)
doc = None
return {'id': r['id'],
'highlights': r.highlights("content", text=doc.content),
'highlights': r.highlights("content", text=doc.content) if doc else None, # NOQA: E501
'score': r.score,
'rank': r.rank,
'document': DocumentSerializer(doc).data,
'document': DocumentSerializer(doc).data if doc else None,
'title': r['title']
}