mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-30 18:27:45 -05:00
Merge branch 'dev' into feature-websockets-status
This commit is contained in:
@@ -91,6 +91,11 @@ class Consumer(LoggingMixin):
|
||||
if not settings.PRE_CONSUME_SCRIPT:
|
||||
return
|
||||
|
||||
if not os.path.isfile(settings.PRE_CONSUME_SCRIPT):
|
||||
raise ConsumerError(
|
||||
f"Configured pre-consume script "
|
||||
f"{settings.PRE_CONSUME_SCRIPT} does not exist.")
|
||||
|
||||
try:
|
||||
Popen((settings.PRE_CONSUME_SCRIPT, self.path)).wait()
|
||||
except Exception as e:
|
||||
@@ -102,6 +107,11 @@ class Consumer(LoggingMixin):
|
||||
if not settings.POST_CONSUME_SCRIPT:
|
||||
return
|
||||
|
||||
if not os.path.isfile(settings.POST_CONSUME_SCRIPT):
|
||||
raise ConsumerError(
|
||||
f"Configured post-consume script "
|
||||
f"{settings.POST_CONSUME_SCRIPT} does not exist.")
|
||||
|
||||
try:
|
||||
Popen((
|
||||
settings.POST_CONSUME_SCRIPT,
|
||||
|
@@ -91,7 +91,7 @@ def generate_unique_filename(doc, root):
|
||||
return new_filename
|
||||
|
||||
|
||||
def generate_filename(doc, counter=0):
|
||||
def generate_filename(doc, counter=0, append_gpg=True):
|
||||
path = ""
|
||||
|
||||
try:
|
||||
@@ -151,7 +151,7 @@ def generate_filename(doc, counter=0):
|
||||
filename = f"{doc.pk:07}{counter_str}{doc.file_type}"
|
||||
|
||||
# Append .gpg for encrypted files
|
||||
if doc.storage_type == doc.STORAGE_TYPE_GPG:
|
||||
if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG:
|
||||
filename += ".gpg"
|
||||
|
||||
return filename
|
||||
|
@@ -11,6 +11,7 @@ from django import db
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.db import transaction
|
||||
from filelock import FileLock
|
||||
from whoosh.writing import AsyncWriter
|
||||
|
||||
from documents.models import Document
|
||||
@@ -47,8 +48,10 @@ def handle_document(document_id):
|
||||
archive_checksum=checksum,
|
||||
content=parser.get_text()
|
||||
)
|
||||
create_source_path_directory(document.archive_path)
|
||||
shutil.move(parser.get_archive_path(), document.archive_path)
|
||||
with FileLock(settings.MEDIA_LOCK):
|
||||
create_source_path_directory(document.archive_path)
|
||||
shutil.move(parser.get_archive_path(),
|
||||
document.archive_path)
|
||||
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
index.update_document(writer, document)
|
||||
|
@@ -5,7 +5,6 @@ from time import sleep
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from django.utils.text import slugify
|
||||
from django_q.tasks import async_task
|
||||
from watchdog.events import FileSystemEventHandler
|
||||
from watchdog.observers.polling import PollingObserver
|
||||
@@ -46,7 +45,7 @@ def _consume(filepath):
|
||||
return
|
||||
|
||||
if not is_file_ext_supported(os.path.splitext(filepath)[1]):
|
||||
logger.debug(
|
||||
logger.warning(
|
||||
f"Not consuming file {filepath}: Unknown file extension.")
|
||||
return
|
||||
|
||||
|
@@ -1,15 +1,21 @@
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import time
|
||||
|
||||
import tqdm
|
||||
from django.conf import settings
|
||||
from django.core import serializers
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from django.db import transaction
|
||||
from filelock import FileLock
|
||||
|
||||
from documents.models import Document, Correspondent, Tag, DocumentType
|
||||
from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME, \
|
||||
EXPORTER_ARCHIVE_NAME
|
||||
from paperless.db import GnuPG
|
||||
from ...file_handling import generate_filename, delete_empty_directories
|
||||
from ...mixins import Renderable
|
||||
|
||||
|
||||
@@ -24,13 +30,47 @@ class Command(Renderable, BaseCommand):
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument("target")
|
||||
|
||||
parser.add_argument(
|
||||
"-c", "--compare-checksums",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Compare file checksums when determining whether to export "
|
||||
"a file or not. If not specified, file size and time "
|
||||
"modified is used instead."
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-f", "--use-filename-format",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Use PAPERLESS_FILENAME_FORMAT for storing files in the "
|
||||
"export directory, if configured."
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-d", "--delete",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="After exporting, delete files in the export directory that "
|
||||
"do not belong to the current export, such as files from "
|
||||
"deleted documents."
|
||||
)
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BaseCommand.__init__(self, *args, **kwargs)
|
||||
self.target = None
|
||||
self.files_in_export_dir = []
|
||||
self.exported_files = []
|
||||
self.compare_checksums = False
|
||||
self.use_filename_format = False
|
||||
self.delete = False
|
||||
|
||||
def handle(self, *args, **options):
|
||||
|
||||
self.target = options["target"]
|
||||
self.compare_checksums = options['compare_checksums']
|
||||
self.use_filename_format = options['use_filename_format']
|
||||
self.delete = options['delete']
|
||||
|
||||
if not os.path.exists(self.target):
|
||||
raise CommandError("That path doesn't exist")
|
||||
@@ -38,83 +78,148 @@ class Command(Renderable, BaseCommand):
|
||||
if not os.access(self.target, os.W_OK):
|
||||
raise CommandError("That path doesn't appear to be writable")
|
||||
|
||||
if os.listdir(self.target):
|
||||
raise CommandError("That directory is not empty.")
|
||||
|
||||
self.dump()
|
||||
with FileLock(settings.MEDIA_LOCK):
|
||||
self.dump()
|
||||
|
||||
def dump(self):
|
||||
# 1. Take a snapshot of what files exist in the current export folder
|
||||
for root, dirs, files in os.walk(self.target):
|
||||
self.files_in_export_dir.extend(
|
||||
map(lambda f: os.path.abspath(os.path.join(root, f)), files)
|
||||
)
|
||||
|
||||
documents = Document.objects.all()
|
||||
document_map = {d.pk: d for d in documents}
|
||||
manifest = json.loads(serializers.serialize("json", documents))
|
||||
# 2. Create manifest, containing all correspondents, types, tags and
|
||||
# documents
|
||||
with transaction.atomic():
|
||||
manifest = json.loads(
|
||||
serializers.serialize("json", Correspondent.objects.all()))
|
||||
|
||||
for index, document_dict in enumerate(manifest):
|
||||
manifest += json.loads(serializers.serialize(
|
||||
"json", Tag.objects.all()))
|
||||
|
||||
# Force output to unencrypted as that will be the current state.
|
||||
# The importer will make the decision to encrypt or not.
|
||||
manifest[index]["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED # NOQA: E501
|
||||
manifest += json.loads(serializers.serialize(
|
||||
"json", DocumentType.objects.all()))
|
||||
|
||||
documents = Document.objects.order_by("id")
|
||||
document_map = {d.pk: d for d in documents}
|
||||
document_manifest = json.loads(
|
||||
serializers.serialize("json", documents))
|
||||
manifest += document_manifest
|
||||
|
||||
# 3. Export files from each document
|
||||
for index, document_dict in tqdm.tqdm(enumerate(document_manifest),
|
||||
total=len(document_manifest)):
|
||||
# 3.1. store files unencrypted
|
||||
document_dict["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED # NOQA: E501
|
||||
|
||||
document = document_map[document_dict["pk"]]
|
||||
|
||||
print(f"Exporting: {document}")
|
||||
|
||||
# 3.2. generate a unique filename
|
||||
filename_counter = 0
|
||||
while True:
|
||||
original_name = document.get_public_filename(
|
||||
counter=filename_counter)
|
||||
original_target = os.path.join(self.target, original_name)
|
||||
if self.use_filename_format:
|
||||
base_name = generate_filename(
|
||||
document, counter=filename_counter,
|
||||
append_gpg=False)
|
||||
else:
|
||||
base_name = document.get_public_filename(
|
||||
counter=filename_counter)
|
||||
|
||||
if not os.path.exists(original_target):
|
||||
if base_name not in self.exported_files:
|
||||
self.exported_files.append(base_name)
|
||||
break
|
||||
else:
|
||||
filename_counter += 1
|
||||
|
||||
thumbnail_name = original_name + "-thumbnail.png"
|
||||
thumbnail_target = os.path.join(self.target, thumbnail_name)
|
||||
|
||||
# 3.3. write filenames into manifest
|
||||
original_name = base_name
|
||||
original_target = os.path.join(self.target, original_name)
|
||||
document_dict[EXPORTER_FILE_NAME] = original_name
|
||||
|
||||
thumbnail_name = base_name + "-thumbnail.png"
|
||||
thumbnail_target = os.path.join(self.target, thumbnail_name)
|
||||
document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name
|
||||
|
||||
if os.path.exists(document.archive_path):
|
||||
archive_name = document.get_public_filename(
|
||||
archive=True, counter=filename_counter, suffix="_archive")
|
||||
archive_name = base_name + "-archive.pdf"
|
||||
archive_target = os.path.join(self.target, archive_name)
|
||||
document_dict[EXPORTER_ARCHIVE_NAME] = archive_name
|
||||
else:
|
||||
archive_target = None
|
||||
|
||||
# 3.4. write files to target folder
|
||||
t = int(time.mktime(document.created.timetuple()))
|
||||
if document.storage_type == Document.STORAGE_TYPE_GPG:
|
||||
|
||||
os.makedirs(os.path.dirname(original_target), exist_ok=True)
|
||||
with open(original_target, "wb") as f:
|
||||
f.write(GnuPG.decrypted(document.source_file))
|
||||
os.utime(original_target, times=(t, t))
|
||||
|
||||
os.makedirs(os.path.dirname(thumbnail_target), exist_ok=True)
|
||||
with open(thumbnail_target, "wb") as f:
|
||||
f.write(GnuPG.decrypted(document.thumbnail_file))
|
||||
os.utime(thumbnail_target, times=(t, t))
|
||||
|
||||
if archive_target:
|
||||
os.makedirs(os.path.dirname(archive_target), exist_ok=True)
|
||||
with open(archive_target, "wb") as f:
|
||||
f.write(GnuPG.decrypted(document.archive_path))
|
||||
os.utime(archive_target, times=(t, t))
|
||||
else:
|
||||
self.check_and_copy(document.source_path,
|
||||
document.checksum,
|
||||
original_target)
|
||||
|
||||
shutil.copy(document.source_path, original_target)
|
||||
shutil.copy(document.thumbnail_path, thumbnail_target)
|
||||
self.check_and_copy(document.thumbnail_path,
|
||||
None,
|
||||
thumbnail_target)
|
||||
|
||||
if archive_target:
|
||||
shutil.copy(document.archive_path, archive_target)
|
||||
self.check_and_copy(document.archive_path,
|
||||
document.archive_checksum,
|
||||
archive_target)
|
||||
|
||||
manifest += json.loads(
|
||||
serializers.serialize("json", Correspondent.objects.all()))
|
||||
# 4. write manifest to target forlder
|
||||
manifest_path = os.path.abspath(
|
||||
os.path.join(self.target, "manifest.json"))
|
||||
|
||||
manifest += json.loads(serializers.serialize(
|
||||
"json", Tag.objects.all()))
|
||||
|
||||
manifest += json.loads(serializers.serialize(
|
||||
"json", DocumentType.objects.all()))
|
||||
|
||||
with open(os.path.join(self.target, "manifest.json"), "w") as f:
|
||||
with open(manifest_path, "w") as f:
|
||||
json.dump(manifest, f, indent=2)
|
||||
|
||||
if self.delete:
|
||||
# 5. Remove files which we did not explicitly export in this run
|
||||
|
||||
if manifest_path in self.files_in_export_dir:
|
||||
self.files_in_export_dir.remove(manifest_path)
|
||||
|
||||
for f in self.files_in_export_dir:
|
||||
os.remove(f)
|
||||
|
||||
delete_empty_directories(os.path.abspath(os.path.dirname(f)),
|
||||
os.path.abspath(self.target))
|
||||
|
||||
def check_and_copy(self, source, source_checksum, target):
|
||||
if os.path.abspath(target) in self.files_in_export_dir:
|
||||
self.files_in_export_dir.remove(os.path.abspath(target))
|
||||
|
||||
perform_copy = False
|
||||
|
||||
if os.path.exists(target):
|
||||
source_stat = os.stat(source)
|
||||
target_stat = os.stat(target)
|
||||
if self.compare_checksums and source_checksum:
|
||||
with open(target, "rb") as f:
|
||||
target_checksum = hashlib.md5(f.read()).hexdigest()
|
||||
perform_copy = target_checksum != source_checksum
|
||||
elif source_stat.st_mtime != target_stat.st_mtime:
|
||||
perform_copy = True
|
||||
elif source_stat.st_size != target_stat.st_size:
|
||||
perform_copy = True
|
||||
else:
|
||||
# Copy if it does not exist
|
||||
perform_copy = True
|
||||
|
||||
if perform_copy:
|
||||
os.makedirs(os.path.dirname(target), exist_ok=True)
|
||||
shutil.copy2(source, target)
|
||||
|
@@ -148,10 +148,10 @@ class Command(Renderable, BaseCommand):
|
||||
|
||||
create_source_path_directory(document.source_path)
|
||||
|
||||
shutil.copy(document_path, document.source_path)
|
||||
shutil.copy(thumbnail_path, document.thumbnail_path)
|
||||
shutil.copy2(document_path, document.source_path)
|
||||
shutil.copy2(thumbnail_path, document.thumbnail_path)
|
||||
if archive_path:
|
||||
create_source_path_directory(document.archive_path)
|
||||
shutil.copy(archive_path, document.archive_path)
|
||||
shutil.copy2(archive_path, document.archive_path)
|
||||
|
||||
document.save()
|
||||
|
@@ -13,8 +13,14 @@ from ...parsers import get_parser_class_for_mime_type
|
||||
|
||||
def _process_document(doc_in):
|
||||
document = Document.objects.get(id=doc_in)
|
||||
parser = get_parser_class_for_mime_type(document.mime_type)(
|
||||
logging_group=None)
|
||||
parser_class = get_parser_class_for_mime_type(document.mime_type)
|
||||
|
||||
if parser_class:
|
||||
parser = parser_class(logging_group=None)
|
||||
else:
|
||||
print(f"{document} No parser for mime type {document.mime_type}")
|
||||
return
|
||||
|
||||
try:
|
||||
thumb = parser.get_optimised_thumbnail(
|
||||
document.source_path, document.mime_type)
|
||||
|
@@ -1,3 +1,4 @@
|
||||
import logging
|
||||
import re
|
||||
|
||||
from fuzzywuzzy import fuzz
|
||||
@@ -5,49 +6,59 @@ from fuzzywuzzy import fuzz
|
||||
from documents.models import MatchingModel, Correspondent, DocumentType, Tag
|
||||
|
||||
|
||||
def match_correspondents(document_content, classifier):
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def log_reason(matching_model, document, reason):
|
||||
class_name = type(matching_model).__name__
|
||||
logger.debug(
|
||||
f"Assigning {class_name} {matching_model.name} to document "
|
||||
f"{document} because {reason}")
|
||||
|
||||
|
||||
def match_correspondents(document, classifier):
|
||||
if classifier:
|
||||
pred_id = classifier.predict_correspondent(document_content)
|
||||
pred_id = classifier.predict_correspondent(document.content)
|
||||
else:
|
||||
pred_id = None
|
||||
|
||||
correspondents = Correspondent.objects.all()
|
||||
|
||||
return list(filter(
|
||||
lambda o: matches(o, document_content) or o.pk == pred_id,
|
||||
lambda o: matches(o, document) or o.pk == pred_id,
|
||||
correspondents))
|
||||
|
||||
|
||||
def match_document_types(document_content, classifier):
|
||||
def match_document_types(document, classifier):
|
||||
if classifier:
|
||||
pred_id = classifier.predict_document_type(document_content)
|
||||
pred_id = classifier.predict_document_type(document.content)
|
||||
else:
|
||||
pred_id = None
|
||||
|
||||
document_types = DocumentType.objects.all()
|
||||
|
||||
return list(filter(
|
||||
lambda o: matches(o, document_content) or o.pk == pred_id,
|
||||
lambda o: matches(o, document) or o.pk == pred_id,
|
||||
document_types))
|
||||
|
||||
|
||||
def match_tags(document_content, classifier):
|
||||
def match_tags(document, classifier):
|
||||
if classifier:
|
||||
predicted_tag_ids = classifier.predict_tags(document_content)
|
||||
predicted_tag_ids = classifier.predict_tags(document.content)
|
||||
else:
|
||||
predicted_tag_ids = []
|
||||
|
||||
tags = Tag.objects.all()
|
||||
|
||||
return list(filter(
|
||||
lambda o: matches(o, document_content) or o.pk in predicted_tag_ids,
|
||||
lambda o: matches(o, document) or o.pk in predicted_tag_ids,
|
||||
tags))
|
||||
|
||||
|
||||
def matches(matching_model, document_content):
|
||||
def matches(matching_model, document):
|
||||
search_kwargs = {}
|
||||
|
||||
document_content = document_content.lower()
|
||||
document_content = document.content.lower()
|
||||
|
||||
# Check that match is not empty
|
||||
if matching_model.match.strip() == "":
|
||||
@@ -62,26 +73,54 @@ def matches(matching_model, document_content):
|
||||
rf"\b{word}\b", document_content, **search_kwargs)
|
||||
if not search_result:
|
||||
return False
|
||||
log_reason(
|
||||
matching_model, document,
|
||||
f"it contains all of these words: {matching_model.match}"
|
||||
)
|
||||
return True
|
||||
|
||||
elif matching_model.matching_algorithm == MatchingModel.MATCH_ANY:
|
||||
for word in _split_match(matching_model):
|
||||
if re.search(rf"\b{word}\b", document_content, **search_kwargs):
|
||||
log_reason(
|
||||
matching_model, document,
|
||||
f"it contains this word: {word}"
|
||||
)
|
||||
return True
|
||||
return False
|
||||
|
||||
elif matching_model.matching_algorithm == MatchingModel.MATCH_LITERAL:
|
||||
return bool(re.search(
|
||||
result = bool(re.search(
|
||||
rf"\b{matching_model.match}\b",
|
||||
document_content,
|
||||
**search_kwargs
|
||||
))
|
||||
if result:
|
||||
log_reason(
|
||||
matching_model, document,
|
||||
f"it contains this string: \"{matching_model.match}\""
|
||||
)
|
||||
return result
|
||||
|
||||
elif matching_model.matching_algorithm == MatchingModel.MATCH_REGEX:
|
||||
return bool(re.search(
|
||||
re.compile(matching_model.match, **search_kwargs),
|
||||
document_content
|
||||
))
|
||||
try:
|
||||
match = re.search(
|
||||
re.compile(matching_model.match, **search_kwargs),
|
||||
document_content
|
||||
)
|
||||
except re.error:
|
||||
logger.error(
|
||||
f"Error while processing regular expression "
|
||||
f"{matching_model.match}"
|
||||
)
|
||||
return False
|
||||
if match:
|
||||
log_reason(
|
||||
matching_model, document,
|
||||
f"the string {match.group()} matches the regular expression "
|
||||
f"{matching_model.match}"
|
||||
)
|
||||
return bool(match)
|
||||
|
||||
elif matching_model.matching_algorithm == MatchingModel.MATCH_FUZZY:
|
||||
match = re.sub(r'[^\w\s]', '', matching_model.match)
|
||||
@@ -89,8 +128,16 @@ def matches(matching_model, document_content):
|
||||
if matching_model.is_insensitive:
|
||||
match = match.lower()
|
||||
text = text.lower()
|
||||
|
||||
return fuzz.partial_ratio(match, text) >= 90
|
||||
if fuzz.partial_ratio(match, text) >= 90:
|
||||
# TODO: make this better
|
||||
log_reason(
|
||||
matching_model, document,
|
||||
f"parts of the document content somehow match the string "
|
||||
f"{matching_model.match}"
|
||||
)
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
elif matching_model.matching_algorithm == MatchingModel.MATCH_AUTO:
|
||||
# this is done elsewhere.
|
||||
|
@@ -12,6 +12,7 @@ from django.conf import settings
|
||||
from django.contrib.auth.models import User
|
||||
from django.db import models
|
||||
from django.utils import timezone
|
||||
from django.utils.timezone import is_aware
|
||||
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
|
||||
@@ -62,12 +63,6 @@ class MatchingModel(models.Model):
|
||||
def __str__(self):
|
||||
return self.name
|
||||
|
||||
def save(self, *args, **kwargs):
|
||||
|
||||
self.match = self.match.lower()
|
||||
|
||||
models.Model.save(self, *args, **kwargs)
|
||||
|
||||
|
||||
class Correspondent(MatchingModel):
|
||||
|
||||
@@ -233,7 +228,10 @@ class Document(models.Model):
|
||||
verbose_name_plural = _("documents")
|
||||
|
||||
def __str__(self):
|
||||
created = datetime.date.isoformat(self.created)
|
||||
if is_aware(self.created):
|
||||
created = timezone.localdate(self.created).isoformat()
|
||||
else:
|
||||
created = datetime.date.isoformat(self.created)
|
||||
if self.correspondent and self.title:
|
||||
return f"{created} {self.correspondent} {self.title}"
|
||||
else:
|
||||
|
@@ -210,6 +210,13 @@ def parse_date(filename, text):
|
||||
}
|
||||
)
|
||||
|
||||
def __filter(date):
|
||||
if date and date.year > 1900 and \
|
||||
date <= timezone.now() and \
|
||||
date.date() not in settings.IGNORE_DATES:
|
||||
return date
|
||||
return None
|
||||
|
||||
date = None
|
||||
|
||||
# if filename date parsing is enabled, search there first:
|
||||
@@ -223,7 +230,8 @@ def parse_date(filename, text):
|
||||
# Skip all matches that do not parse to a proper date
|
||||
continue
|
||||
|
||||
if date and date.year > 1900 and date <= timezone.now():
|
||||
date = __filter(date)
|
||||
if date is not None:
|
||||
return date
|
||||
|
||||
# Iterate through all regex matches in text and try to parse the date
|
||||
@@ -236,10 +244,9 @@ def parse_date(filename, text):
|
||||
# Skip all matches that do not parse to a proper date
|
||||
continue
|
||||
|
||||
if date and date.year > 1900 and date <= timezone.now():
|
||||
date = __filter(date)
|
||||
if date is not None:
|
||||
break
|
||||
else:
|
||||
date = None
|
||||
|
||||
return date
|
||||
|
||||
|
@@ -382,13 +382,6 @@ class PostDocumentSerializer(serializers.Serializer):
|
||||
|
||||
return document.name, document_data
|
||||
|
||||
def validate_title(self, title):
|
||||
if title:
|
||||
return title
|
||||
else:
|
||||
# do not return empty strings.
|
||||
return None
|
||||
|
||||
def validate_correspondent(self, correspondent):
|
||||
if correspondent:
|
||||
return correspondent.id
|
||||
|
@@ -38,7 +38,7 @@ def set_correspondent(sender,
|
||||
if document.correspondent and not replace:
|
||||
return
|
||||
|
||||
potential_correspondents = matching.match_correspondents(document.content,
|
||||
potential_correspondents = matching.match_correspondents(document,
|
||||
classifier)
|
||||
|
||||
potential_count = len(potential_correspondents)
|
||||
@@ -81,7 +81,7 @@ def set_document_type(sender,
|
||||
if document.document_type and not replace:
|
||||
return
|
||||
|
||||
potential_document_type = matching.match_document_types(document.content,
|
||||
potential_document_type = matching.match_document_types(document,
|
||||
classifier)
|
||||
|
||||
potential_count = len(potential_document_type)
|
||||
@@ -130,7 +130,7 @@ def set_tags(sender,
|
||||
|
||||
current_tags = set(document.tags.all())
|
||||
|
||||
matched_tags = matching.match_tags(document.content, classifier)
|
||||
matched_tags = matching.match_tags(document, classifier)
|
||||
|
||||
relevant_tags = set(matched_tags) - current_tags
|
||||
|
||||
|
@@ -1,6 +1,7 @@
|
||||
<!doctype html>
|
||||
|
||||
{% load static %}
|
||||
{% load i18n %}
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
@@ -16,7 +17,7 @@
|
||||
<link rel="stylesheet" href="{% static styles_css %}">
|
||||
</head>
|
||||
<body>
|
||||
<app-root>Loading...</app-root>
|
||||
<app-root>{% translate "Paperless-ng is loading..." %}</app-root>
|
||||
<script src="{% static runtime_js %}" defer></script>
|
||||
<script src="{% static polyfills_js %}" defer></script>
|
||||
<script src="{% static main_js %}" defer></script>
|
||||
|
@@ -1,6 +1,7 @@
|
||||
<!doctype html>
|
||||
|
||||
{% load static %}
|
||||
{% load i18n %}
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
@@ -9,7 +10,7 @@
|
||||
<meta name="description" content="">
|
||||
<meta name="author" content="Mark Otto, Jacob Thornton, and Bootstrap contributors">
|
||||
<meta name="generator" content="Jekyll v4.1.1">
|
||||
<title>Paperless Sign In</title>
|
||||
<title>{% translate "Paperless-ng signed out" %}</title>
|
||||
|
||||
<!-- Bootstrap core CSS -->
|
||||
<link href="{% static 'bootstrap.min.css' %}" rel="stylesheet">
|
||||
@@ -36,9 +37,9 @@
|
||||
|
||||
<body class="text-center">
|
||||
<div class="form-signin">
|
||||
<img class="mb-4" src="{% static 'frontend/assets/logo.svg' %}" alt="" width="300">
|
||||
<p>You have been successfully logged out. Bye!</p>
|
||||
<a href="/">Sign in again</a>
|
||||
<img class="mb-4" src="{% static 'frontend/en-US/assets/logo.svg' %}" alt="" width="300">
|
||||
<p>{% translate "You have been successfully logged out. Bye!" %}</p>
|
||||
<a href="/">{% translate "Sign in again" %}</a>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
@@ -1,6 +1,7 @@
|
||||
<!doctype html>
|
||||
|
||||
{% load static %}
|
||||
{% load i18n %}
|
||||
|
||||
<html lang="en">
|
||||
<head>
|
||||
@@ -9,7 +10,7 @@
|
||||
<meta name="description" content="">
|
||||
<meta name="author" content="Mark Otto, Jacob Thornton, and Bootstrap contributors">
|
||||
<meta name="generator" content="Jekyll v4.1.1">
|
||||
<title>Paperless Sign In</title>
|
||||
<title>{% translate "Paperless-ng sign in" %}</title>
|
||||
|
||||
<!-- Bootstrap core CSS -->
|
||||
<link href="{% static 'bootstrap.min.css' %}" rel="stylesheet">
|
||||
@@ -37,18 +38,20 @@
|
||||
<body class="text-center">
|
||||
<form class="form-signin" method="post">
|
||||
{% csrf_token %}
|
||||
<img class="mb-4" src="{% static 'frontend/assets/logo.svg' %}" alt="" width="300">
|
||||
<p>Please sign in.</p>
|
||||
<img class="mb-4" src="{% static 'frontend/en-US/assets/logo.svg' %}" alt="" width="300">
|
||||
<p>{% translate "Please sign in." %}</p>
|
||||
{% if form.errors %}
|
||||
<div class="alert alert-danger" role="alert">
|
||||
Your username and password didn't match. Please try again.
|
||||
{% translate "Your username and password didn't match. Please try again." %}
|
||||
</div>
|
||||
{% endif %}
|
||||
<label for="inputUsername" class="sr-only">Username</label>
|
||||
<input type="text" name="username" id="inputUsername" class="form-control" placeholder="Username" required autofocus>
|
||||
<label for="inputPassword" class="sr-only">Password</label>
|
||||
<input type="password" name="password" id="inputPassword" class="form-control" placeholder="Password" required>
|
||||
<button class="btn btn-lg btn-primary btn-block" type="submit">Sign in</button>
|
||||
{% translate "Username" as i18n_username %}
|
||||
{% translate "Password" as i18n_password %}
|
||||
<label for="inputUsername" class="sr-only">{{ i18n_username }}</label>
|
||||
<input type="text" name="username" id="inputUsername" class="form-control" placeholder="{{ i18n_username }}" required autofocus>
|
||||
<label for="inputPassword" class="sr-only">{{ i18n_password }}</label>
|
||||
<input type="password" name="password" id="inputPassword" class="form-control" placeholder="{{ i18n_password }}" required>
|
||||
<button class="btn btn-lg btn-primary btn-block" type="submit">{% translate "Sign in" %}</button>
|
||||
</form>
|
||||
</body>
|
||||
</html>
|
||||
|
BIN
src/documents/tests/samples/documents/originals/0000002.pdf
Normal file
BIN
src/documents/tests/samples/documents/originals/0000002.pdf
Normal file
Binary file not shown.
Binary file not shown.
BIN
src/documents/tests/samples/documents/originals/0000003.pdf
Normal file
BIN
src/documents/tests/samples/documents/originals/0000003.pdf
Normal file
Binary file not shown.
BIN
src/documents/tests/samples/documents/originals/0000004.pdf.gpg
Normal file
BIN
src/documents/tests/samples/documents/originals/0000004.pdf.gpg
Normal file
Binary file not shown.
BIN
src/documents/tests/samples/documents/thumbnails/0000002.png
Normal file
BIN
src/documents/tests/samples/documents/thumbnails/0000002.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 7.7 KiB |
BIN
src/documents/tests/samples/documents/thumbnails/0000003.png
Normal file
BIN
src/documents/tests/samples/documents/thumbnails/0000003.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 7.7 KiB |
@@ -5,12 +5,14 @@ from django.test import TestCase
|
||||
from django.utils import timezone
|
||||
|
||||
from documents.admin import DocumentAdmin
|
||||
from documents.models import Document, Tag
|
||||
from documents.models import Document
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
class TestDocumentAdmin(TestCase):
|
||||
class TestDocumentAdmin(DirectoriesMixin, TestCase):
|
||||
|
||||
def setUp(self) -> None:
|
||||
super(TestDocumentAdmin, self).setUp()
|
||||
self.doc_admin = DocumentAdmin(model=Document, admin_site=AdminSite())
|
||||
|
||||
@mock.patch("documents.admin.index.add_or_update_document")
|
||||
|
@@ -114,8 +114,6 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
results = response.data['results']
|
||||
self.assertEqual(len(results[0]), 0)
|
||||
|
||||
|
||||
|
||||
def test_document_actions(self):
|
||||
|
||||
_, filename = tempfile.mkstemp(dir=self.dirs.originals_dir)
|
||||
@@ -230,6 +228,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(len(results), 2)
|
||||
self.assertCountEqual([results[0]['id'], results[1]['id']], [doc1.id, doc3.id])
|
||||
|
||||
response = self.client.get("/api/documents/?tags__id__in={},{}".format(tag_2.id, tag_3.id))
|
||||
self.assertEqual(response.status_code, 200)
|
||||
results = response.data['results']
|
||||
self.assertEqual(len(results), 2)
|
||||
self.assertCountEqual([results[0]['id'], results[1]['id']], [doc2.id, doc3.id])
|
||||
|
||||
response = self.client.get("/api/documents/?tags__id__all={},{}".format(tag_2.id, tag_3.id))
|
||||
self.assertEqual(response.status_code, 200)
|
||||
results = response.data['results']
|
||||
@@ -455,6 +459,23 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
self.assertIsNone(kwargs['override_document_type_id'])
|
||||
self.assertIsNone(kwargs['override_tag_ids'])
|
||||
|
||||
@mock.patch("documents.views.async_task")
|
||||
def test_upload_empty_metadata(self, m):
|
||||
|
||||
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
|
||||
response = self.client.post("/api/documents/post_document/", {"document": f, "title": "", "correspondent": "", "document_type": ""})
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
m.assert_called_once()
|
||||
|
||||
args, kwargs = m.call_args
|
||||
self.assertEqual(kwargs['override_filename'], "simple.pdf")
|
||||
self.assertIsNone(kwargs['override_title'])
|
||||
self.assertIsNone(kwargs['override_correspondent_id'])
|
||||
self.assertIsNone(kwargs['override_document_type_id'])
|
||||
self.assertIsNone(kwargs['override_tag_ids'])
|
||||
|
||||
@mock.patch("documents.views.async_task")
|
||||
def test_upload_invalid_form(self, m):
|
||||
|
||||
@@ -908,6 +929,14 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
doc2 = Document.objects.get(id=self.doc2.id)
|
||||
self.assertEqual(doc2.correspondent, self.c1)
|
||||
|
||||
def test_api_no_correspondent(self):
|
||||
response = self.client.post("/api/documents/bulk_edit/", json.dumps({
|
||||
"documents": [self.doc2.id],
|
||||
"method": "set_correspondent",
|
||||
"parameters": {}
|
||||
}), content_type='application/json')
|
||||
self.assertEqual(response.status_code, 400)
|
||||
|
||||
def test_api_invalid_document_type(self):
|
||||
self.assertEqual(self.doc2.document_type, self.dt1)
|
||||
response = self.client.post("/api/documents/bulk_edit/", json.dumps({
|
||||
@@ -920,6 +949,14 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
doc2 = Document.objects.get(id=self.doc2.id)
|
||||
self.assertEqual(doc2.document_type, self.dt1)
|
||||
|
||||
def test_api_no_document_type(self):
|
||||
response = self.client.post("/api/documents/bulk_edit/", json.dumps({
|
||||
"documents": [self.doc2.id],
|
||||
"method": "set_document_type",
|
||||
"parameters": {}
|
||||
}), content_type='application/json')
|
||||
self.assertEqual(response.status_code, 400)
|
||||
|
||||
def test_api_add_invalid_tag(self):
|
||||
self.assertEqual(list(self.doc2.tags.all()), [self.t1])
|
||||
response = self.client.post("/api/documents/bulk_edit/", json.dumps({
|
||||
@@ -931,6 +968,14 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
|
||||
self.assertEqual(list(self.doc2.tags.all()), [self.t1])
|
||||
|
||||
def test_api_add_tag_no_tag(self):
|
||||
response = self.client.post("/api/documents/bulk_edit/", json.dumps({
|
||||
"documents": [self.doc2.id],
|
||||
"method": "add_tag",
|
||||
"parameters": {}
|
||||
}), content_type='application/json')
|
||||
self.assertEqual(response.status_code, 400)
|
||||
|
||||
def test_api_delete_invalid_tag(self):
|
||||
self.assertEqual(list(self.doc2.tags.all()), [self.t1])
|
||||
response = self.client.post("/api/documents/bulk_edit/", json.dumps({
|
||||
@@ -942,6 +987,14 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
|
||||
self.assertEqual(list(self.doc2.tags.all()), [self.t1])
|
||||
|
||||
def test_api_delete_tag_no_tag(self):
|
||||
response = self.client.post("/api/documents/bulk_edit/", json.dumps({
|
||||
"documents": [self.doc2.id],
|
||||
"method": "remove_tag",
|
||||
"parameters": {}
|
||||
}), content_type='application/json')
|
||||
self.assertEqual(response.status_code, 400)
|
||||
|
||||
def test_api_modify_invalid_tags(self):
|
||||
self.assertEqual(list(self.doc2.tags.all()), [self.t1])
|
||||
response = self.client.post("/api/documents/bulk_edit/", json.dumps({
|
||||
@@ -951,6 +1004,21 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
|
||||
}), content_type='application/json')
|
||||
self.assertEqual(response.status_code, 400)
|
||||
|
||||
def test_api_modify_tags_no_tags(self):
|
||||
response = self.client.post("/api/documents/bulk_edit/", json.dumps({
|
||||
"documents": [self.doc2.id],
|
||||
"method": "modify_tags",
|
||||
"parameters": {"remove_tags": [1123123]}
|
||||
}), content_type='application/json')
|
||||
self.assertEqual(response.status_code, 400)
|
||||
|
||||
response = self.client.post("/api/documents/bulk_edit/", json.dumps({
|
||||
"documents": [self.doc2.id],
|
||||
"method": "modify_tags",
|
||||
"parameters": {'add_tags': [self.t2.id, 1657]}
|
||||
}), content_type='application/json')
|
||||
self.assertEqual(response.status_code, 400)
|
||||
|
||||
def test_api_selection_data_empty(self):
|
||||
response = self.client.post("/api/documents/selection_data/", json.dumps({
|
||||
"documents": []
|
||||
|
@@ -468,6 +468,42 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
self.assertTrue(os.path.isfile(dst))
|
||||
|
||||
|
||||
class PreConsumeTestCase(TestCase):
|
||||
|
||||
@mock.patch("documents.consumer.Popen")
|
||||
@override_settings(PRE_CONSUME_SCRIPT=None)
|
||||
def test_no_pre_consume_script(self, m):
|
||||
c = Consumer()
|
||||
c.path = "path-to-file"
|
||||
c.run_pre_consume_script()
|
||||
m.assert_not_called()
|
||||
|
||||
@mock.patch("documents.consumer.Popen")
|
||||
@override_settings(PRE_CONSUME_SCRIPT="does-not-exist")
|
||||
def test_pre_consume_script_not_found(self, m):
|
||||
c = Consumer()
|
||||
c.path = "path-to-file"
|
||||
self.assertRaises(ConsumerError, c.run_pre_consume_script)
|
||||
|
||||
@mock.patch("documents.consumer.Popen")
|
||||
def test_pre_consume_script(self, m):
|
||||
with tempfile.NamedTemporaryFile() as script:
|
||||
with override_settings(PRE_CONSUME_SCRIPT=script.name):
|
||||
c = Consumer()
|
||||
c.path = "path-to-file"
|
||||
c.run_pre_consume_script()
|
||||
|
||||
m.assert_called_once()
|
||||
|
||||
args, kwargs = m.call_args
|
||||
|
||||
command = args[0]
|
||||
|
||||
self.assertEqual(command[0], script.name)
|
||||
self.assertEqual(command[1], "path-to-file")
|
||||
|
||||
|
||||
|
||||
class PostConsumeTestCase(TestCase):
|
||||
|
||||
@mock.patch("documents.consumer.Popen")
|
||||
@@ -483,36 +519,45 @@ class PostConsumeTestCase(TestCase):
|
||||
|
||||
m.assert_not_called()
|
||||
|
||||
@mock.patch("documents.consumer.Popen")
|
||||
@override_settings(POST_CONSUME_SCRIPT="script")
|
||||
def test_post_consume_script_simple(self, m):
|
||||
|
||||
@override_settings(POST_CONSUME_SCRIPT="does-not-exist")
|
||||
def test_post_consume_script_not_found(self):
|
||||
doc = Document.objects.create(title="Test", mime_type="application/pdf")
|
||||
|
||||
Consumer().run_post_consume_script(doc)
|
||||
|
||||
m.assert_called_once()
|
||||
self.assertRaises(ConsumerError, Consumer().run_post_consume_script, doc)
|
||||
|
||||
@mock.patch("documents.consumer.Popen")
|
||||
def test_post_consume_script_simple(self, m):
|
||||
with tempfile.NamedTemporaryFile() as script:
|
||||
with override_settings(POST_CONSUME_SCRIPT=script.name):
|
||||
doc = Document.objects.create(title="Test", mime_type="application/pdf")
|
||||
|
||||
Consumer().run_post_consume_script(doc)
|
||||
|
||||
m.assert_called_once()
|
||||
|
||||
@mock.patch("documents.consumer.Popen")
|
||||
@override_settings(POST_CONSUME_SCRIPT="script")
|
||||
def test_post_consume_script_with_correspondent(self, m):
|
||||
c = Correspondent.objects.create(name="my_bank")
|
||||
doc = Document.objects.create(title="Test", mime_type="application/pdf", correspondent=c)
|
||||
tag1 = Tag.objects.create(name="a")
|
||||
tag2 = Tag.objects.create(name="b")
|
||||
doc.tags.add(tag1)
|
||||
doc.tags.add(tag2)
|
||||
with tempfile.NamedTemporaryFile() as script:
|
||||
with override_settings(POST_CONSUME_SCRIPT=script.name):
|
||||
c = Correspondent.objects.create(name="my_bank")
|
||||
doc = Document.objects.create(title="Test", mime_type="application/pdf", correspondent=c)
|
||||
tag1 = Tag.objects.create(name="a")
|
||||
tag2 = Tag.objects.create(name="b")
|
||||
doc.tags.add(tag1)
|
||||
doc.tags.add(tag2)
|
||||
|
||||
Consumer().run_post_consume_script(doc)
|
||||
Consumer().run_post_consume_script(doc)
|
||||
|
||||
m.assert_called_once()
|
||||
m.assert_called_once()
|
||||
|
||||
args, kwargs = m.call_args
|
||||
args, kwargs = m.call_args
|
||||
|
||||
command = args[0]
|
||||
command = args[0]
|
||||
|
||||
self.assertEqual(command[0], "script")
|
||||
self.assertEqual(command[1], str(doc.pk))
|
||||
self.assertEqual(command[5], f"/api/documents/{doc.pk}/download/")
|
||||
self.assertEqual(command[6], f"/api/documents/{doc.pk}/thumb/")
|
||||
self.assertEqual(command[7], "my_bank")
|
||||
self.assertCountEqual(command[8].split(","), ["a", "b"])
|
||||
self.assertEqual(command[0], script.name)
|
||||
self.assertEqual(command[1], str(doc.pk))
|
||||
self.assertEqual(command[5], f"/api/documents/{doc.pk}/download/")
|
||||
self.assertEqual(command[6], f"/api/documents/{doc.pk}/thumb/")
|
||||
self.assertEqual(command[7], "my_bank")
|
||||
self.assertCountEqual(command[8].split(","), ["a", "b"])
|
||||
|
@@ -138,3 +138,18 @@ class TestDate(TestCase):
|
||||
@override_settings(FILENAME_DATE_ORDER="YMD")
|
||||
def test_filename_date_parse_invalid(self, *args):
|
||||
self.assertIsNone(parse_date("/tmp/20 408000l 2475 - test.pdf", "No date in here"))
|
||||
|
||||
@override_settings(IGNORE_DATES=(datetime.date(2019, 11, 3), datetime.date(2020, 1, 17)))
|
||||
def test_ignored_dates(self, *args):
|
||||
text = (
|
||||
"lorem ipsum 110319, 20200117 and lorem 13.02.2018 lorem "
|
||||
"ipsum"
|
||||
)
|
||||
date = parse_date("", text)
|
||||
self.assertEqual(
|
||||
date,
|
||||
datetime.datetime(
|
||||
2018, 2, 13, 0, 0,
|
||||
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||
)
|
||||
)
|
@@ -1,10 +1,10 @@
|
||||
import shutil
|
||||
import tempfile
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from django.test import TestCase, override_settings
|
||||
from django.utils import timezone
|
||||
|
||||
from ..models import Document, Correspondent
|
||||
|
||||
@@ -47,20 +47,20 @@ class TestDocument(TestCase):
|
||||
|
||||
def test_file_name(self):
|
||||
|
||||
doc = Document(mime_type="application/pdf", title="test", created=datetime(2020, 12, 25))
|
||||
doc = Document(mime_type="application/pdf", title="test", created=timezone.datetime(2020, 12, 25))
|
||||
self.assertEqual(doc.get_public_filename(), "2020-12-25 test.pdf")
|
||||
|
||||
def test_file_name_jpg(self):
|
||||
|
||||
doc = Document(mime_type="image/jpeg", title="test", created=datetime(2020, 12, 25))
|
||||
doc = Document(mime_type="image/jpeg", title="test", created=timezone.datetime(2020, 12, 25))
|
||||
self.assertEqual(doc.get_public_filename(), "2020-12-25 test.jpg")
|
||||
|
||||
def test_file_name_unknown(self):
|
||||
|
||||
doc = Document(mime_type="application/zip", title="test", created=datetime(2020, 12, 25))
|
||||
doc = Document(mime_type="application/zip", title="test", created=timezone.datetime(2020, 12, 25))
|
||||
self.assertEqual(doc.get_public_filename(), "2020-12-25 test.zip")
|
||||
|
||||
def test_file_name_invalid_type(self):
|
||||
|
||||
doc = Document(mime_type="image/jpegasd", title="test", created=datetime(2020, 12, 25))
|
||||
doc = Document(mime_type="image/jpegasd", title="test", created=timezone.datetime(2020, 12, 25))
|
||||
self.assertEqual(doc.get_public_filename(), "2020-12-25 test")
|
||||
|
@@ -70,18 +70,18 @@ class TestDecryptDocuments(TestCase):
|
||||
PASSPHRASE="test"
|
||||
).enable()
|
||||
|
||||
doc = Document.objects.create(checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
|
||||
doc = Document.objects.create(checksum="82186aaa94f0b98697d704b90fd1c072", title="wow", filename="0000004.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
|
||||
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf.gpg"), os.path.join(originals_dir, "0000002.pdf.gpg"))
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", f"0000002.png.gpg"), os.path.join(thumb_dir, f"{doc.id:07}.png.gpg"))
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000004.pdf.gpg"), os.path.join(originals_dir, "0000004.pdf.gpg"))
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", f"0000004.png.gpg"), os.path.join(thumb_dir, f"{doc.id:07}.png.gpg"))
|
||||
|
||||
call_command('decrypt_documents')
|
||||
|
||||
doc.refresh_from_db()
|
||||
|
||||
self.assertEqual(doc.storage_type, Document.STORAGE_TYPE_UNENCRYPTED)
|
||||
self.assertEqual(doc.filename, "0000002.pdf")
|
||||
self.assertTrue(os.path.isfile(os.path.join(originals_dir, "0000002.pdf")))
|
||||
self.assertEqual(doc.filename, "0000004.pdf")
|
||||
self.assertTrue(os.path.isfile(os.path.join(originals_dir, "0000004.pdf")))
|
||||
self.assertTrue(os.path.isfile(doc.source_path))
|
||||
self.assertTrue(os.path.isfile(os.path.join(thumb_dir, f"{doc.id:07}.png")))
|
||||
self.assertTrue(os.path.isfile(doc.thumbnail_path))
|
||||
|
@@ -3,6 +3,8 @@ import json
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from django.core.management import call_command
|
||||
from django.test import TestCase, override_settings
|
||||
@@ -10,54 +12,87 @@ from django.test import TestCase, override_settings
|
||||
from documents.management.commands import document_exporter
|
||||
from documents.models import Document, Tag, DocumentType, Correspondent
|
||||
from documents.sanity_checker import check_sanity
|
||||
from documents.settings import EXPORTER_FILE_NAME
|
||||
from documents.tests.utils import DirectoriesMixin, paperless_environment
|
||||
|
||||
|
||||
class TestExportImport(DirectoriesMixin, TestCase):
|
||||
|
||||
def setUp(self) -> None:
|
||||
self.target = tempfile.mkdtemp()
|
||||
self.addCleanup(shutil.rmtree, self.target)
|
||||
|
||||
self.d1 = Document.objects.create(content="Content", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", title="wow1", filename="0000001.pdf", mime_type="application/pdf")
|
||||
self.d2 = Document.objects.create(content="Content", checksum="9c9691e51741c1f4f41a20896af31770", title="wow2", filename="0000002.pdf", mime_type="application/pdf")
|
||||
self.d3 = Document.objects.create(content="Content", checksum="d38d7ed02e988e072caf924e0f3fcb76", title="wow2", filename="0000003.pdf", mime_type="application/pdf")
|
||||
self.d4 = Document.objects.create(content="Content", checksum="82186aaa94f0b98697d704b90fd1c072", title="wow_dec", filename="0000004.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
|
||||
|
||||
self.t1 = Tag.objects.create(name="t")
|
||||
self.dt1 = DocumentType.objects.create(name="dt")
|
||||
self.c1 = Correspondent.objects.create(name="c")
|
||||
|
||||
self.d1.tags.add(self.t1)
|
||||
self.d1.correspondent = self.c1
|
||||
self.d1.document_type = self.dt1
|
||||
self.d1.save()
|
||||
super(TestExportImport, self).setUp()
|
||||
|
||||
def _get_document_from_manifest(self, manifest, id):
|
||||
f = list(filter(lambda d: d['model'] == "documents.document" and d['pk'] == id, manifest))
|
||||
if len(f) == 1:
|
||||
return f[0]
|
||||
else:
|
||||
raise ValueError(f"document with id {id} does not exist in manifest")
|
||||
|
||||
@override_settings(
|
||||
PASSPHRASE="test"
|
||||
)
|
||||
def test_exporter(self):
|
||||
def _do_export(self, use_filename_format=False, compare_checksums=False, delete=False):
|
||||
args = ['document_exporter', self.target]
|
||||
if use_filename_format:
|
||||
args += ["--use-filename-format"]
|
||||
if compare_checksums:
|
||||
args += ["--compare-checksums"]
|
||||
if delete:
|
||||
args += ["--delete"]
|
||||
|
||||
call_command(*args)
|
||||
|
||||
with open(os.path.join(self.target, "manifest.json")) as f:
|
||||
manifest = json.load(f)
|
||||
|
||||
return manifest
|
||||
|
||||
def test_exporter(self, use_filename_format=False):
|
||||
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
||||
shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
|
||||
|
||||
file = os.path.join(self.dirs.originals_dir, "0000001.pdf")
|
||||
manifest = self._do_export(use_filename_format=use_filename_format)
|
||||
|
||||
d1 = Document.objects.create(content="Content", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", title="wow", filename="0000001.pdf", mime_type="application/pdf")
|
||||
d2 = Document.objects.create(content="Content", checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
|
||||
t1 = Tag.objects.create(name="t")
|
||||
dt1 = DocumentType.objects.create(name="dt")
|
||||
c1 = Correspondent.objects.create(name="c")
|
||||
self.assertEqual(len(manifest), 7)
|
||||
self.assertEqual(len(list(filter(lambda e: e['model'] == 'documents.document', manifest))), 4)
|
||||
|
||||
d1.tags.add(t1)
|
||||
d1.correspondents = c1
|
||||
d1.document_type = dt1
|
||||
d1.save()
|
||||
d2.save()
|
||||
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
|
||||
|
||||
target = tempfile.mkdtemp()
|
||||
self.addCleanup(shutil.rmtree, target)
|
||||
|
||||
call_command('document_exporter', target)
|
||||
|
||||
with open(os.path.join(target, "manifest.json")) as f:
|
||||
manifest = json.load(f)
|
||||
|
||||
self.assertEqual(len(manifest), 5)
|
||||
self.assertEqual(self._get_document_from_manifest(manifest, self.d1.id)['fields']['title'], "wow1")
|
||||
self.assertEqual(self._get_document_from_manifest(manifest, self.d2.id)['fields']['title'], "wow2")
|
||||
self.assertEqual(self._get_document_from_manifest(manifest, self.d3.id)['fields']['title'], "wow2")
|
||||
self.assertEqual(self._get_document_from_manifest(manifest, self.d4.id)['fields']['title'], "wow_dec")
|
||||
|
||||
for element in manifest:
|
||||
if element['model'] == 'documents.document':
|
||||
fname = os.path.join(target, element[document_exporter.EXPORTER_FILE_NAME])
|
||||
fname = os.path.join(self.target, element[document_exporter.EXPORTER_FILE_NAME])
|
||||
self.assertTrue(os.path.exists(fname))
|
||||
self.assertTrue(os.path.exists(os.path.join(target, element[document_exporter.EXPORTER_THUMBNAIL_NAME])))
|
||||
self.assertTrue(os.path.exists(os.path.join(self.target, element[document_exporter.EXPORTER_THUMBNAIL_NAME])))
|
||||
|
||||
with open(fname, "rb") as f:
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
self.assertEqual(checksum, element['fields']['checksum'])
|
||||
|
||||
self.assertEqual(element['fields']['storage_type'], Document.STORAGE_TYPE_UNENCRYPTED)
|
||||
|
||||
if document_exporter.EXPORTER_ARCHIVE_NAME in element:
|
||||
fname = os.path.join(target, element[document_exporter.EXPORTER_ARCHIVE_NAME])
|
||||
fname = os.path.join(self.target, element[document_exporter.EXPORTER_ARCHIVE_NAME])
|
||||
self.assertTrue(os.path.exists(fname))
|
||||
|
||||
with open(fname, "rb") as f:
|
||||
@@ -65,24 +100,123 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(checksum, element['fields']['archive_checksum'])
|
||||
|
||||
with paperless_environment() as dirs:
|
||||
self.assertEqual(Document.objects.count(), 2)
|
||||
self.assertEqual(Document.objects.count(), 4)
|
||||
Document.objects.all().delete()
|
||||
Correspondent.objects.all().delete()
|
||||
DocumentType.objects.all().delete()
|
||||
Tag.objects.all().delete()
|
||||
self.assertEqual(Document.objects.count(), 0)
|
||||
|
||||
call_command('document_importer', target)
|
||||
self.assertEqual(Document.objects.count(), 2)
|
||||
call_command('document_importer', self.target)
|
||||
self.assertEqual(Document.objects.count(), 4)
|
||||
self.assertEqual(Tag.objects.count(), 1)
|
||||
self.assertEqual(Correspondent.objects.count(), 1)
|
||||
self.assertEqual(DocumentType.objects.count(), 1)
|
||||
self.assertEqual(Document.objects.get(id=self.d1.id).title, "wow1")
|
||||
self.assertEqual(Document.objects.get(id=self.d2.id).title, "wow2")
|
||||
self.assertEqual(Document.objects.get(id=self.d3.id).title, "wow2")
|
||||
self.assertEqual(Document.objects.get(id=self.d4.id).title, "wow_dec")
|
||||
messages = check_sanity()
|
||||
# everything is alright after the test
|
||||
self.assertEqual(len(messages), 0, str([str(m) for m in messages]))
|
||||
|
||||
@override_settings(
|
||||
PAPERLESS_FILENAME_FORMAT="{title}"
|
||||
)
|
||||
def test_exporter_with_filename_format(self):
|
||||
self.test_exporter()
|
||||
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
||||
shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
|
||||
|
||||
with override_settings(PAPERLESS_FILENAME_FORMAT="{created_year}/{correspondent}/{title}"):
|
||||
self.test_exporter(use_filename_format=True)
|
||||
|
||||
def test_update_export_changed_time(self):
|
||||
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
||||
shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
|
||||
|
||||
self._do_export()
|
||||
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
|
||||
|
||||
st_mtime_1 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime
|
||||
|
||||
with mock.patch("documents.management.commands.document_exporter.shutil.copy2") as m:
|
||||
self._do_export()
|
||||
m.assert_not_called()
|
||||
|
||||
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
|
||||
st_mtime_2 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime
|
||||
|
||||
Path(self.d1.source_path).touch()
|
||||
|
||||
with mock.patch("documents.management.commands.document_exporter.shutil.copy2") as m:
|
||||
self._do_export()
|
||||
self.assertEqual(m.call_count, 1)
|
||||
|
||||
st_mtime_3 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime
|
||||
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
|
||||
|
||||
self.assertNotEqual(st_mtime_1, st_mtime_2)
|
||||
self.assertNotEqual(st_mtime_2, st_mtime_3)
|
||||
|
||||
def test_update_export_changed_checksum(self):
|
||||
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
||||
shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
|
||||
|
||||
self._do_export()
|
||||
|
||||
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
|
||||
|
||||
with mock.patch("documents.management.commands.document_exporter.shutil.copy2") as m:
|
||||
self._do_export()
|
||||
m.assert_not_called()
|
||||
|
||||
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
|
||||
|
||||
self.d2.checksum = "asdfasdgf3"
|
||||
self.d2.save()
|
||||
|
||||
with mock.patch("documents.management.commands.document_exporter.shutil.copy2") as m:
|
||||
self._do_export(compare_checksums=True)
|
||||
self.assertEqual(m.call_count, 1)
|
||||
|
||||
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
|
||||
|
||||
def test_update_export_deleted_document(self):
|
||||
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
||||
shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
|
||||
|
||||
manifest = self._do_export()
|
||||
|
||||
self.assertTrue(len(manifest), 7)
|
||||
doc_from_manifest = self._get_document_from_manifest(manifest, self.d3.id)
|
||||
self.assertTrue(os.path.isfile(os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])))
|
||||
self.d3.delete()
|
||||
|
||||
manifest = self._do_export()
|
||||
self.assertRaises(ValueError, self._get_document_from_manifest, manifest, self.d3.id)
|
||||
self.assertTrue(os.path.isfile(os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])))
|
||||
|
||||
manifest = self._do_export(delete=True)
|
||||
self.assertFalse(os.path.isfile(os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])))
|
||||
|
||||
self.assertTrue(len(manifest), 6)
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}/{correspondent}")
|
||||
def test_update_export_changed_location(self):
|
||||
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
||||
shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
|
||||
|
||||
m = self._do_export(use_filename_format=True)
|
||||
self.assertTrue(os.path.isfile(os.path.join(self.target, "wow1", "c.pdf")))
|
||||
|
||||
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
|
||||
|
||||
self.d1.title = "new_title"
|
||||
self.d1.save()
|
||||
self._do_export(use_filename_format=True, delete=True)
|
||||
self.assertFalse(os.path.isfile(os.path.join(self.target, "wow1", "c.pdf")))
|
||||
self.assertFalse(os.path.isdir(os.path.join(self.target, "wow1")))
|
||||
self.assertTrue(os.path.isfile(os.path.join(self.target, "new_title", "c.pdf")))
|
||||
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
|
||||
self.assertTrue(os.path.isfile(os.path.join(self.target, "wow2", "none.pdf")))
|
||||
self.assertTrue(os.path.isfile(os.path.join(self.target, "wow2", "none_01.pdf")))
|
||||
|
||||
def test_export_missing_files(self):
|
||||
|
||||
|
52
src/documents/tests/test_management_thumbnails.py
Normal file
52
src/documents/tests/test_management_thumbnails.py
Normal file
@@ -0,0 +1,52 @@
|
||||
import os
|
||||
import shutil
|
||||
from unittest import mock
|
||||
|
||||
from django.core.management import call_command
|
||||
from django.test import TestCase
|
||||
|
||||
from documents.management.commands.document_thumbnails import _process_document
|
||||
from documents.models import Document, Tag, Correspondent, DocumentType
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
class TestMakeThumbnails(DirectoriesMixin, TestCase):
|
||||
|
||||
def make_models(self):
|
||||
self.d1 = Document.objects.create(checksum="A", title="A", content="first document", mime_type="application/pdf", filename="test.pdf")
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), self.d1.source_path)
|
||||
|
||||
self.d2 = Document.objects.create(checksum="Ass", title="A", content="first document", mime_type="application/pdf", filename="test2.pdf")
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), self.d2.source_path)
|
||||
|
||||
def setUp(self) -> None:
|
||||
super(TestMakeThumbnails, self).setUp()
|
||||
self.make_models()
|
||||
|
||||
def test_process_document(self):
|
||||
self.assertFalse(os.path.isfile(self.d1.thumbnail_path))
|
||||
_process_document(self.d1.id)
|
||||
self.assertTrue(os.path.isfile(self.d1.thumbnail_path))
|
||||
|
||||
@mock.patch("documents.management.commands.document_thumbnails.shutil.move")
|
||||
def test_process_document_invalid_mime_type(self, m):
|
||||
self.d1.mime_type = "asdasdasd"
|
||||
self.d1.save()
|
||||
|
||||
_process_document(self.d1.id)
|
||||
|
||||
m.assert_not_called()
|
||||
|
||||
def test_command(self):
|
||||
self.assertFalse(os.path.isfile(self.d1.thumbnail_path))
|
||||
self.assertFalse(os.path.isfile(self.d2.thumbnail_path))
|
||||
call_command('document_thumbnails')
|
||||
self.assertTrue(os.path.isfile(self.d1.thumbnail_path))
|
||||
self.assertTrue(os.path.isfile(self.d2.thumbnail_path))
|
||||
|
||||
def test_command_documentid(self):
|
||||
self.assertFalse(os.path.isfile(self.d1.thumbnail_path))
|
||||
self.assertFalse(os.path.isfile(self.d2.thumbnail_path))
|
||||
call_command('document_thumbnails', '-d', f"{self.d1.id}")
|
||||
self.assertTrue(os.path.isfile(self.d1.thumbnail_path))
|
||||
self.assertFalse(os.path.isfile(self.d2.thumbnail_path))
|
@@ -21,13 +21,15 @@ class TestMatching(TestCase):
|
||||
matching_algorithm=getattr(klass, algorithm)
|
||||
)
|
||||
for string in true:
|
||||
doc = Document(content=string)
|
||||
self.assertTrue(
|
||||
matching.matches(instance, string),
|
||||
matching.matches(instance, doc),
|
||||
'"%s" should match "%s" but it does not' % (text, string)
|
||||
)
|
||||
for string in false:
|
||||
doc = Document(content=string)
|
||||
self.assertFalse(
|
||||
matching.matches(instance, string),
|
||||
matching.matches(instance, doc),
|
||||
'"%s" should not match "%s" but it does' % (text, string)
|
||||
)
|
||||
|
||||
@@ -169,7 +171,7 @@ class TestMatching(TestCase):
|
||||
def test_match_regex(self):
|
||||
|
||||
self._test_matching(
|
||||
r"alpha\w+gamma",
|
||||
"alpha\w+gamma",
|
||||
"MATCH_REGEX",
|
||||
(
|
||||
"I have alpha_and_gamma in me",
|
||||
@@ -187,6 +189,16 @@ class TestMatching(TestCase):
|
||||
)
|
||||
)
|
||||
|
||||
def test_tach_invalid_regex(self):
|
||||
self._test_matching(
|
||||
"[[",
|
||||
"MATCH_REGEX",
|
||||
[],
|
||||
[
|
||||
"Don't match this"
|
||||
]
|
||||
)
|
||||
|
||||
def test_match_fuzzy(self):
|
||||
|
||||
self._test_matching(
|
||||
|
@@ -98,7 +98,7 @@ class TestMigrateMimeType(DirectoriesMixin, TestMigrations):
|
||||
|
||||
doc2 = Document.objects.create(checksum="B", file_type="pdf", storage_type=STORAGE_TYPE_GPG)
|
||||
self.doc2_id = doc2.id
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf.gpg"), source_path_before(doc2))
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000004.pdf.gpg"), source_path_before(doc2))
|
||||
|
||||
def testMimeTypesMigrated(self):
|
||||
Document = self.apps.get_model('documents', 'Document')
|
||||
|
@@ -120,3 +120,4 @@ class TestParserAvailability(TestCase):
|
||||
|
||||
self.assertTrue(is_file_ext_supported('.pdf'))
|
||||
self.assertFalse(is_file_ext_supported('.hsdfh'))
|
||||
self.assertFalse(is_file_ext_supported(''))
|
||||
|
34
src/documents/tests/test_settings.py
Normal file
34
src/documents/tests/test_settings.py
Normal file
@@ -0,0 +1,34 @@
|
||||
import logging
|
||||
from unittest import mock
|
||||
|
||||
from django.test import TestCase
|
||||
|
||||
from paperless.settings import default_task_workers, default_threads_per_worker
|
||||
|
||||
|
||||
class TestSettings(TestCase):
|
||||
|
||||
@mock.patch("paperless.settings.multiprocessing.cpu_count")
|
||||
def test_single_core(self, cpu_count):
|
||||
cpu_count.return_value = 1
|
||||
|
||||
default_workers = default_task_workers()
|
||||
|
||||
default_threads = default_threads_per_worker(default_workers)
|
||||
|
||||
self.assertEqual(default_workers, 1)
|
||||
self.assertEqual(default_threads, 1)
|
||||
|
||||
def test_workers_threads(self):
|
||||
for i in range(2, 64):
|
||||
with mock.patch("paperless.settings.multiprocessing.cpu_count") as cpu_count:
|
||||
cpu_count.return_value = i
|
||||
|
||||
default_workers = default_task_workers()
|
||||
|
||||
default_threads = default_threads_per_worker(default_workers)
|
||||
|
||||
self.assertTrue(default_workers >= 1)
|
||||
self.assertTrue(default_threads >= 1)
|
||||
|
||||
self.assertTrue(default_workers * default_threads < i, f"{i}")
|
30
src/documents/tests/test_views.py
Normal file
30
src/documents/tests/test_views.py
Normal file
@@ -0,0 +1,30 @@
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import User
|
||||
from django.test import TestCase
|
||||
|
||||
|
||||
class TestViews(TestCase):
|
||||
|
||||
def setUp(self) -> None:
|
||||
self.user = User.objects.create_user("testuser")
|
||||
|
||||
def test_login_redirect(self):
|
||||
response = self.client.get('/')
|
||||
self.assertEqual(response.status_code, 302)
|
||||
self.assertEqual(response.url, "/accounts/login/?next=/")
|
||||
|
||||
def test_index(self):
|
||||
self.client.force_login(self.user)
|
||||
for (language_given, language_actual) in [("", "en-US"), ("en-US", "en-US"), ("de", "de"), ("en", "en-US"), ("en-us", "en-US"), ("fr", "fr"), ("jp", "en-US")]:
|
||||
if language_given:
|
||||
self.client.cookies.load({settings.LANGUAGE_COOKIE_NAME: language_given})
|
||||
elif settings.LANGUAGE_COOKIE_NAME in self.client.cookies.keys():
|
||||
self.client.cookies.pop(settings.LANGUAGE_COOKIE_NAME)
|
||||
|
||||
response = self.client.get('/', )
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(response.context_data['webmanifest'], f"frontend/{language_actual}/manifest.webmanifest")
|
||||
self.assertEqual(response.context_data['styles_css'], f"frontend/{language_actual}/styles.css")
|
||||
self.assertEqual(response.context_data['runtime_js'], f"frontend/{language_actual}/runtime.js")
|
||||
self.assertEqual(response.context_data['polyfills_js'], f"frontend/{language_actual}/polyfills.js")
|
||||
self.assertEqual(response.context_data['main_js'], f"frontend/{language_actual}/main.js")
|
@@ -1,3 +1,4 @@
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
from datetime import datetime
|
||||
@@ -79,7 +80,7 @@ class IndexView(TemplateView):
|
||||
context['runtime_js'] = f"frontend/{self.get_language()}/runtime.js"
|
||||
context['polyfills_js'] = f"frontend/{self.get_language()}/polyfills.js" # NOQA: E501
|
||||
context['main_js'] = f"frontend/{self.get_language()}/main.js"
|
||||
context['manifest'] = f"frontend/{self.get_language()}/manifest.webmanifest" # NOQA: E501
|
||||
context['webmanifest'] = f"frontend/{self.get_language()}/manifest.webmanifest" # NOQA: E501
|
||||
return context
|
||||
|
||||
|
||||
@@ -158,6 +159,9 @@ class DocumentViewSet(RetrieveModelMixin,
|
||||
"added",
|
||||
"archive_serial_number")
|
||||
|
||||
def get_queryset(self):
|
||||
return Document.objects.distinct()
|
||||
|
||||
def get_serializer(self, *args, **kwargs):
|
||||
fields_param = self.request.query_params.get('fields', None)
|
||||
if fields_param:
|
||||
@@ -458,12 +462,21 @@ class SearchView(APIView):
|
||||
self.ix = index.open_index()
|
||||
|
||||
def add_infos_to_hit(self, r):
|
||||
doc = Document.objects.get(id=r['id'])
|
||||
try:
|
||||
doc = Document.objects.get(id=r['id'])
|
||||
except Document.DoesNotExist:
|
||||
logging.getLogger(__name__).warning(
|
||||
f"Search index returned a non-existing document: "
|
||||
f"id: {r['id']}, title: {r['title']}. "
|
||||
f"Search index needs reindex."
|
||||
)
|
||||
doc = None
|
||||
|
||||
return {'id': r['id'],
|
||||
'highlights': r.highlights("content", text=doc.content),
|
||||
'highlights': r.highlights("content", text=doc.content) if doc else None, # NOQA: E501
|
||||
'score': r.score,
|
||||
'rank': r.rank,
|
||||
'document': DocumentSerializer(doc).data,
|
||||
'document': DocumentSerializer(doc).data if doc else None,
|
||||
'title': r['title']
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user