Merge branch 'dev' into feature-consume-eml

This commit is contained in:
phail
2022-05-20 19:29:52 +02:00
189 changed files with 34855 additions and 17114 deletions

View File

@@ -5,6 +5,7 @@ from .models import Document
from .models import DocumentType
from .models import SavedView
from .models import SavedViewFilterRule
from .models import StoragePath
from .models import Tag
@@ -74,19 +75,19 @@ class DocumentAdmin(admin.ModelAdmin):
for o in queryset:
index.remove_document(writer, o)
super(DocumentAdmin, self).delete_queryset(request, queryset)
super().delete_queryset(request, queryset)
def delete_model(self, request, obj):
from documents import index
index.remove_document_from_index(obj)
super(DocumentAdmin, self).delete_model(request, obj)
super().delete_model(request, obj)
def save_model(self, request, obj, form, change):
from documents import index
index.add_or_update_document(obj)
super(DocumentAdmin, self).save_model(request, obj, form, change)
super().save_model(request, obj, form, change)
class RuleInline(admin.TabularInline):
@@ -100,8 +101,19 @@ class SavedViewAdmin(admin.ModelAdmin):
inlines = [RuleInline]
class StoragePathInline(admin.TabularInline):
model = StoragePath
class StoragePathAdmin(admin.ModelAdmin):
list_display = ("name", "path", "match", "matching_algorithm")
list_filter = ("path", "matching_algorithm")
list_editable = ("path", "match", "matching_algorithm")
admin.site.register(Correspondent, CorrespondentAdmin)
admin.site.register(Tag, TagAdmin)
admin.site.register(DocumentType, DocumentTypeAdmin)
admin.site.register(Document, DocumentAdmin)
admin.site.register(SavedView, SavedViewAdmin)
admin.site.register(StoragePath, StoragePathAdmin)

View File

@@ -16,6 +16,7 @@ class DocumentsConfig(AppConfig):
set_correspondent,
set_document_type,
set_tags,
set_storage_path,
add_to_index,
)
@@ -23,6 +24,7 @@ class DocumentsConfig(AppConfig):
document_consumption_finished.connect(set_correspondent)
document_consumption_finished.connect(set_document_type)
document_consumption_finished.connect(set_tags)
document_consumption_finished.connect(set_storage_path)
document_consumption_finished.connect(set_log_entry)
document_consumption_finished.connect(add_to_index)

View File

@@ -32,7 +32,7 @@ class OriginalsOnlyStrategy(BulkArchiveStrategy):
class ArchiveOnlyStrategy(BulkArchiveStrategy):
def __init__(self, zipf):
super(ArchiveOnlyStrategy, self).__init__(zipf)
super().__init__(zipf)
def add_document(self, doc: Document):
if doc.has_archive_version:

View File

@@ -5,6 +5,7 @@ from django_q.tasks import async_task
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
from documents.models import StoragePath
def set_correspondent(doc_ids, correspondent):
@@ -20,6 +21,24 @@ def set_correspondent(doc_ids, correspondent):
return "OK"
def set_storage_path(doc_ids, storage_path):
if storage_path:
storage_path = StoragePath.objects.get(id=storage_path)
qs = Document.objects.filter(
Q(id__in=doc_ids) & ~Q(storage_path=storage_path),
)
affected_docs = [doc.id for doc in qs]
qs.update(storage_path=storage_path)
async_task(
"documents.tasks.bulk_update_documents",
document_ids=affected_docs,
)
return "OK"
def set_document_type(doc_ids, document_type):
if document_type:
document_type = DocumentType.objects.get(id=document_type)

View File

@@ -57,10 +57,10 @@ def load_classifier():
return classifier
class DocumentClassifier(object):
class DocumentClassifier:
# v7 - Updated scikit-learn package version
FORMAT_VERSION = 7
# v8 - Added storage path classifier
FORMAT_VERSION = 8
def __init__(self):
# hash of the training data. used to prevent re-training when the
@@ -72,6 +72,7 @@ class DocumentClassifier(object):
self.tags_classifier = None
self.correspondent_classifier = None
self.document_type_classifier = None
self.storage_path_classifier = None
def load(self):
with open(settings.MODEL_FILE, "rb") as f:
@@ -90,6 +91,7 @@ class DocumentClassifier(object):
self.tags_classifier = pickle.load(f)
self.correspondent_classifier = pickle.load(f)
self.document_type_classifier = pickle.load(f)
self.storage_path_classifier = pickle.load(f)
except Exception:
raise ClassifierModelCorruptError()
@@ -107,6 +109,7 @@ class DocumentClassifier(object):
pickle.dump(self.tags_classifier, f)
pickle.dump(self.correspondent_classifier, f)
pickle.dump(self.document_type_classifier, f)
pickle.dump(self.storage_path_classifier, f)
if os.path.isfile(target_file):
os.unlink(target_file)
@@ -118,6 +121,7 @@ class DocumentClassifier(object):
labels_tags = list()
labels_correspondent = list()
labels_document_type = list()
labels_storage_path = list()
# Step 1: Extract and preprocess training data from the database.
logger.debug("Gathering data from database...")
@@ -144,17 +148,22 @@ class DocumentClassifier(object):
labels_correspondent.append(y)
tags = sorted(
[
tag.pk
for tag in doc.tags.filter(
matching_algorithm=MatchingModel.MATCH_AUTO,
)
],
tag.pk
for tag in doc.tags.filter(
matching_algorithm=MatchingModel.MATCH_AUTO,
)
)
for tag in tags:
m.update(tag.to_bytes(4, "little", signed=True))
labels_tags.append(tags)
y = -1
sd = doc.storage_path
if sd and sd.matching_algorithm == MatchingModel.MATCH_AUTO:
y = sd.pk
m.update(y.to_bytes(4, "little", signed=True))
labels_storage_path.append(y)
if not data:
raise ValueError("No training data available.")
@@ -163,7 +172,7 @@ class DocumentClassifier(object):
if self.data_hash and new_data_hash == self.data_hash:
return False
labels_tags_unique = set([tag for tags in labels_tags for tag in tags])
labels_tags_unique = {tag for tags in labels_tags for tag in tags}
num_tags = len(labels_tags_unique)
@@ -174,14 +183,16 @@ class DocumentClassifier(object):
# it usually is.
num_correspondents = len(set(labels_correspondent) | {-1}) - 1
num_document_types = len(set(labels_document_type) | {-1}) - 1
num_storage_paths = len(set(labels_storage_path) | {-1}) - 1
logger.debug(
"{} documents, {} tag(s), {} correspondent(s), "
"{} document type(s).".format(
"{} document type(s). {} storage path(es)".format(
len(data),
num_tags,
num_correspondents,
num_document_types,
num_storage_paths,
),
)
@@ -244,6 +255,21 @@ class DocumentClassifier(object):
"classifier.",
)
if num_storage_paths > 0:
logger.debug(
"Training storage paths classifier...",
)
self.storage_path_classifier = MLPClassifier(tol=0.01)
self.storage_path_classifier.fit(
data_vectorized,
labels_storage_path,
)
else:
self.storage_path_classifier = None
logger.debug(
"There are no storage paths. Not training storage path classifier.",
)
self.data_hash = new_data_hash
return True
@@ -290,3 +316,14 @@ class DocumentClassifier(object):
return []
else:
return []
def predict_storage_path(self, content):
if self.storage_path_classifier:
X = self.data_vectorizer.transform([preprocess_content(content)])
storage_path_id = self.storage_path_classifier.predict(X)
if storage_path_id != -1:
return storage_path_id
else:
return None
else:
return None

View File

@@ -3,6 +3,8 @@ import hashlib
import os
import uuid
from subprocess import Popen
from typing import Optional
from typing import Type
import magic
from asgiref.sync import async_to_sync
@@ -23,6 +25,7 @@ from .models import Document
from .models import DocumentType
from .models import FileInfo
from .models import Tag
from .parsers import DocumentParser
from .parsers import get_parser_class_for_mime_type
from .parsers import parse_date
from .parsers import ParseError
@@ -186,7 +189,8 @@ class Consumer(LoggingMixin):
override_document_type_id=None,
override_tag_ids=None,
task_id=None,
):
override_created=None,
) -> Document:
"""
Return the document object if it was successfully created.
"""
@@ -198,6 +202,7 @@ class Consumer(LoggingMixin):
self.override_document_type_id = override_document_type_id
self.override_tag_ids = override_tag_ids
self.task_id = task_id or str(uuid.uuid4())
self.override_created = override_created
self._send_progress(0, 100, "STARTING", MESSAGE_NEW_FILE)
@@ -220,7 +225,10 @@ class Consumer(LoggingMixin):
self.log("debug", f"Detected mime type: {mime_type}")
parser_class = get_parser_class_for_mime_type(mime_type)
# Based on the mime type, get the parser for that type
parser_class: Optional[Type[DocumentParser]] = get_parser_class_for_mime_type(
mime_type,
)
if not parser_class:
self._fail(MESSAGE_UNSUPPORTED_TYPE, f"Unsupported mime type {mime_type}")
@@ -241,7 +249,10 @@ class Consumer(LoggingMixin):
# This doesn't parse the document yet, but gives us a parser.
document_parser = parser_class(self.logging_group, progress_callback)
document_parser: DocumentParser = parser_class(
self.logging_group,
progress_callback,
)
self.log("debug", f"Parser: {type(document_parser).__name__}")
@@ -257,7 +268,7 @@ class Consumer(LoggingMixin):
try:
self._send_progress(20, 100, "WORKING", MESSAGE_PARSING_DOCUMENT)
self.log("debug", "Parsing {}...".format(self.filename))
self.log("debug", f"Parsing {self.filename}...")
document_parser.parse(self.path, mime_type, self.filename)
self.log("debug", f"Generating thumbnail for {self.filename}...")
@@ -270,7 +281,7 @@ class Consumer(LoggingMixin):
text = document_parser.get_text()
date = document_parser.get_date()
if not date:
if date is None:
self._send_progress(90, 100, "WORKING", MESSAGE_PARSE_DATE)
date = parse_date(self.filename, text)
archive_path = document_parser.get_archive_path()
@@ -342,11 +353,11 @@ class Consumer(LoggingMixin):
).hexdigest()
# Don't save with the lock active. Saving will cause the file
# renaming logic to aquire the lock as well.
# renaming logic to acquire the lock as well.
document.save()
# Delete the file only if it was successfully consumed
self.log("debug", "Deleting file {}".format(self.path))
self.log("debug", f"Deleting file {self.path}")
os.unlink(self.path)
# https://github.com/jonaswinkler/paperless-ng/discussions/1037
@@ -356,13 +367,14 @@ class Consumer(LoggingMixin):
)
if os.path.isfile(shadow_file):
self.log("debug", "Deleting file {}".format(shadow_file))
self.log("debug", f"Deleting file {shadow_file}")
os.unlink(shadow_file)
except Exception as e:
self._fail(
str(e),
f"The following error occured while consuming " f"{self.filename}: {e}",
f"The following error occurred while consuming "
f"{self.filename}: {e}",
exc_info=True,
)
finally:
@@ -370,27 +382,38 @@ class Consumer(LoggingMixin):
self.run_post_consume_script(document)
self.log("info", "Document {} consumption finished".format(document))
self.log("info", f"Document {document} consumption finished")
self._send_progress(100, 100, "SUCCESS", MESSAGE_FINISHED, document.id)
return document
def _store(self, text, date, mime_type):
def _store(self, text, date, mime_type) -> Document:
# If someone gave us the original filename, use it instead of doc.
file_info = FileInfo.from_filename(self.filename)
stats = os.stat(self.path)
self.log("debug", "Saving record to database")
created = (
file_info.created
or date
or timezone.make_aware(datetime.datetime.fromtimestamp(stats.st_mtime))
)
if self.override_created is not None:
create_date = self.override_created
self.log(
"debug",
f"Creation date from post_documents parameter: {create_date}",
)
elif file_info.created is not None:
create_date = file_info.created
self.log("debug", f"Creation date from FileInfo: {create_date}")
elif date is not None:
create_date = date
self.log("debug", f"Creation date from parse_date: {create_date}")
else:
stats = os.stat(self.path)
create_date = timezone.make_aware(
datetime.datetime.fromtimestamp(stats.st_mtime),
)
self.log("debug", f"Creation date from st_mtime: {create_date}")
storage_type = Document.STORAGE_TYPE_UNENCRYPTED
@@ -400,8 +423,8 @@ class Consumer(LoggingMixin):
content=text,
mime_type=mime_type,
checksum=hashlib.md5(f.read()).hexdigest(),
created=created,
modified=created,
created=create_date,
modified=create_date,
storage_type=storage_type,
)

View File

@@ -6,6 +6,7 @@ from collections import defaultdict
import pathvalidate
from django.conf import settings
from django.template.defaultfilters import slugify
from django.utils import timezone
logger = logging.getLogger("paperless.filehandling")
@@ -127,13 +128,26 @@ def generate_unique_filename(doc, archive_filename=False):
def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
path = ""
filename_format = settings.FILENAME_FORMAT
try:
if settings.PAPERLESS_FILENAME_FORMAT is not None:
tags = defaultdictNoStr(lambda: slugify(None), many_to_dictionary(doc.tags))
if doc.storage_path is not None:
logger.debug(
f"Document has storage_path {doc.storage_path.id} "
f"({doc.storage_path.path}) set",
)
filename_format = doc.storage_path.path
if filename_format is not None:
tags = defaultdictNoStr(
lambda: slugify(None),
many_to_dictionary(doc.tags),
)
tag_list = pathvalidate.sanitize_filename(
",".join(sorted([tag.name for tag in doc.tags.all()])),
",".join(
sorted(tag.name for tag in doc.tags.all()),
),
replacement_text="-",
)
@@ -143,7 +157,7 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
replacement_text="-",
)
else:
correspondent = "none"
correspondent = "-none-"
if doc.document_type:
document_type = pathvalidate.sanitize_filename(
@@ -151,36 +165,45 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
replacement_text="-",
)
else:
document_type = "none"
document_type = "-none-"
if doc.archive_serial_number:
asn = str(doc.archive_serial_number)
else:
asn = "none"
asn = "-none-"
path = settings.PAPERLESS_FILENAME_FORMAT.format(
# Convert UTC database date to localized date
local_added = timezone.localdate(doc.added)
local_created = timezone.localdate(doc.created)
path = filename_format.format(
title=pathvalidate.sanitize_filename(doc.title, replacement_text="-"),
correspondent=correspondent,
document_type=document_type,
created=datetime.date.isoformat(doc.created),
created_year=doc.created.year if doc.created else "none",
created_month=f"{doc.created.month:02}" if doc.created else "none",
created_day=f"{doc.created.day:02}" if doc.created else "none",
added=datetime.date.isoformat(doc.added),
added_year=doc.added.year if doc.added else "none",
added_month=f"{doc.added.month:02}" if doc.added else "none",
added_day=f"{doc.added.day:02}" if doc.added else "none",
created=datetime.date.isoformat(local_created),
created_year=local_created.year,
created_month=f"{local_created.month:02}",
created_day=f"{local_created.day:02}",
added=datetime.date.isoformat(local_added),
added_year=local_added.year,
added_month=f"{local_added.month:02}",
added_day=f"{local_added.day:02}",
asn=asn,
tags=tags,
tag_list=tag_list,
).strip()
if settings.FILENAME_FORMAT_REMOVE_NONE:
path = path.replace("-none-/", "") # remove empty directories
path = path.replace(" -none-", "") # remove when spaced, with space
path = path.replace("-none-", "") # remove rest of the occurences
path = path.replace("-none-", "none") # backward compatibility
path = path.strip(os.sep)
except (ValueError, KeyError, IndexError):
logger.warning(
f"Invalid PAPERLESS_FILENAME_FORMAT: "
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default",
f"Invalid filename_format '{filename_format}', falling back to default",
)
counter_str = f"_{counter:02}" if counter else ""

View File

@@ -7,6 +7,7 @@ from .models import Correspondent
from .models import Document
from .models import DocumentType
from .models import Log
from .models import StoragePath
from .models import Tag
CHAR_KWARGS = ["istartswith", "iendswith", "icontains", "iexact"]
@@ -35,7 +36,7 @@ class DocumentTypeFilterSet(FilterSet):
class TagsFilter(Filter):
def __init__(self, exclude=False, in_list=False):
super(TagsFilter, self).__init__()
super().__init__()
self.exclude = exclude
self.in_list = in_list
@@ -114,6 +115,9 @@ class DocumentFilterSet(FilterSet):
"document_type": ["isnull"],
"document_type__id": ID_KWARGS,
"document_type__name": CHAR_KWARGS,
"storage_path": ["isnull"],
"storage_path__id": ID_KWARGS,
"storage_path__name": CHAR_KWARGS,
}
@@ -121,3 +125,12 @@ class LogFilterSet(FilterSet):
class Meta:
model = Log
fields = {"level": INT_KWARGS, "created": DATE_KWARGS, "group": ID_KWARGS}
class StoragePathFilterSet(FilterSet):
class Meta:
model = StoragePath
fields = {
"name": CHAR_KWARGS,
"path": CHAR_KWARGS,
}

View File

@@ -46,6 +46,9 @@ def get_schema():
created=DATETIME(sortable=True),
modified=DATETIME(sortable=True),
added=DATETIME(sortable=True),
path=TEXT(sortable=True),
path_id=NUMERIC(),
has_path=BOOLEAN(),
)
@@ -104,6 +107,9 @@ def update_document(writer, doc):
added=doc.added,
asn=doc.archive_serial_number,
modified=doc.modified,
path=doc.storage_path.name if doc.storage_path else None,
path_id=doc.storage_path.id if doc.storage_path else None,
has_path=doc.storage_path is not None,
)
@@ -157,6 +163,11 @@ class DelayedQuery:
criterias.append(query.DateRange("added", start=isoparse(v), end=None))
elif k == "added__date__lt":
criterias.append(query.DateRange("added", start=None, end=isoparse(v)))
elif k == "storage_path__id":
criterias.append(query.Term("path_id", v))
elif k == "storage_path__isnull":
criterias.append(query.Term("has_path", v == "false"))
if len(criterias) > 0:
return query.And(criterias)
else:

View File

@@ -55,7 +55,7 @@ class Command(BaseCommand):
for document in encrypted_files:
print("Decrypting {}".format(document).encode("utf-8"))
print(f"Decrypting {document}".encode())
old_paths = [document.source_path, document.thumbnail_path]

View File

@@ -152,4 +152,4 @@ class Command(BaseCommand):
),
)
except KeyboardInterrupt:
print("Aborting...")
self.stdout.write(self.style.NOTICE("Aborting..."))

View File

@@ -28,8 +28,11 @@ def _tags_from_path(filepath):
"""Walk up the directory tree from filepath to CONSUMPTION_DIR
and get or create Tag IDs for every directory.
"""
normalized_consumption_dir = os.path.abspath(
os.path.normpath(settings.CONSUMPTION_DIR),
)
tag_ids = set()
path_parts = Path(filepath).relative_to(settings.CONSUMPTION_DIR).parent.parts
path_parts = Path(filepath).relative_to(normalized_consumption_dir).parent.parts
for part in path_parts:
tag_ids.add(
Tag.objects.get_or_create(name__iexact=part, defaults={"name": part})[0].pk,
@@ -39,7 +42,10 @@ def _tags_from_path(filepath):
def _is_ignored(filepath: str) -> bool:
filepath_relative = PurePath(filepath).relative_to(settings.CONSUMPTION_DIR)
normalized_consumption_dir = os.path.abspath(
os.path.normpath(settings.CONSUMPTION_DIR),
)
filepath_relative = PurePath(filepath).relative_to(normalized_consumption_dir)
return any(filepath_relative.match(p) for p in settings.CONSUMER_IGNORE_PATTERNS)
@@ -160,6 +166,8 @@ class Command(BaseCommand):
if not directory:
raise CommandError("CONSUMPTION_DIR does not appear to be set.")
directory = os.path.abspath(directory)
if not os.path.isdir(directory):
raise CommandError(f"Consumption directory {directory} does not exist")
@@ -208,7 +216,7 @@ class Command(BaseCommand):
try:
inotify_debounce: Final[float] = 0.5
inotify_debounce: Final[float] = settings.CONSUMER_INOTIFY_DELAY
notified_files = {}
while not self.stop_flag:
@@ -226,10 +234,23 @@ class Command(BaseCommand):
for filepath in notified_files:
# Time of the last inotify event for this file
last_event_time = notified_files[filepath]
if (monotonic() - last_event_time) > inotify_debounce:
# Current time - last time over the configured timeout
waited_long_enough = (
monotonic() - last_event_time
) > inotify_debounce
# Also make sure the file exists still, some scanners might write a
# temporary file first
file_still_exists = os.path.exists(filepath) and os.path.isfile(
filepath,
)
if waited_long_enough and file_still_exists:
_consume(filepath)
else:
elif file_still_exists:
still_waiting[filepath] = last_event_time
# These files are still waiting to hit the timeout
notified_files = still_waiting

View File

@@ -18,10 +18,12 @@ from documents.models import DocumentType
from documents.models import SavedView
from documents.models import SavedViewFilterRule
from documents.models import Tag
from documents.models import UiSettings
from documents.settings import EXPORTER_ARCHIVE_NAME
from documents.settings import EXPORTER_FILE_NAME
from documents.settings import EXPORTER_THUMBNAIL_NAME
from filelock import FileLock
from paperless import version
from paperless.db import GnuPG
from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule
@@ -111,8 +113,8 @@ class Command(BaseCommand):
map(lambda f: os.path.abspath(os.path.join(root, f)), files),
)
# 2. Create manifest, containing all correspondents, types, tags and
# documents
# 2. Create manifest, containing all correspondents, types, tags,
# documents and ui_settings
with transaction.atomic():
manifest = json.loads(
serializers.serialize("json", Correspondent.objects.all()),
@@ -149,6 +151,10 @@ class Command(BaseCommand):
manifest += json.loads(serializers.serialize("json", User.objects.all()))
manifest += json.loads(
serializers.serialize("json", UiSettings.objects.all()),
)
# 3. Export files from each document
for index, document_dict in tqdm.tqdm(
enumerate(document_manifest),
@@ -232,12 +238,18 @@ class Command(BaseCommand):
archive_target,
)
# 4. write manifest to target forlder
# 4.1 write manifest to target folder
manifest_path = os.path.abspath(os.path.join(self.target, "manifest.json"))
with open(manifest_path, "w") as f:
json.dump(manifest, f, indent=2)
# 4.2 write version information to target folder
version_path = os.path.abspath(os.path.join(self.target, "version.json"))
with open(version_path, "w") as f:
json.dump({"version": version.__full_version_str__}, f, indent=2)
if self.delete:
# 5. Remove files which we did not explicitly export in this run

View File

@@ -6,9 +6,11 @@ from contextlib import contextmanager
import tqdm
from django.conf import settings
from django.core.exceptions import FieldDoesNotExist
from django.core.management import call_command
from django.core.management.base import BaseCommand
from django.core.management.base import CommandError
from django.core.serializers.base import DeserializationError
from django.db.models.signals import m2m_changed
from django.db.models.signals import post_save
from documents.models import Document
@@ -16,6 +18,7 @@ from documents.settings import EXPORTER_ARCHIVE_NAME
from documents.settings import EXPORTER_FILE_NAME
from documents.settings import EXPORTER_THUMBNAIL_NAME
from filelock import FileLock
from paperless import version
from ...file_handling import create_source_path_directory
from ...signals.handlers import update_filename_and_move_files
@@ -53,6 +56,7 @@ class Command(BaseCommand):
BaseCommand.__init__(self, *args, **kwargs)
self.source = None
self.manifest = None
self.version = None
def handle(self, *args, **options):
@@ -66,12 +70,30 @@ class Command(BaseCommand):
if not os.access(self.source, os.R_OK):
raise CommandError("That path doesn't appear to be readable")
manifest_path = os.path.join(self.source, "manifest.json")
manifest_path = os.path.normpath(os.path.join(self.source, "manifest.json"))
self._check_manifest_exists(manifest_path)
with open(manifest_path) as f:
self.manifest = json.load(f)
version_path = os.path.normpath(os.path.join(self.source, "version.json"))
if os.path.exists(version_path):
with open(version_path) as f:
self.version = json.load(f)["version"]
# Provide an initial warning if needed to the user
if self.version != version.__full_version_str__:
self.stdout.write(
self.style.WARNING(
"Version mismatch: "
f"Currently {version.__full_version_str__},"
f" importing {self.version}."
" Continuing, but import may fail.",
),
)
else:
self.stdout.write(self.style.NOTICE("No version.json file located"))
self._check_manifest()
with disable_signal(
post_save,
@@ -84,12 +106,36 @@ class Command(BaseCommand):
sender=Document.tags.through,
):
# Fill up the database with whatever is in the manifest
call_command("loaddata", manifest_path)
try:
call_command("loaddata", manifest_path)
except (FieldDoesNotExist, DeserializationError) as e:
self.stdout.write(self.style.ERROR("Database import failed"))
if (
self.version is not None
and self.version != version.__full_version_str__
):
self.stdout.write(
self.style.ERROR(
"Version mismatch: "
f"Currently {version.__full_version_str__},"
f" importing {self.version}",
),
)
raise e
else:
self.stdout.write(
self.style.ERROR("No version information present"),
)
raise e
self._import_files_from_manifest(options["no_progress_bar"])
print("Updating search index...")
call_command("document_index", "reindex")
self.stdout.write("Updating search index...")
call_command(
"document_index",
"reindex",
no_progress_bar=options["no_progress_bar"],
)
@staticmethod
def _check_manifest_exists(path):
@@ -132,7 +178,7 @@ class Command(BaseCommand):
os.makedirs(settings.THUMBNAIL_DIR, exist_ok=True)
os.makedirs(settings.ARCHIVE_DIR, exist_ok=True)
print("Copy files into paperless...")
self.stdout.write("Copy files into paperless...")
manifest_documents = list(
filter(lambda r: r["model"] == "documents.document", self.manifest),

View File

@@ -17,4 +17,4 @@ class Command(LoadDataCommand):
def find_fixtures(self, fixture_label):
if fixture_label == "-":
return [("-", None, "-")]
return super(Command, self).find_fixtures(fixture_label)
return super().find_fixtures(fixture_label)

View File

@@ -11,7 +11,14 @@ logger = logging.getLogger("paperless.management.superuser")
class Command(BaseCommand):
help = """
Creates a Django superuser based on env variables.
Creates a Django superuser:
User named: admin
Email: root@localhost
with password based on env variable.
No superuser will be created, when:
- The username is taken already exists
- A superuser already exists
- PAPERLESS_ADMIN_PASSWORD is not set
""".replace(
" ",
"",
@@ -19,26 +26,41 @@ class Command(BaseCommand):
def handle(self, *args, **options):
username = os.getenv("PAPERLESS_ADMIN_USER")
if not username:
return
username = os.getenv("PAPERLESS_ADMIN_USER", "admin")
mail = os.getenv("PAPERLESS_ADMIN_MAIL", "root@localhost")
password = os.getenv("PAPERLESS_ADMIN_PASSWORD")
# Check if user exists already, leave as is if it does
# Check if there's already a user called admin
if User.objects.filter(username=username).exists():
user: User = User.objects.get_by_natural_key(username)
user.set_password(password)
user.save()
self.stdout.write(f"Changed password of user {username}.")
elif password:
# Create superuser based on env variables
User.objects.create_superuser(username, mail, password)
self.stdout.write(f'Created superuser "{username}" with provided password.')
else:
self.stdout.write(f'Did not create superuser "{username}".')
self.stdout.write(
'Make sure you specified "PAPERLESS_ADMIN_PASSWORD" in your '
'"docker-compose.env" file.',
self.style.NOTICE(
f"Did not create superuser, a user {username} already exists",
),
)
return
# Check if any superuseruser
# exists already, leave as is if it does
if User.objects.filter(is_superuser=True).count() > 0:
self.stdout.write(
self.style.NOTICE(
"Did not create superuser, the DB already contains superusers",
),
)
return
if password is None:
self.stdout.write(
self.style.ERROR(
"Please check if PAPERLESS_ADMIN_PASSWORD has been"
" set in the environment",
),
)
else:
# Create superuser with password based on env variable
User.objects.create_superuser(username, mail, password)
self.stdout.write(
self.style.SUCCESS(
f'Created superuser "{username}" with provided password.',
),
)

View File

@@ -4,6 +4,7 @@ import re
from documents.models import Correspondent
from documents.models import DocumentType
from documents.models import MatchingModel
from documents.models import StoragePath
from documents.models import Tag
@@ -57,6 +58,22 @@ def match_tags(document, classifier):
)
def match_storage_paths(document, classifier):
if classifier:
pred_id = classifier.predict_storage_path(document.content)
else:
pred_id = None
storage_paths = StoragePath.objects.all()
return list(
filter(
lambda o: matches(o, document) or o.pk == pred_id,
storage_paths,
),
)
def matches(matching_model, document):
search_kwargs = {}

View File

@@ -83,7 +83,7 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
path = ""
try:
if settings.PAPERLESS_FILENAME_FORMAT is not None:
if settings.FILENAME_FORMAT is not None:
tags = defaultdictNoStr(lambda: slugify(None), many_to_dictionary(doc.tags))
tag_list = pathvalidate.sanitize_filename(
@@ -105,7 +105,7 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
else:
document_type = "none"
path = settings.PAPERLESS_FILENAME_FORMAT.format(
path = settings.FILENAME_FORMAT.format(
title=pathvalidate.sanitize_filename(doc.title, replacement_text="-"),
correspondent=correspondent,
document_type=document_type,
@@ -128,7 +128,7 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
except (ValueError, KeyError, IndexError):
logger.warning(
f"Invalid PAPERLESS_FILENAME_FORMAT: "
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default"
f"{settings.FILENAME_FORMAT}, falling back to default"
)
counter_str = f"_{counter:02}" if counter else ""

View File

@@ -0,0 +1,73 @@
# Generated by Django 4.0.4 on 2022-05-02 15:56
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
("documents", "1018_alter_savedviewfilterrule_value"),
]
operations = [
migrations.CreateModel(
name="StoragePath",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"name",
models.CharField(max_length=128, unique=True, verbose_name="name"),
),
(
"match",
models.CharField(blank=True, max_length=256, verbose_name="match"),
),
(
"matching_algorithm",
models.PositiveIntegerField(
choices=[
(1, "Any word"),
(2, "All words"),
(3, "Exact match"),
(4, "Regular expression"),
(5, "Fuzzy word"),
(6, "Automatic"),
],
default=1,
verbose_name="matching algorithm",
),
),
(
"is_insensitive",
models.BooleanField(default=True, verbose_name="is insensitive"),
),
("path", models.CharField(max_length=512, verbose_name="path")),
],
options={
"verbose_name": "storage path",
"verbose_name_plural": "storage paths",
"ordering": ("name",),
},
),
migrations.AddField(
model_name="document",
name="storage_path",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="documents",
to="documents.storagepath",
verbose_name="storage path",
),
),
]

View File

@@ -0,0 +1,39 @@
# Generated by Django 4.0.4 on 2022-05-07 05:10
from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
("documents", "1018_alter_savedviewfilterrule_value"),
]
operations = [
migrations.CreateModel(
name="UiSettings",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("settings", models.JSONField(null=True)),
(
"user",
models.OneToOneField(
on_delete=django.db.models.deletion.CASCADE,
related_name="ui_settings",
to=settings.AUTH_USER_MODEL,
),
),
],
),
]

View File

@@ -0,0 +1,13 @@
# Generated by Django 4.0.4 on 2022-05-18 18:39
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("documents", "1019_storagepath_document_storage_path"),
("documents", "1019_uisettings"),
]
operations = []

View File

@@ -1,4 +1,3 @@
# coding=utf-8
import datetime
import logging
import os
@@ -11,7 +10,6 @@ from django.conf import settings
from django.contrib.auth.models import User
from django.db import models
from django.utils import timezone
from django.utils.timezone import is_aware
from django.utils.translation import gettext_lazy as _
from documents.parsers import get_default_file_extension
@@ -85,6 +83,18 @@ class DocumentType(MatchingModel):
verbose_name_plural = _("document types")
class StoragePath(MatchingModel):
path = models.CharField(
_("path"),
max_length=512,
)
class Meta:
ordering = ("name",)
verbose_name = _("storage path")
verbose_name_plural = _("storage paths")
class Document(models.Model):
STORAGE_TYPE_UNENCRYPTED = "unencrypted"
@@ -103,6 +113,15 @@ class Document(models.Model):
verbose_name=_("correspondent"),
)
storage_path = models.ForeignKey(
StoragePath,
blank=True,
null=True,
related_name="documents",
on_delete=models.SET_NULL,
verbose_name=_("storage path"),
)
title = models.CharField(_("title"), max_length=128, blank=True, db_index=True)
document_type = models.ForeignKey(
@@ -210,10 +229,10 @@ class Document(models.Model):
verbose_name_plural = _("documents")
def __str__(self):
if is_aware(self.created):
created = timezone.localdate(self.created).isoformat()
else:
created = datetime.date.isoformat(self.created)
# Convert UTC database time to local time
created = datetime.date.isoformat(timezone.localdate(self.created))
if self.correspondent and self.title:
return f"{created} {self.correspondent} {self.title}"
else:
@@ -224,7 +243,7 @@ class Document(models.Model):
if self.filename:
fname = str(self.filename)
else:
fname = "{:07}{}".format(self.pk, self.file_type)
fname = f"{self.pk:07}{self.file_type}"
if self.storage_type == self.STORAGE_TYPE_GPG:
fname += ".gpg" # pragma: no cover
@@ -271,7 +290,7 @@ class Document(models.Model):
@property
def thumbnail_path(self):
file_name = "{:07}.png".format(self.pk)
file_name = f"{self.pk:07}.png"
if self.storage_type == self.STORAGE_TYPE_GPG:
file_name += ".gpg"
@@ -383,6 +402,10 @@ class SavedViewFilterRule(models.Model):
# TODO: why is this in the models file?
# TODO: how about, what is this and where is it documented?
# It appears to parsing JSON from an environment variable to get a title and date from
# the filename, if possible, as a higher priority than either document filename or
# content parsing
class FileInfo:
REGEXES = OrderedDict(
@@ -390,8 +413,7 @@ class FileInfo:
(
"created-title",
re.compile(
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
r"(?P<title>.*)$",
r"^(?P<created>\d{8}(\d{6})?Z) - " r"(?P<title>.*)$",
flags=re.IGNORECASE,
),
),
@@ -417,7 +439,7 @@ class FileInfo:
@classmethod
def _get_created(cls, created):
try:
return dateutil.parser.parse("{:0<14}Z".format(created[:-1]))
return dateutil.parser.parse(f"{created[:-1]:0<14}Z")
except ValueError:
return None
@@ -428,10 +450,10 @@ class FileInfo:
@classmethod
def _mangle_property(cls, properties, name):
if name in properties:
properties[name] = getattr(cls, "_get_{}".format(name))(properties[name])
properties[name] = getattr(cls, f"_get_{name}")(properties[name])
@classmethod
def from_filename(cls, filename):
def from_filename(cls, filename) -> "FileInfo":
# Mutate filename in-place before parsing its components
# by applying at most one of the configured transformations.
for (pattern, repl) in settings.FILENAME_PARSE_TRANSFORMS:
@@ -464,3 +486,17 @@ class FileInfo:
cls._mangle_property(properties, "created")
cls._mangle_property(properties, "title")
return cls(**properties)
# Extending User Model Using a One-To-One Link
class UiSettings(models.Model):
user = models.OneToOneField(
User,
on_delete=models.CASCADE,
related_name="ui_settings",
)
settings = models.JSONField(null=True)
def __str__(self):
return self.user.username

View File

@@ -1,3 +1,4 @@
import datetime
import logging
import mimetypes
import os
@@ -5,6 +6,8 @@ import re
import shutil
import subprocess
import tempfile
from typing import Optional
from typing import Set
import magic
from django.conf import settings
@@ -40,11 +43,11 @@ DATE_REGEX = re.compile(
logger = logging.getLogger("paperless.parsing")
def is_mime_type_supported(mime_type):
def is_mime_type_supported(mime_type) -> bool:
return get_parser_class_for_mime_type(mime_type) is not None
def get_default_file_extension(mime_type):
def get_default_file_extension(mime_type) -> str:
for response in document_consumer_declaration.send(None):
parser_declaration = response[1]
supported_mime_types = parser_declaration["mime_types"]
@@ -59,14 +62,14 @@ def get_default_file_extension(mime_type):
return ""
def is_file_ext_supported(ext):
def is_file_ext_supported(ext) -> bool:
if ext:
return ext.lower() in get_supported_file_extensions()
else:
return False
def get_supported_file_extensions():
def get_supported_file_extensions() -> Set[str]:
extensions = set()
for response in document_consumer_declaration.send(None):
parser_declaration = response[1]
@@ -121,7 +124,7 @@ def run_convert(
auto_orient=False,
extra=None,
logging_group=None,
):
) -> None:
environment = os.environ.copy()
if settings.CONVERT_MEMORY_LIMIT:
@@ -143,14 +146,14 @@ def run_convert(
logger.debug("Execute: " + " ".join(args), extra={"group": logging_group})
if not subprocess.Popen(args, env=environment).wait() == 0:
raise ParseError("Convert failed at {}".format(args))
raise ParseError(f"Convert failed at {args}")
def get_default_thumbnail():
def get_default_thumbnail() -> str:
return os.path.join(os.path.dirname(__file__), "resources", "document.png")
def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None):
def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None) -> str:
out_path = os.path.join(temp_dir, "convert_gs.png")
# if convert fails, fall back to extracting
@@ -164,7 +167,7 @@ def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None):
cmd = [settings.GS_BINARY, "-q", "-sDEVICE=pngalpha", "-o", gs_out_path, in_path]
try:
if not subprocess.Popen(cmd).wait() == 0:
raise ParseError("Thumbnail (gs) failed at {}".format(cmd))
raise ParseError(f"Thumbnail (gs) failed at {cmd}")
# then run convert on the output from gs
run_convert(
density=300,
@@ -184,7 +187,7 @@ def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None):
return get_default_thumbnail()
def make_thumbnail_from_pdf(in_path, temp_dir, logging_group=None):
def make_thumbnail_from_pdf(in_path, temp_dir, logging_group=None) -> str:
"""
The thumbnail of a PDF is just a 500px wide image of the first page.
"""
@@ -199,7 +202,7 @@ def make_thumbnail_from_pdf(in_path, temp_dir, logging_group=None):
strip=True,
trim=False,
auto_orient=True,
input_file="{}[0]".format(in_path),
input_file=f"{in_path}[0]",
output_file=out_path,
logging_group=logging_group,
)
@@ -209,12 +212,12 @@ def make_thumbnail_from_pdf(in_path, temp_dir, logging_group=None):
return out_path
def parse_date(filename, text):
def parse_date(filename, text) -> Optional[datetime.datetime]:
"""
Returns the date of the document.
"""
def __parser(ds, date_order):
def __parser(ds: str, date_order: str) -> datetime.datetime:
"""
Call dateparser.parse with a particular date ordering
"""
@@ -230,9 +233,9 @@ def parse_date(filename, text):
},
)
def __filter(date):
def __filter(date: datetime.datetime) -> Optional[datetime.datetime]:
if (
date
date is not None
and date.year > 1900
and date <= timezone.now()
and date.date() not in settings.IGNORE_DATES
@@ -269,7 +272,7 @@ def parse_date(filename, text):
date = __filter(date)
if date is not None:
break
return date
return date
@@ -294,7 +297,7 @@ class DocumentParser(LoggingMixin):
self.archive_path = None
self.text = None
self.date = None
self.date: Optional[datetime.datetime] = None
self.progress_callback = progress_callback
def progress(self, current_progress, max_progress):
@@ -333,7 +336,7 @@ class DocumentParser(LoggingMixin):
self.log("debug", f"Execute: {' '.join(args)}")
if not subprocess.Popen(args).wait() == 0:
raise ParseError("Optipng failed at {}".format(args))
raise ParseError(f"Optipng failed at {args}")
return out_path
else:
@@ -342,7 +345,7 @@ class DocumentParser(LoggingMixin):
def get_text(self):
return self.text
def get_date(self):
def get_date(self) -> Optional[datetime.datetime]:
return self.date
def cleanup(self):

View File

@@ -14,7 +14,9 @@ from .models import DocumentType
from .models import MatchingModel
from .models import SavedView
from .models import SavedViewFilterRule
from .models import StoragePath
from .models import Tag
from .models import UiSettings
from .parsers import is_mime_type_supported
@@ -30,7 +32,7 @@ class DynamicFieldsModelSerializer(serializers.ModelSerializer):
fields = kwargs.pop("fields", None)
# Instantiate the superclass normally
super(DynamicFieldsModelSerializer, self).__init__(*args, **kwargs)
super().__init__(*args, **kwargs)
if fields is not None:
# Drop any fields that are not specified in the `fields` argument.
@@ -198,11 +200,17 @@ class DocumentTypeField(serializers.PrimaryKeyRelatedField):
return DocumentType.objects.all()
class StoragePathField(serializers.PrimaryKeyRelatedField):
def get_queryset(self):
return StoragePath.objects.all()
class DocumentSerializer(DynamicFieldsModelSerializer):
correspondent = CorrespondentField(allow_null=True)
tags = TagsField(many=True)
document_type = DocumentTypeField(allow_null=True)
storage_path = StoragePathField(allow_null=True)
original_file_name = SerializerMethodField()
archived_file_name = SerializerMethodField()
@@ -223,6 +231,7 @@ class DocumentSerializer(DynamicFieldsModelSerializer):
"id",
"correspondent",
"document_type",
"storage_path",
"title",
"content",
"tags",
@@ -263,7 +272,7 @@ class SavedViewSerializer(serializers.ModelSerializer):
rules_data = validated_data.pop("filter_rules")
else:
rules_data = None
super(SavedViewSerializer, self).update(instance, validated_data)
super().update(instance, validated_data)
if rules_data is not None:
SavedViewFilterRule.objects.filter(saved_view=instance).delete()
for rule_data in rules_data:
@@ -309,6 +318,7 @@ class BulkEditSerializer(DocumentListSerializer):
choices=[
"set_correspondent",
"set_document_type",
"set_storage_path",
"add_tag",
"remove_tag",
"modify_tags",
@@ -336,6 +346,8 @@ class BulkEditSerializer(DocumentListSerializer):
return bulk_edit.set_correspondent
elif method == "set_document_type":
return bulk_edit.set_document_type
elif method == "set_storage_path":
return bulk_edit.set_storage_path
elif method == "add_tag":
return bulk_edit.add_tag
elif method == "remove_tag":
@@ -382,6 +394,20 @@ class BulkEditSerializer(DocumentListSerializer):
else:
raise serializers.ValidationError("correspondent not specified")
def _validate_storage_path(self, parameters):
if "storage_path" in parameters:
storage_path_id = parameters["storage_path"]
if storage_path_id is None:
return
try:
StoragePath.objects.get(id=storage_path_id)
except StoragePath.DoesNotExist:
raise serializers.ValidationError(
"Storage path does not exist",
)
else:
raise serializers.ValidationError("storage path not specified")
def _validate_parameters_modify_tags(self, parameters):
if "add_tags" in parameters:
self._validate_tag_id_list(parameters["add_tags"], "add_tags")
@@ -406,12 +432,21 @@ class BulkEditSerializer(DocumentListSerializer):
self._validate_parameters_tags(parameters)
elif method == bulk_edit.modify_tags:
self._validate_parameters_modify_tags(parameters)
elif method == bulk_edit.set_storage_path:
self._validate_storage_path(parameters)
return attrs
class PostDocumentSerializer(serializers.Serializer):
created = serializers.DateTimeField(
label="Created",
allow_null=True,
write_only=True,
required=False,
)
document = serializers.FileField(
label="Document",
write_only=True,
@@ -498,3 +533,65 @@ class BulkDownloadSerializer(DocumentListSerializer):
"bzip2": zipfile.ZIP_BZIP2,
"lzma": zipfile.ZIP_LZMA,
}[compression]
class StoragePathSerializer(MatchingModelSerializer):
document_count = serializers.IntegerField(read_only=True)
class Meta:
model = StoragePath
fields = (
"id",
"slug",
"name",
"path",
"match",
"matching_algorithm",
"is_insensitive",
"document_count",
)
def validate_path(self, path):
try:
path.format(
title="title",
correspondent="correspondent",
document_type="document_type",
created="created",
created_year="created_year",
created_month="created_month",
created_day="created_day",
added="added",
added_year="added_year",
added_month="added_month",
added_day="added_day",
asn="asn",
tags="tags",
tag_list="tag_list",
)
except (KeyError):
raise serializers.ValidationError(_("Invalid variable detected."))
return path
class UiSettingsViewSerializer(serializers.ModelSerializer):
class Meta:
model = UiSettings
depth = 1
fields = [
"id",
"settings",
]
def update(self, instance, validated_data):
super().update(instance, validated_data)
return instance
def create(self, validated_data):
ui_settings = UiSettings.objects.update_or_create(
user=validated_data.get("user"),
defaults={"settings": validated_data.get("settings", None)},
)
return ui_settings

View File

@@ -230,6 +230,76 @@ def set_tags(
document.tags.add(*relevant_tags)
def set_storage_path(
sender,
document=None,
logging_group=None,
classifier=None,
replace=False,
use_first=True,
suggest=False,
base_url=None,
color=False,
**kwargs,
):
if document.storage_path and not replace:
return
potential_storage_path = matching.match_storage_paths(
document,
classifier,
)
potential_count = len(potential_storage_path)
if potential_storage_path:
selected = potential_storage_path[0]
else:
selected = None
if potential_count > 1:
if use_first:
logger.info(
f"Detected {potential_count} potential storage paths, "
f"so we've opted for {selected}",
extra={"group": logging_group},
)
else:
logger.info(
f"Detected {potential_count} potential storage paths, "
f"not assigning any storage directory",
extra={"group": logging_group},
)
return
if selected or replace:
if suggest:
if base_url:
print(
termcolors.colorize(str(document), fg="green")
if color
else str(document),
)
print(f"{base_url}/documents/{document.pk}")
else:
print(
(
termcolors.colorize(str(document), fg="green")
if color
else str(document)
)
+ f" [{document.pk}]",
)
print(f"Sugest storage directory {selected}")
else:
logger.info(
f"Assigning storage path {selected} to {document}",
extra={"group": logging_group},
)
document.storage_path = selected
document.save(update_fields=("storage_path",))
@receiver(models.signals.post_delete, sender=Document)
def cleanup_document_deletion(sender, instance, using, **kwargs):
with FileLock(settings.MEDIA_LOCK):

View File

@@ -4,6 +4,7 @@ import shutil
import tempfile
from typing import List # for type hinting. Can be removed, if only Python >3.8 is used
import magic
import tqdm
from asgiref.sync import async_to_sync
from channels.layers import get_channel_layer
@@ -18,6 +19,7 @@ from documents.consumer import ConsumerError
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
from documents.models import StoragePath
from documents.models import Tag
from documents.sanity_checker import SanityCheckFailedException
from pdf2image import convert_from_path
@@ -52,6 +54,7 @@ def train_classifier():
not Tag.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
and not DocumentType.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
and not Correspondent.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
and not StoragePath.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
):
return
@@ -64,7 +67,7 @@ def train_classifier():
try:
if classifier.train():
logger.info(
"Saving updated classifier model to {}...".format(settings.MODEL_FILE),
f"Saving updated classifier model to {settings.MODEL_FILE}...",
)
classifier.save()
else:
@@ -95,19 +98,33 @@ def barcode_reader(image) -> List[str]:
return barcodes
def get_file_type(path: str) -> str:
"""
Determines the file type, based on MIME type.
Returns the MIME type.
"""
mime_type = magic.from_file(path, mime=True)
logger.debug(f"Detected mime type: {mime_type}")
return mime_type
def convert_from_tiff_to_pdf(filepath: str) -> str:
"""
converts a given TIFF image file to pdf into a temp. directory.
converts a given TIFF image file to pdf into a temporary directory.
Returns the new pdf file.
"""
file_name = os.path.splitext(os.path.basename(filepath))[0]
file_extension = os.path.splitext(os.path.basename(filepath))[1].lower()
mime_type = get_file_type(filepath)
tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
# use old file name with pdf extension
if file_extension == ".tif" or file_extension == ".tiff":
if mime_type == "image/tiff":
newpath = os.path.join(tempdir, file_name + ".pdf")
else:
logger.warning(f"Cannot convert from {str(file_extension)} to pdf.")
logger.warning(
f"Cannot convert mime type {str(mime_type)} from {str(filepath)} to pdf.",
)
return None
with Image.open(filepath) as image:
images = []
@@ -165,7 +182,7 @@ def separate_pages(filepath: str, pages_to_split_on: List[int]) -> List[str]:
for n, page in enumerate(pdf.pages):
if n < pages_to_split_on[0]:
dst.pages.append(page)
output_filename = "{}_document_0.pdf".format(fname)
output_filename = f"{fname}_document_0.pdf"
savepath = os.path.join(tempdir, output_filename)
with open(savepath, "wb") as out:
dst.save(out)
@@ -185,7 +202,7 @@ def separate_pages(filepath: str, pages_to_split_on: List[int]) -> List[str]:
f"page_number: {str(page_number)} next_page: {str(next_page)}",
)
dst.pages.append(pdf.pages[page])
output_filename = "{}_document_{}.pdf".format(fname, str(count + 1))
output_filename = f"{fname}_document_{str(count + 1)}.pdf"
logger.debug(f"pdf no:{str(count)} has {str(len(dst.pages))} pages")
savepath = os.path.join(tempdir, output_filename)
with open(savepath, "wb") as out:
@@ -223,6 +240,7 @@ def consume_file(
override_document_type_id=None,
override_tag_ids=None,
task_id=None,
override_created=None,
):
# check for separators in current document
@@ -231,17 +249,17 @@ def consume_file(
document_list = []
converted_tiff = None
if settings.CONSUMER_BARCODE_TIFF_SUPPORT:
supported_extensions = [".pdf", ".tiff", ".tif"]
supported_mime = ["image/tiff", "application/pdf"]
else:
supported_extensions = [".pdf"]
file_extension = os.path.splitext(os.path.basename(path))[1].lower()
if file_extension not in supported_extensions:
supported_mime = ["application/pdf"]
mime_type = get_file_type(path)
if mime_type not in supported_mime:
# if not supported, skip this routine
logger.warning(
f"Unsupported file format for barcode reader: {str(file_extension)}",
f"Unsupported file format for barcode reader: {str(mime_type)}",
)
else:
if file_extension in {".tif", ".tiff"}:
if mime_type == "image/tiff":
file_to_process = convert_from_tiff_to_pdf(path)
else:
file_to_process = path
@@ -266,9 +284,9 @@ def consume_file(
# if we got here, the document was successfully split
# and can safely be deleted
if converted_tiff:
logger.debug("Deleting file {}".format(file_to_process))
logger.debug(f"Deleting file {file_to_process}")
os.unlink(file_to_process)
logger.debug("Deleting file {}".format(path))
logger.debug(f"Deleting file {path}")
os.unlink(path)
# notify the sender, otherwise the progress bar
# in the UI stays stuck
@@ -303,10 +321,11 @@ def consume_file(
override_document_type_id=override_document_type_id,
override_tag_ids=override_tag_ids,
task_id=task_id,
override_created=override_created,
)
if document:
return "Success. New document id {} created".format(document.pk)
return f"Success. New document id {document.pk} created"
else:
raise ConsumerError(
"Unknown error: Returned document was null, but "

View File

@@ -9,8 +9,6 @@
<title>Paperless-ngx</title>
<base href="{% url 'base' %}">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="username" content="{{username}}">
<meta name="full_name" content="{{full_name}}">
<meta name="cookie_prefix" content="{{cookie_prefix}}">
<meta name="robots" content="noindex,nofollow">
<link rel="icon" type="image/x-icon" href="favicon.ico">

Binary file not shown.

Binary file not shown.

View File

@@ -16,7 +16,7 @@ class TestDocumentAdmin(DirectoriesMixin, TestCase):
return searcher.document(id=doc.id)
def setUp(self) -> None:
super(TestDocumentAdmin, self).setUp()
super().setUp()
self.doc_admin = DocumentAdmin(model=Document, admin_site=AdminSite())
def test_save_model(self):

View File

@@ -4,8 +4,15 @@ import json
import os
import shutil
import tempfile
import urllib.request
import zipfile
from unittest import mock
from unittest.mock import MagicMock
try:
import zoneinfo
except ImportError:
import backports.zoneinfo as zoneinfo
import pytest
from django.conf import settings
@@ -19,15 +26,19 @@ from documents.models import Document
from documents.models import DocumentType
from documents.models import MatchingModel
from documents.models import SavedView
from documents.models import StoragePath
from documents.models import Tag
from documents.models import UiSettings
from documents.models import StoragePath
from documents.tests.utils import DirectoriesMixin
from paperless import version
from rest_framework.test import APITestCase
from whoosh.writing import AsyncWriter
class TestDocumentApi(DirectoriesMixin, APITestCase):
def setUp(self):
super(TestDocumentApi, self).setUp()
super().setUp()
self.user = User.objects.create_superuser(username="temp_admin")
self.client.force_login(user=self.user)
@@ -70,7 +81,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
returned_doc["title"] = "the new title"
response = self.client.put(
"/api/documents/{}/".format(doc.pk),
f"/api/documents/{doc.pk}/",
returned_doc,
format="json",
)
@@ -82,7 +93,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(doc_after_save.correspondent, c2)
self.assertEqual(doc_after_save.title, "the new title")
self.client.delete("/api/documents/{}/".format(doc_after_save.pk))
self.client.delete(f"/api/documents/{doc_after_save.pk}/")
self.assertEqual(len(Document.objects.all()), 0)
@@ -90,6 +101,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
c = Correspondent.objects.create(name="c", pk=41)
dt = DocumentType.objects.create(name="dt", pk=63)
tag = Tag.objects.create(name="t", pk=85)
storage_path = StoragePath.objects.create(name="sp", pk=77, path="p")
doc = Document.objects.create(
title="WOW",
content="the content",
@@ -97,6 +109,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
document_type=dt,
checksum="123",
mime_type="application/pdf",
storage_path=storage_path,
)
response = self.client.get("/api/documents/", format="json")
@@ -163,27 +176,27 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
)
with open(
os.path.join(self.dirs.thumbnail_dir, "{:07d}.png".format(doc.pk)),
os.path.join(self.dirs.thumbnail_dir, f"{doc.pk:07d}.png"),
"wb",
) as f:
f.write(content_thumbnail)
response = self.client.get("/api/documents/{}/download/".format(doc.pk))
response = self.client.get(f"/api/documents/{doc.pk}/download/")
self.assertEqual(response.status_code, 200)
self.assertEqual(response.content, content)
response = self.client.get("/api/documents/{}/preview/".format(doc.pk))
response = self.client.get(f"/api/documents/{doc.pk}/preview/")
self.assertEqual(response.status_code, 200)
self.assertEqual(response.content, content)
response = self.client.get("/api/documents/{}/thumb/".format(doc.pk))
response = self.client.get(f"/api/documents/{doc.pk}/thumb/")
self.assertEqual(response.status_code, 200)
self.assertEqual(response.content, content_thumbnail)
@override_settings(PAPERLESS_FILENAME_FORMAT="")
@override_settings(FILENAME_FORMAT="")
def test_download_with_archive(self):
content = b"This is a test"
@@ -202,25 +215,25 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
with open(doc.archive_path, "wb") as f:
f.write(content_archive)
response = self.client.get("/api/documents/{}/download/".format(doc.pk))
response = self.client.get(f"/api/documents/{doc.pk}/download/")
self.assertEqual(response.status_code, 200)
self.assertEqual(response.content, content_archive)
response = self.client.get(
"/api/documents/{}/download/?original=true".format(doc.pk),
f"/api/documents/{doc.pk}/download/?original=true",
)
self.assertEqual(response.status_code, 200)
self.assertEqual(response.content, content)
response = self.client.get("/api/documents/{}/preview/".format(doc.pk))
response = self.client.get(f"/api/documents/{doc.pk}/preview/")
self.assertEqual(response.status_code, 200)
self.assertEqual(response.content, content_archive)
response = self.client.get(
"/api/documents/{}/preview/?original=true".format(doc.pk),
f"/api/documents/{doc.pk}/preview/?original=true",
)
self.assertEqual(response.status_code, 200)
@@ -234,13 +247,13 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
mime_type="application/pdf",
)
response = self.client.get("/api/documents/{}/download/".format(doc.pk))
response = self.client.get(f"/api/documents/{doc.pk}/download/")
self.assertEqual(response.status_code, 404)
response = self.client.get("/api/documents/{}/preview/".format(doc.pk))
response = self.client.get(f"/api/documents/{doc.pk}/preview/")
self.assertEqual(response.status_code, 404)
response = self.client.get("/api/documents/{}/thumb/".format(doc.pk))
response = self.client.get(f"/api/documents/{doc.pk}/thumb/")
self.assertEqual(response.status_code, 404)
def test_document_filters(self):
@@ -283,7 +296,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc2.id, doc3.id])
response = self.client.get(
"/api/documents/?tags__id__in={},{}".format(tag_inbox.id, tag_3.id),
f"/api/documents/?tags__id__in={tag_inbox.id},{tag_3.id}",
)
self.assertEqual(response.status_code, 200)
results = response.data["results"]
@@ -291,7 +304,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc1.id, doc3.id])
response = self.client.get(
"/api/documents/?tags__id__in={},{}".format(tag_2.id, tag_3.id),
f"/api/documents/?tags__id__in={tag_2.id},{tag_3.id}",
)
self.assertEqual(response.status_code, 200)
results = response.data["results"]
@@ -299,7 +312,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc2.id, doc3.id])
response = self.client.get(
"/api/documents/?tags__id__all={},{}".format(tag_2.id, tag_3.id),
f"/api/documents/?tags__id__all={tag_2.id},{tag_3.id}",
)
self.assertEqual(response.status_code, 200)
results = response.data["results"]
@@ -307,27 +320,27 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(results[0]["id"], doc3.id)
response = self.client.get(
"/api/documents/?tags__id__all={},{}".format(tag_inbox.id, tag_3.id),
f"/api/documents/?tags__id__all={tag_inbox.id},{tag_3.id}",
)
self.assertEqual(response.status_code, 200)
results = response.data["results"]
self.assertEqual(len(results), 0)
response = self.client.get(
"/api/documents/?tags__id__all={}a{}".format(tag_inbox.id, tag_3.id),
f"/api/documents/?tags__id__all={tag_inbox.id}a{tag_3.id}",
)
self.assertEqual(response.status_code, 200)
results = response.data["results"]
self.assertEqual(len(results), 3)
response = self.client.get("/api/documents/?tags__id__none={}".format(tag_3.id))
response = self.client.get(f"/api/documents/?tags__id__none={tag_3.id}")
self.assertEqual(response.status_code, 200)
results = response.data["results"]
self.assertEqual(len(results), 2)
self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc1.id, doc2.id])
response = self.client.get(
"/api/documents/?tags__id__none={},{}".format(tag_3.id, tag_2.id),
f"/api/documents/?tags__id__none={tag_3.id},{tag_2.id}",
)
self.assertEqual(response.status_code, 200)
results = response.data["results"]
@@ -335,7 +348,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(results[0]["id"], doc1.id)
response = self.client.get(
"/api/documents/?tags__id__none={},{}".format(tag_2.id, tag_inbox.id),
f"/api/documents/?tags__id__none={tag_2.id},{tag_inbox.id}",
)
self.assertEqual(response.status_code, 200)
results = response.data["results"]
@@ -571,10 +584,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
t2 = Tag.objects.create(name="tag2")
c = Correspondent.objects.create(name="correspondent")
dt = DocumentType.objects.create(name="type")
sp = StoragePath.objects.create(name="path")
d1 = Document.objects.create(checksum="1", correspondent=c, content="test")
d2 = Document.objects.create(checksum="2", document_type=dt, content="test")
d3 = Document.objects.create(checksum="3", content="test")
d3.tags.add(t)
d3.tags.add(t2)
d4 = Document.objects.create(
@@ -589,6 +604,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
content="test",
)
d6 = Document.objects.create(checksum="6", content="test2")
d7 = Document.objects.create(checksum="7", storage_path=sp, content="test")
with AsyncWriter(index.open_index()) as writer:
for doc in Document.objects.all():
@@ -599,18 +615,30 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(r.status_code, 200)
return [hit["id"] for hit in r.data["results"]]
self.assertCountEqual(search_query(""), [d1.id, d2.id, d3.id, d4.id, d5.id])
self.assertCountEqual(
search_query(""),
[d1.id, d2.id, d3.id, d4.id, d5.id, d7.id],
)
self.assertCountEqual(search_query("&is_tagged=true"), [d3.id, d4.id])
self.assertCountEqual(search_query("&is_tagged=false"), [d1.id, d2.id, d5.id])
self.assertCountEqual(
search_query("&is_tagged=false"),
[d1.id, d2.id, d5.id, d7.id],
)
self.assertCountEqual(search_query("&correspondent__id=" + str(c.id)), [d1.id])
self.assertCountEqual(search_query("&document_type__id=" + str(dt.id)), [d2.id])
self.assertCountEqual(search_query("&storage_path__id=" + str(sp.id)), [d7.id])
self.assertCountEqual(
search_query("&storage_path__isnull"),
[d1.id, d2.id, d3.id, d4.id, d5.id],
)
self.assertCountEqual(
search_query("&correspondent__isnull"),
[d2.id, d3.id, d4.id, d5.id],
[d2.id, d3.id, d4.id, d5.id, d7.id],
)
self.assertCountEqual(
search_query("&document_type__isnull"),
[d1.id, d3.id, d4.id, d5.id],
[d1.id, d3.id, d4.id, d5.id, d7.id],
)
self.assertCountEqual(
search_query("&tags__id__all=" + str(t.id) + "," + str(t2.id)),
@@ -952,6 +980,34 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
async_task.assert_not_called()
@mock.patch("documents.views.async_task")
def test_upload_with_created(self, async_task):
created = datetime.datetime(
2022,
5,
12,
0,
0,
0,
0,
tzinfo=zoneinfo.ZoneInfo("America/Los_Angeles"),
)
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f:
response = self.client.post(
"/api/documents/post_document/",
{"document": f, "created": created},
)
self.assertEqual(response.status_code, 200)
async_task.assert_called_once()
args, kwargs = async_task.call_args
self.assertEqual(kwargs["override_created"], created)
def test_get_metadata(self):
doc = Document.objects.create(
title="test",
@@ -1043,35 +1099,49 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, 200)
self.assertEqual(
response.data,
{"correspondents": [], "tags": [], "document_types": []},
{
"correspondents": [],
"tags": [],
"document_types": [],
"storage_paths": [],
},
)
def test_get_suggestions_invalid_doc(self):
response = self.client.get(f"/api/documents/34676/suggestions/")
self.assertEqual(response.status_code, 404)
@mock.patch("documents.views.match_correspondents")
@mock.patch("documents.views.match_tags")
@mock.patch("documents.views.match_storage_paths")
@mock.patch("documents.views.match_document_types")
@mock.patch("documents.views.match_tags")
@mock.patch("documents.views.match_correspondents")
def test_get_suggestions(
self,
match_document_types,
match_tags,
match_correspondents,
match_tags,
match_document_types,
match_storage_paths,
):
doc = Document.objects.create(
title="test",
mime_type="application/pdf",
content="this is an invoice!",
)
match_correspondents.return_value = [Correspondent(id=88), Correspondent(id=2)]
match_tags.return_value = [Tag(id=56), Tag(id=123)]
match_document_types.return_value = [DocumentType(id=23)]
match_correspondents.return_value = [Correspondent(id=88), Correspondent(id=2)]
match_storage_paths.return_value = [StoragePath(id=99), StoragePath(id=77)]
response = self.client.get(f"/api/documents/{doc.pk}/suggestions/")
self.assertEqual(
response.data,
{"correspondents": [88, 2], "tags": [56, 123], "document_types": [23]},
{
"correspondents": [88, 2],
"tags": [56, 123],
"document_types": [23],
"storage_paths": [99, 77],
},
)
def test_saved_views(self):
@@ -1284,7 +1354,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
class TestDocumentApiV2(DirectoriesMixin, APITestCase):
def setUp(self):
super(TestDocumentApiV2, self).setUp()
super().setUp()
self.user = User.objects.create_superuser(username="temp_admin")
@@ -1362,10 +1432,45 @@ class TestDocumentApiV2(DirectoriesMixin, APITestCase):
"#000000",
)
def test_ui_settings(self):
test_user = User.objects.create_superuser(username="test")
self.client.force_login(user=test_user)
response = self.client.get("/api/ui_settings/", format="json")
self.assertEqual(response.status_code, 200)
self.assertDictEqual(
response.data["settings"],
{},
)
settings = {
"settings": {
"dark_mode": {
"enabled": True,
},
},
}
response = self.client.post(
"/api/ui_settings/",
json.dumps(settings),
content_type="application/json",
)
self.assertEqual(response.status_code, 200)
response = self.client.get("/api/ui_settings/", format="json")
self.assertEqual(response.status_code, 200)
self.assertDictEqual(
response.data["settings"],
settings["settings"],
)
class TestBulkEdit(DirectoriesMixin, APITestCase):
def setUp(self):
super(TestBulkEdit, self).setUp()
super().setUp()
user = User.objects.create_superuser(username="temp_admin")
self.client.force_login(user=user)
@@ -1397,6 +1502,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
self.doc2.tags.add(self.t1)
self.doc3.tags.add(self.t2)
self.doc4.tags.add(self.t1, self.t2)
self.sp1 = StoragePath.objects.create(name="sp1", path="Something/{checksum}")
def test_set_correspondent(self):
self.assertEqual(Document.objects.filter(correspondent=self.c2).count(), 1)
@@ -1436,6 +1542,60 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
args, kwargs = self.async_task.call_args
self.assertCountEqual(kwargs["document_ids"], [self.doc2.id, self.doc3.id])
def test_set_document_storage_path(self):
"""
GIVEN:
- 5 documents without defined storage path
WHEN:
- Bulk edit called to add storage path to 1 document
THEN:
- Single document storage path update
"""
self.assertEqual(Document.objects.filter(storage_path=None).count(), 5)
bulk_edit.set_storage_path(
[self.doc1.id],
self.sp1.id,
)
self.assertEqual(Document.objects.filter(storage_path=None).count(), 4)
self.async_task.assert_called_once()
args, kwargs = self.async_task.call_args
self.assertCountEqual(kwargs["document_ids"], [self.doc1.id])
def test_unset_document_storage_path(self):
"""
GIVEN:
- 4 documents without defined storage path
- 1 document with a defined storage
WHEN:
- Bulk edit called to remove storage path from 1 document
THEN:
- Single document storage path removed
"""
self.assertEqual(Document.objects.filter(storage_path=None).count(), 5)
bulk_edit.set_storage_path(
[self.doc1.id],
self.sp1.id,
)
self.assertEqual(Document.objects.filter(storage_path=None).count(), 4)
bulk_edit.set_storage_path(
[self.doc1.id],
None,
)
self.assertEqual(Document.objects.filter(storage_path=None).count(), 5)
self.async_task.assert_called()
args, kwargs = self.async_task.call_args
self.assertCountEqual(kwargs["document_ids"], [self.doc1.id])
def test_add_tag(self):
self.assertEqual(Document.objects.filter(tags__id=self.t1.id).count(), 2)
bulk_edit.add_tag(
@@ -1886,7 +2046,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
class TestBulkDownload(DirectoriesMixin, APITestCase):
def setUp(self):
super(TestBulkDownload, self).setUp()
super().setUp()
user = User.objects.create_superuser(username="temp_admin")
self.client.force_login(user=user)
@@ -2094,3 +2254,170 @@ class TestApiAuth(APITestCase):
response = self.client.get("/api/")
self.assertIn("X-Api-Version", response)
self.assertIn("X-Version", response)
class TestRemoteVersion(APITestCase):
ENDPOINT = "/api/remote_version/"
def setUp(self):
super().setUp()
def test_remote_version_default(self):
response = self.client.get(self.ENDPOINT)
self.assertEqual(response.status_code, 200)
self.assertDictEqual(
response.data,
{
"version": "0.0.0",
"update_available": False,
"feature_is_set": False,
},
)
@override_settings(
ENABLE_UPDATE_CHECK=False,
)
def test_remote_version_disabled(self):
response = self.client.get(self.ENDPOINT)
self.assertEqual(response.status_code, 200)
self.assertDictEqual(
response.data,
{
"version": "0.0.0",
"update_available": False,
"feature_is_set": True,
},
)
@override_settings(
ENABLE_UPDATE_CHECK=True,
)
@mock.patch("urllib.request.urlopen")
def test_remote_version_enabled_no_update_prefix(self, urlopen_mock):
cm = MagicMock()
cm.getcode.return_value = 200
cm.read.return_value = json.dumps({"tag_name": "ngx-1.6.0"}).encode()
cm.__enter__.return_value = cm
urlopen_mock.return_value = cm
response = self.client.get(self.ENDPOINT)
self.assertEqual(response.status_code, 200)
self.assertDictEqual(
response.data,
{
"version": "1.6.0",
"update_available": False,
"feature_is_set": True,
},
)
@override_settings(
ENABLE_UPDATE_CHECK=True,
)
@mock.patch("urllib.request.urlopen")
def test_remote_version_enabled_no_update_no_prefix(self, urlopen_mock):
cm = MagicMock()
cm.getcode.return_value = 200
cm.read.return_value = json.dumps(
{"tag_name": version.__full_version_str__},
).encode()
cm.__enter__.return_value = cm
urlopen_mock.return_value = cm
response = self.client.get(self.ENDPOINT)
self.assertEqual(response.status_code, 200)
self.assertDictEqual(
response.data,
{
"version": version.__full_version_str__,
"update_available": False,
"feature_is_set": True,
},
)
@override_settings(
ENABLE_UPDATE_CHECK=True,
)
@mock.patch("urllib.request.urlopen")
def test_remote_version_enabled_update(self, urlopen_mock):
new_version = (
version.__version__[0],
version.__version__[1],
version.__version__[2] + 1,
)
new_version_str = ".".join(map(str, new_version))
cm = MagicMock()
cm.getcode.return_value = 200
cm.read.return_value = json.dumps(
{"tag_name": new_version_str},
).encode()
cm.__enter__.return_value = cm
urlopen_mock.return_value = cm
response = self.client.get(self.ENDPOINT)
self.assertEqual(response.status_code, 200)
self.assertDictEqual(
response.data,
{
"version": new_version_str,
"update_available": True,
"feature_is_set": True,
},
)
@override_settings(
ENABLE_UPDATE_CHECK=True,
)
@mock.patch("urllib.request.urlopen")
def test_remote_version_bad_json(self, urlopen_mock):
cm = MagicMock()
cm.getcode.return_value = 200
cm.read.return_value = b'{ "blah":'
cm.__enter__.return_value = cm
urlopen_mock.return_value = cm
response = self.client.get(self.ENDPOINT)
self.assertEqual(response.status_code, 200)
self.assertDictEqual(
response.data,
{
"version": "0.0.0",
"update_available": False,
"feature_is_set": True,
},
)
@override_settings(
ENABLE_UPDATE_CHECK=True,
)
@mock.patch("urllib.request.urlopen")
def test_remote_version_exception(self, urlopen_mock):
cm = MagicMock()
cm.getcode.return_value = 200
cm.read.side_effect = urllib.error.URLError("an error")
cm.__enter__.return_value = cm
urlopen_mock.return_value = cm
response = self.client.get(self.ENDPOINT)
self.assertEqual(response.status_code, 200)
self.assertDictEqual(
response.data,
{
"version": "0.0.0",
"update_available": False,
"feature_is_set": True,
},
)

View File

@@ -13,13 +13,14 @@ from documents.classifier import load_classifier
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
from documents.models import StoragePath
from documents.models import Tag
from documents.tests.utils import DirectoriesMixin
class TestClassifier(DirectoriesMixin, TestCase):
def setUp(self):
super(TestClassifier, self).setUp()
super().setUp()
self.classifier = DocumentClassifier()
def generate_test_data(self):
@@ -56,6 +57,16 @@ class TestClassifier(DirectoriesMixin, TestCase):
name="dt2",
matching_algorithm=DocumentType.MATCH_AUTO,
)
self.sp1 = StoragePath.objects.create(
name="sp1",
path="path1",
matching_algorithm=DocumentType.MATCH_AUTO,
)
self.sp2 = StoragePath.objects.create(
name="sp2",
path="path2",
matching_algorithm=DocumentType.MATCH_AUTO,
)
self.doc1 = Document.objects.create(
title="doc1",
@@ -64,12 +75,14 @@ class TestClassifier(DirectoriesMixin, TestCase):
checksum="A",
document_type=self.dt,
)
self.doc2 = Document.objects.create(
title="doc1",
content="this is another document, but from c2",
correspondent=self.c2,
checksum="B",
)
self.doc_inbox = Document.objects.create(
title="doc235",
content="aa",
@@ -81,6 +94,8 @@ class TestClassifier(DirectoriesMixin, TestCase):
self.doc2.tags.add(self.t3)
self.doc_inbox.tags.add(self.t2)
self.doc1.storage_path = self.sp1
def testNoTrainingData(self):
try:
self.classifier.train()
@@ -177,6 +192,14 @@ class TestClassifier(DirectoriesMixin, TestCase):
new_classifier.load()
self.assertFalse(new_classifier.train())
# @override_settings(
# MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"),
# )
# def test_create_test_load_and_classify(self):
# self.generate_test_data()
# self.classifier.train()
# self.classifier.save()
@override_settings(
MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"),
)
@@ -263,6 +286,45 @@ class TestClassifier(DirectoriesMixin, TestCase):
self.assertEqual(self.classifier.predict_document_type(doc1.content), dt.pk)
self.assertIsNone(self.classifier.predict_document_type(doc2.content))
def test_one_path_predict(self):
sp = StoragePath.objects.create(
name="sp",
matching_algorithm=StoragePath.MATCH_AUTO,
)
doc1 = Document.objects.create(
title="doc1",
content="this is a document from c1",
checksum="A",
storage_path=sp,
)
self.classifier.train()
self.assertEqual(self.classifier.predict_storage_path(doc1.content), sp.pk)
def test_one_path_predict_manydocs(self):
sp = StoragePath.objects.create(
name="sp",
matching_algorithm=StoragePath.MATCH_AUTO,
)
doc1 = Document.objects.create(
title="doc1",
content="this is a document from c1",
checksum="A",
storage_path=sp,
)
doc2 = Document.objects.create(
title="doc1",
content="this is a document from c2",
checksum="B",
)
self.classifier.train()
self.assertEqual(self.classifier.predict_storage_path(doc1.content), sp.pk)
self.assertIsNone(self.classifier.predict_storage_path(doc2.content))
def test_one_tag_predict(self):
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)

View File

@@ -1,3 +1,4 @@
import datetime
import os
import re
import shutil
@@ -5,6 +6,8 @@ import tempfile
from unittest import mock
from unittest.mock import MagicMock
from dateutil import tz
try:
import zoneinfo
except ImportError:
@@ -41,7 +44,7 @@ class TestAttributes(TestCase):
self.assertEqual(file_info.title, title, filename)
self.assertEqual(tuple([t.name for t in file_info.tags]), tags, filename)
self.assertEqual(tuple(t.name for t in file_info.tags), tags, filename)
def test_guess_attributes_from_name_when_title_starts_with_dash(self):
self._test_guess_attributes_from_name(
@@ -176,7 +179,7 @@ class DummyParser(DocumentParser):
raise NotImplementedError()
def __init__(self, logging_group, scratch_dir, archive_path):
super(DummyParser, self).__init__(logging_group, None)
super().__init__(logging_group, None)
_, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
self.archive_path = archive_path
@@ -195,7 +198,7 @@ class CopyParser(DocumentParser):
return self.fake_thumb
def __init__(self, logging_group, progress_callback=None):
super(CopyParser, self).__init__(logging_group, progress_callback)
super().__init__(logging_group, progress_callback)
_, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=self.tempdir)
def parse(self, document_path, mime_type, file_name=None):
@@ -210,7 +213,7 @@ class FaultyParser(DocumentParser):
raise NotImplementedError()
def __init__(self, logging_group, scratch_dir):
super(FaultyParser, self).__init__(logging_group)
super().__init__(logging_group)
_, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
@@ -270,7 +273,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
return FaultyParser(logging_group, self.dirs.scratch_dir)
def setUp(self):
super(TestConsumer, self).setUp()
super().setUp()
patcher = mock.patch("documents.parsers.document_consumer_declaration.send")
m = patcher.start()
@@ -317,7 +320,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
shutil.copy(src, dst)
return dst
@override_settings(PAPERLESS_FILENAME_FORMAT=None, TIME_ZONE="America/Chicago")
@override_settings(FILENAME_FORMAT=None, TIME_ZONE="America/Chicago")
def testNormalOperation(self):
filename = self.get_test_file()
@@ -348,7 +351,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
self.assertEqual(document.created.tzinfo, zoneinfo.ZoneInfo("America/Chicago"))
@override_settings(PAPERLESS_FILENAME_FORMAT=None)
@override_settings(FILENAME_FORMAT=None)
def testDeleteMacFiles(self):
# https://github.com/jonaswinkler/paperless-ng/discussions/1037
@@ -502,7 +505,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
self.assertRaisesMessage(
ConsumerError,
"sample.pdf: The following error occured while consuming sample.pdf: NO.",
"sample.pdf: The following error occurred while consuming sample.pdf: NO.",
self.consumer.try_consume_file,
filename,
)
@@ -515,7 +518,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
# Database empty
self.assertEqual(len(Document.objects.all()), 0)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@override_settings(FILENAME_FORMAT="{correspondent}/{title}")
def testFilenameHandling(self):
filename = self.get_test_file()
@@ -527,7 +530,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
self._assert_first_last_send_progress()
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@override_settings(FILENAME_FORMAT="{correspondent}/{title}")
@mock.patch("documents.signals.handlers.generate_unique_filename")
def testFilenameHandlingUnstableFormat(self, m):
@@ -609,7 +612,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
self._assert_first_last_send_progress(last_status="FAILED")
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
@override_settings(FILENAME_FORMAT="{title}")
@mock.patch("documents.parsers.document_consumer_declaration.send")
def test_similar_filenames(self, m):
shutil.copy(
@@ -654,6 +657,127 @@ class TestConsumer(DirectoriesMixin, TestCase):
sanity_check()
@mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
class TestConsumerCreatedDate(DirectoriesMixin, TestCase):
def setUp(self):
super().setUp()
# this prevents websocket message reports during testing.
patcher = mock.patch("documents.consumer.Consumer._send_progress")
self._send_progress = patcher.start()
self.addCleanup(patcher.stop)
self.consumer = Consumer()
def test_consume_date_from_content(self):
"""
GIVEN:
- File content with date in DMY (default) format
THEN:
- Should parse the date from the file content
"""
src = os.path.join(
os.path.dirname(__file__),
"samples",
"documents",
"originals",
"0000005.pdf",
)
dst = os.path.join(self.dirs.scratch_dir, "sample.pdf")
shutil.copy(src, dst)
document = self.consumer.try_consume_file(dst)
self.assertEqual(
document.created,
datetime.datetime(1996, 2, 20, tzinfo=tz.gettz(settings.TIME_ZONE)),
)
@override_settings(FILENAME_DATE_ORDER="YMD")
def test_consume_date_from_filename(self):
"""
GIVEN:
- File content with date in DMY (default) format
- Filename with date in YMD format
THEN:
- Should parse the date from the filename
"""
src = os.path.join(
os.path.dirname(__file__),
"samples",
"documents",
"originals",
"0000005.pdf",
)
dst = os.path.join(self.dirs.scratch_dir, "Scan - 2022-02-01.pdf")
shutil.copy(src, dst)
document = self.consumer.try_consume_file(dst)
self.assertEqual(
document.created,
datetime.datetime(2022, 2, 1, tzinfo=tz.gettz(settings.TIME_ZONE)),
)
def test_consume_date_filename_date_use_content(self):
"""
GIVEN:
- File content with date in DMY (default) format
- Filename date parsing disabled
- Filename with date in YMD format
THEN:
- Should parse the date from the content
"""
src = os.path.join(
os.path.dirname(__file__),
"samples",
"documents",
"originals",
"0000005.pdf",
)
dst = os.path.join(self.dirs.scratch_dir, "Scan - 2022-02-01.pdf")
shutil.copy(src, dst)
document = self.consumer.try_consume_file(dst)
self.assertEqual(
document.created,
datetime.datetime(1996, 2, 20, tzinfo=tz.gettz(settings.TIME_ZONE)),
)
@override_settings(
IGNORE_DATES=(datetime.date(2010, 12, 13), datetime.date(2011, 11, 12)),
)
def test_consume_date_use_content_with_ignore(self):
"""
GIVEN:
- File content with dates in DMY (default) format
- File content includes ignored dates
THEN:
- Should parse the date from the filename
"""
src = os.path.join(
os.path.dirname(__file__),
"samples",
"documents",
"originals",
"0000006.pdf",
)
dst = os.path.join(self.dirs.scratch_dir, "0000006.pdf")
shutil.copy(src, dst)
document = self.consumer.try_consume_file(dst)
self.assertEqual(
document.created,
datetime.datetime(1997, 2, 20, tzinfo=tz.gettz(settings.TIME_ZONE)),
)
class PreConsumeTestCase(TestCase):
@mock.patch("documents.consumer.Popen")
@override_settings(PRE_CONSUME_SCRIPT=None)

View File

@@ -8,6 +8,7 @@ from django.conf import settings
from django.test import override_settings
from django.test import TestCase
from documents.parsers import parse_date
from paperless.settings import DATE_ORDER
class TestDate(TestCase):
@@ -16,7 +17,7 @@ class TestDate(TestCase):
os.path.dirname(__file__),
"../../paperless_tesseract/tests/samples",
)
SCRATCH = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
SCRATCH = f"/tmp/paperless-tests-{str(uuid4())[:8]}"
def setUp(self):
os.makedirs(self.SCRATCH, exist_ok=True)
@@ -160,19 +161,112 @@ class TestDate(TestCase):
def test_crazy_date_with_spaces(self, *args):
self.assertIsNone(parse_date("", "20 408000l 2475"))
@override_settings(FILENAME_DATE_ORDER="YMD")
def test_filename_date_parse_valid_ymd(self, *args):
"""
GIVEN:
- Date parsing from the filename is enabled
- Filename date format is with Year Month Day (YMD)
- Filename contains date matching the format
THEN:
- Should parse the date from the filename
"""
self.assertEqual(
parse_date("/tmp/Scan-2022-04-01.pdf", "No date in here"),
datetime.datetime(2022, 4, 1, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
)
@override_settings(FILENAME_DATE_ORDER="DMY")
def test_filename_date_parse_valid_dmy(self, *args):
"""
GIVEN:
- Date parsing from the filename is enabled
- Filename date format is with Day Month Year (DMY)
- Filename contains date matching the format
THEN:
- Should parse the date from the filename
"""
self.assertEqual(
parse_date("/tmp/Scan-10.01.2021.pdf", "No date in here"),
datetime.datetime(2021, 1, 10, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
)
@override_settings(FILENAME_DATE_ORDER="YMD")
def test_filename_date_parse_invalid(self, *args):
"""
GIVEN:
- Date parsing from the filename is enabled
- Filename includes no date
- File content includes no date
THEN:
- No date is parsed
"""
self.assertIsNone(
parse_date("/tmp/20 408000l 2475 - test.pdf", "No date in here"),
)
@override_settings(
FILENAME_DATE_ORDER="YMD",
IGNORE_DATES=(datetime.date(2022, 4, 1),),
)
def test_filename_date_ignored_use_content(self, *args):
"""
GIVEN:
- Date parsing from the filename is enabled
- Filename date format is with Day Month Year (YMD)
- Date order is Day Month Year (DMY, the default)
- Filename contains date matching the format
- Filename date is an ignored date
- File content includes a date
THEN:
- Should parse the date from the content not filename
"""
self.assertEqual(
parse_date("/tmp/Scan-2022-04-01.pdf", "The matching date is 24.03.2022"),
datetime.datetime(2022, 3, 24, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
)
@override_settings(
IGNORE_DATES=(datetime.date(2019, 11, 3), datetime.date(2020, 1, 17)),
)
def test_ignored_dates(self, *args):
def test_ignored_dates_default_order(self, *args):
"""
GIVEN:
- Ignore dates have been set
- File content includes ignored dates
- File content includes 1 non-ignored date
THEN:
- Should parse the date non-ignored date from content
"""
text = "lorem ipsum 110319, 20200117 and lorem 13.02.2018 lorem " "ipsum"
date = parse_date("", text)
self.assertEqual(
date,
parse_date("", text),
datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
)
@override_settings(
IGNORE_DATES=(datetime.date(2019, 11, 3), datetime.date(2020, 1, 17)),
DATE_ORDER="YMD",
)
def test_ignored_dates_order_ymd(self, *args):
"""
GIVEN:
- Ignore dates have been set
- Date order is Year Month Date (YMD)
- File content includes ignored dates
- File content includes 1 non-ignored date
THEN:
- Should parse the date non-ignored date from content
"""
text = "lorem ipsum 190311, 20200117 and lorem 13.02.2018 lorem " "ipsum"
self.assertEqual(
parse_date("", text),
datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
)

View File

@@ -3,6 +3,11 @@ import tempfile
from pathlib import Path
from unittest import mock
try:
import zoneinfo
except ImportError:
import backports.zoneinfo as zoneinfo
from django.test import override_settings
from django.test import TestCase
from django.utils import timezone
@@ -51,16 +56,62 @@ class TestDocument(TestCase):
doc = Document(
mime_type="application/pdf",
title="test",
created=timezone.datetime(2020, 12, 25),
created=timezone.datetime(2020, 12, 25, tzinfo=zoneinfo.ZoneInfo("UTC")),
)
self.assertEqual(doc.get_public_filename(), "2020-12-25 test.pdf")
@override_settings(
TIME_ZONE="Europe/Berlin",
)
def test_file_name_with_timezone(self):
# See https://docs.djangoproject.com/en/4.0/ref/utils/#django.utils.timezone.now
# The default for created is an aware datetime in UTC
# This does that, just manually, with a fixed date
local_create_date = timezone.datetime(
2020,
12,
25,
tzinfo=zoneinfo.ZoneInfo("Europe/Berlin"),
)
utc_create_date = local_create_date.astimezone(zoneinfo.ZoneInfo("UTC"))
doc = Document(
mime_type="application/pdf",
title="test",
created=utc_create_date,
)
# Ensure the create date would cause an off by 1 if not properly created above
self.assertEqual(utc_create_date.date().day, 24)
self.assertEqual(doc.get_public_filename(), "2020-12-25 test.pdf")
local_create_date = timezone.datetime(
2020,
1,
1,
tzinfo=zoneinfo.ZoneInfo("Europe/Berlin"),
)
utc_create_date = local_create_date.astimezone(zoneinfo.ZoneInfo("UTC"))
doc = Document(
mime_type="application/pdf",
title="test",
created=utc_create_date,
)
# Ensure the create date would cause an off by 1 in the year if not properly created above
self.assertEqual(utc_create_date.date().year, 2019)
self.assertEqual(doc.get_public_filename(), "2020-01-01 test.pdf")
def test_file_name_jpg(self):
doc = Document(
mime_type="image/jpeg",
title="test",
created=timezone.datetime(2020, 12, 25),
created=timezone.datetime(2020, 12, 25, tzinfo=zoneinfo.ZoneInfo("UTC")),
)
self.assertEqual(doc.get_public_filename(), "2020-12-25 test.jpg")
@@ -69,7 +120,7 @@ class TestDocument(TestCase):
doc = Document(
mime_type="application/zip",
title="test",
created=timezone.datetime(2020, 12, 25),
created=timezone.datetime(2020, 12, 25, tzinfo=zoneinfo.ZoneInfo("UTC")),
)
self.assertEqual(doc.get_public_filename(), "2020-12-25 test.zip")
@@ -78,6 +129,6 @@ class TestDocument(TestCase):
doc = Document(
mime_type="image/jpegasd",
title="test",
created=timezone.datetime(2020, 12, 25),
created=timezone.datetime(2020, 12, 25, tzinfo=zoneinfo.ZoneInfo("UTC")),
)
self.assertEqual(doc.get_public_filename(), "2020-12-25 test")

View File

@@ -20,27 +20,27 @@ from ..file_handling import generate_unique_filename
from ..models import Correspondent
from ..models import Document
from ..models import DocumentType
from ..models import Tag
from ..models import StoragePath
from .utils import DirectoriesMixin
class TestFileHandling(DirectoriesMixin, TestCase):
@override_settings(PAPERLESS_FILENAME_FORMAT="")
@override_settings(FILENAME_FORMAT="")
def test_generate_source_filename(self):
document = Document()
document.mime_type = "application/pdf"
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
document.save()
self.assertEqual(generate_filename(document), "{:07d}.pdf".format(document.pk))
self.assertEqual(generate_filename(document), f"{document.pk:07d}.pdf")
document.storage_type = Document.STORAGE_TYPE_GPG
self.assertEqual(
generate_filename(document),
"{:07d}.pdf.gpg".format(document.pk),
f"{document.pk:07d}.pdf.gpg",
)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
@override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}")
def test_file_renaming(self):
document = Document()
document.mime_type = "application/pdf"
@@ -50,7 +50,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Test default source_path
self.assertEqual(
document.source_path,
settings.ORIGINALS_DIR + "/{:07d}.pdf".format(document.pk),
settings.ORIGINALS_DIR + f"/{document.pk:07d}.pdf",
)
document.filename = generate_filename(document)
@@ -82,7 +82,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
True,
)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
@override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}")
def test_file_renaming_missing_permissions(self):
document = Document()
document.mime_type = "application/pdf"
@@ -117,7 +117,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
os.chmod(settings.ORIGINALS_DIR + "/none", 0o777)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
@override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}")
def test_file_renaming_database_error(self):
document1 = Document.objects.create(
@@ -156,7 +156,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
)
self.assertEqual(document.filename, "none/none.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
@override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}")
def test_document_delete(self):
document = Document()
document.mime_type = "application/pdf"
@@ -180,7 +180,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
@override_settings(
PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}",
FILENAME_FORMAT="{correspondent}/{correspondent}",
TRASH_DIR=tempfile.mkdtemp(),
)
def test_document_delete_trash(self):
@@ -218,7 +218,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
document.delete()
self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none_01.pdf"), True)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
@override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}")
def test_document_delete_nofile(self):
document = Document()
document.mime_type = "application/pdf"
@@ -227,7 +227,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
document.delete()
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
@override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}")
def test_directory_not_empty(self):
document = Document()
document.mime_type = "application/pdf"
@@ -253,7 +253,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), True)
self.assertTrue(os.path.isfile(important_file))
@override_settings(PAPERLESS_FILENAME_FORMAT="{document_type} - {title}")
@override_settings(FILENAME_FORMAT="{document_type} - {title}")
def test_document_type(self):
dt = DocumentType.objects.create(name="my_doc_type")
d = Document.objects.create(title="the_doc", mime_type="application/pdf")
@@ -264,7 +264,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(generate_filename(d), "my_doc_type - the_doc.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{asn} - {title}")
@override_settings(FILENAME_FORMAT="{asn} - {title}")
def test_asn(self):
d1 = Document.objects.create(
title="the_doc",
@@ -281,7 +281,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(generate_filename(d1), "652 - the_doc.pdf")
self.assertEqual(generate_filename(d2), "none - the_doc.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
@override_settings(FILENAME_FORMAT="{tags[type]}")
def test_tags_with_underscore(self):
document = Document()
document.mime_type = "application/pdf"
@@ -296,7 +296,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated
self.assertEqual(generate_filename(document), "demo.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
@override_settings(FILENAME_FORMAT="{tags[type]}")
def test_tags_with_dash(self):
document = Document()
document.mime_type = "application/pdf"
@@ -311,7 +311,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated
self.assertEqual(generate_filename(document), "demo.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
@override_settings(FILENAME_FORMAT="{tags[type]}")
def test_tags_malformed(self):
document = Document()
document.mime_type = "application/pdf"
@@ -326,7 +326,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated
self.assertEqual(generate_filename(document), "none.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}")
@override_settings(FILENAME_FORMAT="{tags[0]}")
def test_tags_all(self):
document = Document()
document.mime_type = "application/pdf"
@@ -340,7 +340,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated
self.assertEqual(generate_filename(document), "demo.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[1]}")
@override_settings(FILENAME_FORMAT="{tags[1]}")
def test_tags_out_of_bounds(self):
document = Document()
document.mime_type = "application/pdf"
@@ -354,7 +354,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated
self.assertEqual(generate_filename(document), "none.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags}")
@override_settings(FILENAME_FORMAT="{tags}")
def test_tags_without_args(self):
document = Document()
document.mime_type = "application/pdf"
@@ -363,7 +363,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(generate_filename(document), f"{document.pk:07}.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{title} {tag_list}")
@override_settings(FILENAME_FORMAT="{title} {tag_list}")
def test_tag_list(self):
doc = Document.objects.create(title="doc1", mime_type="application/pdf")
doc.tags.create(name="tag2")
@@ -379,7 +379,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(generate_filename(doc), "doc2.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="//etc/something/{title}")
@override_settings(FILENAME_FORMAT="//etc/something/{title}")
def test_filename_relative(self):
doc = Document.objects.create(title="doc1", mime_type="application/pdf")
doc.filename = generate_filename(doc)
@@ -391,7 +391,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
)
@override_settings(
PAPERLESS_FILENAME_FORMAT="{created_year}-{created_month}-{created_day}",
FILENAME_FORMAT="{created_year}-{created_month}-{created_day}",
)
def test_created_year_month_day(self):
d1 = timezone.make_aware(datetime.datetime(2020, 3, 6, 1, 1, 1))
@@ -408,7 +408,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(generate_filename(doc1), "2020-11-16.pdf")
@override_settings(
PAPERLESS_FILENAME_FORMAT="{added_year}-{added_month}-{added_day}",
FILENAME_FORMAT="{added_year}-{added_month}-{added_day}",
)
def test_added_year_month_day(self):
d1 = timezone.make_aware(datetime.datetime(232, 1, 9, 1, 1, 1))
@@ -425,7 +425,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(generate_filename(doc1), "2020-11-16.pdf")
@override_settings(
PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}",
FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}",
)
def test_nested_directory_cleanup(self):
document = Document()
@@ -453,7 +453,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR), True)
@override_settings(PAPERLESS_FILENAME_FORMAT=None)
@override_settings(FILENAME_FORMAT=None)
def test_format_none(self):
document = Document()
document.pk = 1
@@ -479,7 +479,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(os.path.isfile(os.path.join(tmp, "notempty", "file")), True)
self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty", "empty")), False)
@override_settings(PAPERLESS_FILENAME_FORMAT="{created/[title]")
@override_settings(FILENAME_FORMAT="{created/[title]")
def test_invalid_format(self):
document = Document()
document.pk = 1
@@ -488,7 +488,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(generate_filename(document), "0000001.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{created__year}")
@override_settings(FILENAME_FORMAT="{created__year}")
def test_invalid_format_key(self):
document = Document()
document.pk = 1
@@ -497,7 +497,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(generate_filename(document), "0000001.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
@override_settings(FILENAME_FORMAT="{title}")
def test_duplicates(self):
document = Document.objects.create(
mime_type="application/pdf",
@@ -548,7 +548,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertTrue(os.path.isfile(document.source_path))
self.assertEqual(document2.filename, "qwe.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
@override_settings(FILENAME_FORMAT="{title}")
@mock.patch("documents.signals.handlers.Document.objects.filter")
def test_no_update_without_change(self, m):
doc = Document.objects.create(
@@ -568,7 +568,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
@override_settings(PAPERLESS_FILENAME_FORMAT=None)
@override_settings(FILENAME_FORMAT=None)
def test_create_no_format(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
@@ -587,7 +587,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@override_settings(FILENAME_FORMAT="{correspondent}/{title}")
def test_create_with_format(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
@@ -615,7 +615,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf"),
)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@override_settings(FILENAME_FORMAT="{correspondent}/{title}")
def test_move_archive_gone(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
@@ -634,7 +634,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertTrue(os.path.isfile(doc.source_path))
self.assertFalse(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@override_settings(FILENAME_FORMAT="{correspondent}/{title}")
def test_move_archive_exists(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
@@ -659,7 +659,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertTrue(os.path.isfile(existing_archive_file))
self.assertEqual(doc.archive_filename, "none/my_doc_01.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
@override_settings(FILENAME_FORMAT="{title}")
def test_move_original_only(self):
original = os.path.join(settings.ORIGINALS_DIR, "document_01.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "document.pdf")
@@ -681,7 +681,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
@override_settings(FILENAME_FORMAT="{title}")
def test_move_archive_only(self):
original = os.path.join(settings.ORIGINALS_DIR, "document.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "document_01.pdf")
@@ -703,7 +703,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@override_settings(FILENAME_FORMAT="{correspondent}/{title}")
@mock.patch("documents.signals.handlers.os.rename")
def test_move_archive_error(self, m):
def fake_rename(src, dst):
@@ -734,7 +734,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@override_settings(FILENAME_FORMAT="{correspondent}/{title}")
def test_move_file_gone(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
@@ -754,7 +754,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertFalse(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@override_settings(FILENAME_FORMAT="{correspondent}/{title}")
@mock.patch("documents.signals.handlers.os.rename")
def test_move_file_error(self, m):
def fake_rename(src, dst):
@@ -785,7 +785,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="")
@override_settings(FILENAME_FORMAT="")
def test_archive_deleted(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
@@ -812,7 +812,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertFalse(os.path.isfile(doc.source_path))
self.assertFalse(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
@override_settings(FILENAME_FORMAT="{title}")
def test_archive_deleted2(self):
original = os.path.join(settings.ORIGINALS_DIR, "document.png")
original2 = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
@@ -846,7 +846,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertTrue(os.path.isfile(doc1.archive_path))
self.assertFalse(os.path.isfile(doc2.source_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@override_settings(FILENAME_FORMAT="{correspondent}/{title}")
def test_database_error(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
@@ -872,7 +872,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
class TestFilenameGeneration(TestCase):
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
@override_settings(FILENAME_FORMAT="{title}")
def test_invalid_characters(self):
doc = Document.objects.create(
@@ -891,7 +891,7 @@ class TestFilenameGeneration(TestCase):
)
self.assertEqual(generate_filename(doc), "my-invalid-..-title-yay.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{created}")
@override_settings(FILENAME_FORMAT="{created}")
def test_date(self):
doc = Document.objects.create(
title="does not matter",
@@ -902,6 +902,140 @@ class TestFilenameGeneration(TestCase):
)
self.assertEqual(generate_filename(doc), "2020-05-21.pdf")
def test_dynamic_path(self):
"""
GIVEN:
- A document with a defined storage path
WHEN:
- the filename is generated for the document
THEN:
- the generated filename uses the defined storage path for the document
"""
doc = Document.objects.create(
title="does not matter",
created=timezone.make_aware(datetime.datetime(2020, 6, 25, 7, 36, 51, 153)),
mime_type="application/pdf",
pk=2,
checksum="2",
storage_path=StoragePath.objects.create(path="TestFolder/{created}"),
)
self.assertEqual(generate_filename(doc), "TestFolder/2020-06-25.pdf")
def test_dynamic_path_with_none(self):
"""
GIVEN:
- A document with a defined storage path
- The defined storage path uses an undefined field for the document
WHEN:
- the filename is generated for the document
THEN:
- the generated filename uses the defined storage path for the document
- the generated filename includes "none" in the place undefined field
"""
doc = Document.objects.create(
title="does not matter",
created=timezone.make_aware(datetime.datetime(2020, 6, 25, 7, 36, 51, 153)),
mime_type="application/pdf",
pk=2,
checksum="2",
storage_path=StoragePath.objects.create(path="{asn} - {created}"),
)
self.assertEqual(generate_filename(doc), "none - 2020-06-25.pdf")
@override_settings(
FILENAME_FORMAT_REMOVE_NONE=True,
)
def test_dynamic_path_remove_none(self):
"""
GIVEN:
- A document with a defined storage path
- The defined storage path uses an undefined field for the document
- The setting for removing undefined fields is enabled
WHEN:
- the filename is generated for the document
THEN:
- the generated filename uses the defined storage path for the document
- the generated filename does not include "none" in the place undefined field
"""
doc = Document.objects.create(
title="does not matter",
created=timezone.make_aware(datetime.datetime(2020, 6, 25, 7, 36, 51, 153)),
mime_type="application/pdf",
pk=2,
checksum="2",
storage_path=StoragePath.objects.create(path="TestFolder/{asn}/{created}"),
)
self.assertEqual(generate_filename(doc), "TestFolder/2020-06-25.pdf")
def test_multiple_doc_paths(self):
"""
GIVEN:
- Two documents, each with different storage paths
WHEN:
- the filename is generated for the documents
THEN:
- Each document generated filename uses its storage path
"""
doc_a = Document.objects.create(
title="does not matter",
created=timezone.make_aware(datetime.datetime(2020, 6, 25, 7, 36, 51, 153)),
mime_type="application/pdf",
pk=2,
checksum="2",
archive_serial_number=4,
storage_path=StoragePath.objects.create(
name="sp1",
path="ThisIsAFolder/{asn}/{created}",
),
)
doc_b = Document.objects.create(
title="does not matter",
created=timezone.make_aware(datetime.datetime(2020, 7, 25, 7, 36, 51, 153)),
mime_type="application/pdf",
pk=5,
checksum="abcde",
storage_path=StoragePath.objects.create(
name="sp2",
path="SomeImportantNone/{created}",
),
)
self.assertEqual(generate_filename(doc_a), "ThisIsAFolder/4/2020-06-25.pdf")
self.assertEqual(generate_filename(doc_b), "SomeImportantNone/2020-07-25.pdf")
def test_no_path_fallback(self):
"""
GIVEN:
- Two documents, one with defined storage path, the other not
WHEN:
- the filename is generated for the documents
THEN:
- Document with defined path uses its format
- Document without defined path uses the default path
"""
doc_a = Document.objects.create(
title="does not matter",
created=timezone.make_aware(datetime.datetime(2020, 6, 25, 7, 36, 51, 153)),
mime_type="application/pdf",
pk=2,
checksum="2",
archive_serial_number=4,
)
doc_b = Document.objects.create(
title="does not matter",
created=timezone.make_aware(datetime.datetime(2020, 7, 25, 7, 36, 51, 153)),
mime_type="application/pdf",
pk=5,
checksum="abcde",
storage_path=StoragePath.objects.create(
name="sp2",
path="SomeImportantNone/{created}",
),
)
self.assertEqual(generate_filename(doc_a), "0000002.pdf")
self.assertEqual(generate_filename(doc_b), "SomeImportantNone/2020-07-25.pdf")
def run():
doc = Document.objects.create(

View File

@@ -18,7 +18,7 @@ from documents.tests.utils import DirectoriesMixin
sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@override_settings(FILENAME_FORMAT="{correspondent}/{title}")
class TestArchiver(DirectoriesMixin, TestCase):
def make_models(self):
return Document.objects.create(
@@ -72,7 +72,7 @@ class TestArchiver(DirectoriesMixin, TestCase):
self.assertIsNone(doc.archive_filename)
self.assertTrue(os.path.isfile(doc.source_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
@override_settings(FILENAME_FORMAT="{title}")
def test_naming_priorities(self):
doc1 = Document.objects.create(
checksum="A",
@@ -109,7 +109,7 @@ class TestDecryptDocuments(TestCase):
ORIGINALS_DIR=os.path.join(os.path.dirname(__file__), "samples", "originals"),
THUMBNAIL_DIR=os.path.join(os.path.dirname(__file__), "samples", "thumb"),
PASSPHRASE="test",
PAPERLESS_FILENAME_FORMAT=None,
FILENAME_FORMAT=None,
)
@mock.patch("documents.management.commands.decrypt_documents.input")
def test_decrypt(self, m):
@@ -184,7 +184,7 @@ class TestMakeIndex(TestCase):
class TestRenamer(DirectoriesMixin, TestCase):
@override_settings(PAPERLESS_FILENAME_FORMAT="")
@override_settings(FILENAME_FORMAT="")
def test_rename(self):
doc = Document.objects.create(title="test", mime_type="image/jpeg")
doc.filename = generate_filename(doc)
@@ -194,7 +194,7 @@ class TestRenamer(DirectoriesMixin, TestCase):
Path(doc.source_path).touch()
Path(doc.archive_path).touch()
with override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}"):
with override_settings(FILENAME_FORMAT="{correspondent}/{title}"):
call_command("document_renamer")
doc2 = Document.objects.get(id=doc.id)

View File

@@ -39,7 +39,7 @@ class ConsumerMixin:
sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
def setUp(self) -> None:
super(ConsumerMixin, self).setUp()
super().setUp()
self.t = None
patcher = mock.patch(
"documents.management.commands.document_consumer.async_task",
@@ -60,7 +60,7 @@ class ConsumerMixin:
# wait for the consumer to exit.
self.t.join()
super(ConsumerMixin, self).tearDown()
super().tearDown()
def wait_for_task_mock_call(self, excpeted_call_count=1):
n = 0
@@ -98,6 +98,9 @@ class ConsumerMixin:
print("file completed.")
@override_settings(
CONSUMER_INOTIFY_DELAY=0.01,
)
class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
def test_consume_file(self):
self.t_start()
@@ -286,7 +289,7 @@ class TestConsumerPolling(TestConsumer):
pass
@override_settings(CONSUMER_RECURSIVE=True)
@override_settings(CONSUMER_INOTIFY_DELAY=0.01, CONSUMER_RECURSIVE=True)
class TestConsumerRecursive(TestConsumer):
# just do all the tests with recursive
pass

View File

@@ -65,7 +65,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
self.d1.correspondent = self.c1
self.d1.document_type = self.dt1
self.d1.save()
super(TestExportImport, self).setUp()
super().setUp()
def _get_document_from_manifest(self, manifest, id):
f = list(
@@ -200,7 +200,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
)
with override_settings(
PAPERLESS_FILENAME_FORMAT="{created_year}/{correspondent}/{title}",
FILENAME_FORMAT="{created_year}/{correspondent}/{title}",
):
self.test_exporter(use_filename_format=True)
@@ -309,7 +309,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
self.assertTrue(len(manifest), 6)
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}/{correspondent}")
@override_settings(FILENAME_FORMAT="{title}/{correspondent}")
def test_update_export_changed_location(self):
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
shutil.copytree(

View File

@@ -82,7 +82,7 @@ class TestRetagger(DirectoriesMixin, TestCase):
)
def setUp(self) -> None:
super(TestRetagger, self).setUp()
super().setUp()
self.make_models()
def test_add_tags(self):

View File

@@ -1,67 +1,180 @@
import os
import shutil
from io import StringIO
from unittest import mock
from django.contrib.auth.models import User
from django.core.management import call_command
from django.test import TestCase
from documents.management.commands.document_thumbnails import _process_document
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
from documents.models import Tag
from documents.tests.utils import DirectoriesMixin
class TestManageSuperUser(DirectoriesMixin, TestCase):
def reset_environment(self):
if "PAPERLESS_ADMIN_USER" in os.environ:
del os.environ["PAPERLESS_ADMIN_USER"]
if "PAPERLESS_ADMIN_PASSWORD" in os.environ:
del os.environ["PAPERLESS_ADMIN_PASSWORD"]
def setUp(self) -> None:
super().setUp()
self.reset_environment()
def tearDown(self) -> None:
super().tearDown()
self.reset_environment()
def call_command(self, environ):
out = StringIO()
with mock.patch.dict(os.environ, environ):
call_command(
"manage_superuser",
"--no-color",
stdout=out,
stderr=StringIO(),
)
return out.getvalue()
def test_no_user(self):
call_command("manage_superuser")
"""
GIVEN:
- Environment does not contain admin user info
THEN:
- No admin user is created
"""
# just the consumer user.
out = self.call_command(environ={})
# just the consumer user which is created
# during migration
self.assertEqual(User.objects.count(), 1)
self.assertTrue(User.objects.filter(username="consumer").exists())
self.assertEqual(User.objects.filter(is_superuser=True).count(), 0)
self.assertEqual(
out,
"Please check if PAPERLESS_ADMIN_PASSWORD has been set in the environment\n",
)
def test_create(self):
os.environ["PAPERLESS_ADMIN_USER"] = "new_user"
os.environ["PAPERLESS_ADMIN_PASSWORD"] = "123456"
"""
GIVEN:
- Environment does contain admin user password
THEN:
- admin user is created
"""
call_command("manage_superuser")
out = self.call_command(environ={"PAPERLESS_ADMIN_PASSWORD": "123456"})
user: User = User.objects.get_by_natural_key("new_user")
self.assertTrue(user.check_password("123456"))
# count is 2 as there's the consumer
# user already created during migration
user: User = User.objects.get_by_natural_key("admin")
self.assertEqual(User.objects.count(), 2)
self.assertTrue(user.is_superuser)
self.assertEqual(user.email, "root@localhost")
self.assertEqual(out, 'Created superuser "admin" with provided password.\n')
def test_update(self):
os.environ["PAPERLESS_ADMIN_USER"] = "new_user"
os.environ["PAPERLESS_ADMIN_PASSWORD"] = "123456"
def test_some_superuser_exists(self):
"""
GIVEN:
- A super user already exists
- Environment does contain admin user password
THEN:
- admin user is NOT created
"""
User.objects.create_superuser("someuser", "root@localhost", "password")
call_command("manage_superuser")
out = self.call_command(environ={"PAPERLESS_ADMIN_PASSWORD": "123456"})
os.environ["PAPERLESS_ADMIN_USER"] = "new_user"
os.environ["PAPERLESS_ADMIN_PASSWORD"] = "more_secure_pwd_7645"
self.assertEqual(User.objects.count(), 2)
with self.assertRaises(User.DoesNotExist):
User.objects.get_by_natural_key("admin")
self.assertEqual(
out,
"Did not create superuser, the DB already contains superusers\n",
)
call_command("manage_superuser")
def test_admin_superuser_exists(self):
"""
GIVEN:
- A super user already exists
- The existing superuser's username is admin
- Environment does contain admin user password
THEN:
- Password remains unchanged
"""
User.objects.create_superuser("admin", "root@localhost", "password")
user: User = User.objects.get_by_natural_key("new_user")
self.assertTrue(user.check_password("more_secure_pwd_7645"))
out = self.call_command(environ={"PAPERLESS_ADMIN_PASSWORD": "123456"})
self.assertEqual(User.objects.count(), 2)
user: User = User.objects.get_by_natural_key("admin")
self.assertTrue(user.check_password("password"))
self.assertEqual(out, "Did not create superuser, a user admin already exists\n")
def test_admin_user_exists(self):
"""
GIVEN:
- A user already exists with the username admin
- Environment does contain admin user password
THEN:
- Password remains unchanged
- User is not upgraded to superuser
"""
User.objects.create_user("admin", "root@localhost", "password")
out = self.call_command(environ={"PAPERLESS_ADMIN_PASSWORD": "123456"})
self.assertEqual(User.objects.count(), 2)
user: User = User.objects.get_by_natural_key("admin")
self.assertTrue(user.check_password("password"))
self.assertFalse(user.is_superuser)
self.assertEqual(out, "Did not create superuser, a user admin already exists\n")
def test_no_password(self):
os.environ["PAPERLESS_ADMIN_USER"] = "new_user"
call_command("manage_superuser")
"""
GIVEN:
- No environment data is set
THEN:
- No user is created
"""
out = self.call_command(environ={})
with self.assertRaises(User.DoesNotExist):
User.objects.get_by_natural_key("new_user")
User.objects.get_by_natural_key("admin")
self.assertEqual(
out,
"Please check if PAPERLESS_ADMIN_PASSWORD has been set in the environment\n",
)
def test_user_email(self):
"""
GIVEN:
- Environment does contain admin user password
- Environment contains user email
THEN:
- admin user is created
"""
out = self.call_command(
environ={
"PAPERLESS_ADMIN_PASSWORD": "123456",
"PAPERLESS_ADMIN_MAIL": "hello@world.com",
},
)
user: User = User.objects.get_by_natural_key("admin")
self.assertEqual(User.objects.count(), 2)
self.assertTrue(user.is_superuser)
self.assertEqual(user.email, "hello@world.com")
self.assertEqual(user.username, "admin")
self.assertEqual(out, 'Created superuser "admin" with provided password.\n')
def test_user_username(self):
"""
GIVEN:
- Environment does contain admin user password
- Environment contains user username
THEN:
- admin user is created
"""
out = self.call_command(
environ={
"PAPERLESS_ADMIN_PASSWORD": "123456",
"PAPERLESS_ADMIN_MAIL": "hello@world.com",
"PAPERLESS_ADMIN_USER": "super",
},
)
user: User = User.objects.get_by_natural_key("super")
self.assertEqual(User.objects.count(), 2)
self.assertTrue(user.is_superuser)
self.assertEqual(user.email, "hello@world.com")
self.assertEqual(user.username, "super")
self.assertEqual(out, 'Created superuser "super" with provided password.\n')

View File

@@ -39,7 +39,7 @@ class TestMakeThumbnails(DirectoriesMixin, TestCase):
)
def setUp(self) -> None:
super(TestMakeThumbnails, self).setUp()
super().setUp()
self.make_models()
def test_process_document(self):

View File

@@ -36,13 +36,13 @@ class _TestMatchingBase(TestCase):
doc = Document(content=string)
self.assertTrue(
matching.matches(instance, doc),
'"%s" should match "%s" but it does not' % (match_text, string),
f'"{match_text}" should match "{string}" but it does not',
)
for string in no_match:
doc = Document(content=string)
self.assertFalse(
matching.matches(instance, doc),
'"%s" should not match "%s" but it does' % (match_text, string),
f'"{match_text}" should not match "{string}" but it does',
)

View File

@@ -22,7 +22,7 @@ def archive_path_old(self):
if self.filename:
fname = archive_name_from_filename(self.filename)
else:
fname = "{:07}.pdf".format(self.pk)
fname = f"{self.pk:07}.pdf"
return os.path.join(settings.ARCHIVE_DIR, fname)
@@ -38,7 +38,7 @@ def source_path(doc):
if doc.filename:
fname = str(doc.filename)
else:
fname = "{:07}{}".format(doc.pk, doc.file_type)
fname = f"{doc.pk:07}{doc.file_type}"
if doc.storage_type == STORAGE_TYPE_GPG:
fname += ".gpg" # pragma: no cover
@@ -46,7 +46,7 @@ def source_path(doc):
def thumbnail_path(doc):
file_name = "{:07}.png".format(doc.pk)
file_name = f"{doc.pk:07}.png"
if doc.storage_type == STORAGE_TYPE_GPG:
file_name += ".gpg"
@@ -111,7 +111,7 @@ simple_png = os.path.join(os.path.dirname(__file__), "samples", "simple-noalpha.
simple_png2 = os.path.join(os.path.dirname(__file__), "examples", "no-text.png")
@override_settings(PAPERLESS_FILENAME_FORMAT="")
@override_settings(FILENAME_FORMAT="")
class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations):
migrate_from = "1011_auto_20210101_2340"
@@ -240,7 +240,7 @@ class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations):
)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@override_settings(FILENAME_FORMAT="{correspondent}/{title}")
class TestMigrateArchiveFilesWithFilenameFormat(TestMigrateArchiveFiles):
def test_filenames(self):
Document = self.apps.get_model("documents", "Document")
@@ -279,7 +279,7 @@ def fake_parse_wrapper(parser, path, mime_type, file_name):
parser.text = "the text"
@override_settings(PAPERLESS_FILENAME_FORMAT="")
@override_settings(FILENAME_FORMAT="")
class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
migrate_from = "1011_auto_20210101_2340"
@@ -447,7 +447,7 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
self.assertIsNone(doc2.archive_filename)
@override_settings(PAPERLESS_FILENAME_FORMAT="")
@override_settings(FILENAME_FORMAT="")
class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations):
migrate_from = "1012_fix_archive_files"
@@ -505,14 +505,14 @@ class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations):
)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@override_settings(FILENAME_FORMAT="{correspondent}/{title}")
class TestMigrateArchiveFilesBackwardsWithFilenameFormat(
TestMigrateArchiveFilesBackwards,
):
pass
@override_settings(PAPERLESS_FILENAME_FORMAT="")
@override_settings(FILENAME_FORMAT="")
class TestMigrateArchiveFilesBackwardsErrors(DirectoriesMixin, TestMigrations):
migrate_from = "1012_fix_archive_files"

View File

@@ -15,7 +15,7 @@ def source_path_before(self):
if self.filename:
fname = str(self.filename)
else:
fname = "{:07}.{}".format(self.pk, self.file_type)
fname = f"{self.pk:07}.{self.file_type}"
if self.storage_type == STORAGE_TYPE_GPG:
fname += ".gpg"
@@ -30,7 +30,7 @@ def source_path_after(doc):
if doc.filename:
fname = str(doc.filename)
else:
fname = "{:07}{}".format(doc.pk, file_type_after(doc))
fname = f"{doc.pk:07}{file_type_after(doc)}"
if doc.storage_type == STORAGE_TYPE_GPG:
fname += ".gpg" # pragma: no cover

View File

@@ -31,7 +31,7 @@ def fake_magic_from_file(file, mime=False):
class TestParserDiscovery(TestCase):
@mock.patch("documents.parsers.document_consumer_declaration.send")
def test__get_parser_class_1_parser(self, m, *args):
class DummyParser(object):
class DummyParser:
pass
m.return_value = (
@@ -49,10 +49,10 @@ class TestParserDiscovery(TestCase):
@mock.patch("documents.parsers.document_consumer_declaration.send")
def test__get_parser_class_n_parsers(self, m, *args):
class DummyParser1(object):
class DummyParser1:
pass
class DummyParser2(object):
class DummyParser2:
pass
m.return_value = (

View File

@@ -204,6 +204,34 @@ class TestTasks(DirectoriesMixin, TestCase):
img = Image.open(test_file)
self.assertEqual(tasks.barcode_reader(img), ["CUSTOM BARCODE"])
def test_get_mime_type(self):
tiff_file = os.path.join(
os.path.dirname(__file__),
"samples",
"simple.tiff",
)
pdf_file = os.path.join(
os.path.dirname(__file__),
"samples",
"simple.pdf",
)
png_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
"barcode-128-custom.png",
)
tiff_file_no_extension = os.path.join(settings.SCRATCH_DIR, "testfile1")
pdf_file_no_extension = os.path.join(settings.SCRATCH_DIR, "testfile2")
shutil.copy(tiff_file, tiff_file_no_extension)
shutil.copy(pdf_file, pdf_file_no_extension)
self.assertEqual(tasks.get_file_type(tiff_file), "image/tiff")
self.assertEqual(tasks.get_file_type(pdf_file), "application/pdf")
self.assertEqual(tasks.get_file_type(tiff_file_no_extension), "image/tiff")
self.assertEqual(tasks.get_file_type(pdf_file_no_extension), "application/pdf")
self.assertEqual(tasks.get_file_type(png_file), "image/png")
def test_convert_from_tiff_to_pdf(self):
test_file = os.path.join(
os.path.dirname(__file__),
@@ -469,7 +497,7 @@ class TestTasks(DirectoriesMixin, TestCase):
self.assertEqual(
cm.output,
[
"WARNING:paperless.tasks:Unsupported file format for barcode reader: .jpg",
"WARNING:paperless.tasks:Unsupported file format for barcode reader: image/jpeg",
],
)
m.assert_called_once()
@@ -481,6 +509,26 @@ class TestTasks(DirectoriesMixin, TestCase):
self.assertIsNone(kwargs["override_document_type_id"])
self.assertIsNone(kwargs["override_tag_ids"])
@override_settings(
CONSUMER_ENABLE_BARCODES=True,
CONSUMER_BARCODE_TIFF_SUPPORT=True,
)
def test_consume_barcode_supported_no_extension_file(self):
"""
This test assumes barcode and TIFF support are enabled and
the user uploads a supported image file, but without extension
"""
test_file = os.path.join(
os.path.dirname(__file__),
"samples",
"barcodes",
"patch-code-t-middle.tiff",
)
dst = os.path.join(settings.SCRATCH_DIR, "patch-code-t-middle")
shutil.copy(test_file, dst)
self.assertEqual(tasks.consume_file(dst), "File successfully split")
@mock.patch("documents.tasks.sanity_checker.check_sanity")
def test_sanity_check_success(self, m):
m.return_value = SanityCheckMessages()

View File

@@ -76,10 +76,10 @@ class DirectoriesMixin:
def setUp(self) -> None:
self.dirs = setup_directories()
super(DirectoriesMixin, self).setUp()
super().setUp()
def tearDown(self) -> None:
super(DirectoriesMixin, self).tearDown()
super().tearDown()
remove_dirs(self.dirs)
@@ -93,7 +93,7 @@ class TestMigrations(TransactionTestCase):
auto_migrate = True
def setUp(self):
super(TestMigrations, self).setUp()
super().setUp()
assert (
self.migrate_from and self.migrate_to

View File

@@ -11,6 +11,7 @@ from unicodedata import normalize
from urllib.parse import quote
from django.conf import settings
from django.contrib.auth.models import User
from django.db.models import Case
from django.db.models import Count
from django.db.models import IntegerField
@@ -54,14 +55,17 @@ from .classifier import load_classifier
from .filters import CorrespondentFilterSet
from .filters import DocumentFilterSet
from .filters import DocumentTypeFilterSet
from .filters import StoragePathFilterSet
from .filters import TagFilterSet
from .matching import match_correspondents
from .matching import match_document_types
from .matching import match_storage_paths
from .matching import match_tags
from .models import Correspondent
from .models import Document
from .models import DocumentType
from .models import SavedView
from .models import StoragePath
from .models import Tag
from .parsers import get_parser_class_for_mime_type
from .serialisers import BulkDownloadSerializer
@@ -72,8 +76,10 @@ from .serialisers import DocumentSerializer
from .serialisers import DocumentTypeSerializer
from .serialisers import PostDocumentSerializer
from .serialisers import SavedViewSerializer
from .serialisers import StoragePathSerializer
from .serialisers import TagSerializer
from .serialisers import TagSerializerVersion1
from .serialisers import UiSettingsViewSerializer
logger = logging.getLogger("paperless.api")
@@ -81,12 +87,18 @@ logger = logging.getLogger("paperless.api")
class IndexView(TemplateView):
template_name = "index.html"
def get_language(self):
def get_frontend_language(self):
if hasattr(
self.request.user,
"ui_settings",
) and self.request.user.ui_settings.settings.get("language"):
lang = self.request.user.ui_settings.settings.get("language")
else:
lang = get_language()
# This is here for the following reason:
# Django identifies languages in the form "en-us"
# However, angular generates locales as "en-US".
# this translates between these two forms.
lang = get_language()
if "-" in lang:
first = lang[: lang.index("-")]
second = lang[lang.index("-") + 1 :]
@@ -99,16 +111,18 @@ class IndexView(TemplateView):
context["cookie_prefix"] = settings.COOKIE_PREFIX
context["username"] = self.request.user.username
context["full_name"] = self.request.user.get_full_name()
context["styles_css"] = f"frontend/{self.get_language()}/styles.css"
context["runtime_js"] = f"frontend/{self.get_language()}/runtime.js"
context["polyfills_js"] = f"frontend/{self.get_language()}/polyfills.js"
context["main_js"] = f"frontend/{self.get_language()}/main.js"
context["styles_css"] = f"frontend/{self.get_frontend_language()}/styles.css"
context["runtime_js"] = f"frontend/{self.get_frontend_language()}/runtime.js"
context[
"polyfills_js"
] = f"frontend/{self.get_frontend_language()}/polyfills.js"
context["main_js"] = f"frontend/{self.get_frontend_language()}/main.js"
context[
"webmanifest"
] = f"frontend/{self.get_language()}/manifest.webmanifest" # noqa: E501
] = f"frontend/{self.get_frontend_language()}/manifest.webmanifest" # noqa: E501
context[
"apple_touch_icon"
] = f"frontend/{self.get_language()}/apple-touch-icon.png" # noqa: E501
] = f"frontend/{self.get_frontend_language()}/apple-touch-icon.png" # noqa: E501
return context
@@ -210,7 +224,7 @@ class DocumentViewSet(
return serializer_class(*args, **kwargs)
def update(self, request, *args, **kwargs):
response = super(DocumentViewSet, self).update(request, *args, **kwargs)
response = super().update(request, *args, **kwargs)
from documents import index
index.add_or_update_document(self.get_object())
@@ -220,7 +234,7 @@ class DocumentViewSet(
from documents import index
index.remove_document_from_index(self.get_object())
return super(DocumentViewSet, self).destroy(request, *args, **kwargs)
return super().destroy(request, *args, **kwargs)
@staticmethod
def original_requested(request):
@@ -325,6 +339,7 @@ class DocumentViewSet(
"document_types": [
dt.id for dt in match_document_types(doc, classifier)
],
"storage_paths": [dt.id for dt in match_storage_paths(doc, classifier)],
},
)
@@ -362,7 +377,7 @@ class DocumentViewSet(
class SearchResultSerializer(DocumentSerializer):
def to_representation(self, instance):
doc = Document.objects.get(id=instance["id"])
r = super(SearchResultSerializer, self).to_representation(doc)
r = super().to_representation(doc)
r["__search_hit__"] = {
"score": instance.score,
"highlights": instance.highlights("content", text=doc.content)
@@ -376,7 +391,7 @@ class SearchResultSerializer(DocumentSerializer):
class UnifiedSearchViewSet(DocumentViewSet):
def __init__(self, *args, **kwargs):
super(UnifiedSearchViewSet, self).__init__(*args, **kwargs)
super().__init__(*args, **kwargs)
self.searcher = None
def get_serializer_class(self):
@@ -408,7 +423,7 @@ class UnifiedSearchViewSet(DocumentViewSet):
self.paginator.get_page_size(self.request),
)
else:
return super(UnifiedSearchViewSet, self).filter_queryset(queryset)
return super().filter_queryset(queryset)
def list(self, request, *args, **kwargs):
if self._is_search_request():
@@ -417,13 +432,13 @@ class UnifiedSearchViewSet(DocumentViewSet):
try:
with index.open_index_searcher() as s:
self.searcher = s
return super(UnifiedSearchViewSet, self).list(request)
return super().list(request)
except NotFound:
raise
except Exception as e:
return HttpResponseBadRequest(str(e))
else:
return super(UnifiedSearchViewSet, self).list(request)
return super().list(request)
class LogViewSet(ViewSet):
@@ -441,7 +456,7 @@ class LogViewSet(ViewSet):
if not os.path.isfile(filename):
raise Http404()
with open(filename, "r") as f:
with open(filename) as f:
lines = [line.rstrip() for line in f.readlines()]
return Response(lines)
@@ -504,6 +519,7 @@ class PostDocumentView(GenericAPIView):
document_type_id = serializer.validated_data.get("document_type")
tag_ids = serializer.validated_data.get("tags")
title = serializer.validated_data.get("title")
created = serializer.validated_data.get("created")
t = int(mktime(datetime.now().timetuple()))
@@ -530,6 +546,7 @@ class PostDocumentView(GenericAPIView):
override_tag_ids=tag_ids,
task_id=task_id,
task_name=os.path.basename(doc_name)[:100],
override_created=created,
)
return Response("OK")
@@ -565,6 +582,12 @@ class SelectionDataView(GenericAPIView):
),
)
storage_paths = StoragePath.objects.annotate(
document_count=Count(
Case(When(documents__id__in=ids, then=1), output_field=IntegerField()),
),
)
r = Response(
{
"selected_correspondents": [
@@ -577,6 +600,10 @@ class SelectionDataView(GenericAPIView):
"selected_document_types": [
{"id": t.id, "document_count": t.document_count} for t in types
],
"selected_storage_paths": [
{"id": t.id, "document_count": t.document_count}
for t in storage_paths
],
},
)
@@ -692,7 +719,10 @@ class RemoteVersionView(GenericAPIView):
remote = response.read().decode("utf-8")
try:
remote_json = json.loads(remote)
remote_version = remote_json["tag_name"].removeprefix("ngx-")
remote_version = remote_json["tag_name"]
# Basically PEP 616 but that only went in 3.9
if remote_version.startswith("ngx-"):
remote_version = remote_version[len("ngx-") :]
except ValueError:
logger.debug("An error occurred parsing remote version json")
except urllib.error.URLError:
@@ -712,3 +742,56 @@ class RemoteVersionView(GenericAPIView):
"feature_is_set": feature_is_set,
},
)
class StoragePathViewSet(ModelViewSet):
model = DocumentType
queryset = StoragePath.objects.annotate(document_count=Count("documents")).order_by(
Lower("name"),
)
serializer_class = StoragePathSerializer
pagination_class = StandardPagination
permission_classes = (IsAuthenticated,)
filter_backends = (DjangoFilterBackend, OrderingFilter)
filterset_class = StoragePathFilterSet
ordering_fields = ("name", "path", "matching_algorithm", "match", "document_count")
class UiSettingsView(GenericAPIView):
permission_classes = (IsAuthenticated,)
serializer_class = UiSettingsViewSerializer
def get(self, request, format=None):
serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
user = User.objects.get(pk=request.user.id)
displayname = user.username
if user.first_name or user.last_name:
displayname = " ".join([user.first_name, user.last_name])
settings = {}
if hasattr(user, "ui_settings"):
settings = user.ui_settings.settings
return Response(
{
"user_id": user.id,
"username": user.username,
"display_name": displayname,
"settings": settings,
},
)
def post(self, request, format=None):
serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
serializer.save(user=self.request.user)
return Response(
{
"success": True,
},
)

View File

@@ -2,7 +2,7 @@ msgid ""
msgstr ""
"Project-Id-Version: paperless-ngx\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2022-03-02 11:20-0800\n"
"POT-Creation-Date: 2022-05-19 15:24-0700\n"
"PO-Revision-Date: 2022-02-17 04:17\n"
"Last-Translator: \n"
"Language-Team: English\n"
@@ -17,373 +17,397 @@ msgstr ""
"X-Crowdin-File: /dev/src/locale/en_US/LC_MESSAGES/django.po\n"
"X-Crowdin-File-ID: 14\n"
#: documents/apps.py:10
#: documents/apps.py:9
msgid "Documents"
msgstr ""
#: documents/models.py:32
#: documents/models.py:27
msgid "Any word"
msgstr ""
#: documents/models.py:33
#: documents/models.py:28
msgid "All words"
msgstr ""
#: documents/models.py:34
#: documents/models.py:29
msgid "Exact match"
msgstr ""
#: documents/models.py:35
#: documents/models.py:30
msgid "Regular expression"
msgstr ""
#: documents/models.py:36
#: documents/models.py:31
msgid "Fuzzy word"
msgstr ""
#: documents/models.py:37
#: documents/models.py:32
msgid "Automatic"
msgstr ""
#: documents/models.py:40 documents/models.py:314 paperless_mail/models.py:23
#: paperless_mail/models.py:107
#: documents/models.py:35 documents/models.py:343 paperless_mail/models.py:16
#: paperless_mail/models.py:79
msgid "name"
msgstr ""
#: documents/models.py:42
#: documents/models.py:37
msgid "match"
msgstr ""
#: documents/models.py:45
#: documents/models.py:40
msgid "matching algorithm"
msgstr ""
#: documents/models.py:48
#: documents/models.py:45
msgid "is insensitive"
msgstr ""
#: documents/models.py:61 documents/models.py:104
#: documents/models.py:58 documents/models.py:113
msgid "correspondent"
msgstr ""
#: documents/models.py:62
#: documents/models.py:59
msgid "correspondents"
msgstr ""
#: documents/models.py:67
#: documents/models.py:64
msgid "color"
msgstr ""
#: documents/models.py:70
#: documents/models.py:67
msgid "is inbox tag"
msgstr ""
#: documents/models.py:73
#: documents/models.py:70
msgid ""
"Marks this tag as an inbox tag: All newly consumed documents will be tagged "
"with inbox tags."
msgstr ""
#: documents/models.py:79
#: documents/models.py:76
msgid "tag"
msgstr ""
#: documents/models.py:80 documents/models.py:130
#: documents/models.py:77 documents/models.py:151
msgid "tags"
msgstr ""
#: documents/models.py:85 documents/models.py:115
#: documents/models.py:82 documents/models.py:133
msgid "document type"
msgstr ""
#: documents/models.py:86
#: documents/models.py:83
msgid "document types"
msgstr ""
#: documents/models.py:94
msgid "Unencrypted"
#: documents/models.py:88
msgid "path"
msgstr ""
#: documents/models.py:94 documents/models.py:122
msgid "storage path"
msgstr ""
#: documents/models.py:95
msgid "storage paths"
msgstr ""
#: documents/models.py:103
msgid "Unencrypted"
msgstr ""
#: documents/models.py:104
msgid "Encrypted with GNU Privacy Guard"
msgstr ""
#: documents/models.py:107
#: documents/models.py:125
msgid "title"
msgstr ""
#: documents/models.py:119
#: documents/models.py:137
msgid "content"
msgstr ""
#: documents/models.py:122
#: documents/models.py:140
msgid ""
"The raw, text-only data of the document. This field is primarily used for "
"searching."
msgstr ""
#: documents/models.py:127
#: documents/models.py:145
msgid "mime type"
msgstr ""
#: documents/models.py:134
#: documents/models.py:155
msgid "checksum"
msgstr ""
#: documents/models.py:138
#: documents/models.py:159
msgid "The checksum of the original document."
msgstr ""
#: documents/models.py:142
#: documents/models.py:163
msgid "archive checksum"
msgstr ""
#: documents/models.py:147
#: documents/models.py:168
msgid "The checksum of the archived document."
msgstr ""
#: documents/models.py:150 documents/models.py:295
#: documents/models.py:171 documents/models.py:324
msgid "created"
msgstr ""
#: documents/models.py:153
#: documents/models.py:174
msgid "modified"
msgstr ""
#: documents/models.py:157
#: documents/models.py:181
msgid "storage type"
msgstr ""
#: documents/models.py:165
#: documents/models.py:189
msgid "added"
msgstr ""
#: documents/models.py:169
#: documents/models.py:196
msgid "filename"
msgstr ""
#: documents/models.py:175
#: documents/models.py:202
msgid "Current filename in storage"
msgstr ""
#: documents/models.py:179
#: documents/models.py:206
msgid "archive filename"
msgstr ""
#: documents/models.py:185
#: documents/models.py:212
msgid "Current archive filename in storage"
msgstr ""
#: documents/models.py:189
#: documents/models.py:216
msgid "archive serial number"
msgstr ""
#: documents/models.py:195
#: documents/models.py:222
msgid "The position of this document in your physical document archive."
msgstr ""
#: documents/models.py:201
#: documents/models.py:228
msgid "document"
msgstr ""
#: documents/models.py:202
#: documents/models.py:229
msgid "documents"
msgstr ""
#: documents/models.py:280
#: documents/models.py:307
msgid "debug"
msgstr ""
#: documents/models.py:281
#: documents/models.py:308
msgid "information"
msgstr ""
#: documents/models.py:282
#: documents/models.py:309
msgid "warning"
msgstr ""
#: documents/models.py:283
#: documents/models.py:310
msgid "error"
msgstr ""
#: documents/models.py:284
#: documents/models.py:311
msgid "critical"
msgstr ""
#: documents/models.py:287
#: documents/models.py:314
msgid "group"
msgstr ""
#: documents/models.py:289
#: documents/models.py:316
msgid "message"
msgstr ""
#: documents/models.py:292
#: documents/models.py:319
msgid "level"
msgstr ""
#: documents/models.py:299
#: documents/models.py:328
msgid "log"
msgstr ""
#: documents/models.py:300
#: documents/models.py:329
msgid "logs"
msgstr ""
#: documents/models.py:310 documents/models.py:360
#: documents/models.py:339 documents/models.py:392
msgid "saved view"
msgstr ""
#: documents/models.py:311
#: documents/models.py:340
msgid "saved views"
msgstr ""
#: documents/models.py:313
#: documents/models.py:342
msgid "user"
msgstr ""
#: documents/models.py:317
#: documents/models.py:346
msgid "show on dashboard"
msgstr ""
#: documents/models.py:320
#: documents/models.py:349
msgid "show in sidebar"
msgstr ""
#: documents/models.py:324
#: documents/models.py:353
msgid "sort field"
msgstr ""
#: documents/models.py:326
#: documents/models.py:358
msgid "sort reverse"
msgstr ""
#: documents/models.py:331
#: documents/models.py:363
msgid "title contains"
msgstr ""
#: documents/models.py:332
#: documents/models.py:364
msgid "content contains"
msgstr ""
#: documents/models.py:333
#: documents/models.py:365
msgid "ASN is"
msgstr ""
#: documents/models.py:334
#: documents/models.py:366
msgid "correspondent is"
msgstr ""
#: documents/models.py:335
#: documents/models.py:367
msgid "document type is"
msgstr ""
#: documents/models.py:336
#: documents/models.py:368
msgid "is in inbox"
msgstr ""
#: documents/models.py:337
#: documents/models.py:369
msgid "has tag"
msgstr ""
#: documents/models.py:338
#: documents/models.py:370
msgid "has any tag"
msgstr ""
#: documents/models.py:339
#: documents/models.py:371
msgid "created before"
msgstr ""
#: documents/models.py:340
#: documents/models.py:372
msgid "created after"
msgstr ""
#: documents/models.py:341
#: documents/models.py:373
msgid "created year is"
msgstr ""
#: documents/models.py:342
#: documents/models.py:374
msgid "created month is"
msgstr ""
#: documents/models.py:343
#: documents/models.py:375
msgid "created day is"
msgstr ""
#: documents/models.py:344
#: documents/models.py:376
msgid "added before"
msgstr ""
#: documents/models.py:345
#: documents/models.py:377
msgid "added after"
msgstr ""
#: documents/models.py:346
#: documents/models.py:378
msgid "modified before"
msgstr ""
#: documents/models.py:347
#: documents/models.py:379
msgid "modified after"
msgstr ""
#: documents/models.py:348
#: documents/models.py:380
msgid "does not have tag"
msgstr ""
#: documents/models.py:349
#: documents/models.py:381
msgid "does not have ASN"
msgstr ""
#: documents/models.py:350
#: documents/models.py:382
msgid "title or content contains"
msgstr ""
#: documents/models.py:351
#: documents/models.py:383
msgid "fulltext query"
msgstr ""
#: documents/models.py:352
#: documents/models.py:384
msgid "more like this"
msgstr ""
#: documents/models.py:353
#: documents/models.py:385
msgid "has tags in"
msgstr ""
#: documents/models.py:363
#: documents/models.py:395
msgid "rule type"
msgstr ""
#: documents/models.py:365
#: documents/models.py:397
msgid "value"
msgstr ""
#: documents/models.py:368
#: documents/models.py:400
msgid "filter rule"
msgstr ""
#: documents/models.py:369
#: documents/models.py:401
msgid "filter rules"
msgstr ""
#: documents/serialisers.py:64
#: documents/serialisers.py:63
#, python-format
msgid "Invalid regular expression: %(error)s"
msgstr ""
#: documents/serialisers.py:185
#: documents/serialisers.py:184
msgid "Invalid color."
msgstr ""
#: documents/serialisers.py:459
#: documents/serialisers.py:491
#, python-format
msgid "File type %(type)s not supported"
msgstr ""
#: documents/templates/index.html:22
#: documents/serialisers.py:574
msgid "Invalid variable detected."
msgstr ""
#: documents/templates/index.html:78
msgid "Paperless-ngx is loading..."
msgstr ""
#: documents/templates/index.html:79
msgid "Still here?! Hmm, something might be wrong."
msgstr ""
#: documents/templates/index.html:79
msgid "Here's a link to the docs."
msgstr ""
#: documents/templates/registration/logged_out.html:14
msgid "Paperless-ngx signed out"
msgstr ""
@@ -420,71 +444,91 @@ msgstr ""
msgid "Sign in"
msgstr ""
#: paperless/settings.py:299
#: paperless/settings.py:338
msgid "English (US)"
msgstr ""
#: paperless/settings.py:300
#: paperless/settings.py:339
msgid "Belarusian"
msgstr ""
#: paperless/settings.py:340
msgid "Czech"
msgstr ""
#: paperless/settings.py:301
#: paperless/settings.py:341
msgid "Danish"
msgstr ""
#: paperless/settings.py:302
#: paperless/settings.py:342
msgid "German"
msgstr ""
#: paperless/settings.py:303
#: paperless/settings.py:343
msgid "English (GB)"
msgstr ""
#: paperless/settings.py:304
#: paperless/settings.py:344
msgid "Spanish"
msgstr ""
#: paperless/settings.py:305
#: paperless/settings.py:345
msgid "French"
msgstr ""
#: paperless/settings.py:306
#: paperless/settings.py:346
msgid "Italian"
msgstr ""
#: paperless/settings.py:307
#: paperless/settings.py:347
msgid "Luxembourgish"
msgstr ""
#: paperless/settings.py:308
#: paperless/settings.py:348
msgid "Dutch"
msgstr ""
#: paperless/settings.py:309
#: paperless/settings.py:349
msgid "Polish"
msgstr ""
#: paperless/settings.py:310
#: paperless/settings.py:350
msgid "Portuguese (Brazil)"
msgstr ""
#: paperless/settings.py:311
#: paperless/settings.py:351
msgid "Portuguese"
msgstr ""
#: paperless/settings.py:312
#: paperless/settings.py:352
msgid "Romanian"
msgstr ""
#: paperless/settings.py:313
#: paperless/settings.py:353
msgid "Russian"
msgstr ""
#: paperless/settings.py:314
#: paperless/settings.py:354
msgid "Slovenian"
msgstr ""
#: paperless/settings.py:355
msgid "Serbian"
msgstr ""
#: paperless/settings.py:356
msgid "Swedish"
msgstr ""
#: paperless/urls.py:139
#: paperless/settings.py:357
msgid "Turkish"
msgstr ""
#: paperless/settings.py:358
msgid "Chinese Simplified"
msgstr ""
#: paperless/urls.py:153
msgid "Paperless-ngx administration"
msgstr ""
@@ -527,208 +571,210 @@ msgid ""
"process all matching rules that you have defined."
msgstr ""
#: paperless_mail/apps.py:9
#: paperless_mail/apps.py:8
msgid "Paperless mail"
msgstr ""
#: paperless_mail/models.py:10
#: paperless_mail/models.py:8
msgid "mail account"
msgstr ""
#: paperless_mail/models.py:11
#: paperless_mail/models.py:9
msgid "mail accounts"
msgstr ""
#: paperless_mail/models.py:18
#: paperless_mail/models.py:12
msgid "No encryption"
msgstr ""
#: paperless_mail/models.py:19
#: paperless_mail/models.py:13
msgid "Use SSL"
msgstr ""
#: paperless_mail/models.py:20
#: paperless_mail/models.py:14
msgid "Use STARTTLS"
msgstr ""
#: paperless_mail/models.py:25
#: paperless_mail/models.py:18
msgid "IMAP server"
msgstr ""
#: paperless_mail/models.py:28
#: paperless_mail/models.py:21
msgid "IMAP port"
msgstr ""
#: paperless_mail/models.py:32
#: paperless_mail/models.py:25
msgid ""
"This is usually 143 for unencrypted and STARTTLS connections, and 993 for "
"SSL connections."
msgstr ""
#: paperless_mail/models.py:38
#: paperless_mail/models.py:31
msgid "IMAP security"
msgstr ""
#: paperless_mail/models.py:41
#: paperless_mail/models.py:36
msgid "username"
msgstr ""
#: paperless_mail/models.py:43
#: paperless_mail/models.py:38
msgid "password"
msgstr ""
#: paperless_mail/models.py:46
#: paperless_mail/models.py:41
msgid "character set"
msgstr ""
#: paperless_mail/models.py:50
#: paperless_mail/models.py:45
msgid ""
"The character set to use when communicating with the mail server, such as "
"'UTF-8' or 'US-ASCII'."
msgstr ""
#: paperless_mail/models.py:61
#: paperless_mail/models.py:56
msgid "mail rule"
msgstr ""
#: paperless_mail/models.py:62
#: paperless_mail/models.py:57
msgid "mail rules"
msgstr ""
#: paperless_mail/models.py:68
#: paperless_mail/models.py:60
msgid "Only process attachments."
msgstr ""
#: paperless_mail/models.py:71
#: paperless_mail/models.py:61
msgid "Process all files, including 'inline' attachments."
msgstr ""
#: paperless_mail/models.py:81
msgid "Mark as read, don't process read mails"
msgstr ""
#: paperless_mail/models.py:82
msgid "Flag the mail, don't process flagged mails"
msgstr ""
#: paperless_mail/models.py:83
msgid "Move to specified folder"
msgstr ""
#: paperless_mail/models.py:84
#: paperless_mail/models.py:64
msgid "Delete"
msgstr ""
#: paperless_mail/models.py:91
#: paperless_mail/models.py:65
msgid "Move to specified folder"
msgstr ""
#: paperless_mail/models.py:66
msgid "Mark as read, don't process read mails"
msgstr ""
#: paperless_mail/models.py:67
msgid "Flag the mail, don't process flagged mails"
msgstr ""
#: paperless_mail/models.py:70
msgid "Use subject as title"
msgstr ""
#: paperless_mail/models.py:92
#: paperless_mail/models.py:71
msgid "Use attachment filename as title"
msgstr ""
#: paperless_mail/models.py:101
#: paperless_mail/models.py:74
msgid "Do not assign a correspondent"
msgstr ""
#: paperless_mail/models.py:102
#: paperless_mail/models.py:75
msgid "Use mail address"
msgstr ""
#: paperless_mail/models.py:103
#: paperless_mail/models.py:76
msgid "Use name (or mail address if not available)"
msgstr ""
#: paperless_mail/models.py:104
#: paperless_mail/models.py:77
msgid "Use correspondent selected below"
msgstr ""
#: paperless_mail/models.py:109
#: paperless_mail/models.py:81
msgid "order"
msgstr ""
#: paperless_mail/models.py:115
#: paperless_mail/models.py:87
msgid "account"
msgstr ""
#: paperless_mail/models.py:119
#: paperless_mail/models.py:91
msgid "folder"
msgstr ""
#: paperless_mail/models.py:122
msgid "Subfolders must be separated by dots."
#: paperless_mail/models.py:95
msgid ""
"Subfolders must be separated by a delimiter, often a dot ('.') or slash "
"('/'), but it varies by mail server."
msgstr ""
#: paperless_mail/models.py:126
#: paperless_mail/models.py:101
msgid "filter from"
msgstr ""
#: paperless_mail/models.py:129
#: paperless_mail/models.py:107
msgid "filter subject"
msgstr ""
#: paperless_mail/models.py:132
#: paperless_mail/models.py:113
msgid "filter body"
msgstr ""
#: paperless_mail/models.py:136
#: paperless_mail/models.py:120
msgid "filter attachment filename"
msgstr ""
#: paperless_mail/models.py:141
#: paperless_mail/models.py:125
msgid ""
"Only consume documents which entirely match this filename if specified. "
"Wildcards such as *.pdf or *invoice* are allowed. Case insensitive."
msgstr ""
#: paperless_mail/models.py:148
#: paperless_mail/models.py:132
msgid "maximum age"
msgstr ""
#: paperless_mail/models.py:148
#: paperless_mail/models.py:134
msgid "Specified in days."
msgstr ""
#: paperless_mail/models.py:152
#: paperless_mail/models.py:138
msgid "attachment type"
msgstr ""
#: paperless_mail/models.py:156
#: paperless_mail/models.py:142
msgid ""
"Inline attachments include embedded images, so it's best to combine this "
"option with a filename filter."
msgstr ""
#: paperless_mail/models.py:162
#: paperless_mail/models.py:148
msgid "action"
msgstr ""
#: paperless_mail/models.py:168
#: paperless_mail/models.py:154
msgid "action parameter"
msgstr ""
#: paperless_mail/models.py:173
#: paperless_mail/models.py:159
msgid ""
"Additional parameter for the action selected above, i.e., the target folder "
"of the move to folder action. Subfolders must be separated by dots."
msgstr ""
#: paperless_mail/models.py:181
#: paperless_mail/models.py:167
msgid "assign title from"
msgstr ""
#: paperless_mail/models.py:189
#: paperless_mail/models.py:175
msgid "assign this tag"
msgstr ""
#: paperless_mail/models.py:197
#: paperless_mail/models.py:183
msgid "assign this document type"
msgstr ""
#: paperless_mail/models.py:201
#: paperless_mail/models.py:187
msgid "assign correspondent from"
msgstr ""
#: paperless_mail/models.py:211
#: paperless_mail/models.py:197
msgid "assign this correspondent"
msgstr ""

View File

@@ -3,7 +3,7 @@ msgstr ""
"Project-Id-Version: paperless-ngx\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2022-03-02 11:20-0800\n"
"PO-Revision-Date: 2022-04-12 15:26\n"
"PO-Revision-Date: 2022-05-13 03:55\n"
"Last-Translator: \n"
"Language-Team: Polish\n"
"Language: pl_PL\n"
@@ -638,7 +638,7 @@ msgstr "konto"
#: paperless_mail/models.py:119
msgid "folder"
msgstr "folder"
msgstr "katalog"
#: paperless_mail/models.py:122
msgid "Subfolders must be separated by dots."

View File

@@ -3,7 +3,7 @@ msgstr ""
"Project-Id-Version: paperless-ngx\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2022-03-02 11:20-0800\n"
"PO-Revision-Date: 2022-03-27 17:08\n"
"PO-Revision-Date: 2022-05-13 03:55\n"
"Last-Translator: \n"
"Language-Team: Serbian (Latin)\n"
"Language: sr_CS\n"
@@ -356,11 +356,11 @@ msgstr "vrednost"
#: documents/models.py:368
msgid "filter rule"
msgstr ""
msgstr "filter pravilo"
#: documents/models.py:369
msgid "filter rules"
msgstr ""
msgstr "filter pravila"
#: documents/serialisers.py:64
#, python-format
@@ -369,7 +369,7 @@ msgstr ""
#: documents/serialisers.py:185
msgid "Invalid color."
msgstr ""
msgstr "Nevažeća boja."
#: documents/serialisers.py:459
#, python-format
@@ -378,7 +378,7 @@ msgstr ""
#: documents/templates/index.html:22
msgid "Paperless-ngx is loading..."
msgstr ""
msgstr "Paperless-ngx se učitava..."
#: documents/templates/registration/logged_out.html:14
msgid "Paperless-ngx signed out"

View File

@@ -27,7 +27,7 @@ class AngularApiAuthenticationOverride(authentication.BaseAuthentication):
and request.headers["Referer"].startswith("http://localhost:4200/")
):
user = User.objects.filter(is_staff=True).first()
print("Auto-Login with user {}".format(user))
print(f"Auto-Login with user {user}")
return (user, None)
else:
return None

View File

@@ -1,9 +1,11 @@
import datetime
import json
import math
import multiprocessing
import os
import re
from typing import Final
from typing import Set
from urllib.parse import urlparse
from concurrent_log_handler.queue import setup_logging_queues
@@ -46,6 +48,13 @@ def __get_int(key: str, default: int) -> int:
return int(os.getenv(key, default))
def __get_float(key: str, default: float) -> float:
"""
Return an integer value based on the environment variable or a default
"""
return float(os.getenv(key, default))
# NEVER RUN WITH DEBUG IN PRODUCTION.
DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
@@ -483,6 +492,11 @@ CONSUMER_POLLING_RETRY_COUNT = int(
os.getenv("PAPERLESS_CONSUMER_POLLING_RETRY_COUNT", 5),
)
CONSUMER_INOTIFY_DELAY: Final[float] = __get_float(
"PAPERLESS_CONSUMER_INOTIFY_DELAY",
0.5,
)
CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
CONSUMER_RECURSIVE = __get_boolean("PAPERLESS_CONSUMER_RECURSIVE")
@@ -583,15 +597,22 @@ FILENAME_PARSE_TRANSFORMS = []
for t in json.loads(os.getenv("PAPERLESS_FILENAME_PARSE_TRANSFORMS", "[]")):
FILENAME_PARSE_TRANSFORMS.append((re.compile(t["pattern"]), t["repl"]))
# TODO: this should not have a prefix.
# Specify the filename format for out files
PAPERLESS_FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")
FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")
# If this is enabled, variables in filename format will resolve to empty-string instead of 'none'.
# Directories with 'empty names' are omitted, too.
FILENAME_FORMAT_REMOVE_NONE = __get_boolean(
"PAPERLESS_FILENAME_FORMAT_REMOVE_NONE",
"NO",
)
THUMBNAIL_FONT_NAME = os.getenv(
"PAPERLESS_THUMBNAIL_FONT_NAME",
"/usr/share/fonts/liberation/LiberationSerif-Regular.ttf",
)
# TODO: this should not have a prefix.
# Tika settings
PAPERLESS_TIKA_ENABLED = __get_boolean("PAPERLESS_TIKA_ENABLED", "NO")
PAPERLESS_TIKA_ENDPOINT = os.getenv("PAPERLESS_TIKA_ENDPOINT", "http://localhost:9998")
@@ -603,16 +624,42 @@ PAPERLESS_TIKA_GOTENBERG_ENDPOINT = os.getenv(
if PAPERLESS_TIKA_ENABLED:
INSTALLED_APPS.append("paperless_tika.apps.PaperlessTikaConfig")
# List dates that should be ignored when trying to parse date from document text
IGNORE_DATES = set()
if os.getenv("PAPERLESS_IGNORE_DATES", ""):
def _parse_ignore_dates(
env_ignore: str,
date_order: str = DATE_ORDER,
) -> Set[datetime.datetime]:
"""
If the PAPERLESS_IGNORE_DATES environment variable is set, parse the
user provided string(s) into dates
Args:
env_ignore (str): The value of the environment variable, comma seperated dates
date_order (str, optional): The format of the date strings. Defaults to DATE_ORDER.
Returns:
Set[datetime.datetime]: The set of parsed date objects
"""
import dateparser
for s in os.getenv("PAPERLESS_IGNORE_DATES", "").split(","):
d = dateparser.parse(s)
ignored_dates = set()
for s in env_ignore.split(","):
d = dateparser.parse(
s,
settings={
"DATE_ORDER": date_order,
},
)
if d:
IGNORE_DATES.add(d.date())
ignored_dates.add(d.date())
return ignored_dates
# List dates that should be ignored when trying to parse date from document text
IGNORE_DATES: Set[datetime.date] = set()
if os.getenv("PAPERLESS_IGNORE_DATES") is not None:
IGNORE_DATES = _parse_ignore_dates(os.getenv("PAPERLESS_IGNORE_DATES"))
ENABLE_UPDATE_CHECK = os.getenv("PAPERLESS_ENABLE_UPDATE_CHECK", "default")
if ENABLE_UPDATE_CHECK != "default":

View File

@@ -0,0 +1,58 @@
import datetime
from unittest import TestCase
from paperless.settings import _parse_ignore_dates
class TestIgnoreDateParsing(TestCase):
"""
Tests the parsing of the PAPERLESS_IGNORE_DATES setting value
"""
def _parse_checker(self, test_cases):
"""
Helper function to check ignore date parsing
Args:
test_cases (_type_): _description_
"""
for env_str, date_format, expected_date_set in test_cases:
self.assertSetEqual(
_parse_ignore_dates(env_str, date_format),
expected_date_set,
)
def test_no_ignore_dates_set(self):
"""
GIVEN:
- No ignore dates are set
THEN:
- No ignore dates are parsed
"""
self.assertSetEqual(_parse_ignore_dates(""), set())
def test_single_ignore_dates_set(self):
"""
GIVEN:
- Ignore dates are set per certain inputs
THEN:
- All ignore dates are parsed
"""
test_cases = [
("1985-05-01", "YMD", {datetime.date(1985, 5, 1)}),
(
"1985-05-01,1991-12-05",
"YMD",
{datetime.date(1985, 5, 1), datetime.date(1991, 12, 5)},
),
("2010-12-13", "YMD", {datetime.date(2010, 12, 13)}),
("11.01.10", "DMY", {datetime.date(2010, 1, 11)}),
(
"11.01.2001,15-06-1996",
"DMY",
{datetime.date(2001, 1, 11), datetime.date(1996, 6, 15)},
),
]
self._parse_checker(test_cases)

View File

@@ -19,7 +19,9 @@ from documents.views import SavedViewViewSet
from documents.views import SearchAutoCompleteView
from documents.views import SelectionDataView
from documents.views import StatisticsView
from documents.views import StoragePathViewSet
from documents.views import TagViewSet
from documents.views import UiSettingsView
from documents.views import UnifiedSearchViewSet
from paperless.consumers import StatusConsumer
from paperless.views import FaviconView
@@ -33,6 +35,7 @@ api_router.register(r"documents", UnifiedSearchViewSet)
api_router.register(r"logs", LogViewSet, basename="logs")
api_router.register(r"tags", TagViewSet)
api_router.register(r"saved_views", SavedViewViewSet)
api_router.register(r"storage_paths", StoragePathViewSet)
urlpatterns = [
@@ -78,6 +81,11 @@ urlpatterns = [
RemoteVersionView.as_view(),
name="remoteversion",
),
re_path(
r"^ui_settings/",
UiSettingsView.as_view(),
name="ui_settings",
),
path("token/", views.obtain_auth_token),
]
+ api_router.urls,

View File

@@ -1,7 +1,7 @@
from typing import Final
from typing import Tuple
__version__: Final[Tuple[int, int, int]] = (1, 7, 0)
__version__: Final[Tuple[int, int, int]] = (1, 7, 1)
# Version string like X.Y.Z
__full_version_str__: Final[str] = ".".join(map(str, __version__))
# Version string like X.Y

View File

@@ -28,7 +28,7 @@ from paperless_mail.models import MailRule
@dataclasses.dataclass
class _AttachmentDef(object):
class _AttachmentDef:
filename: str = "a_file.pdf"
maintype: str = "application/pdf"
subtype: str = "pdf"
@@ -45,7 +45,7 @@ class BogusFolderManager:
self.current_folder = new_folder
class BogusClient(object):
class BogusClient:
def authenticate(self, mechanism, authobject):
# authobject must be a callable object
auth_bytes = authobject(None)
@@ -205,7 +205,7 @@ class TestMail(DirectoriesMixin, TestCase):
self.reset_bogus_mailbox()
self.mail_account_handler = MailAccountHandler()
super(TestMail, self).setUp()
super().setUp()
def reset_bogus_mailbox(self):
self.bogus_mailbox.messages = []
@@ -473,7 +473,7 @@ class TestMail(DirectoriesMixin, TestCase):
self.assertEqual(result, len(matches), f"Error with pattern: {pattern}")
filenames = sorted(
[a[1]["override_filename"] for a in self.async_task.call_args_list],
a[1]["override_filename"] for a in self.async_task.call_args_list
)
self.assertListEqual(filenames, matches)

View File

@@ -98,7 +98,7 @@ class RasterisedDocumentParser(DocumentParser):
def extract_text(self, sidecar_file, pdf_file):
if sidecar_file and os.path.isfile(sidecar_file):
with open(sidecar_file, "r") as f:
with open(sidecar_file) as f:
text = f.read()
if "[OCR skipped on page" not in text:

View File

@@ -18,7 +18,7 @@ class TextDocumentParser(DocumentParser):
def get_thumbnail(self, document_path, mime_type, file_name=None):
def read_text():
with open(document_path, "r") as src:
with open(document_path) as src:
lines = [line.strip() for line in src.readlines()]
text = "\n".join(lines[:50])
return text
@@ -38,5 +38,5 @@ class TextDocumentParser(DocumentParser):
return out_path
def parse(self, document_path, mime_type, file_name=None):
with open(document_path, "r") as f:
with open(document_path) as f:
self.text = f.read()