Feature: Dynamic document storage pathes (#916)

* Added devcontainer

* Add feature storage pathes

* Exclude tests and add versioning

* Check escaping

* Check escaping

* Check quoting

* Echo

* Escape

* Escape :

* Double escape \

* Escaping

* Remove if

* Escape colon

* Missing \

* Esacpe :

* Escape all

* test

* Remove sed

* Fix exclude

* Remove SED command

* Add LD_LIBRARY_PATH

* Adjusted to v1.7

* Updated test-cases

* Remove devcontainer

* Removed internal build-file

* Run pre-commit

* Corrected flak8 error

* Adjusted to v1.7

* Updated test-cases

* Corrected flak8 error

* Adjusted to new plural translations

* Small adjustments due to code-review backend

* Adjusted line-break

* Removed PAPERLESS prefix from settings variables

* Corrected style change due to search+replace

* First documentation draft

* Revert changes to Pipfile

* Add sphinx-autobuild with keep-outdated

* Revert merge error that results in wrong storage path is evaluated

* Adjust styles of generated files ...

* Adds additional testing to cover dynamic storage path functionality

* Remove unnecessary condition

* Add hint to edit storage path dialog

* Correct spelling of pathes to paths

* Minor documentation tweaks

* Minor typo

* improving wrapping of filter editor buttons with new storage path button

* Update .gitignore

* Fix select border radius in non input-groups

* Better storage path edit hint

* Add note to edit storage path dialog re document_renamer

* Add note to bulk edit storage path re document_renamer

* Rename FILTER_STORAGE_DIRECTORY to PATH

* Fix broken filter rule parsing

* Show default storage if unspecified

* Remove note re storage path on bulk edit

* Add basic validation of filename variables

Co-authored-by: Markus Kling <markus@markus-kling.net>
Co-authored-by: Trenton Holmes <holmes.trenton@gmail.com>
Co-authored-by: Michael Shamoon <4887959+shamoon@users.noreply.github.com>
Co-authored-by: Quinn Casey <quinn@quinncasey.com>
This commit is contained in:
Markus
2022-05-19 23:42:25 +02:00
committed by GitHub
parent c3997c9f26
commit 69ef26dab0
67 changed files with 1427 additions and 203 deletions

View File

@@ -5,6 +5,7 @@ from .models import Document
from .models import DocumentType
from .models import SavedView
from .models import SavedViewFilterRule
from .models import StoragePath
from .models import Tag
@@ -100,8 +101,19 @@ class SavedViewAdmin(admin.ModelAdmin):
inlines = [RuleInline]
class StoragePathInline(admin.TabularInline):
model = StoragePath
class StoragePathAdmin(admin.ModelAdmin):
list_display = ("name", "path", "match", "matching_algorithm")
list_filter = ("path", "matching_algorithm")
list_editable = ("path", "match", "matching_algorithm")
admin.site.register(Correspondent, CorrespondentAdmin)
admin.site.register(Tag, TagAdmin)
admin.site.register(DocumentType, DocumentTypeAdmin)
admin.site.register(Document, DocumentAdmin)
admin.site.register(SavedView, SavedViewAdmin)
admin.site.register(StoragePath, StoragePathAdmin)

View File

@@ -16,6 +16,7 @@ class DocumentsConfig(AppConfig):
set_correspondent,
set_document_type,
set_tags,
set_storage_path,
add_to_index,
)
@@ -23,6 +24,7 @@ class DocumentsConfig(AppConfig):
document_consumption_finished.connect(set_correspondent)
document_consumption_finished.connect(set_document_type)
document_consumption_finished.connect(set_tags)
document_consumption_finished.connect(set_storage_path)
document_consumption_finished.connect(set_log_entry)
document_consumption_finished.connect(add_to_index)

View File

@@ -5,6 +5,7 @@ from django_q.tasks import async_task
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
from documents.models import StoragePath
def set_correspondent(doc_ids, correspondent):
@@ -20,6 +21,24 @@ def set_correspondent(doc_ids, correspondent):
return "OK"
def set_storage_path(doc_ids, storage_path):
if storage_path:
storage_path = StoragePath.objects.get(id=storage_path)
qs = Document.objects.filter(
Q(id__in=doc_ids) & ~Q(storage_path=storage_path),
)
affected_docs = [doc.id for doc in qs]
qs.update(storage_path=storage_path)
async_task(
"documents.tasks.bulk_update_documents",
document_ids=affected_docs,
)
return "OK"
def set_document_type(doc_ids, document_type):
if document_type:
document_type = DocumentType.objects.get(id=document_type)

View File

@@ -59,8 +59,8 @@ def load_classifier():
class DocumentClassifier:
# v7 - Updated scikit-learn package version
FORMAT_VERSION = 7
# v8 - Added storage path classifier
FORMAT_VERSION = 8
def __init__(self):
# hash of the training data. used to prevent re-training when the
@@ -72,6 +72,7 @@ class DocumentClassifier:
self.tags_classifier = None
self.correspondent_classifier = None
self.document_type_classifier = None
self.storage_path_classifier = None
def load(self):
with open(settings.MODEL_FILE, "rb") as f:
@@ -90,6 +91,7 @@ class DocumentClassifier:
self.tags_classifier = pickle.load(f)
self.correspondent_classifier = pickle.load(f)
self.document_type_classifier = pickle.load(f)
self.storage_path_classifier = pickle.load(f)
except Exception:
raise ClassifierModelCorruptError()
@@ -107,6 +109,7 @@ class DocumentClassifier:
pickle.dump(self.tags_classifier, f)
pickle.dump(self.correspondent_classifier, f)
pickle.dump(self.document_type_classifier, f)
pickle.dump(self.storage_path_classifier, f)
if os.path.isfile(target_file):
os.unlink(target_file)
@@ -118,6 +121,7 @@ class DocumentClassifier:
labels_tags = list()
labels_correspondent = list()
labels_document_type = list()
labels_storage_path = list()
# Step 1: Extract and preprocess training data from the database.
logger.debug("Gathering data from database...")
@@ -153,6 +157,13 @@ class DocumentClassifier:
m.update(tag.to_bytes(4, "little", signed=True))
labels_tags.append(tags)
y = -1
sd = doc.storage_path
if sd and sd.matching_algorithm == MatchingModel.MATCH_AUTO:
y = sd.pk
m.update(y.to_bytes(4, "little", signed=True))
labels_storage_path.append(y)
if not data:
raise ValueError("No training data available.")
@@ -172,14 +183,16 @@ class DocumentClassifier:
# it usually is.
num_correspondents = len(set(labels_correspondent) | {-1}) - 1
num_document_types = len(set(labels_document_type) | {-1}) - 1
num_storage_paths = len(set(labels_storage_path) | {-1}) - 1
logger.debug(
"{} documents, {} tag(s), {} correspondent(s), "
"{} document type(s).".format(
"{} document type(s). {} storage path(es)".format(
len(data),
num_tags,
num_correspondents,
num_document_types,
num_storage_paths,
),
)
@@ -242,6 +255,21 @@ class DocumentClassifier:
"classifier.",
)
if num_storage_paths > 0:
logger.debug(
"Training storage paths classifier...",
)
self.storage_path_classifier = MLPClassifier(tol=0.01)
self.storage_path_classifier.fit(
data_vectorized,
labels_storage_path,
)
else:
self.storage_path_classifier = None
logger.debug(
"There are no storage paths. Not training storage path classifier.",
)
self.data_hash = new_data_hash
return True
@@ -288,3 +316,14 @@ class DocumentClassifier:
return []
else:
return []
def predict_storage_path(self, content):
if self.storage_path_classifier:
X = self.data_vectorizer.transform([preprocess_content(content)])
storage_path_id = self.storage_path_classifier.predict(X)
if storage_path_id != -1:
return storage_path_id
else:
return None
else:
return None

View File

@@ -128,13 +128,26 @@ def generate_unique_filename(doc, archive_filename=False):
def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
path = ""
filename_format = settings.FILENAME_FORMAT
try:
if settings.PAPERLESS_FILENAME_FORMAT is not None:
tags = defaultdictNoStr(lambda: slugify(None), many_to_dictionary(doc.tags))
if doc.storage_path is not None:
logger.debug(
f"Document has storage_path {doc.storage_path.id} "
f"({doc.storage_path.path}) set",
)
filename_format = doc.storage_path.path
if filename_format is not None:
tags = defaultdictNoStr(
lambda: slugify(None),
many_to_dictionary(doc.tags),
)
tag_list = pathvalidate.sanitize_filename(
",".join(sorted(tag.name for tag in doc.tags.all())),
",".join(
sorted(tag.name for tag in doc.tags.all()),
),
replacement_text="-",
)
@@ -144,7 +157,7 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
replacement_text="-",
)
else:
correspondent = "none"
correspondent = "-none-"
if doc.document_type:
document_type = pathvalidate.sanitize_filename(
@@ -152,18 +165,18 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
replacement_text="-",
)
else:
document_type = "none"
document_type = "-none-"
if doc.archive_serial_number:
asn = str(doc.archive_serial_number)
else:
asn = "none"
asn = "-none-"
# Convert UTC database date to localized date
local_added = timezone.localdate(doc.added)
local_created = timezone.localdate(doc.created)
path = settings.PAPERLESS_FILENAME_FORMAT.format(
path = filename_format.format(
title=pathvalidate.sanitize_filename(doc.title, replacement_text="-"),
correspondent=correspondent,
document_type=document_type,
@@ -180,12 +193,17 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
tag_list=tag_list,
).strip()
if settings.FILENAME_FORMAT_REMOVE_NONE:
path = path.replace("-none-/", "") # remove empty directories
path = path.replace(" -none-", "") # remove when spaced, with space
path = path.replace("-none-", "") # remove rest of the occurences
path = path.replace("-none-", "none") # backward compatibility
path = path.strip(os.sep)
except (ValueError, KeyError, IndexError):
logger.warning(
f"Invalid PAPERLESS_FILENAME_FORMAT: "
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default",
f"Invalid filename_format '{filename_format}', falling back to default",
)
counter_str = f"_{counter:02}" if counter else ""

View File

@@ -7,6 +7,7 @@ from .models import Correspondent
from .models import Document
from .models import DocumentType
from .models import Log
from .models import StoragePath
from .models import Tag
CHAR_KWARGS = ["istartswith", "iendswith", "icontains", "iexact"]
@@ -114,6 +115,9 @@ class DocumentFilterSet(FilterSet):
"document_type": ["isnull"],
"document_type__id": ID_KWARGS,
"document_type__name": CHAR_KWARGS,
"storage_path": ["isnull"],
"storage_path__id": ID_KWARGS,
"storage_path__name": CHAR_KWARGS,
}
@@ -121,3 +125,12 @@ class LogFilterSet(FilterSet):
class Meta:
model = Log
fields = {"level": INT_KWARGS, "created": DATE_KWARGS, "group": ID_KWARGS}
class StoragePathFilterSet(FilterSet):
class Meta:
model = StoragePath
fields = {
"name": CHAR_KWARGS,
"path": CHAR_KWARGS,
}

View File

@@ -46,6 +46,9 @@ def get_schema():
created=DATETIME(sortable=True),
modified=DATETIME(sortable=True),
added=DATETIME(sortable=True),
path=TEXT(sortable=True),
path_id=NUMERIC(),
has_path=BOOLEAN(),
)
@@ -104,6 +107,9 @@ def update_document(writer, doc):
added=doc.added,
asn=doc.archive_serial_number,
modified=doc.modified,
path=doc.storage_path.name if doc.storage_path else None,
path_id=doc.storage_path.id if doc.storage_path else None,
has_path=doc.storage_path is not None,
)
@@ -157,6 +163,11 @@ class DelayedQuery:
criterias.append(query.DateRange("added", start=isoparse(v), end=None))
elif k == "added__date__lt":
criterias.append(query.DateRange("added", start=None, end=isoparse(v)))
elif k == "storage_path__id":
criterias.append(query.Term("path_id", v))
elif k == "storage_path__isnull":
criterias.append(query.Term("has_path", v == "false"))
if len(criterias) > 0:
return query.And(criterias)
else:

View File

@@ -152,4 +152,4 @@ class Command(BaseCommand):
),
)
except KeyboardInterrupt:
self.stdout.write(self.style.NOTICE(("Aborting...")))
self.stdout.write(self.style.NOTICE("Aborting..."))

View File

@@ -4,6 +4,7 @@ import re
from documents.models import Correspondent
from documents.models import DocumentType
from documents.models import MatchingModel
from documents.models import StoragePath
from documents.models import Tag
@@ -57,6 +58,22 @@ def match_tags(document, classifier):
)
def match_storage_paths(document, classifier):
if classifier:
pred_id = classifier.predict_storage_path(document.content)
else:
pred_id = None
storage_paths = StoragePath.objects.all()
return list(
filter(
lambda o: matches(o, document) or o.pk == pred_id,
storage_paths,
),
)
def matches(matching_model, document):
search_kwargs = {}

View File

@@ -83,7 +83,7 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
path = ""
try:
if settings.PAPERLESS_FILENAME_FORMAT is not None:
if settings.FILENAME_FORMAT is not None:
tags = defaultdictNoStr(lambda: slugify(None), many_to_dictionary(doc.tags))
tag_list = pathvalidate.sanitize_filename(
@@ -105,7 +105,7 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
else:
document_type = "none"
path = settings.PAPERLESS_FILENAME_FORMAT.format(
path = settings.FILENAME_FORMAT.format(
title=pathvalidate.sanitize_filename(doc.title, replacement_text="-"),
correspondent=correspondent,
document_type=document_type,
@@ -128,7 +128,7 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
except (ValueError, KeyError, IndexError):
logger.warning(
f"Invalid PAPERLESS_FILENAME_FORMAT: "
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default"
f"{settings.FILENAME_FORMAT}, falling back to default"
)
counter_str = f"_{counter:02}" if counter else ""

View File

@@ -0,0 +1,73 @@
# Generated by Django 4.0.4 on 2022-05-02 15:56
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
("documents", "1018_alter_savedviewfilterrule_value"),
]
operations = [
migrations.CreateModel(
name="StoragePath",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"name",
models.CharField(max_length=128, unique=True, verbose_name="name"),
),
(
"match",
models.CharField(blank=True, max_length=256, verbose_name="match"),
),
(
"matching_algorithm",
models.PositiveIntegerField(
choices=[
(1, "Any word"),
(2, "All words"),
(3, "Exact match"),
(4, "Regular expression"),
(5, "Fuzzy word"),
(6, "Automatic"),
],
default=1,
verbose_name="matching algorithm",
),
),
(
"is_insensitive",
models.BooleanField(default=True, verbose_name="is insensitive"),
),
("path", models.CharField(max_length=512, verbose_name="path")),
],
options={
"verbose_name": "storage path",
"verbose_name_plural": "storage paths",
"ordering": ("name",),
},
),
migrations.AddField(
model_name="document",
name="storage_path",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="documents",
to="documents.storagepath",
verbose_name="storage path",
),
),
]

View File

@@ -0,0 +1,13 @@
# Generated by Django 4.0.4 on 2022-05-18 18:39
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("documents", "1019_storagepath_document_storage_path"),
("documents", "1019_uisettings"),
]
operations = []

View File

@@ -83,6 +83,18 @@ class DocumentType(MatchingModel):
verbose_name_plural = _("document types")
class StoragePath(MatchingModel):
path = models.CharField(
_("path"),
max_length=512,
)
class Meta:
ordering = ("name",)
verbose_name = _("storage path")
verbose_name_plural = _("storage paths")
class Document(models.Model):
STORAGE_TYPE_UNENCRYPTED = "unencrypted"
@@ -101,6 +113,15 @@ class Document(models.Model):
verbose_name=_("correspondent"),
)
storage_path = models.ForeignKey(
StoragePath,
blank=True,
null=True,
related_name="documents",
on_delete=models.SET_NULL,
verbose_name=_("storage path"),
)
title = models.CharField(_("title"), max_length=128, blank=True, db_index=True)
document_type = models.ForeignKey(

View File

@@ -14,6 +14,7 @@ from .models import DocumentType
from .models import MatchingModel
from .models import SavedView
from .models import SavedViewFilterRule
from .models import StoragePath
from .models import Tag
from .models import UiSettings
from .parsers import is_mime_type_supported
@@ -199,11 +200,17 @@ class DocumentTypeField(serializers.PrimaryKeyRelatedField):
return DocumentType.objects.all()
class StoragePathField(serializers.PrimaryKeyRelatedField):
def get_queryset(self):
return StoragePath.objects.all()
class DocumentSerializer(DynamicFieldsModelSerializer):
correspondent = CorrespondentField(allow_null=True)
tags = TagsField(many=True)
document_type = DocumentTypeField(allow_null=True)
storage_path = StoragePathField(allow_null=True)
original_file_name = SerializerMethodField()
archived_file_name = SerializerMethodField()
@@ -224,6 +231,7 @@ class DocumentSerializer(DynamicFieldsModelSerializer):
"id",
"correspondent",
"document_type",
"storage_path",
"title",
"content",
"tags",
@@ -310,6 +318,7 @@ class BulkEditSerializer(DocumentListSerializer):
choices=[
"set_correspondent",
"set_document_type",
"set_storage_path",
"add_tag",
"remove_tag",
"modify_tags",
@@ -337,6 +346,8 @@ class BulkEditSerializer(DocumentListSerializer):
return bulk_edit.set_correspondent
elif method == "set_document_type":
return bulk_edit.set_document_type
elif method == "set_storage_path":
return bulk_edit.set_storage_path
elif method == "add_tag":
return bulk_edit.add_tag
elif method == "remove_tag":
@@ -383,6 +394,20 @@ class BulkEditSerializer(DocumentListSerializer):
else:
raise serializers.ValidationError("correspondent not specified")
def _validate_storage_path(self, parameters):
if "storage_path" in parameters:
storage_path_id = parameters["storage_path"]
if storage_path_id is None:
return
try:
StoragePath.objects.get(id=storage_path_id)
except StoragePath.DoesNotExist:
raise serializers.ValidationError(
"Storage path does not exist",
)
else:
raise serializers.ValidationError("storage path not specified")
def _validate_parameters_modify_tags(self, parameters):
if "add_tags" in parameters:
self._validate_tag_id_list(parameters["add_tags"], "add_tags")
@@ -407,6 +432,8 @@ class BulkEditSerializer(DocumentListSerializer):
self._validate_parameters_tags(parameters)
elif method == bulk_edit.modify_tags:
self._validate_parameters_modify_tags(parameters)
elif method == bulk_edit.set_storage_path:
self._validate_storage_path(parameters)
return attrs
@@ -508,6 +535,47 @@ class BulkDownloadSerializer(DocumentListSerializer):
}[compression]
class StoragePathSerializer(MatchingModelSerializer):
document_count = serializers.IntegerField(read_only=True)
class Meta:
model = StoragePath
fields = (
"id",
"slug",
"name",
"path",
"match",
"matching_algorithm",
"is_insensitive",
"document_count",
)
def validate_path(self, path):
try:
path.format(
title="title",
correspondent="correspondent",
document_type="document_type",
created="created",
created_year="created_year",
created_month="created_month",
created_day="created_day",
added="added",
added_year="added_year",
added_month="added_month",
added_day="added_day",
asn="asn",
tags="tags",
tag_list="tag_list",
)
except (KeyError):
raise serializers.ValidationError(_("Invalid variable detected."))
return path
class UiSettingsViewSerializer(serializers.ModelSerializer):
class Meta:
model = UiSettings

View File

@@ -230,6 +230,76 @@ def set_tags(
document.tags.add(*relevant_tags)
def set_storage_path(
sender,
document=None,
logging_group=None,
classifier=None,
replace=False,
use_first=True,
suggest=False,
base_url=None,
color=False,
**kwargs,
):
if document.storage_path and not replace:
return
potential_storage_path = matching.match_storage_paths(
document,
classifier,
)
potential_count = len(potential_storage_path)
if potential_storage_path:
selected = potential_storage_path[0]
else:
selected = None
if potential_count > 1:
if use_first:
logger.info(
f"Detected {potential_count} potential storage paths, "
f"so we've opted for {selected}",
extra={"group": logging_group},
)
else:
logger.info(
f"Detected {potential_count} potential storage paths, "
f"not assigning any storage directory",
extra={"group": logging_group},
)
return
if selected or replace:
if suggest:
if base_url:
print(
termcolors.colorize(str(document), fg="green")
if color
else str(document),
)
print(f"{base_url}/documents/{document.pk}")
else:
print(
(
termcolors.colorize(str(document), fg="green")
if color
else str(document)
)
+ f" [{document.pk}]",
)
print(f"Sugest storage directory {selected}")
else:
logger.info(
f"Assigning storage path {selected} to {document}",
extra={"group": logging_group},
)
document.storage_path = selected
document.save(update_fields=("storage_path",))
@receiver(models.signals.post_delete, sender=Document)
def cleanup_document_deletion(sender, instance, using, **kwargs):
with FileLock(settings.MEDIA_LOCK):

View File

@@ -19,6 +19,7 @@ from documents.consumer import ConsumerError
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
from documents.models import StoragePath
from documents.models import Tag
from documents.sanity_checker import SanityCheckFailedException
from pdf2image import convert_from_path
@@ -53,6 +54,7 @@ def train_classifier():
not Tag.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
and not DocumentType.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
and not Correspondent.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
and not StoragePath.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
):
return

View File

@@ -26,8 +26,10 @@ from documents.models import Document
from documents.models import DocumentType
from documents.models import MatchingModel
from documents.models import SavedView
from documents.models import StoragePath
from documents.models import Tag
from documents.models import UiSettings
from documents.models import StoragePath
from documents.tests.utils import DirectoriesMixin
from paperless import version
from rest_framework.test import APITestCase
@@ -99,6 +101,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
c = Correspondent.objects.create(name="c", pk=41)
dt = DocumentType.objects.create(name="dt", pk=63)
tag = Tag.objects.create(name="t", pk=85)
storage_path = StoragePath.objects.create(name="sp", pk=77, path="p")
doc = Document.objects.create(
title="WOW",
content="the content",
@@ -106,6 +109,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
document_type=dt,
checksum="123",
mime_type="application/pdf",
storage_path=storage_path,
)
response = self.client.get("/api/documents/", format="json")
@@ -192,7 +196,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, 200)
self.assertEqual(response.content, content_thumbnail)
@override_settings(PAPERLESS_FILENAME_FORMAT="")
@override_settings(FILENAME_FORMAT="")
def test_download_with_archive(self):
content = b"This is a test"
@@ -580,10 +584,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
t2 = Tag.objects.create(name="tag2")
c = Correspondent.objects.create(name="correspondent")
dt = DocumentType.objects.create(name="type")
sp = StoragePath.objects.create(name="path")
d1 = Document.objects.create(checksum="1", correspondent=c, content="test")
d2 = Document.objects.create(checksum="2", document_type=dt, content="test")
d3 = Document.objects.create(checksum="3", content="test")
d3.tags.add(t)
d3.tags.add(t2)
d4 = Document.objects.create(
@@ -598,6 +604,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
content="test",
)
d6 = Document.objects.create(checksum="6", content="test2")
d7 = Document.objects.create(checksum="7", storage_path=sp, content="test")
with AsyncWriter(index.open_index()) as writer:
for doc in Document.objects.all():
@@ -608,18 +615,30 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(r.status_code, 200)
return [hit["id"] for hit in r.data["results"]]
self.assertCountEqual(search_query(""), [d1.id, d2.id, d3.id, d4.id, d5.id])
self.assertCountEqual(
search_query(""),
[d1.id, d2.id, d3.id, d4.id, d5.id, d7.id],
)
self.assertCountEqual(search_query("&is_tagged=true"), [d3.id, d4.id])
self.assertCountEqual(search_query("&is_tagged=false"), [d1.id, d2.id, d5.id])
self.assertCountEqual(
search_query("&is_tagged=false"),
[d1.id, d2.id, d5.id, d7.id],
)
self.assertCountEqual(search_query("&correspondent__id=" + str(c.id)), [d1.id])
self.assertCountEqual(search_query("&document_type__id=" + str(dt.id)), [d2.id])
self.assertCountEqual(search_query("&storage_path__id=" + str(sp.id)), [d7.id])
self.assertCountEqual(
search_query("&storage_path__isnull"),
[d1.id, d2.id, d3.id, d4.id, d5.id],
)
self.assertCountEqual(
search_query("&correspondent__isnull"),
[d2.id, d3.id, d4.id, d5.id],
[d2.id, d3.id, d4.id, d5.id, d7.id],
)
self.assertCountEqual(
search_query("&document_type__isnull"),
[d1.id, d3.id, d4.id, d5.id],
[d1.id, d3.id, d4.id, d5.id, d7.id],
)
self.assertCountEqual(
search_query("&tags__id__all=" + str(t.id) + "," + str(t2.id)),
@@ -1080,35 +1099,49 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, 200)
self.assertEqual(
response.data,
{"correspondents": [], "tags": [], "document_types": []},
{
"correspondents": [],
"tags": [],
"document_types": [],
"storage_paths": [],
},
)
def test_get_suggestions_invalid_doc(self):
response = self.client.get(f"/api/documents/34676/suggestions/")
self.assertEqual(response.status_code, 404)
@mock.patch("documents.views.match_correspondents")
@mock.patch("documents.views.match_tags")
@mock.patch("documents.views.match_storage_paths")
@mock.patch("documents.views.match_document_types")
@mock.patch("documents.views.match_tags")
@mock.patch("documents.views.match_correspondents")
def test_get_suggestions(
self,
match_document_types,
match_tags,
match_correspondents,
match_tags,
match_document_types,
match_storage_paths,
):
doc = Document.objects.create(
title="test",
mime_type="application/pdf",
content="this is an invoice!",
)
match_correspondents.return_value = [Correspondent(id=88), Correspondent(id=2)]
match_tags.return_value = [Tag(id=56), Tag(id=123)]
match_document_types.return_value = [DocumentType(id=23)]
match_correspondents.return_value = [Correspondent(id=88), Correspondent(id=2)]
match_storage_paths.return_value = [StoragePath(id=99), StoragePath(id=77)]
response = self.client.get(f"/api/documents/{doc.pk}/suggestions/")
self.assertEqual(
response.data,
{"correspondents": [88, 2], "tags": [56, 123], "document_types": [23]},
{
"correspondents": [88, 2],
"tags": [56, 123],
"document_types": [23],
"storage_paths": [99, 77],
},
)
def test_saved_views(self):
@@ -1469,6 +1502,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
self.doc2.tags.add(self.t1)
self.doc3.tags.add(self.t2)
self.doc4.tags.add(self.t1, self.t2)
self.sp1 = StoragePath.objects.create(name="sp1", path="Something/{checksum}")
def test_set_correspondent(self):
self.assertEqual(Document.objects.filter(correspondent=self.c2).count(), 1)
@@ -1508,6 +1542,60 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
args, kwargs = self.async_task.call_args
self.assertCountEqual(kwargs["document_ids"], [self.doc2.id, self.doc3.id])
def test_set_document_storage_path(self):
"""
GIVEN:
- 5 documents without defined storage path
WHEN:
- Bulk edit called to add storage path to 1 document
THEN:
- Single document storage path update
"""
self.assertEqual(Document.objects.filter(storage_path=None).count(), 5)
bulk_edit.set_storage_path(
[self.doc1.id],
self.sp1.id,
)
self.assertEqual(Document.objects.filter(storage_path=None).count(), 4)
self.async_task.assert_called_once()
args, kwargs = self.async_task.call_args
self.assertCountEqual(kwargs["document_ids"], [self.doc1.id])
def test_unset_document_storage_path(self):
"""
GIVEN:
- 4 documents without defined storage path
- 1 document with a defined storage
WHEN:
- Bulk edit called to remove storage path from 1 document
THEN:
- Single document storage path removed
"""
self.assertEqual(Document.objects.filter(storage_path=None).count(), 5)
bulk_edit.set_storage_path(
[self.doc1.id],
self.sp1.id,
)
self.assertEqual(Document.objects.filter(storage_path=None).count(), 4)
bulk_edit.set_storage_path(
[self.doc1.id],
None,
)
self.assertEqual(Document.objects.filter(storage_path=None).count(), 5)
self.async_task.assert_called()
args, kwargs = self.async_task.call_args
self.assertCountEqual(kwargs["document_ids"], [self.doc1.id])
def test_add_tag(self):
self.assertEqual(Document.objects.filter(tags__id=self.t1.id).count(), 2)
bulk_edit.add_tag(

View File

@@ -13,6 +13,7 @@ from documents.classifier import load_classifier
from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
from documents.models import StoragePath
from documents.models import Tag
from documents.tests.utils import DirectoriesMixin
@@ -56,6 +57,16 @@ class TestClassifier(DirectoriesMixin, TestCase):
name="dt2",
matching_algorithm=DocumentType.MATCH_AUTO,
)
self.sp1 = StoragePath.objects.create(
name="sp1",
path="path1",
matching_algorithm=DocumentType.MATCH_AUTO,
)
self.sp2 = StoragePath.objects.create(
name="sp2",
path="path2",
matching_algorithm=DocumentType.MATCH_AUTO,
)
self.doc1 = Document.objects.create(
title="doc1",
@@ -64,12 +75,14 @@ class TestClassifier(DirectoriesMixin, TestCase):
checksum="A",
document_type=self.dt,
)
self.doc2 = Document.objects.create(
title="doc1",
content="this is another document, but from c2",
correspondent=self.c2,
checksum="B",
)
self.doc_inbox = Document.objects.create(
title="doc235",
content="aa",
@@ -81,6 +94,8 @@ class TestClassifier(DirectoriesMixin, TestCase):
self.doc2.tags.add(self.t3)
self.doc_inbox.tags.add(self.t2)
self.doc1.storage_path = self.sp1
def testNoTrainingData(self):
try:
self.classifier.train()
@@ -177,6 +192,14 @@ class TestClassifier(DirectoriesMixin, TestCase):
new_classifier.load()
self.assertFalse(new_classifier.train())
# @override_settings(
# MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"),
# )
# def test_create_test_load_and_classify(self):
# self.generate_test_data()
# self.classifier.train()
# self.classifier.save()
@override_settings(
MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"),
)
@@ -263,6 +286,45 @@ class TestClassifier(DirectoriesMixin, TestCase):
self.assertEqual(self.classifier.predict_document_type(doc1.content), dt.pk)
self.assertIsNone(self.classifier.predict_document_type(doc2.content))
def test_one_path_predict(self):
sp = StoragePath.objects.create(
name="sp",
matching_algorithm=StoragePath.MATCH_AUTO,
)
doc1 = Document.objects.create(
title="doc1",
content="this is a document from c1",
checksum="A",
storage_path=sp,
)
self.classifier.train()
self.assertEqual(self.classifier.predict_storage_path(doc1.content), sp.pk)
def test_one_path_predict_manydocs(self):
sp = StoragePath.objects.create(
name="sp",
matching_algorithm=StoragePath.MATCH_AUTO,
)
doc1 = Document.objects.create(
title="doc1",
content="this is a document from c1",
checksum="A",
storage_path=sp,
)
doc2 = Document.objects.create(
title="doc1",
content="this is a document from c2",
checksum="B",
)
self.classifier.train()
self.assertEqual(self.classifier.predict_storage_path(doc1.content), sp.pk)
self.assertIsNone(self.classifier.predict_storage_path(doc2.content))
def test_one_tag_predict(self):
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)

View File

@@ -320,7 +320,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
shutil.copy(src, dst)
return dst
@override_settings(PAPERLESS_FILENAME_FORMAT=None, TIME_ZONE="America/Chicago")
@override_settings(FILENAME_FORMAT=None, TIME_ZONE="America/Chicago")
def testNormalOperation(self):
filename = self.get_test_file()
@@ -351,7 +351,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
self.assertEqual(document.created.tzinfo, zoneinfo.ZoneInfo("America/Chicago"))
@override_settings(PAPERLESS_FILENAME_FORMAT=None)
@override_settings(FILENAME_FORMAT=None)
def testDeleteMacFiles(self):
# https://github.com/jonaswinkler/paperless-ng/discussions/1037
@@ -518,7 +518,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
# Database empty
self.assertEqual(len(Document.objects.all()), 0)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@override_settings(FILENAME_FORMAT="{correspondent}/{title}")
def testFilenameHandling(self):
filename = self.get_test_file()
@@ -530,7 +530,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
self._assert_first_last_send_progress()
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@override_settings(FILENAME_FORMAT="{correspondent}/{title}")
@mock.patch("documents.signals.handlers.generate_unique_filename")
def testFilenameHandlingUnstableFormat(self, m):
@@ -612,7 +612,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
self._assert_first_last_send_progress(last_status="FAILED")
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
@override_settings(FILENAME_FORMAT="{title}")
@mock.patch("documents.parsers.document_consumer_declaration.send")
def test_similar_filenames(self, m):
shutil.copy(
@@ -660,7 +660,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
@mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
class TestConsumerCreatedDate(DirectoriesMixin, TestCase):
def setUp(self):
super(TestConsumerCreatedDate, self).setUp()
super().setUp()
# this prevents websocket message reports during testing.
patcher = mock.patch("documents.consumer.Consumer._send_progress")

View File

@@ -20,12 +20,12 @@ from ..file_handling import generate_unique_filename
from ..models import Correspondent
from ..models import Document
from ..models import DocumentType
from ..models import Tag
from ..models import StoragePath
from .utils import DirectoriesMixin
class TestFileHandling(DirectoriesMixin, TestCase):
@override_settings(PAPERLESS_FILENAME_FORMAT="")
@override_settings(FILENAME_FORMAT="")
def test_generate_source_filename(self):
document = Document()
document.mime_type = "application/pdf"
@@ -40,7 +40,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
f"{document.pk:07d}.pdf.gpg",
)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
@override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}")
def test_file_renaming(self):
document = Document()
document.mime_type = "application/pdf"
@@ -82,7 +82,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
True,
)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
@override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}")
def test_file_renaming_missing_permissions(self):
document = Document()
document.mime_type = "application/pdf"
@@ -117,7 +117,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
os.chmod(settings.ORIGINALS_DIR + "/none", 0o777)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
@override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}")
def test_file_renaming_database_error(self):
document1 = Document.objects.create(
@@ -156,7 +156,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
)
self.assertEqual(document.filename, "none/none.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
@override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}")
def test_document_delete(self):
document = Document()
document.mime_type = "application/pdf"
@@ -180,7 +180,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
@override_settings(
PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}",
FILENAME_FORMAT="{correspondent}/{correspondent}",
TRASH_DIR=tempfile.mkdtemp(),
)
def test_document_delete_trash(self):
@@ -218,7 +218,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
document.delete()
self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none_01.pdf"), True)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
@override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}")
def test_document_delete_nofile(self):
document = Document()
document.mime_type = "application/pdf"
@@ -227,7 +227,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
document.delete()
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
@override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}")
def test_directory_not_empty(self):
document = Document()
document.mime_type = "application/pdf"
@@ -253,7 +253,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), True)
self.assertTrue(os.path.isfile(important_file))
@override_settings(PAPERLESS_FILENAME_FORMAT="{document_type} - {title}")
@override_settings(FILENAME_FORMAT="{document_type} - {title}")
def test_document_type(self):
dt = DocumentType.objects.create(name="my_doc_type")
d = Document.objects.create(title="the_doc", mime_type="application/pdf")
@@ -264,7 +264,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(generate_filename(d), "my_doc_type - the_doc.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{asn} - {title}")
@override_settings(FILENAME_FORMAT="{asn} - {title}")
def test_asn(self):
d1 = Document.objects.create(
title="the_doc",
@@ -281,7 +281,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(generate_filename(d1), "652 - the_doc.pdf")
self.assertEqual(generate_filename(d2), "none - the_doc.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
@override_settings(FILENAME_FORMAT="{tags[type]}")
def test_tags_with_underscore(self):
document = Document()
document.mime_type = "application/pdf"
@@ -296,7 +296,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated
self.assertEqual(generate_filename(document), "demo.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
@override_settings(FILENAME_FORMAT="{tags[type]}")
def test_tags_with_dash(self):
document = Document()
document.mime_type = "application/pdf"
@@ -311,7 +311,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated
self.assertEqual(generate_filename(document), "demo.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
@override_settings(FILENAME_FORMAT="{tags[type]}")
def test_tags_malformed(self):
document = Document()
document.mime_type = "application/pdf"
@@ -326,7 +326,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated
self.assertEqual(generate_filename(document), "none.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}")
@override_settings(FILENAME_FORMAT="{tags[0]}")
def test_tags_all(self):
document = Document()
document.mime_type = "application/pdf"
@@ -340,7 +340,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated
self.assertEqual(generate_filename(document), "demo.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[1]}")
@override_settings(FILENAME_FORMAT="{tags[1]}")
def test_tags_out_of_bounds(self):
document = Document()
document.mime_type = "application/pdf"
@@ -354,7 +354,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
# Ensure that filename is properly generated
self.assertEqual(generate_filename(document), "none.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags}")
@override_settings(FILENAME_FORMAT="{tags}")
def test_tags_without_args(self):
document = Document()
document.mime_type = "application/pdf"
@@ -363,7 +363,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(generate_filename(document), f"{document.pk:07}.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{title} {tag_list}")
@override_settings(FILENAME_FORMAT="{title} {tag_list}")
def test_tag_list(self):
doc = Document.objects.create(title="doc1", mime_type="application/pdf")
doc.tags.create(name="tag2")
@@ -379,7 +379,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(generate_filename(doc), "doc2.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="//etc/something/{title}")
@override_settings(FILENAME_FORMAT="//etc/something/{title}")
def test_filename_relative(self):
doc = Document.objects.create(title="doc1", mime_type="application/pdf")
doc.filename = generate_filename(doc)
@@ -391,7 +391,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
)
@override_settings(
PAPERLESS_FILENAME_FORMAT="{created_year}-{created_month}-{created_day}",
FILENAME_FORMAT="{created_year}-{created_month}-{created_day}",
)
def test_created_year_month_day(self):
d1 = timezone.make_aware(datetime.datetime(2020, 3, 6, 1, 1, 1))
@@ -408,7 +408,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(generate_filename(doc1), "2020-11-16.pdf")
@override_settings(
PAPERLESS_FILENAME_FORMAT="{added_year}-{added_month}-{added_day}",
FILENAME_FORMAT="{added_year}-{added_month}-{added_day}",
)
def test_added_year_month_day(self):
d1 = timezone.make_aware(datetime.datetime(232, 1, 9, 1, 1, 1))
@@ -425,7 +425,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(generate_filename(doc1), "2020-11-16.pdf")
@override_settings(
PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}",
FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}",
)
def test_nested_directory_cleanup(self):
document = Document()
@@ -453,7 +453,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR), True)
@override_settings(PAPERLESS_FILENAME_FORMAT=None)
@override_settings(FILENAME_FORMAT=None)
def test_format_none(self):
document = Document()
document.pk = 1
@@ -479,7 +479,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(os.path.isfile(os.path.join(tmp, "notempty", "file")), True)
self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty", "empty")), False)
@override_settings(PAPERLESS_FILENAME_FORMAT="{created/[title]")
@override_settings(FILENAME_FORMAT="{created/[title]")
def test_invalid_format(self):
document = Document()
document.pk = 1
@@ -488,7 +488,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(generate_filename(document), "0000001.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{created__year}")
@override_settings(FILENAME_FORMAT="{created__year}")
def test_invalid_format_key(self):
document = Document()
document.pk = 1
@@ -497,7 +497,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertEqual(generate_filename(document), "0000001.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
@override_settings(FILENAME_FORMAT="{title}")
def test_duplicates(self):
document = Document.objects.create(
mime_type="application/pdf",
@@ -548,7 +548,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
self.assertTrue(os.path.isfile(document.source_path))
self.assertEqual(document2.filename, "qwe.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
@override_settings(FILENAME_FORMAT="{title}")
@mock.patch("documents.signals.handlers.Document.objects.filter")
def test_no_update_without_change(self, m):
doc = Document.objects.create(
@@ -568,7 +568,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
@override_settings(PAPERLESS_FILENAME_FORMAT=None)
@override_settings(FILENAME_FORMAT=None)
def test_create_no_format(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
@@ -587,7 +587,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@override_settings(FILENAME_FORMAT="{correspondent}/{title}")
def test_create_with_format(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
@@ -615,7 +615,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf"),
)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@override_settings(FILENAME_FORMAT="{correspondent}/{title}")
def test_move_archive_gone(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
@@ -634,7 +634,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertTrue(os.path.isfile(doc.source_path))
self.assertFalse(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@override_settings(FILENAME_FORMAT="{correspondent}/{title}")
def test_move_archive_exists(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
@@ -659,7 +659,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertTrue(os.path.isfile(existing_archive_file))
self.assertEqual(doc.archive_filename, "none/my_doc_01.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
@override_settings(FILENAME_FORMAT="{title}")
def test_move_original_only(self):
original = os.path.join(settings.ORIGINALS_DIR, "document_01.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "document.pdf")
@@ -681,7 +681,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
@override_settings(FILENAME_FORMAT="{title}")
def test_move_archive_only(self):
original = os.path.join(settings.ORIGINALS_DIR, "document.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "document_01.pdf")
@@ -703,7 +703,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@override_settings(FILENAME_FORMAT="{correspondent}/{title}")
@mock.patch("documents.signals.handlers.os.rename")
def test_move_archive_error(self, m):
def fake_rename(src, dst):
@@ -734,7 +734,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@override_settings(FILENAME_FORMAT="{correspondent}/{title}")
def test_move_file_gone(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
@@ -754,7 +754,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertFalse(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@override_settings(FILENAME_FORMAT="{correspondent}/{title}")
@mock.patch("documents.signals.handlers.os.rename")
def test_move_file_error(self, m):
def fake_rename(src, dst):
@@ -785,7 +785,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertTrue(os.path.isfile(doc.source_path))
self.assertTrue(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="")
@override_settings(FILENAME_FORMAT="")
def test_archive_deleted(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
@@ -812,7 +812,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertFalse(os.path.isfile(doc.source_path))
self.assertFalse(os.path.isfile(doc.archive_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
@override_settings(FILENAME_FORMAT="{title}")
def test_archive_deleted2(self):
original = os.path.join(settings.ORIGINALS_DIR, "document.png")
original2 = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
@@ -846,7 +846,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
self.assertTrue(os.path.isfile(doc1.archive_path))
self.assertFalse(os.path.isfile(doc2.source_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@override_settings(FILENAME_FORMAT="{correspondent}/{title}")
def test_database_error(self):
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
@@ -872,7 +872,7 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
class TestFilenameGeneration(TestCase):
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
@override_settings(FILENAME_FORMAT="{title}")
def test_invalid_characters(self):
doc = Document.objects.create(
@@ -891,7 +891,7 @@ class TestFilenameGeneration(TestCase):
)
self.assertEqual(generate_filename(doc), "my-invalid-..-title-yay.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{created}")
@override_settings(FILENAME_FORMAT="{created}")
def test_date(self):
doc = Document.objects.create(
title="does not matter",
@@ -902,6 +902,140 @@ class TestFilenameGeneration(TestCase):
)
self.assertEqual(generate_filename(doc), "2020-05-21.pdf")
def test_dynamic_path(self):
"""
GIVEN:
- A document with a defined storage path
WHEN:
- the filename is generated for the document
THEN:
- the generated filename uses the defined storage path for the document
"""
doc = Document.objects.create(
title="does not matter",
created=timezone.make_aware(datetime.datetime(2020, 6, 25, 7, 36, 51, 153)),
mime_type="application/pdf",
pk=2,
checksum="2",
storage_path=StoragePath.objects.create(path="TestFolder/{created}"),
)
self.assertEqual(generate_filename(doc), "TestFolder/2020-06-25.pdf")
def test_dynamic_path_with_none(self):
"""
GIVEN:
- A document with a defined storage path
- The defined storage path uses an undefined field for the document
WHEN:
- the filename is generated for the document
THEN:
- the generated filename uses the defined storage path for the document
- the generated filename includes "none" in the place undefined field
"""
doc = Document.objects.create(
title="does not matter",
created=timezone.make_aware(datetime.datetime(2020, 6, 25, 7, 36, 51, 153)),
mime_type="application/pdf",
pk=2,
checksum="2",
storage_path=StoragePath.objects.create(path="{asn} - {created}"),
)
self.assertEqual(generate_filename(doc), "none - 2020-06-25.pdf")
@override_settings(
FILENAME_FORMAT_REMOVE_NONE=True,
)
def test_dynamic_path_remove_none(self):
"""
GIVEN:
- A document with a defined storage path
- The defined storage path uses an undefined field for the document
- The setting for removing undefined fields is enabled
WHEN:
- the filename is generated for the document
THEN:
- the generated filename uses the defined storage path for the document
- the generated filename does not include "none" in the place undefined field
"""
doc = Document.objects.create(
title="does not matter",
created=timezone.make_aware(datetime.datetime(2020, 6, 25, 7, 36, 51, 153)),
mime_type="application/pdf",
pk=2,
checksum="2",
storage_path=StoragePath.objects.create(path="TestFolder/{asn}/{created}"),
)
self.assertEqual(generate_filename(doc), "TestFolder/2020-06-25.pdf")
def test_multiple_doc_paths(self):
"""
GIVEN:
- Two documents, each with different storage paths
WHEN:
- the filename is generated for the documents
THEN:
- Each document generated filename uses its storage path
"""
doc_a = Document.objects.create(
title="does not matter",
created=timezone.make_aware(datetime.datetime(2020, 6, 25, 7, 36, 51, 153)),
mime_type="application/pdf",
pk=2,
checksum="2",
archive_serial_number=4,
storage_path=StoragePath.objects.create(
name="sp1",
path="ThisIsAFolder/{asn}/{created}",
),
)
doc_b = Document.objects.create(
title="does not matter",
created=timezone.make_aware(datetime.datetime(2020, 7, 25, 7, 36, 51, 153)),
mime_type="application/pdf",
pk=5,
checksum="abcde",
storage_path=StoragePath.objects.create(
name="sp2",
path="SomeImportantNone/{created}",
),
)
self.assertEqual(generate_filename(doc_a), "ThisIsAFolder/4/2020-06-25.pdf")
self.assertEqual(generate_filename(doc_b), "SomeImportantNone/2020-07-25.pdf")
def test_no_path_fallback(self):
"""
GIVEN:
- Two documents, one with defined storage path, the other not
WHEN:
- the filename is generated for the documents
THEN:
- Document with defined path uses its format
- Document without defined path uses the default path
"""
doc_a = Document.objects.create(
title="does not matter",
created=timezone.make_aware(datetime.datetime(2020, 6, 25, 7, 36, 51, 153)),
mime_type="application/pdf",
pk=2,
checksum="2",
archive_serial_number=4,
)
doc_b = Document.objects.create(
title="does not matter",
created=timezone.make_aware(datetime.datetime(2020, 7, 25, 7, 36, 51, 153)),
mime_type="application/pdf",
pk=5,
checksum="abcde",
storage_path=StoragePath.objects.create(
name="sp2",
path="SomeImportantNone/{created}",
),
)
self.assertEqual(generate_filename(doc_a), "0000002.pdf")
self.assertEqual(generate_filename(doc_b), "SomeImportantNone/2020-07-25.pdf")
def run():
doc = Document.objects.create(

View File

@@ -18,7 +18,7 @@ from documents.tests.utils import DirectoriesMixin
sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@override_settings(FILENAME_FORMAT="{correspondent}/{title}")
class TestArchiver(DirectoriesMixin, TestCase):
def make_models(self):
return Document.objects.create(
@@ -72,7 +72,7 @@ class TestArchiver(DirectoriesMixin, TestCase):
self.assertIsNone(doc.archive_filename)
self.assertTrue(os.path.isfile(doc.source_path))
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
@override_settings(FILENAME_FORMAT="{title}")
def test_naming_priorities(self):
doc1 = Document.objects.create(
checksum="A",
@@ -109,7 +109,7 @@ class TestDecryptDocuments(TestCase):
ORIGINALS_DIR=os.path.join(os.path.dirname(__file__), "samples", "originals"),
THUMBNAIL_DIR=os.path.join(os.path.dirname(__file__), "samples", "thumb"),
PASSPHRASE="test",
PAPERLESS_FILENAME_FORMAT=None,
FILENAME_FORMAT=None,
)
@mock.patch("documents.management.commands.decrypt_documents.input")
def test_decrypt(self, m):
@@ -184,7 +184,7 @@ class TestMakeIndex(TestCase):
class TestRenamer(DirectoriesMixin, TestCase):
@override_settings(PAPERLESS_FILENAME_FORMAT="")
@override_settings(FILENAME_FORMAT="")
def test_rename(self):
doc = Document.objects.create(title="test", mime_type="image/jpeg")
doc.filename = generate_filename(doc)
@@ -194,7 +194,7 @@ class TestRenamer(DirectoriesMixin, TestCase):
Path(doc.source_path).touch()
Path(doc.archive_path).touch()
with override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}"):
with override_settings(FILENAME_FORMAT="{correspondent}/{title}"):
call_command("document_renamer")
doc2 = Document.objects.get(id=doc.id)

View File

@@ -200,7 +200,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
)
with override_settings(
PAPERLESS_FILENAME_FORMAT="{created_year}/{correspondent}/{title}",
FILENAME_FORMAT="{created_year}/{correspondent}/{title}",
):
self.test_exporter(use_filename_format=True)
@@ -309,7 +309,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
self.assertTrue(len(manifest), 6)
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}/{correspondent}")
@override_settings(FILENAME_FORMAT="{title}/{correspondent}")
def test_update_export_changed_location(self):
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
shutil.copytree(

View File

@@ -111,7 +111,7 @@ simple_png = os.path.join(os.path.dirname(__file__), "samples", "simple-noalpha.
simple_png2 = os.path.join(os.path.dirname(__file__), "examples", "no-text.png")
@override_settings(PAPERLESS_FILENAME_FORMAT="")
@override_settings(FILENAME_FORMAT="")
class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations):
migrate_from = "1011_auto_20210101_2340"
@@ -240,7 +240,7 @@ class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations):
)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@override_settings(FILENAME_FORMAT="{correspondent}/{title}")
class TestMigrateArchiveFilesWithFilenameFormat(TestMigrateArchiveFiles):
def test_filenames(self):
Document = self.apps.get_model("documents", "Document")
@@ -279,7 +279,7 @@ def fake_parse_wrapper(parser, path, mime_type, file_name):
parser.text = "the text"
@override_settings(PAPERLESS_FILENAME_FORMAT="")
@override_settings(FILENAME_FORMAT="")
class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
migrate_from = "1011_auto_20210101_2340"
@@ -447,7 +447,7 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
self.assertIsNone(doc2.archive_filename)
@override_settings(PAPERLESS_FILENAME_FORMAT="")
@override_settings(FILENAME_FORMAT="")
class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations):
migrate_from = "1012_fix_archive_files"
@@ -505,14 +505,14 @@ class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations):
)
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
@override_settings(FILENAME_FORMAT="{correspondent}/{title}")
class TestMigrateArchiveFilesBackwardsWithFilenameFormat(
TestMigrateArchiveFilesBackwards,
):
pass
@override_settings(PAPERLESS_FILENAME_FORMAT="")
@override_settings(FILENAME_FORMAT="")
class TestMigrateArchiveFilesBackwardsErrors(DirectoriesMixin, TestMigrations):
migrate_from = "1012_fix_archive_files"

View File

@@ -55,14 +55,17 @@ from .classifier import load_classifier
from .filters import CorrespondentFilterSet
from .filters import DocumentFilterSet
from .filters import DocumentTypeFilterSet
from .filters import StoragePathFilterSet
from .filters import TagFilterSet
from .matching import match_correspondents
from .matching import match_document_types
from .matching import match_storage_paths
from .matching import match_tags
from .models import Correspondent
from .models import Document
from .models import DocumentType
from .models import SavedView
from .models import StoragePath
from .models import Tag
from .parsers import get_parser_class_for_mime_type
from .serialisers import BulkDownloadSerializer
@@ -73,6 +76,7 @@ from .serialisers import DocumentSerializer
from .serialisers import DocumentTypeSerializer
from .serialisers import PostDocumentSerializer
from .serialisers import SavedViewSerializer
from .serialisers import StoragePathSerializer
from .serialisers import TagSerializer
from .serialisers import TagSerializerVersion1
from .serialisers import UiSettingsViewSerializer
@@ -335,6 +339,7 @@ class DocumentViewSet(
"document_types": [
dt.id for dt in match_document_types(doc, classifier)
],
"storage_paths": [dt.id for dt in match_storage_paths(doc, classifier)],
},
)
@@ -577,6 +582,12 @@ class SelectionDataView(GenericAPIView):
),
)
storage_paths = StoragePath.objects.annotate(
document_count=Count(
Case(When(documents__id__in=ids, then=1), output_field=IntegerField()),
),
)
r = Response(
{
"selected_correspondents": [
@@ -589,6 +600,10 @@ class SelectionDataView(GenericAPIView):
"selected_document_types": [
{"id": t.id, "document_count": t.document_count} for t in types
],
"selected_storage_paths": [
{"id": t.id, "document_count": t.document_count}
for t in storage_paths
],
},
)
@@ -729,6 +744,21 @@ class RemoteVersionView(GenericAPIView):
)
class StoragePathViewSet(ModelViewSet):
model = DocumentType
queryset = StoragePath.objects.annotate(document_count=Count("documents")).order_by(
Lower("name"),
)
serializer_class = StoragePathSerializer
pagination_class = StandardPagination
permission_classes = (IsAuthenticated,)
filter_backends = (DjangoFilterBackend, OrderingFilter)
filterset_class = StoragePathFilterSet
ordering_fields = ("name", "path", "matching_algorithm", "match", "document_count")
class UiSettingsView(GenericAPIView):
permission_classes = (IsAuthenticated,)

View File

@@ -597,15 +597,22 @@ FILENAME_PARSE_TRANSFORMS = []
for t in json.loads(os.getenv("PAPERLESS_FILENAME_PARSE_TRANSFORMS", "[]")):
FILENAME_PARSE_TRANSFORMS.append((re.compile(t["pattern"]), t["repl"]))
# TODO: this should not have a prefix.
# Specify the filename format for out files
PAPERLESS_FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")
FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")
# If this is enabled, variables in filename format will resolve to empty-string instead of 'none'.
# Directories with 'empty names' are omitted, too.
FILENAME_FORMAT_REMOVE_NONE = __get_boolean(
"PAPERLESS_FILENAME_FORMAT_REMOVE_NONE",
"NO",
)
THUMBNAIL_FONT_NAME = os.getenv(
"PAPERLESS_THUMBNAIL_FONT_NAME",
"/usr/share/fonts/liberation/LiberationSerif-Regular.ttf",
)
# TODO: this should not have a prefix.
# Tika settings
PAPERLESS_TIKA_ENABLED = __get_boolean("PAPERLESS_TIKA_ENABLED", "NO")
PAPERLESS_TIKA_ENDPOINT = os.getenv("PAPERLESS_TIKA_ENDPOINT", "http://localhost:9998")

View File

@@ -19,6 +19,7 @@ from documents.views import SavedViewViewSet
from documents.views import SearchAutoCompleteView
from documents.views import SelectionDataView
from documents.views import StatisticsView
from documents.views import StoragePathViewSet
from documents.views import TagViewSet
from documents.views import UiSettingsView
from documents.views import UnifiedSearchViewSet
@@ -34,6 +35,7 @@ api_router.register(r"documents", UnifiedSearchViewSet)
api_router.register(r"logs", LogViewSet, basename="logs")
api_router.register(r"tags", TagViewSet)
api_router.register(r"saved_views", SavedViewViewSet)
api_router.register(r"storage_paths", StoragePathViewSet)
urlpatterns = [