mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-01-30 23:08:59 -06:00
Merge branch 'dev' into feature-better-asn-operations
This commit is contained in:
@@ -13,6 +13,7 @@ from documents.models import PaperlessTask
|
||||
from documents.models import SavedView
|
||||
from documents.models import SavedViewFilterRule
|
||||
from documents.models import ShareLink
|
||||
from documents.models import ShareLinkBundle
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
from documents.tasks import update_document_parent_tags
|
||||
@@ -184,6 +185,22 @@ class ShareLinksAdmin(GuardedModelAdmin):
|
||||
return super().get_queryset(request).select_related("document__correspondent")
|
||||
|
||||
|
||||
class ShareLinkBundleAdmin(GuardedModelAdmin):
|
||||
list_display = ("created", "status", "expiration", "owner", "slug")
|
||||
list_filter = ("status", "created", "expiration", "owner")
|
||||
search_fields = ("slug",)
|
||||
|
||||
def get_queryset(self, request): # pragma: no cover
|
||||
return (
|
||||
super()
|
||||
.get_queryset(request)
|
||||
.select_related("owner")
|
||||
.prefetch_related(
|
||||
"documents",
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
class CustomFieldsAdmin(GuardedModelAdmin):
|
||||
fields = ("name", "created", "data_type")
|
||||
readonly_fields = ("created", "data_type")
|
||||
@@ -215,6 +232,7 @@ admin.site.register(StoragePath, StoragePathAdmin)
|
||||
admin.site.register(PaperlessTask, TaskAdmin)
|
||||
admin.site.register(Note, NotesAdmin)
|
||||
admin.site.register(ShareLink, ShareLinksAdmin)
|
||||
admin.site.register(ShareLinkBundle, ShareLinkBundleAdmin)
|
||||
admin.site.register(CustomField, CustomFieldsAdmin)
|
||||
admin.site.register(CustomFieldInstance, CustomFieldInstancesAdmin)
|
||||
|
||||
|
||||
@@ -779,19 +779,45 @@ class ConsumerPreflightPlugin(
|
||||
Q(checksum=checksum) | Q(archive_checksum=checksum),
|
||||
)
|
||||
if existing_doc.exists():
|
||||
msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS
|
||||
log_msg = f"Not consuming {self.filename}: It is a duplicate of {existing_doc.get().title} (#{existing_doc.get().pk})."
|
||||
existing_doc = existing_doc.order_by("-created")
|
||||
duplicates_in_trash = existing_doc.filter(deleted_at__isnull=False)
|
||||
log_msg = (
|
||||
f"Consuming duplicate {self.filename}: "
|
||||
f"{existing_doc.count()} existing document(s) share the same content."
|
||||
)
|
||||
|
||||
if existing_doc.first().deleted_at is not None:
|
||||
msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS_IN_TRASH
|
||||
log_msg += " Note: existing document is in the trash."
|
||||
if duplicates_in_trash.exists():
|
||||
log_msg += " Note: at least one existing document is in the trash."
|
||||
|
||||
self.log.warning(log_msg)
|
||||
|
||||
if settings.CONSUMER_DELETE_DUPLICATES:
|
||||
duplicate = existing_doc.first()
|
||||
duplicate_label = (
|
||||
duplicate.title
|
||||
or duplicate.original_filename
|
||||
or (Path(duplicate.filename).name if duplicate.filename else None)
|
||||
or str(duplicate.pk)
|
||||
)
|
||||
|
||||
Path(self.input_doc.original_file).unlink()
|
||||
self._fail(
|
||||
msg,
|
||||
log_msg,
|
||||
)
|
||||
|
||||
failure_msg = (
|
||||
f"Not consuming {self.filename}: "
|
||||
f"It is a duplicate of {duplicate_label} (#{duplicate.pk})"
|
||||
)
|
||||
status_msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS
|
||||
|
||||
if duplicates_in_trash.exists():
|
||||
status_msg = (
|
||||
ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS_IN_TRASH
|
||||
)
|
||||
failure_msg += " Note: existing document is in the trash."
|
||||
|
||||
self._fail(
|
||||
status_msg,
|
||||
failure_msg,
|
||||
)
|
||||
|
||||
def pre_check_directories(self):
|
||||
"""
|
||||
|
||||
@@ -118,7 +118,7 @@ class DocumentMetadataOverrides:
|
||||
).values_list("id", flat=True),
|
||||
)
|
||||
overrides.custom_fields = {
|
||||
custom_field.id: custom_field.value
|
||||
custom_field.field.id: custom_field.value
|
||||
for custom_field in doc.custom_fields.all()
|
||||
}
|
||||
|
||||
|
||||
@@ -39,6 +39,7 @@ from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import PaperlessTask
|
||||
from documents.models import ShareLink
|
||||
from documents.models import ShareLinkBundle
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
|
||||
@@ -796,6 +797,29 @@ class ShareLinkFilterSet(FilterSet):
|
||||
}
|
||||
|
||||
|
||||
class ShareLinkBundleFilterSet(FilterSet):
|
||||
documents = Filter(method="filter_documents")
|
||||
|
||||
class Meta:
|
||||
model = ShareLinkBundle
|
||||
fields = {
|
||||
"created": DATETIME_KWARGS,
|
||||
"expiration": DATETIME_KWARGS,
|
||||
"status": ["exact"],
|
||||
}
|
||||
|
||||
def filter_documents(self, queryset, name, value):
|
||||
ids = []
|
||||
if value:
|
||||
try:
|
||||
ids = [int(item) for item in value.split(",") if item]
|
||||
except ValueError:
|
||||
return queryset.none()
|
||||
if not ids:
|
||||
return queryset
|
||||
return queryset.filter(documents__in=ids).distinct()
|
||||
|
||||
|
||||
class PaperlessTaskFilterSet(FilterSet):
|
||||
acknowledged = BooleanFilter(
|
||||
label="Acknowledged",
|
||||
|
||||
@@ -602,7 +602,7 @@ def rewrite_natural_date_keywords(query_string: str) -> str:
|
||||
|
||||
case "this year":
|
||||
start = datetime(local_now.year, 1, 1, 0, 0, 0, tzinfo=tz)
|
||||
end = datetime.combine(today, time.max, tzinfo=tz)
|
||||
end = datetime(local_now.year, 12, 31, 23, 59, 59, tzinfo=tz)
|
||||
|
||||
case "previous week":
|
||||
days_since_monday = local_now.weekday()
|
||||
|
||||
@@ -403,6 +403,18 @@ def existing_document_matches_workflow(
|
||||
f"Document tags {list(document.tags.all())} include excluded tags {list(trigger_has_not_tags_qs)}",
|
||||
)
|
||||
|
||||
allowed_correspondent_ids = set(
|
||||
trigger.filter_has_any_correspondents.values_list("id", flat=True),
|
||||
)
|
||||
if (
|
||||
allowed_correspondent_ids
|
||||
and document.correspondent_id not in allowed_correspondent_ids
|
||||
):
|
||||
return (
|
||||
False,
|
||||
f"Document correspondent {document.correspondent} is not one of {list(trigger.filter_has_any_correspondents.all())}",
|
||||
)
|
||||
|
||||
# Document correspondent vs trigger has_correspondent
|
||||
if (
|
||||
trigger.filter_has_correspondent_id is not None
|
||||
@@ -424,6 +436,17 @@ def existing_document_matches_workflow(
|
||||
f"Document correspondent {document.correspondent} is excluded by {list(trigger.filter_has_not_correspondents.all())}",
|
||||
)
|
||||
|
||||
allowed_document_type_ids = set(
|
||||
trigger.filter_has_any_document_types.values_list("id", flat=True),
|
||||
)
|
||||
if allowed_document_type_ids and (
|
||||
document.document_type_id not in allowed_document_type_ids
|
||||
):
|
||||
return (
|
||||
False,
|
||||
f"Document doc type {document.document_type} is not one of {list(trigger.filter_has_any_document_types.all())}",
|
||||
)
|
||||
|
||||
# Document document_type vs trigger has_document_type
|
||||
if (
|
||||
trigger.filter_has_document_type_id is not None
|
||||
@@ -445,6 +468,17 @@ def existing_document_matches_workflow(
|
||||
f"Document doc type {document.document_type} is excluded by {list(trigger.filter_has_not_document_types.all())}",
|
||||
)
|
||||
|
||||
allowed_storage_path_ids = set(
|
||||
trigger.filter_has_any_storage_paths.values_list("id", flat=True),
|
||||
)
|
||||
if allowed_storage_path_ids and (
|
||||
document.storage_path_id not in allowed_storage_path_ids
|
||||
):
|
||||
return (
|
||||
False,
|
||||
f"Document storage path {document.storage_path} is not one of {list(trigger.filter_has_any_storage_paths.all())}",
|
||||
)
|
||||
|
||||
# Document storage_path vs trigger has_storage_path
|
||||
if (
|
||||
trigger.filter_has_storage_path_id is not None
|
||||
@@ -532,6 +566,10 @@ def prefilter_documents_by_workflowtrigger(
|
||||
|
||||
# Correspondent, DocumentType, etc. filtering
|
||||
|
||||
if trigger.filter_has_any_correspondents.exists():
|
||||
documents = documents.filter(
|
||||
correspondent__in=trigger.filter_has_any_correspondents.all(),
|
||||
)
|
||||
if trigger.filter_has_correspondent is not None:
|
||||
documents = documents.filter(
|
||||
correspondent=trigger.filter_has_correspondent,
|
||||
@@ -541,6 +579,10 @@ def prefilter_documents_by_workflowtrigger(
|
||||
correspondent__in=trigger.filter_has_not_correspondents.all(),
|
||||
)
|
||||
|
||||
if trigger.filter_has_any_document_types.exists():
|
||||
documents = documents.filter(
|
||||
document_type__in=trigger.filter_has_any_document_types.all(),
|
||||
)
|
||||
if trigger.filter_has_document_type is not None:
|
||||
documents = documents.filter(
|
||||
document_type=trigger.filter_has_document_type,
|
||||
@@ -550,6 +592,10 @@ def prefilter_documents_by_workflowtrigger(
|
||||
document_type__in=trigger.filter_has_not_document_types.all(),
|
||||
)
|
||||
|
||||
if trigger.filter_has_any_storage_paths.exists():
|
||||
documents = documents.filter(
|
||||
storage_path__in=trigger.filter_has_any_storage_paths.all(),
|
||||
)
|
||||
if trigger.filter_has_storage_path is not None:
|
||||
documents = documents.filter(
|
||||
storage_path=trigger.filter_has_storage_path,
|
||||
@@ -604,8 +650,11 @@ def document_matches_workflow(
|
||||
"filter_has_tags",
|
||||
"filter_has_all_tags",
|
||||
"filter_has_not_tags",
|
||||
"filter_has_any_document_types",
|
||||
"filter_has_not_document_types",
|
||||
"filter_has_any_correspondents",
|
||||
"filter_has_not_correspondents",
|
||||
"filter_has_any_storage_paths",
|
||||
"filter_has_not_storage_paths",
|
||||
)
|
||||
)
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
# Generated by Django 5.2.7 on 2025-12-17 22:25
|
||||
|
||||
from django.db import migrations
|
||||
from django.db import models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0004_remove_document_storage_type"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="workflowtrigger",
|
||||
name="filter_has_any_correspondents",
|
||||
field=models.ManyToManyField(
|
||||
blank=True,
|
||||
related_name="workflowtriggers_has_any_correspondent",
|
||||
to="documents.correspondent",
|
||||
verbose_name="has one of these correspondents",
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="workflowtrigger",
|
||||
name="filter_has_any_document_types",
|
||||
field=models.ManyToManyField(
|
||||
blank=True,
|
||||
related_name="workflowtriggers_has_any_document_type",
|
||||
to="documents.documenttype",
|
||||
verbose_name="has one of these document types",
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="workflowtrigger",
|
||||
name="filter_has_any_storage_paths",
|
||||
field=models.ManyToManyField(
|
||||
blank=True,
|
||||
related_name="workflowtriggers_has_any_storage_path",
|
||||
to="documents.storagepath",
|
||||
verbose_name="has one of these storage paths",
|
||||
),
|
||||
),
|
||||
]
|
||||
@@ -0,0 +1,23 @@
|
||||
# Generated by Django 5.2.7 on 2026-01-14 17:45
|
||||
|
||||
from django.db import migrations
|
||||
from django.db import models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0005_workflowtrigger_filter_has_any_correspondents_and_more"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name="document",
|
||||
name="checksum",
|
||||
field=models.CharField(
|
||||
editable=False,
|
||||
max_length=32,
|
||||
verbose_name="checksum",
|
||||
help_text="The checksum of the original document.",
|
||||
),
|
||||
),
|
||||
]
|
||||
25
src/documents/migrations/0007_document_content_length.py
Normal file
25
src/documents/migrations/0007_document_content_length.py
Normal file
@@ -0,0 +1,25 @@
|
||||
# Generated by Django 5.2.6 on 2026-01-24 07:33
|
||||
|
||||
import django.db.models.functions.text
|
||||
from django.db import migrations
|
||||
from django.db import models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("documents", "0006_alter_document_checksum_unique"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="document",
|
||||
name="content_length",
|
||||
field=models.GeneratedField(
|
||||
db_persist=True,
|
||||
expression=django.db.models.functions.text.Length("content"),
|
||||
null=False,
|
||||
help_text="Length of the content field in characters. Automatically maintained by the database for faster statistics computation.",
|
||||
output_field=models.PositiveIntegerField(default=0),
|
||||
),
|
||||
),
|
||||
]
|
||||
177
src/documents/migrations/0008_sharelinkbundle.py
Normal file
177
src/documents/migrations/0008_sharelinkbundle.py
Normal file
@@ -0,0 +1,177 @@
|
||||
# Generated by Django 5.2.9 on 2026-01-27 01:09
|
||||
|
||||
import django.db.models.deletion
|
||||
import django.db.models.functions.text
|
||||
import django.utils.timezone
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.management import create_permissions
|
||||
from django.contrib.auth.models import Group
|
||||
from django.contrib.auth.models import Permission
|
||||
from django.contrib.auth.models import User
|
||||
from django.db import migrations
|
||||
from django.db import models
|
||||
|
||||
|
||||
def grant_share_link_bundle_permissions(apps, schema_editor):
|
||||
# Ensure newly introduced permissions are created for all apps
|
||||
for app_config in apps.get_app_configs():
|
||||
app_config.models_module = True
|
||||
create_permissions(app_config, apps=apps, verbosity=0)
|
||||
app_config.models_module = None
|
||||
|
||||
add_document_perm = Permission.objects.filter(codename="add_document").first()
|
||||
share_bundle_permissions = Permission.objects.filter(
|
||||
codename__contains="sharelinkbundle",
|
||||
)
|
||||
|
||||
users = User.objects.filter(user_permissions=add_document_perm).distinct()
|
||||
for user in users:
|
||||
user.user_permissions.add(*share_bundle_permissions)
|
||||
|
||||
groups = Group.objects.filter(permissions=add_document_perm).distinct()
|
||||
for group in groups:
|
||||
group.permissions.add(*share_bundle_permissions)
|
||||
|
||||
|
||||
def revoke_share_link_bundle_permissions(apps, schema_editor):
|
||||
share_bundle_permissions = Permission.objects.filter(
|
||||
codename__contains="sharelinkbundle",
|
||||
)
|
||||
for user in User.objects.all():
|
||||
user.user_permissions.remove(*share_bundle_permissions)
|
||||
for group in Group.objects.all():
|
||||
group.permissions.remove(*share_bundle_permissions)
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||
("documents", "0007_document_content_length"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name="ShareLinkBundle",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.AutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
(
|
||||
"created",
|
||||
models.DateTimeField(
|
||||
blank=True,
|
||||
db_index=True,
|
||||
default=django.utils.timezone.now,
|
||||
editable=False,
|
||||
verbose_name="created",
|
||||
),
|
||||
),
|
||||
(
|
||||
"expiration",
|
||||
models.DateTimeField(
|
||||
blank=True,
|
||||
db_index=True,
|
||||
null=True,
|
||||
verbose_name="expiration",
|
||||
),
|
||||
),
|
||||
(
|
||||
"slug",
|
||||
models.SlugField(
|
||||
blank=True,
|
||||
editable=False,
|
||||
unique=True,
|
||||
verbose_name="slug",
|
||||
),
|
||||
),
|
||||
(
|
||||
"file_version",
|
||||
models.CharField(
|
||||
choices=[("archive", "Archive"), ("original", "Original")],
|
||||
default="archive",
|
||||
max_length=50,
|
||||
),
|
||||
),
|
||||
(
|
||||
"status",
|
||||
models.CharField(
|
||||
choices=[
|
||||
("pending", "Pending"),
|
||||
("processing", "Processing"),
|
||||
("ready", "Ready"),
|
||||
("failed", "Failed"),
|
||||
],
|
||||
default="pending",
|
||||
max_length=50,
|
||||
),
|
||||
),
|
||||
(
|
||||
"size_bytes",
|
||||
models.PositiveIntegerField(
|
||||
blank=True,
|
||||
null=True,
|
||||
verbose_name="size (bytes)",
|
||||
),
|
||||
),
|
||||
(
|
||||
"last_error",
|
||||
models.JSONField(
|
||||
blank=True,
|
||||
null=True,
|
||||
default=None,
|
||||
verbose_name="last error",
|
||||
),
|
||||
),
|
||||
(
|
||||
"file_path",
|
||||
models.CharField(
|
||||
blank=True,
|
||||
max_length=512,
|
||||
verbose_name="file path",
|
||||
),
|
||||
),
|
||||
(
|
||||
"built_at",
|
||||
models.DateTimeField(
|
||||
blank=True,
|
||||
null=True,
|
||||
verbose_name="built at",
|
||||
),
|
||||
),
|
||||
(
|
||||
"documents",
|
||||
models.ManyToManyField(
|
||||
related_name="share_link_bundles",
|
||||
to="documents.document",
|
||||
verbose_name="documents",
|
||||
),
|
||||
),
|
||||
(
|
||||
"owner",
|
||||
models.ForeignKey(
|
||||
blank=True,
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.SET_NULL,
|
||||
related_name="share_link_bundles",
|
||||
to=settings.AUTH_USER_MODEL,
|
||||
verbose_name="owner",
|
||||
),
|
||||
),
|
||||
],
|
||||
options={
|
||||
"ordering": ("-created",),
|
||||
"verbose_name": "share link bundle",
|
||||
"verbose_name_plural": "share link bundles",
|
||||
},
|
||||
),
|
||||
migrations.RunPython(
|
||||
grant_share_link_bundle_permissions,
|
||||
reverse_code=revoke_share_link_bundle_permissions,
|
||||
),
|
||||
]
|
||||
@@ -20,7 +20,9 @@ if settings.AUDIT_LOG_ENABLED:
|
||||
from auditlog.registry import auditlog
|
||||
|
||||
from django.db.models import Case
|
||||
from django.db.models import PositiveIntegerField
|
||||
from django.db.models.functions import Cast
|
||||
from django.db.models.functions import Length
|
||||
from django.db.models.functions import Substr
|
||||
from django_softdelete.models import SoftDeleteModel
|
||||
|
||||
@@ -192,6 +194,15 @@ class Document(SoftDeleteModel, ModelWithOwner):
|
||||
),
|
||||
)
|
||||
|
||||
content_length = models.GeneratedField(
|
||||
expression=Length("content"),
|
||||
output_field=PositiveIntegerField(default=0),
|
||||
db_persist=True,
|
||||
null=False,
|
||||
serialize=False,
|
||||
help_text="Length of the content field in characters. Automatically maintained by the database for faster statistics computation.",
|
||||
)
|
||||
|
||||
mime_type = models.CharField(_("mime type"), max_length=256, editable=False)
|
||||
|
||||
tags = models.ManyToManyField(
|
||||
@@ -205,7 +216,6 @@ class Document(SoftDeleteModel, ModelWithOwner):
|
||||
_("checksum"),
|
||||
max_length=32,
|
||||
editable=False,
|
||||
unique=True,
|
||||
help_text=_("The checksum of the original document."),
|
||||
)
|
||||
|
||||
@@ -756,6 +766,114 @@ class ShareLink(SoftDeleteModel):
|
||||
return f"Share Link for {self.document.title}"
|
||||
|
||||
|
||||
class ShareLinkBundle(models.Model):
|
||||
class Status(models.TextChoices):
|
||||
PENDING = ("pending", _("Pending"))
|
||||
PROCESSING = ("processing", _("Processing"))
|
||||
READY = ("ready", _("Ready"))
|
||||
FAILED = ("failed", _("Failed"))
|
||||
|
||||
created = models.DateTimeField(
|
||||
_("created"),
|
||||
default=timezone.now,
|
||||
db_index=True,
|
||||
blank=True,
|
||||
editable=False,
|
||||
)
|
||||
|
||||
expiration = models.DateTimeField(
|
||||
_("expiration"),
|
||||
blank=True,
|
||||
null=True,
|
||||
db_index=True,
|
||||
)
|
||||
|
||||
slug = models.SlugField(
|
||||
_("slug"),
|
||||
db_index=True,
|
||||
unique=True,
|
||||
blank=True,
|
||||
editable=False,
|
||||
)
|
||||
|
||||
owner = models.ForeignKey(
|
||||
User,
|
||||
blank=True,
|
||||
null=True,
|
||||
related_name="share_link_bundles",
|
||||
on_delete=models.SET_NULL,
|
||||
verbose_name=_("owner"),
|
||||
)
|
||||
|
||||
file_version = models.CharField(
|
||||
max_length=50,
|
||||
choices=ShareLink.FileVersion.choices,
|
||||
default=ShareLink.FileVersion.ARCHIVE,
|
||||
)
|
||||
|
||||
status = models.CharField(
|
||||
max_length=50,
|
||||
choices=Status.choices,
|
||||
default=Status.PENDING,
|
||||
)
|
||||
|
||||
size_bytes = models.PositiveIntegerField(
|
||||
_("size (bytes)"),
|
||||
blank=True,
|
||||
null=True,
|
||||
)
|
||||
|
||||
last_error = models.JSONField(
|
||||
_("last error"),
|
||||
blank=True,
|
||||
null=True,
|
||||
default=None,
|
||||
)
|
||||
|
||||
file_path = models.CharField(
|
||||
_("file path"),
|
||||
max_length=512,
|
||||
blank=True,
|
||||
)
|
||||
|
||||
built_at = models.DateTimeField(
|
||||
_("built at"),
|
||||
null=True,
|
||||
blank=True,
|
||||
)
|
||||
|
||||
documents = models.ManyToManyField(
|
||||
"documents.Document",
|
||||
related_name="share_link_bundles",
|
||||
verbose_name=_("documents"),
|
||||
)
|
||||
|
||||
class Meta:
|
||||
ordering = ("-created",)
|
||||
verbose_name = _("share link bundle")
|
||||
verbose_name_plural = _("share link bundles")
|
||||
|
||||
def __str__(self):
|
||||
return _("Share link bundle %(slug)s") % {"slug": self.slug}
|
||||
|
||||
@property
|
||||
def absolute_file_path(self) -> Path | None:
|
||||
if not self.file_path:
|
||||
return None
|
||||
return (settings.SHARE_LINK_BUNDLE_DIR / Path(self.file_path)).resolve()
|
||||
|
||||
def remove_file(self):
|
||||
if self.absolute_file_path is not None and self.absolute_file_path.exists():
|
||||
try:
|
||||
self.absolute_file_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def delete(self, using=None, *, keep_parents=False):
|
||||
self.remove_file()
|
||||
return super().delete(using=using, keep_parents=keep_parents)
|
||||
|
||||
|
||||
class CustomField(models.Model):
|
||||
"""
|
||||
Defines the name and type of a custom field
|
||||
@@ -946,7 +1064,7 @@ if settings.AUDIT_LOG_ENABLED:
|
||||
auditlog.register(
|
||||
Document,
|
||||
m2m_fields={"tags"},
|
||||
exclude_fields=["modified"],
|
||||
exclude_fields=["content_length", "modified"],
|
||||
)
|
||||
auditlog.register(Correspondent)
|
||||
auditlog.register(Tag)
|
||||
@@ -1066,6 +1184,13 @@ class WorkflowTrigger(models.Model):
|
||||
verbose_name=_("has this document type"),
|
||||
)
|
||||
|
||||
filter_has_any_document_types = models.ManyToManyField(
|
||||
DocumentType,
|
||||
blank=True,
|
||||
related_name="workflowtriggers_has_any_document_type",
|
||||
verbose_name=_("has one of these document types"),
|
||||
)
|
||||
|
||||
filter_has_not_document_types = models.ManyToManyField(
|
||||
DocumentType,
|
||||
blank=True,
|
||||
@@ -1088,6 +1213,13 @@ class WorkflowTrigger(models.Model):
|
||||
verbose_name=_("does not have these correspondent(s)"),
|
||||
)
|
||||
|
||||
filter_has_any_correspondents = models.ManyToManyField(
|
||||
Correspondent,
|
||||
blank=True,
|
||||
related_name="workflowtriggers_has_any_correspondent",
|
||||
verbose_name=_("has one of these correspondents"),
|
||||
)
|
||||
|
||||
filter_has_storage_path = models.ForeignKey(
|
||||
StoragePath,
|
||||
null=True,
|
||||
@@ -1096,6 +1228,13 @@ class WorkflowTrigger(models.Model):
|
||||
verbose_name=_("has this storage path"),
|
||||
)
|
||||
|
||||
filter_has_any_storage_paths = models.ManyToManyField(
|
||||
StoragePath,
|
||||
blank=True,
|
||||
related_name="workflowtriggers_has_any_storage_path",
|
||||
verbose_name=_("has one of these storage paths"),
|
||||
)
|
||||
|
||||
filter_has_not_storage_paths = models.ManyToManyField(
|
||||
StoragePath,
|
||||
blank=True,
|
||||
|
||||
@@ -148,13 +148,29 @@ def get_document_count_filter_for_user(user):
|
||||
)
|
||||
|
||||
|
||||
def get_objects_for_user_owner_aware(user, perms, Model) -> QuerySet:
|
||||
objects_owned = Model.objects.filter(owner=user)
|
||||
objects_unowned = Model.objects.filter(owner__isnull=True)
|
||||
def get_objects_for_user_owner_aware(
|
||||
user,
|
||||
perms,
|
||||
Model,
|
||||
*,
|
||||
include_deleted=False,
|
||||
) -> QuerySet:
|
||||
"""
|
||||
Returns objects the user owns, are unowned, or has explicit perms.
|
||||
When include_deleted is True, soft-deleted items are also included.
|
||||
"""
|
||||
manager = (
|
||||
Model.global_objects
|
||||
if include_deleted and hasattr(Model, "global_objects")
|
||||
else Model.objects
|
||||
)
|
||||
|
||||
objects_owned = manager.filter(owner=user)
|
||||
objects_unowned = manager.filter(owner__isnull=True)
|
||||
objects_with_perms = get_objects_for_user(
|
||||
user=user,
|
||||
perms=perms,
|
||||
klass=Model,
|
||||
klass=manager.all(),
|
||||
accept_global_perms=False,
|
||||
)
|
||||
return objects_owned | objects_unowned | objects_with_perms
|
||||
|
||||
@@ -4,6 +4,7 @@ import logging
|
||||
import math
|
||||
import re
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
from decimal import Decimal
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Literal
|
||||
@@ -23,7 +24,9 @@ from django.core.validators import MinValueValidator
|
||||
from django.core.validators import RegexValidator
|
||||
from django.core.validators import integer_validator
|
||||
from django.db.models import Count
|
||||
from django.db.models import Q
|
||||
from django.db.models.functions import Lower
|
||||
from django.utils import timezone
|
||||
from django.utils.crypto import get_random_string
|
||||
from django.utils.dateparse import parse_datetime
|
||||
from django.utils.text import slugify
|
||||
@@ -61,6 +64,7 @@ from documents.models import PaperlessTask
|
||||
from documents.models import SavedView
|
||||
from documents.models import SavedViewFilterRule
|
||||
from documents.models import ShareLink
|
||||
from documents.models import ShareLinkBundle
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
from documents.models import UiSettings
|
||||
@@ -72,6 +76,7 @@ from documents.models import WorkflowTrigger
|
||||
from documents.parsers import is_mime_type_supported
|
||||
from documents.permissions import get_document_count_filter_for_user
|
||||
from documents.permissions import get_groups_with_only_permission
|
||||
from documents.permissions import get_objects_for_user_owner_aware
|
||||
from documents.permissions import set_permissions_for_object
|
||||
from documents.regex import validate_regex_pattern
|
||||
from documents.templating.filepath import validate_filepath_template_and_render
|
||||
@@ -82,6 +87,9 @@ from documents.validators import url_validator
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Iterable
|
||||
|
||||
from django.db.models.query import QuerySet
|
||||
|
||||
|
||||
logger = logging.getLogger("paperless.serializers")
|
||||
|
||||
|
||||
@@ -1014,6 +1022,32 @@ class NotesSerializer(serializers.ModelSerializer):
|
||||
return ret
|
||||
|
||||
|
||||
def _get_viewable_duplicates(
|
||||
document: Document,
|
||||
user: User | None,
|
||||
) -> QuerySet[Document]:
|
||||
checksums = {document.checksum}
|
||||
if document.archive_checksum:
|
||||
checksums.add(document.archive_checksum)
|
||||
duplicates = Document.global_objects.filter(
|
||||
Q(checksum__in=checksums) | Q(archive_checksum__in=checksums),
|
||||
).exclude(pk=document.pk)
|
||||
duplicates = duplicates.order_by("-created")
|
||||
allowed = get_objects_for_user_owner_aware(
|
||||
user,
|
||||
"documents.view_document",
|
||||
Document,
|
||||
include_deleted=True,
|
||||
)
|
||||
return duplicates.filter(id__in=allowed)
|
||||
|
||||
|
||||
class DuplicateDocumentSummarySerializer(serializers.Serializer):
|
||||
id = serializers.IntegerField()
|
||||
title = serializers.CharField()
|
||||
deleted_at = serializers.DateTimeField(allow_null=True)
|
||||
|
||||
|
||||
@extend_schema_serializer(
|
||||
deprecate_fields=["created_date"],
|
||||
)
|
||||
@@ -1031,6 +1065,7 @@ class DocumentSerializer(
|
||||
archived_file_name = SerializerMethodField()
|
||||
created_date = serializers.DateField(required=False)
|
||||
page_count = SerializerMethodField()
|
||||
duplicate_documents = SerializerMethodField()
|
||||
|
||||
notes = NotesSerializer(many=True, required=False, read_only=True)
|
||||
|
||||
@@ -1056,6 +1091,16 @@ class DocumentSerializer(
|
||||
def get_page_count(self, obj) -> int | None:
|
||||
return obj.page_count
|
||||
|
||||
@extend_schema_field(DuplicateDocumentSummarySerializer(many=True))
|
||||
def get_duplicate_documents(self, obj):
|
||||
view = self.context.get("view")
|
||||
if view and getattr(view, "action", None) != "retrieve":
|
||||
return []
|
||||
request = self.context.get("request")
|
||||
user = request.user if request else None
|
||||
duplicates = _get_viewable_duplicates(obj, user)
|
||||
return list(duplicates.values("id", "title", "deleted_at"))
|
||||
|
||||
def get_original_file_name(self, obj) -> str | None:
|
||||
return obj.original_filename
|
||||
|
||||
@@ -1233,6 +1278,7 @@ class DocumentSerializer(
|
||||
"archive_serial_number",
|
||||
"original_file_name",
|
||||
"archived_file_name",
|
||||
"duplicate_documents",
|
||||
"owner",
|
||||
"permissions",
|
||||
"user_can_change",
|
||||
@@ -2094,10 +2140,12 @@ class TasksViewSerializer(OwnedObjectSerializer):
|
||||
"result",
|
||||
"acknowledged",
|
||||
"related_document",
|
||||
"duplicate_documents",
|
||||
"owner",
|
||||
)
|
||||
|
||||
related_document = serializers.SerializerMethodField()
|
||||
duplicate_documents = serializers.SerializerMethodField()
|
||||
created_doc_re = re.compile(r"New document id (\d+) created")
|
||||
duplicate_doc_re = re.compile(r"It is a duplicate of .* \(#(\d+)\)")
|
||||
|
||||
@@ -2122,6 +2170,17 @@ class TasksViewSerializer(OwnedObjectSerializer):
|
||||
|
||||
return result
|
||||
|
||||
@extend_schema_field(DuplicateDocumentSummarySerializer(many=True))
|
||||
def get_duplicate_documents(self, obj):
|
||||
related_document = self.get_related_document(obj)
|
||||
request = self.context.get("request")
|
||||
user = request.user if request else None
|
||||
document = Document.global_objects.filter(pk=related_document).first()
|
||||
if not related_document or not user or not document:
|
||||
return []
|
||||
duplicates = _get_viewable_duplicates(document, user)
|
||||
return list(duplicates.values("id", "title", "deleted_at"))
|
||||
|
||||
|
||||
class RunTaskViewSerializer(serializers.Serializer):
|
||||
task_name = serializers.ChoiceField(
|
||||
@@ -2172,6 +2231,104 @@ class ShareLinkSerializer(OwnedObjectSerializer):
|
||||
return super().create(validated_data)
|
||||
|
||||
|
||||
class ShareLinkBundleSerializer(OwnedObjectSerializer):
|
||||
document_ids = serializers.ListField(
|
||||
child=serializers.IntegerField(min_value=1),
|
||||
allow_empty=False,
|
||||
write_only=True,
|
||||
)
|
||||
expiration_days = serializers.IntegerField(
|
||||
required=False,
|
||||
allow_null=True,
|
||||
min_value=1,
|
||||
write_only=True,
|
||||
)
|
||||
documents = serializers.PrimaryKeyRelatedField(
|
||||
many=True,
|
||||
read_only=True,
|
||||
)
|
||||
document_count = SerializerMethodField()
|
||||
|
||||
class Meta:
|
||||
model = ShareLinkBundle
|
||||
fields = (
|
||||
"id",
|
||||
"created",
|
||||
"expiration",
|
||||
"expiration_days",
|
||||
"slug",
|
||||
"file_version",
|
||||
"status",
|
||||
"size_bytes",
|
||||
"last_error",
|
||||
"built_at",
|
||||
"documents",
|
||||
"document_ids",
|
||||
"document_count",
|
||||
)
|
||||
read_only_fields = (
|
||||
"id",
|
||||
"created",
|
||||
"expiration",
|
||||
"slug",
|
||||
"status",
|
||||
"size_bytes",
|
||||
"last_error",
|
||||
"built_at",
|
||||
"documents",
|
||||
"document_count",
|
||||
)
|
||||
|
||||
def validate_document_ids(self, value):
|
||||
unique_ids = set(value)
|
||||
if len(unique_ids) != len(value):
|
||||
raise serializers.ValidationError(
|
||||
_("Duplicate document identifiers are not allowed."),
|
||||
)
|
||||
return value
|
||||
|
||||
def create(self, validated_data):
|
||||
document_ids = validated_data.pop("document_ids")
|
||||
expiration_days = validated_data.pop("expiration_days", None)
|
||||
validated_data["slug"] = get_random_string(50)
|
||||
if expiration_days:
|
||||
validated_data["expiration"] = timezone.now() + timedelta(
|
||||
days=expiration_days,
|
||||
)
|
||||
else:
|
||||
validated_data["expiration"] = None
|
||||
|
||||
share_link_bundle = super().create(validated_data)
|
||||
|
||||
documents = list(
|
||||
Document.objects.filter(pk__in=document_ids).only(
|
||||
"pk",
|
||||
),
|
||||
)
|
||||
documents_by_id = {doc.pk: doc for doc in documents}
|
||||
missing = [
|
||||
str(doc_id) for doc_id in document_ids if doc_id not in documents_by_id
|
||||
]
|
||||
if missing:
|
||||
raise serializers.ValidationError(
|
||||
{
|
||||
"document_ids": _(
|
||||
"Documents not found: %(ids)s",
|
||||
)
|
||||
% {"ids": ", ".join(missing)},
|
||||
},
|
||||
)
|
||||
|
||||
ordered_documents = [documents_by_id[doc_id] for doc_id in document_ids]
|
||||
share_link_bundle.documents.set(ordered_documents)
|
||||
share_link_bundle.document_total = len(ordered_documents)
|
||||
|
||||
return share_link_bundle
|
||||
|
||||
def get_document_count(self, obj: ShareLinkBundle) -> int:
|
||||
return getattr(obj, "document_total") or obj.documents.count()
|
||||
|
||||
|
||||
class BulkEditObjectsSerializer(SerializerWithPerms, SetPermissionsMixin):
|
||||
objects = serializers.ListField(
|
||||
required=True,
|
||||
@@ -2299,8 +2456,11 @@ class WorkflowTriggerSerializer(serializers.ModelSerializer):
|
||||
"filter_has_all_tags",
|
||||
"filter_has_not_tags",
|
||||
"filter_custom_field_query",
|
||||
"filter_has_any_correspondents",
|
||||
"filter_has_not_correspondents",
|
||||
"filter_has_any_document_types",
|
||||
"filter_has_not_document_types",
|
||||
"filter_has_any_storage_paths",
|
||||
"filter_has_not_storage_paths",
|
||||
"filter_has_correspondent",
|
||||
"filter_has_document_type",
|
||||
@@ -2538,14 +2698,26 @@ class WorkflowSerializer(serializers.ModelSerializer):
|
||||
filter_has_tags = trigger.pop("filter_has_tags", None)
|
||||
filter_has_all_tags = trigger.pop("filter_has_all_tags", None)
|
||||
filter_has_not_tags = trigger.pop("filter_has_not_tags", None)
|
||||
filter_has_any_correspondents = trigger.pop(
|
||||
"filter_has_any_correspondents",
|
||||
None,
|
||||
)
|
||||
filter_has_not_correspondents = trigger.pop(
|
||||
"filter_has_not_correspondents",
|
||||
None,
|
||||
)
|
||||
filter_has_any_document_types = trigger.pop(
|
||||
"filter_has_any_document_types",
|
||||
None,
|
||||
)
|
||||
filter_has_not_document_types = trigger.pop(
|
||||
"filter_has_not_document_types",
|
||||
None,
|
||||
)
|
||||
filter_has_any_storage_paths = trigger.pop(
|
||||
"filter_has_any_storage_paths",
|
||||
None,
|
||||
)
|
||||
filter_has_not_storage_paths = trigger.pop(
|
||||
"filter_has_not_storage_paths",
|
||||
None,
|
||||
@@ -2562,14 +2734,26 @@ class WorkflowSerializer(serializers.ModelSerializer):
|
||||
trigger_instance.filter_has_all_tags.set(filter_has_all_tags)
|
||||
if filter_has_not_tags is not None:
|
||||
trigger_instance.filter_has_not_tags.set(filter_has_not_tags)
|
||||
if filter_has_any_correspondents is not None:
|
||||
trigger_instance.filter_has_any_correspondents.set(
|
||||
filter_has_any_correspondents,
|
||||
)
|
||||
if filter_has_not_correspondents is not None:
|
||||
trigger_instance.filter_has_not_correspondents.set(
|
||||
filter_has_not_correspondents,
|
||||
)
|
||||
if filter_has_any_document_types is not None:
|
||||
trigger_instance.filter_has_any_document_types.set(
|
||||
filter_has_any_document_types,
|
||||
)
|
||||
if filter_has_not_document_types is not None:
|
||||
trigger_instance.filter_has_not_document_types.set(
|
||||
filter_has_not_document_types,
|
||||
)
|
||||
if filter_has_any_storage_paths is not None:
|
||||
trigger_instance.filter_has_any_storage_paths.set(
|
||||
filter_has_any_storage_paths,
|
||||
)
|
||||
if filter_has_not_storage_paths is not None:
|
||||
trigger_instance.filter_has_not_storage_paths.set(
|
||||
filter_has_not_storage_paths,
|
||||
|
||||
@@ -3,8 +3,10 @@ import hashlib
|
||||
import logging
|
||||
import shutil
|
||||
import uuid
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
from tempfile import TemporaryDirectory
|
||||
from tempfile import mkstemp
|
||||
|
||||
import tqdm
|
||||
from celery import Task
|
||||
@@ -22,6 +24,8 @@ from whoosh.writing import AsyncWriter
|
||||
from documents import index
|
||||
from documents import sanity_checker
|
||||
from documents.barcodes import BarcodePlugin
|
||||
from documents.bulk_download import ArchiveOnlyStrategy
|
||||
from documents.bulk_download import OriginalsOnlyStrategy
|
||||
from documents.caching import clear_document_caches
|
||||
from documents.classifier import DocumentClassifier
|
||||
from documents.classifier import load_classifier
|
||||
@@ -39,6 +43,8 @@ from documents.models import CustomFieldInstance
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import PaperlessTask
|
||||
from documents.models import ShareLink
|
||||
from documents.models import ShareLinkBundle
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
from documents.models import WorkflowRun
|
||||
@@ -625,3 +631,117 @@ def update_document_in_llm_index(document):
|
||||
@shared_task
|
||||
def remove_document_from_llm_index(document):
|
||||
llm_index_remove_document(document)
|
||||
|
||||
|
||||
@shared_task
|
||||
def build_share_link_bundle(bundle_id: int):
|
||||
try:
|
||||
bundle = (
|
||||
ShareLinkBundle.objects.filter(pk=bundle_id)
|
||||
.prefetch_related("documents")
|
||||
.get()
|
||||
)
|
||||
except ShareLinkBundle.DoesNotExist:
|
||||
logger.warning("Share link bundle %s no longer exists.", bundle_id)
|
||||
return
|
||||
|
||||
bundle.remove_file()
|
||||
bundle.status = ShareLinkBundle.Status.PROCESSING
|
||||
bundle.last_error = None
|
||||
bundle.size_bytes = None
|
||||
bundle.built_at = None
|
||||
bundle.file_path = ""
|
||||
bundle.save(
|
||||
update_fields=[
|
||||
"status",
|
||||
"last_error",
|
||||
"size_bytes",
|
||||
"built_at",
|
||||
"file_path",
|
||||
],
|
||||
)
|
||||
|
||||
documents = list(bundle.documents.all().order_by("pk"))
|
||||
|
||||
_, temp_zip_path_str = mkstemp(suffix=".zip", dir=settings.SCRATCH_DIR)
|
||||
temp_zip_path = Path(temp_zip_path_str)
|
||||
|
||||
try:
|
||||
strategy_class = (
|
||||
ArchiveOnlyStrategy
|
||||
if bundle.file_version == ShareLink.FileVersion.ARCHIVE
|
||||
else OriginalsOnlyStrategy
|
||||
)
|
||||
with zipfile.ZipFile(temp_zip_path, "w", zipfile.ZIP_DEFLATED) as zipf:
|
||||
strategy = strategy_class(zipf)
|
||||
for document in documents:
|
||||
strategy.add_document(document)
|
||||
|
||||
output_dir = settings.SHARE_LINK_BUNDLE_DIR
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
final_path = (output_dir / f"{bundle.slug}.zip").resolve()
|
||||
if final_path.exists():
|
||||
final_path.unlink()
|
||||
shutil.move(temp_zip_path, final_path)
|
||||
|
||||
bundle.file_path = f"{bundle.slug}.zip"
|
||||
bundle.size_bytes = final_path.stat().st_size
|
||||
bundle.status = ShareLinkBundle.Status.READY
|
||||
bundle.built_at = timezone.now()
|
||||
bundle.last_error = None
|
||||
bundle.save(
|
||||
update_fields=[
|
||||
"file_path",
|
||||
"size_bytes",
|
||||
"status",
|
||||
"built_at",
|
||||
"last_error",
|
||||
],
|
||||
)
|
||||
logger.info("Built share link bundle %s", bundle.pk)
|
||||
except Exception as exc:
|
||||
logger.exception(
|
||||
"Failed to build share link bundle %s: %s",
|
||||
bundle_id,
|
||||
exc,
|
||||
)
|
||||
bundle.status = ShareLinkBundle.Status.FAILED
|
||||
bundle.last_error = {
|
||||
"bundle_id": bundle_id,
|
||||
"exception_type": exc.__class__.__name__,
|
||||
"message": str(exc),
|
||||
"timestamp": timezone.now().isoformat(),
|
||||
}
|
||||
bundle.save(update_fields=["status", "last_error"])
|
||||
try:
|
||||
temp_zip_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
finally:
|
||||
try:
|
||||
temp_zip_path.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
@shared_task
|
||||
def cleanup_expired_share_link_bundles():
|
||||
now = timezone.now()
|
||||
expired_qs = ShareLinkBundle.objects.filter(
|
||||
expiration__isnull=False,
|
||||
expiration__lt=now,
|
||||
)
|
||||
count = 0
|
||||
for bundle in expired_qs.iterator():
|
||||
count += 1
|
||||
try:
|
||||
bundle.delete()
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Failed to delete expired share link bundle %s: %s",
|
||||
bundle.pk,
|
||||
exc,
|
||||
)
|
||||
if count:
|
||||
logger.info("Deleted %s expired share link bundle(s)", count)
|
||||
|
||||
@@ -131,6 +131,10 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
|
||||
self.assertIn("content", results_full[0])
|
||||
self.assertIn("id", results_full[0])
|
||||
|
||||
# Content length is used internally for performance reasons.
|
||||
# No need to expose this field.
|
||||
self.assertNotIn("content_length", results_full[0])
|
||||
|
||||
response = self.client.get("/api/documents/?fields=id", format="json")
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
results = response.data["results"]
|
||||
|
||||
@@ -7,6 +7,7 @@ from django.contrib.auth.models import User
|
||||
from rest_framework import status
|
||||
from rest_framework.test import APITestCase
|
||||
|
||||
from documents.models import Document
|
||||
from documents.models import PaperlessTask
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from documents.views import TasksViewSet
|
||||
@@ -258,7 +259,7 @@ class TestTasks(DirectoriesMixin, APITestCase):
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_file_name="task_one.pdf",
|
||||
status=celery.states.FAILURE,
|
||||
result="test.pdf: Not consuming test.pdf: It is a duplicate.",
|
||||
result="test.pdf: Unexpected error during ingestion.",
|
||||
)
|
||||
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
@@ -270,7 +271,7 @@ class TestTasks(DirectoriesMixin, APITestCase):
|
||||
|
||||
self.assertEqual(
|
||||
returned_data["result"],
|
||||
"test.pdf: Not consuming test.pdf: It is a duplicate.",
|
||||
"test.pdf: Unexpected error during ingestion.",
|
||||
)
|
||||
|
||||
def test_task_name_webui(self):
|
||||
@@ -325,20 +326,34 @@ class TestTasks(DirectoriesMixin, APITestCase):
|
||||
|
||||
self.assertEqual(returned_data["task_file_name"], "anothertest.pdf")
|
||||
|
||||
def test_task_result_failed_duplicate_includes_related_doc(self):
|
||||
def test_task_result_duplicate_warning_includes_count(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- A celery task failed with a duplicate error
|
||||
- A celery task succeeds, but a duplicate exists
|
||||
WHEN:
|
||||
- API call is made to get tasks
|
||||
THEN:
|
||||
- The returned data includes a related document link
|
||||
- The returned data includes duplicate warning metadata
|
||||
"""
|
||||
checksum = "duplicate-checksum"
|
||||
Document.objects.create(
|
||||
title="Existing",
|
||||
content="",
|
||||
mime_type="application/pdf",
|
||||
checksum=checksum,
|
||||
)
|
||||
created_doc = Document.objects.create(
|
||||
title="Created",
|
||||
content="",
|
||||
mime_type="application/pdf",
|
||||
checksum=checksum,
|
||||
archive_checksum="another-checksum",
|
||||
)
|
||||
PaperlessTask.objects.create(
|
||||
task_id=str(uuid.uuid4()),
|
||||
task_file_name="task_one.pdf",
|
||||
status=celery.states.FAILURE,
|
||||
result="Not consuming task_one.pdf: It is a duplicate of task_one_existing.pdf (#1234).",
|
||||
status=celery.states.SUCCESS,
|
||||
result=f"Success. New document id {created_doc.pk} created",
|
||||
)
|
||||
|
||||
response = self.client.get(self.ENDPOINT)
|
||||
@@ -348,7 +363,7 @@ class TestTasks(DirectoriesMixin, APITestCase):
|
||||
|
||||
returned_data = response.data[0]
|
||||
|
||||
self.assertEqual(returned_data["related_document"], "1234")
|
||||
self.assertEqual(returned_data["related_document"], str(created_doc.pk))
|
||||
|
||||
def test_run_train_classifier_task(self):
|
||||
"""
|
||||
|
||||
@@ -186,8 +186,11 @@ class TestApiWorkflows(DirectoriesMixin, APITestCase):
|
||||
"filter_has_tags": [self.t1.id],
|
||||
"filter_has_all_tags": [self.t2.id],
|
||||
"filter_has_not_tags": [self.t3.id],
|
||||
"filter_has_any_correspondents": [self.c.id],
|
||||
"filter_has_not_correspondents": [self.c2.id],
|
||||
"filter_has_any_document_types": [self.dt.id],
|
||||
"filter_has_not_document_types": [self.dt2.id],
|
||||
"filter_has_any_storage_paths": [self.sp.id],
|
||||
"filter_has_not_storage_paths": [self.sp2.id],
|
||||
"filter_custom_field_query": json.dumps(
|
||||
[
|
||||
@@ -248,14 +251,26 @@ class TestApiWorkflows(DirectoriesMixin, APITestCase):
|
||||
set(trigger.filter_has_not_tags.values_list("id", flat=True)),
|
||||
{self.t3.id},
|
||||
)
|
||||
self.assertSetEqual(
|
||||
set(trigger.filter_has_any_correspondents.values_list("id", flat=True)),
|
||||
{self.c.id},
|
||||
)
|
||||
self.assertSetEqual(
|
||||
set(trigger.filter_has_not_correspondents.values_list("id", flat=True)),
|
||||
{self.c2.id},
|
||||
)
|
||||
self.assertSetEqual(
|
||||
set(trigger.filter_has_any_document_types.values_list("id", flat=True)),
|
||||
{self.dt.id},
|
||||
)
|
||||
self.assertSetEqual(
|
||||
set(trigger.filter_has_not_document_types.values_list("id", flat=True)),
|
||||
{self.dt2.id},
|
||||
)
|
||||
self.assertSetEqual(
|
||||
set(trigger.filter_has_any_storage_paths.values_list("id", flat=True)),
|
||||
{self.sp.id},
|
||||
)
|
||||
self.assertSetEqual(
|
||||
set(trigger.filter_has_not_storage_paths.values_list("id", flat=True)),
|
||||
{self.sp2.id},
|
||||
@@ -419,8 +434,11 @@ class TestApiWorkflows(DirectoriesMixin, APITestCase):
|
||||
"filter_has_tags": [self.t1.id],
|
||||
"filter_has_all_tags": [self.t2.id],
|
||||
"filter_has_not_tags": [self.t3.id],
|
||||
"filter_has_any_correspondents": [self.c.id],
|
||||
"filter_has_not_correspondents": [self.c2.id],
|
||||
"filter_has_any_document_types": [self.dt.id],
|
||||
"filter_has_not_document_types": [self.dt2.id],
|
||||
"filter_has_any_storage_paths": [self.sp.id],
|
||||
"filter_has_not_storage_paths": [self.sp2.id],
|
||||
"filter_custom_field_query": json.dumps(
|
||||
["AND", [[self.cf1.id, "exact", "value"]]],
|
||||
@@ -450,14 +468,26 @@ class TestApiWorkflows(DirectoriesMixin, APITestCase):
|
||||
workflow.triggers.first().filter_has_not_tags.first(),
|
||||
self.t3,
|
||||
)
|
||||
self.assertEqual(
|
||||
workflow.triggers.first().filter_has_any_correspondents.first(),
|
||||
self.c,
|
||||
)
|
||||
self.assertEqual(
|
||||
workflow.triggers.first().filter_has_not_correspondents.first(),
|
||||
self.c2,
|
||||
)
|
||||
self.assertEqual(
|
||||
workflow.triggers.first().filter_has_any_document_types.first(),
|
||||
self.dt,
|
||||
)
|
||||
self.assertEqual(
|
||||
workflow.triggers.first().filter_has_not_document_types.first(),
|
||||
self.dt2,
|
||||
)
|
||||
self.assertEqual(
|
||||
workflow.triggers.first().filter_has_any_storage_paths.first(),
|
||||
self.sp,
|
||||
)
|
||||
self.assertEqual(
|
||||
workflow.triggers.first().filter_has_not_storage_paths.first(),
|
||||
self.sp2,
|
||||
|
||||
@@ -478,21 +478,21 @@ class TestConsumer(
|
||||
with self.get_consumer(self.get_test_file()) as consumer:
|
||||
consumer.run()
|
||||
|
||||
with self.assertRaisesMessage(ConsumerError, "It is a duplicate"):
|
||||
with self.get_consumer(self.get_test_file()) as consumer:
|
||||
consumer.run()
|
||||
with self.get_consumer(self.get_test_file()) as consumer:
|
||||
consumer.run()
|
||||
|
||||
self._assert_first_last_send_progress(last_status="FAILED")
|
||||
self.assertEqual(Document.objects.count(), 2)
|
||||
self._assert_first_last_send_progress()
|
||||
|
||||
def testDuplicates2(self):
|
||||
with self.get_consumer(self.get_test_file()) as consumer:
|
||||
consumer.run()
|
||||
|
||||
with self.assertRaisesMessage(ConsumerError, "It is a duplicate"):
|
||||
with self.get_consumer(self.get_test_archive_file()) as consumer:
|
||||
consumer.run()
|
||||
with self.get_consumer(self.get_test_archive_file()) as consumer:
|
||||
consumer.run()
|
||||
|
||||
self._assert_first_last_send_progress(last_status="FAILED")
|
||||
self.assertEqual(Document.objects.count(), 2)
|
||||
self._assert_first_last_send_progress()
|
||||
|
||||
def testDuplicates3(self):
|
||||
with self.get_consumer(self.get_test_archive_file()) as consumer:
|
||||
@@ -506,9 +506,10 @@ class TestConsumer(
|
||||
|
||||
Document.objects.all().delete()
|
||||
|
||||
with self.assertRaisesMessage(ConsumerError, "document is in the trash"):
|
||||
with self.get_consumer(self.get_test_file()) as consumer:
|
||||
consumer.run()
|
||||
with self.get_consumer(self.get_test_file()) as consumer:
|
||||
consumer.run()
|
||||
|
||||
self.assertEqual(Document.objects.count(), 1)
|
||||
|
||||
def testAsnExists(self):
|
||||
with self.get_consumer(
|
||||
@@ -711,12 +712,45 @@ class TestConsumer(
|
||||
dst = self.get_test_file()
|
||||
self.assertIsFile(dst)
|
||||
|
||||
with self.assertRaises(ConsumerError):
|
||||
expected_message = (
|
||||
f"{dst.name}: Not consuming {dst.name}: "
|
||||
f"It is a duplicate of {document.title} (#{document.pk})"
|
||||
)
|
||||
|
||||
with self.assertRaisesMessage(ConsumerError, expected_message):
|
||||
with self.get_consumer(dst) as consumer:
|
||||
consumer.run()
|
||||
|
||||
self.assertIsNotFile(dst)
|
||||
self._assert_first_last_send_progress(last_status="FAILED")
|
||||
self.assertEqual(Document.objects.count(), 1)
|
||||
self._assert_first_last_send_progress(last_status=ProgressStatusOptions.FAILED)
|
||||
|
||||
@override_settings(CONSUMER_DELETE_DUPLICATES=True)
|
||||
def test_delete_duplicate_in_trash(self):
|
||||
dst = self.get_test_file()
|
||||
with self.get_consumer(dst) as consumer:
|
||||
consumer.run()
|
||||
|
||||
# Move the existing document to trash
|
||||
document = Document.objects.first()
|
||||
document.delete()
|
||||
|
||||
dst = self.get_test_file()
|
||||
self.assertIsFile(dst)
|
||||
|
||||
expected_message = (
|
||||
f"{dst.name}: Not consuming {dst.name}: "
|
||||
f"It is a duplicate of {document.title} (#{document.pk})"
|
||||
f" Note: existing document is in the trash."
|
||||
)
|
||||
|
||||
with self.assertRaisesMessage(ConsumerError, expected_message):
|
||||
with self.get_consumer(dst) as consumer:
|
||||
consumer.run()
|
||||
|
||||
self.assertIsNotFile(dst)
|
||||
self.assertEqual(Document.global_objects.count(), 1)
|
||||
self.assertEqual(Document.objects.count(), 0)
|
||||
|
||||
@override_settings(CONSUMER_DELETE_DUPLICATES=False)
|
||||
def test_no_delete_duplicate(self):
|
||||
@@ -736,15 +770,12 @@ class TestConsumer(
|
||||
dst = self.get_test_file()
|
||||
self.assertIsFile(dst)
|
||||
|
||||
with self.assertRaisesRegex(
|
||||
ConsumerError,
|
||||
r"sample\.pdf: Not consuming sample\.pdf: It is a duplicate of sample \(#\d+\)",
|
||||
):
|
||||
with self.get_consumer(dst) as consumer:
|
||||
consumer.run()
|
||||
with self.get_consumer(dst) as consumer:
|
||||
consumer.run()
|
||||
|
||||
self.assertIsFile(dst)
|
||||
self._assert_first_last_send_progress(last_status="FAILED")
|
||||
self.assertIsNotFile(dst)
|
||||
self.assertEqual(Document.objects.count(), 2)
|
||||
self._assert_first_last_send_progress()
|
||||
|
||||
@override_settings(FILENAME_FORMAT="{title}")
|
||||
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
||||
|
||||
@@ -224,17 +224,18 @@ class TestDoubleSided(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
THEN:
|
||||
- The collated file gets put into foo/bar
|
||||
"""
|
||||
# TODO: parameterize this instead
|
||||
for path in [
|
||||
Path("foo") / "bar" / "double-sided",
|
||||
Path("double-sided") / "foo" / "bar",
|
||||
]:
|
||||
with self.subTest(path=path):
|
||||
with self.subTest(path=str(path)):
|
||||
# Ensure we get fresh directories for each run
|
||||
self.tearDown()
|
||||
self.setUp()
|
||||
|
||||
self.create_staging_file()
|
||||
self.consume_file("double-sided-odd.pdf", path / "foo.pdf")
|
||||
self.consume_file("double-sided-odd.pdf", Path(path) / "foo.pdf")
|
||||
self.assertIsFile(
|
||||
self.dirs.consumption_dir / "foo" / "bar" / "foo-collated.pdf",
|
||||
)
|
||||
|
||||
@@ -180,7 +180,7 @@ class TestRewriteNaturalDateKeywords(SimpleTestCase):
|
||||
(
|
||||
"added:this year",
|
||||
datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
|
||||
("added:[20250101", "TO 20250715"),
|
||||
("added:[20250101", "TO 20251231"),
|
||||
),
|
||||
(
|
||||
"added:previous year",
|
||||
|
||||
@@ -241,6 +241,10 @@ class TestExportImport(
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
self.assertEqual(checksum, element["fields"]["checksum"])
|
||||
|
||||
# Generated field "content_length" should not be exported,
|
||||
# it is automatically computed during import.
|
||||
self.assertNotIn("content_length", element["fields"])
|
||||
|
||||
if document_exporter.EXPORTER_ARCHIVE_NAME in element:
|
||||
fname = (
|
||||
self.target / element[document_exporter.EXPORTER_ARCHIVE_NAME]
|
||||
|
||||
51
src/documents/tests/test_migration_share_link_bundle.py
Normal file
51
src/documents/tests/test_migration_share_link_bundle.py
Normal file
@@ -0,0 +1,51 @@
|
||||
from documents.tests.utils import TestMigrations
|
||||
|
||||
|
||||
class TestMigrateShareLinkBundlePermissions(TestMigrations):
|
||||
migrate_from = "0007_document_content_length"
|
||||
migrate_to = "0008_sharelinkbundle"
|
||||
|
||||
def setUpBeforeMigration(self, apps):
|
||||
User = apps.get_model("auth", "User")
|
||||
Group = apps.get_model("auth", "Group")
|
||||
self.Permission = apps.get_model("auth", "Permission")
|
||||
self.user = User.objects.create(username="user1")
|
||||
self.group = Group.objects.create(name="group1")
|
||||
add_document = self.Permission.objects.get(codename="add_document")
|
||||
self.user.user_permissions.add(add_document.id)
|
||||
self.group.permissions.add(add_document.id)
|
||||
|
||||
def test_share_link_permissions_granted_to_add_document_holders(self):
|
||||
share_perms = self.Permission.objects.filter(
|
||||
codename__contains="sharelinkbundle",
|
||||
)
|
||||
self.assertTrue(self.user.user_permissions.filter(pk__in=share_perms).exists())
|
||||
self.assertTrue(self.group.permissions.filter(pk__in=share_perms).exists())
|
||||
|
||||
|
||||
class TestReverseMigrateShareLinkBundlePermissions(TestMigrations):
|
||||
migrate_from = "0008_sharelinkbundle"
|
||||
migrate_to = "0007_document_content_length"
|
||||
|
||||
def setUpBeforeMigration(self, apps):
|
||||
User = apps.get_model("auth", "User")
|
||||
Group = apps.get_model("auth", "Group")
|
||||
self.Permission = apps.get_model("auth", "Permission")
|
||||
self.user = User.objects.create(username="user1")
|
||||
self.group = Group.objects.create(name="group1")
|
||||
add_document = self.Permission.objects.get(codename="add_document")
|
||||
share_perms = self.Permission.objects.filter(
|
||||
codename__contains="sharelinkbundle",
|
||||
)
|
||||
self.share_perm_ids = list(share_perms.values_list("id", flat=True))
|
||||
|
||||
self.user.user_permissions.add(add_document.id, *self.share_perm_ids)
|
||||
self.group.permissions.add(add_document.id, *self.share_perm_ids)
|
||||
|
||||
def test_share_link_permissions_revoked_on_reverse(self):
|
||||
self.assertFalse(
|
||||
self.user.user_permissions.filter(pk__in=self.share_perm_ids).exists(),
|
||||
)
|
||||
self.assertFalse(
|
||||
self.group.permissions.filter(pk__in=self.share_perm_ids).exists(),
|
||||
)
|
||||
536
src/documents/tests/test_share_link_bundles.py
Normal file
536
src/documents/tests/test_share_link_bundles.py
Normal file
@@ -0,0 +1,536 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import zipfile
|
||||
from datetime import timedelta
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import User
|
||||
from django.utils import timezone
|
||||
from rest_framework import serializers
|
||||
from rest_framework import status
|
||||
from rest_framework.test import APITestCase
|
||||
|
||||
from documents.filters import ShareLinkBundleFilterSet
|
||||
from documents.models import ShareLink
|
||||
from documents.models import ShareLinkBundle
|
||||
from documents.serialisers import ShareLinkBundleSerializer
|
||||
from documents.tasks import build_share_link_bundle
|
||||
from documents.tasks import cleanup_expired_share_link_bundles
|
||||
from documents.tests.factories import DocumentFactory
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
class ShareLinkBundleAPITests(DirectoriesMixin, APITestCase):
|
||||
ENDPOINT = "/api/share_link_bundles/"
|
||||
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
self.user = User.objects.create_superuser(username="bundle_admin")
|
||||
self.client.force_authenticate(self.user)
|
||||
self.document = DocumentFactory.create()
|
||||
|
||||
@mock.patch("documents.views.build_share_link_bundle.delay")
|
||||
def test_create_bundle_triggers_build_job(self, delay_mock):
|
||||
payload = {
|
||||
"document_ids": [self.document.pk],
|
||||
"file_version": ShareLink.FileVersion.ARCHIVE,
|
||||
"expiration_days": 7,
|
||||
}
|
||||
|
||||
response = self.client.post(self.ENDPOINT, payload, format="json")
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
|
||||
bundle = ShareLinkBundle.objects.get(pk=response.data["id"])
|
||||
self.assertEqual(bundle.documents.count(), 1)
|
||||
self.assertEqual(bundle.status, ShareLinkBundle.Status.PENDING)
|
||||
delay_mock.assert_called_once_with(bundle.pk)
|
||||
|
||||
def test_create_bundle_rejects_missing_documents(self):
|
||||
payload = {
|
||||
"document_ids": [9999],
|
||||
"file_version": ShareLink.FileVersion.ARCHIVE,
|
||||
"expiration_days": 7,
|
||||
}
|
||||
|
||||
response = self.client.post(self.ENDPOINT, payload, format="json")
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn("document_ids", response.data)
|
||||
|
||||
@mock.patch("documents.views.has_perms_owner_aware", return_value=False)
|
||||
def test_create_bundle_rejects_insufficient_permissions(self, perms_mock):
|
||||
payload = {
|
||||
"document_ids": [self.document.pk],
|
||||
"file_version": ShareLink.FileVersion.ARCHIVE,
|
||||
"expiration_days": 7,
|
||||
}
|
||||
|
||||
response = self.client.post(self.ENDPOINT, payload, format="json")
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn("document_ids", response.data)
|
||||
perms_mock.assert_called()
|
||||
|
||||
@mock.patch("documents.views.build_share_link_bundle.delay")
|
||||
def test_rebuild_bundle_resets_state(self, delay_mock):
|
||||
bundle = ShareLinkBundle.objects.create(
|
||||
slug="rebuild-slug",
|
||||
file_version=ShareLink.FileVersion.ARCHIVE,
|
||||
status=ShareLinkBundle.Status.FAILED,
|
||||
)
|
||||
bundle.documents.set([self.document])
|
||||
bundle.last_error = {"message": "Something went wrong"}
|
||||
bundle.size_bytes = 100
|
||||
bundle.file_path = "path/to/file.zip"
|
||||
bundle.save()
|
||||
|
||||
response = self.client.post(f"{self.ENDPOINT}{bundle.pk}/rebuild/")
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
bundle.refresh_from_db()
|
||||
self.assertEqual(bundle.status, ShareLinkBundle.Status.PENDING)
|
||||
self.assertIsNone(bundle.last_error)
|
||||
self.assertIsNone(bundle.size_bytes)
|
||||
self.assertEqual(bundle.file_path, "")
|
||||
delay_mock.assert_called_once_with(bundle.pk)
|
||||
|
||||
def test_rebuild_bundle_rejects_processing_status(self):
|
||||
bundle = ShareLinkBundle.objects.create(
|
||||
slug="processing-slug",
|
||||
file_version=ShareLink.FileVersion.ARCHIVE,
|
||||
status=ShareLinkBundle.Status.PROCESSING,
|
||||
)
|
||||
bundle.documents.set([self.document])
|
||||
|
||||
response = self.client.post(f"{self.ENDPOINT}{bundle.pk}/rebuild/")
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn("detail", response.data)
|
||||
|
||||
def test_create_bundle_rejects_duplicate_documents(self):
|
||||
payload = {
|
||||
"document_ids": [self.document.pk, self.document.pk],
|
||||
"file_version": ShareLink.FileVersion.ARCHIVE,
|
||||
"expiration_days": 7,
|
||||
}
|
||||
|
||||
response = self.client.post(self.ENDPOINT, payload, format="json")
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn("document_ids", response.data)
|
||||
|
||||
def test_download_ready_bundle_streams_file(self):
|
||||
bundle_file = Path(self.dirs.media_dir) / "bundles" / "ready.zip"
|
||||
bundle_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
bundle_file.write_bytes(b"binary-zip-content")
|
||||
|
||||
bundle = ShareLinkBundle.objects.create(
|
||||
slug="readyslug",
|
||||
file_version=ShareLink.FileVersion.ARCHIVE,
|
||||
status=ShareLinkBundle.Status.READY,
|
||||
file_path=str(bundle_file),
|
||||
)
|
||||
bundle.documents.set([self.document])
|
||||
|
||||
self.client.logout()
|
||||
response = self.client.get(f"/share/{bundle.slug}/")
|
||||
content = b"".join(response.streaming_content)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
self.assertEqual(response["Content-Type"], "application/zip")
|
||||
self.assertEqual(content, b"binary-zip-content")
|
||||
self.assertIn("attachment;", response["Content-Disposition"])
|
||||
|
||||
def test_download_pending_bundle_returns_202(self):
|
||||
bundle = ShareLinkBundle.objects.create(
|
||||
slug="pendingslug",
|
||||
file_version=ShareLink.FileVersion.ARCHIVE,
|
||||
status=ShareLinkBundle.Status.PENDING,
|
||||
)
|
||||
bundle.documents.set([self.document])
|
||||
|
||||
self.client.logout()
|
||||
response = self.client.get(f"/share/{bundle.slug}/")
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_202_ACCEPTED)
|
||||
|
||||
def test_download_failed_bundle_returns_503(self):
|
||||
bundle = ShareLinkBundle.objects.create(
|
||||
slug="failedslug",
|
||||
file_version=ShareLink.FileVersion.ARCHIVE,
|
||||
status=ShareLinkBundle.Status.FAILED,
|
||||
)
|
||||
bundle.documents.set([self.document])
|
||||
|
||||
self.client.logout()
|
||||
response = self.client.get(f"/share/{bundle.slug}/")
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_503_SERVICE_UNAVAILABLE)
|
||||
|
||||
def test_expired_share_link_redirects(self):
|
||||
share_link = ShareLink.objects.create(
|
||||
slug="expiredlink",
|
||||
document=self.document,
|
||||
file_version=ShareLink.FileVersion.ORIGINAL,
|
||||
expiration=timezone.now() - timedelta(hours=1),
|
||||
)
|
||||
|
||||
self.client.logout()
|
||||
response = self.client.get(f"/share/{share_link.slug}/")
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_302_FOUND)
|
||||
self.assertIn("sharelink_expired=1", response["Location"])
|
||||
|
||||
def test_unknown_share_link_redirects(self):
|
||||
self.client.logout()
|
||||
response = self.client.get("/share/unknownsharelink/")
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_302_FOUND)
|
||||
self.assertIn("sharelink_notfound=1", response["Location"])
|
||||
|
||||
|
||||
class ShareLinkBundleTaskTests(DirectoriesMixin, APITestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
self.document = DocumentFactory.create()
|
||||
|
||||
def test_cleanup_expired_share_link_bundles(self):
|
||||
expired_path = Path(self.dirs.media_dir) / "expired.zip"
|
||||
expired_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
expired_path.write_bytes(b"expired")
|
||||
|
||||
active_path = Path(self.dirs.media_dir) / "active.zip"
|
||||
active_path.write_bytes(b"active")
|
||||
|
||||
expired_bundle = ShareLinkBundle.objects.create(
|
||||
slug="expired-bundle",
|
||||
file_version=ShareLink.FileVersion.ARCHIVE,
|
||||
status=ShareLinkBundle.Status.READY,
|
||||
expiration=timezone.now() - timedelta(days=1),
|
||||
file_path=str(expired_path),
|
||||
)
|
||||
expired_bundle.documents.set([self.document])
|
||||
|
||||
active_bundle = ShareLinkBundle.objects.create(
|
||||
slug="active-bundle",
|
||||
file_version=ShareLink.FileVersion.ARCHIVE,
|
||||
status=ShareLinkBundle.Status.READY,
|
||||
expiration=timezone.now() + timedelta(days=1),
|
||||
file_path=str(active_path),
|
||||
)
|
||||
active_bundle.documents.set([self.document])
|
||||
|
||||
cleanup_expired_share_link_bundles()
|
||||
|
||||
self.assertFalse(ShareLinkBundle.objects.filter(pk=expired_bundle.pk).exists())
|
||||
self.assertTrue(ShareLinkBundle.objects.filter(pk=active_bundle.pk).exists())
|
||||
self.assertFalse(expired_path.exists())
|
||||
self.assertTrue(active_path.exists())
|
||||
|
||||
def test_cleanup_expired_share_link_bundles_logs_on_failure(self):
|
||||
expired_bundle = ShareLinkBundle.objects.create(
|
||||
slug="expired-bundle",
|
||||
file_version=ShareLink.FileVersion.ARCHIVE,
|
||||
status=ShareLinkBundle.Status.READY,
|
||||
expiration=timezone.now() - timedelta(days=1),
|
||||
)
|
||||
expired_bundle.documents.set([self.document])
|
||||
|
||||
with mock.patch.object(
|
||||
ShareLinkBundle,
|
||||
"delete",
|
||||
side_effect=RuntimeError("fail"),
|
||||
):
|
||||
with self.assertLogs("paperless.tasks", level="WARNING") as logs:
|
||||
cleanup_expired_share_link_bundles()
|
||||
|
||||
self.assertTrue(
|
||||
any(
|
||||
"Failed to delete expired share link bundle" in msg
|
||||
for msg in logs.output
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class ShareLinkBundleBuildTaskTests(DirectoriesMixin, APITestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
self.document = DocumentFactory.create(
|
||||
mime_type="application/pdf",
|
||||
checksum="123",
|
||||
)
|
||||
self.document.archive_checksum = ""
|
||||
self.document.save()
|
||||
self.addCleanup(
|
||||
setattr,
|
||||
settings,
|
||||
"SHARE_LINK_BUNDLE_DIR",
|
||||
settings.SHARE_LINK_BUNDLE_DIR,
|
||||
)
|
||||
settings.SHARE_LINK_BUNDLE_DIR = (
|
||||
Path(settings.MEDIA_ROOT) / "documents" / "share_link_bundles"
|
||||
)
|
||||
|
||||
def _write_document_file(self, *, archive: bool, content: bytes) -> Path:
|
||||
if archive:
|
||||
self.document.archive_filename = f"{self.document.pk:07}.pdf"
|
||||
self.document.save()
|
||||
path = self.document.archive_path
|
||||
else:
|
||||
path = self.document.source_path
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_bytes(content)
|
||||
return path
|
||||
|
||||
def test_build_share_link_bundle_creates_zip_and_sets_metadata(self):
|
||||
self._write_document_file(archive=False, content=b"source")
|
||||
archive_path = self._write_document_file(archive=True, content=b"archive")
|
||||
bundle = ShareLinkBundle.objects.create(
|
||||
slug="build-archive",
|
||||
file_version=ShareLink.FileVersion.ARCHIVE,
|
||||
)
|
||||
bundle.documents.set([self.document])
|
||||
|
||||
build_share_link_bundle(bundle.pk)
|
||||
|
||||
bundle.refresh_from_db()
|
||||
self.assertEqual(bundle.status, ShareLinkBundle.Status.READY)
|
||||
self.assertIsNone(bundle.last_error)
|
||||
self.assertIsNotNone(bundle.built_at)
|
||||
self.assertGreater(bundle.size_bytes or 0, 0)
|
||||
final_path = bundle.absolute_file_path
|
||||
self.assertIsNotNone(final_path)
|
||||
self.assertTrue(final_path.exists())
|
||||
with zipfile.ZipFile(final_path) as zipf:
|
||||
names = zipf.namelist()
|
||||
self.assertEqual(len(names), 1)
|
||||
self.assertEqual(zipf.read(names[0]), archive_path.read_bytes())
|
||||
|
||||
def test_build_share_link_bundle_overwrites_existing_file(self):
|
||||
self._write_document_file(archive=False, content=b"source")
|
||||
bundle = ShareLinkBundle.objects.create(
|
||||
slug="overwrite",
|
||||
file_version=ShareLink.FileVersion.ORIGINAL,
|
||||
)
|
||||
bundle.documents.set([self.document])
|
||||
|
||||
existing = settings.SHARE_LINK_BUNDLE_DIR / "overwrite.zip"
|
||||
existing.parent.mkdir(parents=True, exist_ok=True)
|
||||
existing.write_bytes(b"old")
|
||||
|
||||
build_share_link_bundle(bundle.pk)
|
||||
|
||||
bundle.refresh_from_db()
|
||||
final_path = bundle.absolute_file_path
|
||||
self.assertIsNotNone(final_path)
|
||||
self.assertTrue(final_path.exists())
|
||||
self.assertNotEqual(final_path.read_bytes(), b"old")
|
||||
|
||||
def test_build_share_link_bundle_failure_marks_failed(self):
|
||||
self._write_document_file(archive=False, content=b"source")
|
||||
bundle = ShareLinkBundle.objects.create(
|
||||
slug="fail-bundle",
|
||||
file_version=ShareLink.FileVersion.ORIGINAL,
|
||||
)
|
||||
bundle.documents.set([self.document])
|
||||
|
||||
with (
|
||||
mock.patch(
|
||||
"documents.tasks.OriginalsOnlyStrategy.add_document",
|
||||
side_effect=RuntimeError("zip failure"),
|
||||
),
|
||||
mock.patch("pathlib.Path.unlink") as unlink_mock,
|
||||
):
|
||||
unlink_mock.side_effect = [OSError("unlink"), OSError("unlink-finally")] + [
|
||||
None,
|
||||
] * 5
|
||||
with self.assertRaises(RuntimeError):
|
||||
build_share_link_bundle(bundle.pk)
|
||||
|
||||
bundle.refresh_from_db()
|
||||
self.assertEqual(bundle.status, ShareLinkBundle.Status.FAILED)
|
||||
self.assertIsInstance(bundle.last_error, dict)
|
||||
self.assertEqual(bundle.last_error.get("message"), "zip failure")
|
||||
self.assertEqual(bundle.last_error.get("exception_type"), "RuntimeError")
|
||||
scratch_zips = list(Path(settings.SCRATCH_DIR).glob("*.zip"))
|
||||
self.assertTrue(scratch_zips)
|
||||
for path in scratch_zips:
|
||||
path.unlink(missing_ok=True)
|
||||
|
||||
def test_build_share_link_bundle_missing_bundle_noop(self):
|
||||
# Should not raise when bundle does not exist
|
||||
build_share_link_bundle(99999)
|
||||
|
||||
|
||||
class ShareLinkBundleFilterSetTests(DirectoriesMixin, APITestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
self.document = DocumentFactory.create()
|
||||
self.document.checksum = "doc1checksum"
|
||||
self.document.save()
|
||||
self.other_document = DocumentFactory.create()
|
||||
self.other_document.checksum = "doc2checksum"
|
||||
self.other_document.save()
|
||||
self.bundle_one = ShareLinkBundle.objects.create(
|
||||
slug="bundle-one",
|
||||
file_version=ShareLink.FileVersion.ORIGINAL,
|
||||
)
|
||||
self.bundle_one.documents.set([self.document])
|
||||
self.bundle_two = ShareLinkBundle.objects.create(
|
||||
slug="bundle-two",
|
||||
file_version=ShareLink.FileVersion.ORIGINAL,
|
||||
)
|
||||
self.bundle_two.documents.set([self.other_document])
|
||||
|
||||
def test_filter_documents_returns_all_for_empty_value(self):
|
||||
filterset = ShareLinkBundleFilterSet(
|
||||
data={"documents": ""},
|
||||
queryset=ShareLinkBundle.objects.all(),
|
||||
)
|
||||
|
||||
self.assertCountEqual(filterset.qs, [self.bundle_one, self.bundle_two])
|
||||
|
||||
def test_filter_documents_handles_invalid_input(self):
|
||||
filterset = ShareLinkBundleFilterSet(
|
||||
data={"documents": "invalid"},
|
||||
queryset=ShareLinkBundle.objects.all(),
|
||||
)
|
||||
|
||||
self.assertFalse(filterset.qs.exists())
|
||||
|
||||
def test_filter_documents_filters_by_multiple_ids(self):
|
||||
filterset = ShareLinkBundleFilterSet(
|
||||
data={"documents": f"{self.document.pk},{self.other_document.pk}"},
|
||||
queryset=ShareLinkBundle.objects.all(),
|
||||
)
|
||||
|
||||
self.assertCountEqual(filterset.qs, [self.bundle_one, self.bundle_two])
|
||||
|
||||
def test_filter_documents_returns_queryset_for_empty_ids(self):
|
||||
filterset = ShareLinkBundleFilterSet(
|
||||
data={"documents": ","},
|
||||
queryset=ShareLinkBundle.objects.all(),
|
||||
)
|
||||
|
||||
self.assertCountEqual(filterset.qs, [self.bundle_one, self.bundle_two])
|
||||
|
||||
|
||||
class ShareLinkBundleModelTests(DirectoriesMixin, APITestCase):
|
||||
def test_absolute_file_path_handles_relative_and_absolute(self):
|
||||
relative_path = Path("relative.zip")
|
||||
bundle = ShareLinkBundle.objects.create(
|
||||
slug="relative-bundle",
|
||||
file_version=ShareLink.FileVersion.ORIGINAL,
|
||||
file_path=str(relative_path),
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
bundle.absolute_file_path,
|
||||
(settings.SHARE_LINK_BUNDLE_DIR / relative_path).resolve(),
|
||||
)
|
||||
|
||||
absolute_path = Path(self.dirs.media_dir) / "absolute.zip"
|
||||
bundle.file_path = str(absolute_path)
|
||||
|
||||
self.assertEqual(bundle.absolute_file_path.resolve(), absolute_path.resolve())
|
||||
|
||||
def test_str_returns_translated_slug(self):
|
||||
bundle = ShareLinkBundle.objects.create(
|
||||
slug="string-slug",
|
||||
file_version=ShareLink.FileVersion.ORIGINAL,
|
||||
)
|
||||
|
||||
self.assertIn("string-slug", str(bundle))
|
||||
|
||||
def test_remove_file_deletes_existing_file(self):
|
||||
bundle_path = settings.SHARE_LINK_BUNDLE_DIR / "remove.zip"
|
||||
bundle_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
bundle_path.write_bytes(b"remove-me")
|
||||
bundle = ShareLinkBundle.objects.create(
|
||||
slug="remove-bundle",
|
||||
file_version=ShareLink.FileVersion.ORIGINAL,
|
||||
file_path=str(bundle_path.relative_to(settings.SHARE_LINK_BUNDLE_DIR)),
|
||||
)
|
||||
|
||||
bundle.remove_file()
|
||||
|
||||
self.assertFalse(bundle_path.exists())
|
||||
|
||||
def test_remove_file_handles_oserror(self):
|
||||
bundle_path = settings.SHARE_LINK_BUNDLE_DIR / "remove-error.zip"
|
||||
bundle_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
bundle_path.write_bytes(b"remove-me")
|
||||
bundle = ShareLinkBundle.objects.create(
|
||||
slug="remove-error",
|
||||
file_version=ShareLink.FileVersion.ORIGINAL,
|
||||
file_path=str(bundle_path.relative_to(settings.SHARE_LINK_BUNDLE_DIR)),
|
||||
)
|
||||
|
||||
with mock.patch("pathlib.Path.unlink", side_effect=OSError("fail")):
|
||||
bundle.remove_file()
|
||||
|
||||
self.assertTrue(bundle_path.exists())
|
||||
|
||||
def test_delete_calls_remove_file(self):
|
||||
bundle_path = settings.SHARE_LINK_BUNDLE_DIR / "delete.zip"
|
||||
bundle_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
bundle_path.write_bytes(b"remove-me")
|
||||
bundle = ShareLinkBundle.objects.create(
|
||||
slug="delete-bundle",
|
||||
file_version=ShareLink.FileVersion.ORIGINAL,
|
||||
file_path=str(bundle_path.relative_to(settings.SHARE_LINK_BUNDLE_DIR)),
|
||||
)
|
||||
|
||||
bundle.delete()
|
||||
self.assertFalse(bundle_path.exists())
|
||||
|
||||
|
||||
class ShareLinkBundleSerializerTests(DirectoriesMixin, APITestCase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
self.document = DocumentFactory.create()
|
||||
|
||||
def test_validate_document_ids_rejects_duplicates(self):
|
||||
serializer = ShareLinkBundleSerializer(
|
||||
data={
|
||||
"document_ids": [self.document.pk, self.document.pk],
|
||||
"file_version": ShareLink.FileVersion.ORIGINAL,
|
||||
},
|
||||
)
|
||||
|
||||
self.assertFalse(serializer.is_valid())
|
||||
self.assertIn("document_ids", serializer.errors)
|
||||
|
||||
def test_create_assigns_documents_and_expiration(self):
|
||||
serializer = ShareLinkBundleSerializer(
|
||||
data={
|
||||
"document_ids": [self.document.pk],
|
||||
"file_version": ShareLink.FileVersion.ORIGINAL,
|
||||
"expiration_days": 3,
|
||||
},
|
||||
)
|
||||
|
||||
self.assertTrue(serializer.is_valid(), serializer.errors)
|
||||
bundle = serializer.save()
|
||||
|
||||
self.assertEqual(list(bundle.documents.all()), [self.document])
|
||||
expected_expiration = timezone.now() + timedelta(days=3)
|
||||
self.assertAlmostEqual(
|
||||
bundle.expiration,
|
||||
expected_expiration,
|
||||
delta=timedelta(seconds=10),
|
||||
)
|
||||
|
||||
def test_create_raises_when_missing_documents(self):
|
||||
serializer = ShareLinkBundleSerializer(
|
||||
data={
|
||||
"document_ids": [self.document.pk, 9999],
|
||||
"file_version": ShareLink.FileVersion.ORIGINAL,
|
||||
},
|
||||
)
|
||||
|
||||
self.assertTrue(serializer.is_valid(), serializer.errors)
|
||||
with self.assertRaises(serializers.ValidationError):
|
||||
serializer.save(documents=[self.document])
|
||||
@@ -1276,6 +1276,76 @@ class TestWorkflows(
|
||||
)
|
||||
self.assertIn(expected_str, cm.output[1])
|
||||
|
||||
def test_document_added_any_filters(self):
|
||||
trigger = WorkflowTrigger.objects.create(
|
||||
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
|
||||
)
|
||||
trigger.filter_has_any_correspondents.set([self.c])
|
||||
trigger.filter_has_any_document_types.set([self.dt])
|
||||
trigger.filter_has_any_storage_paths.set([self.sp])
|
||||
|
||||
matching_doc = Document.objects.create(
|
||||
title="sample test",
|
||||
correspondent=self.c,
|
||||
document_type=self.dt,
|
||||
storage_path=self.sp,
|
||||
original_filename="sample.pdf",
|
||||
checksum="checksum-any-match",
|
||||
)
|
||||
|
||||
matched, reason = existing_document_matches_workflow(matching_doc, trigger)
|
||||
self.assertTrue(matched)
|
||||
self.assertIsNone(reason)
|
||||
|
||||
wrong_correspondent = Document.objects.create(
|
||||
title="wrong correspondent",
|
||||
correspondent=self.c2,
|
||||
document_type=self.dt,
|
||||
storage_path=self.sp,
|
||||
original_filename="sample2.pdf",
|
||||
)
|
||||
matched, reason = existing_document_matches_workflow(
|
||||
wrong_correspondent,
|
||||
trigger,
|
||||
)
|
||||
self.assertFalse(matched)
|
||||
self.assertIn("correspondent", reason)
|
||||
|
||||
other_document_type = DocumentType.objects.create(name="Other")
|
||||
wrong_document_type = Document.objects.create(
|
||||
title="wrong doc type",
|
||||
correspondent=self.c,
|
||||
document_type=other_document_type,
|
||||
storage_path=self.sp,
|
||||
original_filename="sample3.pdf",
|
||||
checksum="checksum-wrong-doc-type",
|
||||
)
|
||||
matched, reason = existing_document_matches_workflow(
|
||||
wrong_document_type,
|
||||
trigger,
|
||||
)
|
||||
self.assertFalse(matched)
|
||||
self.assertIn("doc type", reason)
|
||||
|
||||
other_storage_path = StoragePath.objects.create(
|
||||
name="Other path",
|
||||
path="/other/",
|
||||
)
|
||||
wrong_storage_path = Document.objects.create(
|
||||
title="wrong storage",
|
||||
correspondent=self.c,
|
||||
document_type=self.dt,
|
||||
storage_path=other_storage_path,
|
||||
original_filename="sample4.pdf",
|
||||
checksum="checksum-wrong-storage-path",
|
||||
)
|
||||
matched, reason = existing_document_matches_workflow(
|
||||
wrong_storage_path,
|
||||
trigger,
|
||||
)
|
||||
self.assertFalse(matched)
|
||||
self.assertIn("storage path", reason)
|
||||
|
||||
def test_document_added_custom_field_query_no_match(self):
|
||||
trigger = WorkflowTrigger.objects.create(
|
||||
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
|
||||
@@ -1384,6 +1454,39 @@ class TestWorkflows(
|
||||
self.assertIn(doc1, filtered)
|
||||
self.assertNotIn(doc2, filtered)
|
||||
|
||||
def test_prefilter_documents_any_filters(self):
|
||||
trigger = WorkflowTrigger.objects.create(
|
||||
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
|
||||
)
|
||||
trigger.filter_has_any_correspondents.set([self.c])
|
||||
trigger.filter_has_any_document_types.set([self.dt])
|
||||
trigger.filter_has_any_storage_paths.set([self.sp])
|
||||
|
||||
allowed_document = Document.objects.create(
|
||||
title="allowed",
|
||||
correspondent=self.c,
|
||||
document_type=self.dt,
|
||||
storage_path=self.sp,
|
||||
original_filename="doc-allowed.pdf",
|
||||
checksum="checksum-any-allowed",
|
||||
)
|
||||
blocked_document = Document.objects.create(
|
||||
title="blocked",
|
||||
correspondent=self.c2,
|
||||
document_type=self.dt,
|
||||
storage_path=self.sp,
|
||||
original_filename="doc-blocked.pdf",
|
||||
checksum="checksum-any-blocked",
|
||||
)
|
||||
|
||||
filtered = prefilter_documents_by_workflowtrigger(
|
||||
Document.objects.all(),
|
||||
trigger,
|
||||
)
|
||||
|
||||
self.assertIn(allowed_document, filtered)
|
||||
self.assertNotIn(blocked_document, filtered)
|
||||
|
||||
def test_consumption_trigger_requires_filter_configuration(self):
|
||||
serializer = WorkflowTriggerSerializer(
|
||||
data={
|
||||
|
||||
@@ -35,7 +35,6 @@ from django.db.models import Model
|
||||
from django.db.models import Q
|
||||
from django.db.models import Sum
|
||||
from django.db.models import When
|
||||
from django.db.models.functions import Length
|
||||
from django.db.models.functions import Lower
|
||||
from django.db.models.manager import Manager
|
||||
from django.http import FileResponse
|
||||
@@ -51,6 +50,7 @@ from django.utils import timezone
|
||||
from django.utils.decorators import method_decorator
|
||||
from django.utils.timezone import make_aware
|
||||
from django.utils.translation import get_language
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
from django.views import View
|
||||
from django.views.decorators.cache import cache_control
|
||||
from django.views.decorators.csrf import ensure_csrf_cookie
|
||||
@@ -71,6 +71,7 @@ from packaging import version as packaging_version
|
||||
from redis import Redis
|
||||
from rest_framework import parsers
|
||||
from rest_framework import serializers
|
||||
from rest_framework import status
|
||||
from rest_framework.decorators import action
|
||||
from rest_framework.exceptions import NotFound
|
||||
from rest_framework.exceptions import ValidationError
|
||||
@@ -121,6 +122,7 @@ from documents.filters import DocumentTypeFilterSet
|
||||
from documents.filters import ObjectOwnedOrGrantedPermissionsFilter
|
||||
from documents.filters import ObjectOwnedPermissionsFilter
|
||||
from documents.filters import PaperlessTaskFilterSet
|
||||
from documents.filters import ShareLinkBundleFilterSet
|
||||
from documents.filters import ShareLinkFilterSet
|
||||
from documents.filters import StoragePathFilterSet
|
||||
from documents.filters import TagFilterSet
|
||||
@@ -138,6 +140,7 @@ from documents.models import Note
|
||||
from documents.models import PaperlessTask
|
||||
from documents.models import SavedView
|
||||
from documents.models import ShareLink
|
||||
from documents.models import ShareLinkBundle
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
from documents.models import UiSettings
|
||||
@@ -171,6 +174,7 @@ from documents.serialisers import PostDocumentSerializer
|
||||
from documents.serialisers import RunTaskViewSerializer
|
||||
from documents.serialisers import SavedViewSerializer
|
||||
from documents.serialisers import SearchResultSerializer
|
||||
from documents.serialisers import ShareLinkBundleSerializer
|
||||
from documents.serialisers import ShareLinkSerializer
|
||||
from documents.serialisers import StoragePathSerializer
|
||||
from documents.serialisers import StoragePathTestSerializer
|
||||
@@ -183,6 +187,7 @@ from documents.serialisers import WorkflowActionSerializer
|
||||
from documents.serialisers import WorkflowSerializer
|
||||
from documents.serialisers import WorkflowTriggerSerializer
|
||||
from documents.signals import document_updated
|
||||
from documents.tasks import build_share_link_bundle
|
||||
from documents.tasks import consume_file
|
||||
from documents.tasks import empty_trash
|
||||
from documents.tasks import index_optimize
|
||||
@@ -479,11 +484,11 @@ class TagViewSet(ModelViewSet, PermissionsAwareDocumentCountMixin):
|
||||
|
||||
if descendant_pks:
|
||||
filter_q = self.get_document_count_filter()
|
||||
children_source = (
|
||||
children_source = list(
|
||||
Tag.objects.filter(pk__in=descendant_pks | {t.pk for t in all_tags})
|
||||
.select_related("owner")
|
||||
.annotate(document_count=Count("documents", filter=filter_q))
|
||||
.order_by(*ordering)
|
||||
.order_by(*ordering),
|
||||
)
|
||||
else:
|
||||
children_source = all_tags
|
||||
@@ -495,7 +500,11 @@ class TagViewSet(ModelViewSet, PermissionsAwareDocumentCountMixin):
|
||||
|
||||
page = self.paginate_queryset(queryset)
|
||||
serializer = self.get_serializer(page, many=True)
|
||||
return self.get_paginated_response(serializer.data)
|
||||
response = self.get_paginated_response(serializer.data)
|
||||
if descendant_pks:
|
||||
# Include children in the "all" field, if needed
|
||||
response.data["all"] = [tag.pk for tag in children_source]
|
||||
return response
|
||||
|
||||
def perform_update(self, serializer):
|
||||
old_parent = self.get_object().get_parent()
|
||||
@@ -2322,23 +2331,19 @@ class StatisticsView(GenericAPIView):
|
||||
user = request.user if request.user is not None else None
|
||||
|
||||
documents = (
|
||||
(
|
||||
Document.objects.all()
|
||||
if user is None
|
||||
else get_objects_for_user_owner_aware(
|
||||
user,
|
||||
"documents.view_document",
|
||||
Document,
|
||||
)
|
||||
Document.objects.all()
|
||||
if user is None
|
||||
else get_objects_for_user_owner_aware(
|
||||
user,
|
||||
"documents.view_document",
|
||||
Document,
|
||||
)
|
||||
.only("mime_type", "content")
|
||||
.prefetch_related("tags")
|
||||
)
|
||||
tags = (
|
||||
Tag.objects.all()
|
||||
if user is None
|
||||
else get_objects_for_user_owner_aware(user, "documents.view_tag", Tag)
|
||||
)
|
||||
).only("id", "is_inbox_tag")
|
||||
correspondent_count = (
|
||||
Correspondent.objects.count()
|
||||
if user is None
|
||||
@@ -2367,31 +2372,33 @@ class StatisticsView(GenericAPIView):
|
||||
).count()
|
||||
)
|
||||
|
||||
documents_total = documents.count()
|
||||
|
||||
inbox_tags = tags.filter(is_inbox_tag=True)
|
||||
inbox_tag_pks = list(
|
||||
tags.filter(is_inbox_tag=True).values_list("pk", flat=True),
|
||||
)
|
||||
|
||||
documents_inbox = (
|
||||
documents.filter(tags__id__in=inbox_tags).distinct().count()
|
||||
if inbox_tags.exists()
|
||||
documents.filter(tags__id__in=inbox_tag_pks).values("id").distinct().count()
|
||||
if inbox_tag_pks
|
||||
else None
|
||||
)
|
||||
|
||||
document_file_type_counts = (
|
||||
# Single SQL request for document stats and mime type counts
|
||||
mime_type_stats = list(
|
||||
documents.values("mime_type")
|
||||
.annotate(mime_type_count=Count("mime_type"))
|
||||
.order_by("-mime_type_count")
|
||||
if documents_total > 0
|
||||
else []
|
||||
.annotate(
|
||||
mime_type_count=Count("id"),
|
||||
mime_type_chars=Sum("content_length"),
|
||||
)
|
||||
.order_by("-mime_type_count"),
|
||||
)
|
||||
|
||||
character_count = (
|
||||
documents.annotate(
|
||||
characters=Length("content"),
|
||||
)
|
||||
.aggregate(Sum("characters"))
|
||||
.get("characters__sum")
|
||||
)
|
||||
# Calculate totals from grouped results
|
||||
documents_total = sum(row["mime_type_count"] for row in mime_type_stats)
|
||||
character_count = sum(row["mime_type_chars"] or 0 for row in mime_type_stats)
|
||||
document_file_type_counts = [
|
||||
{"mime_type": row["mime_type"], "mime_type_count": row["mime_type_count"]}
|
||||
for row in mime_type_stats
|
||||
]
|
||||
|
||||
current_asn = Document.objects.aggregate(
|
||||
Max("archive_serial_number", default=0),
|
||||
@@ -2404,11 +2411,9 @@ class StatisticsView(GenericAPIView):
|
||||
"documents_total": documents_total,
|
||||
"documents_inbox": documents_inbox,
|
||||
"inbox_tag": (
|
||||
inbox_tags.first().pk if inbox_tags.exists() else None
|
||||
inbox_tag_pks[0] if inbox_tag_pks else None
|
||||
), # backwards compatibility
|
||||
"inbox_tags": (
|
||||
[tag.pk for tag in inbox_tags] if inbox_tags.exists() else None
|
||||
),
|
||||
"inbox_tags": (inbox_tag_pks if inbox_tag_pks else None),
|
||||
"document_file_type_counts": document_file_type_counts,
|
||||
"character_count": character_count,
|
||||
"tag_count": len(tags),
|
||||
@@ -2436,7 +2441,7 @@ class BulkDownloadView(GenericAPIView):
|
||||
follow_filename_format = serializer.validated_data.get("follow_formatting")
|
||||
|
||||
for document in documents:
|
||||
if not has_perms_owner_aware(request.user, "view_document", document):
|
||||
if not has_perms_owner_aware(request.user, "change_document", document):
|
||||
return HttpResponseForbidden("Insufficient permissions")
|
||||
|
||||
settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
|
||||
@@ -2791,21 +2796,187 @@ class ShareLinkViewSet(ModelViewSet, PassUserMixin):
|
||||
ordering_fields = ("created", "expiration", "document")
|
||||
|
||||
|
||||
class ShareLinkBundleViewSet(ModelViewSet, PassUserMixin):
|
||||
model = ShareLinkBundle
|
||||
|
||||
queryset = ShareLinkBundle.objects.all()
|
||||
|
||||
serializer_class = ShareLinkBundleSerializer
|
||||
pagination_class = StandardPagination
|
||||
permission_classes = (IsAuthenticated, PaperlessObjectPermissions)
|
||||
filter_backends = (
|
||||
DjangoFilterBackend,
|
||||
OrderingFilter,
|
||||
ObjectOwnedOrGrantedPermissionsFilter,
|
||||
)
|
||||
filterset_class = ShareLinkBundleFilterSet
|
||||
ordering_fields = ("created", "expiration", "status")
|
||||
|
||||
def get_queryset(self):
|
||||
return (
|
||||
super()
|
||||
.get_queryset()
|
||||
.prefetch_related("documents")
|
||||
.annotate(document_total=Count("documents", distinct=True))
|
||||
)
|
||||
|
||||
def create(self, request, *args, **kwargs):
|
||||
serializer = self.get_serializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
document_ids = serializer.validated_data["document_ids"]
|
||||
documents_qs = Document.objects.filter(pk__in=document_ids).select_related(
|
||||
"owner",
|
||||
)
|
||||
found_ids = set(documents_qs.values_list("pk", flat=True))
|
||||
missing = sorted(set(document_ids) - found_ids)
|
||||
if missing:
|
||||
raise ValidationError(
|
||||
{
|
||||
"document_ids": _(
|
||||
"Documents not found: %(ids)s",
|
||||
)
|
||||
% {"ids": ", ".join(str(item) for item in missing)},
|
||||
},
|
||||
)
|
||||
|
||||
documents = list(documents_qs)
|
||||
for document in documents:
|
||||
if not has_perms_owner_aware(request.user, "view_document", document):
|
||||
raise ValidationError(
|
||||
{
|
||||
"document_ids": _(
|
||||
"Insufficient permissions to share document %(id)s.",
|
||||
)
|
||||
% {"id": document.pk},
|
||||
},
|
||||
)
|
||||
|
||||
document_map = {document.pk: document for document in documents}
|
||||
ordered_documents = [document_map[doc_id] for doc_id in document_ids]
|
||||
|
||||
bundle = serializer.save(
|
||||
owner=request.user,
|
||||
documents=ordered_documents,
|
||||
)
|
||||
bundle.remove_file()
|
||||
bundle.status = ShareLinkBundle.Status.PENDING
|
||||
bundle.last_error = None
|
||||
bundle.size_bytes = None
|
||||
bundle.built_at = None
|
||||
bundle.file_path = ""
|
||||
bundle.save(
|
||||
update_fields=[
|
||||
"status",
|
||||
"last_error",
|
||||
"size_bytes",
|
||||
"built_at",
|
||||
"file_path",
|
||||
],
|
||||
)
|
||||
build_share_link_bundle.delay(bundle.pk)
|
||||
bundle.document_total = len(ordered_documents)
|
||||
response_serializer = self.get_serializer(bundle)
|
||||
headers = self.get_success_headers(response_serializer.data)
|
||||
return Response(
|
||||
response_serializer.data,
|
||||
status=status.HTTP_201_CREATED,
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
@action(detail=True, methods=["post"])
|
||||
def rebuild(self, request, pk=None):
|
||||
bundle = self.get_object()
|
||||
if bundle.status == ShareLinkBundle.Status.PROCESSING:
|
||||
return Response(
|
||||
{"detail": _("Bundle is already being processed.")},
|
||||
status=status.HTTP_400_BAD_REQUEST,
|
||||
)
|
||||
bundle.remove_file()
|
||||
bundle.status = ShareLinkBundle.Status.PENDING
|
||||
bundle.last_error = None
|
||||
bundle.size_bytes = None
|
||||
bundle.built_at = None
|
||||
bundle.file_path = ""
|
||||
bundle.save(
|
||||
update_fields=[
|
||||
"status",
|
||||
"last_error",
|
||||
"size_bytes",
|
||||
"built_at",
|
||||
"file_path",
|
||||
],
|
||||
)
|
||||
build_share_link_bundle.delay(bundle.pk)
|
||||
bundle.document_total = (
|
||||
getattr(bundle, "document_total", None) or bundle.documents.count()
|
||||
)
|
||||
serializer = self.get_serializer(bundle)
|
||||
return Response(serializer.data)
|
||||
|
||||
|
||||
class SharedLinkView(View):
|
||||
authentication_classes = []
|
||||
permission_classes = []
|
||||
|
||||
def get(self, request, slug):
|
||||
share_link = ShareLink.objects.filter(slug=slug).first()
|
||||
if share_link is None:
|
||||
if share_link is not None:
|
||||
if (
|
||||
share_link.expiration is not None
|
||||
and share_link.expiration < timezone.now()
|
||||
):
|
||||
return HttpResponseRedirect("/accounts/login/?sharelink_expired=1")
|
||||
return serve_file(
|
||||
doc=share_link.document,
|
||||
use_archive=share_link.file_version == "archive",
|
||||
disposition="inline",
|
||||
)
|
||||
|
||||
bundle = ShareLinkBundle.objects.filter(slug=slug).first()
|
||||
if bundle is None:
|
||||
return HttpResponseRedirect("/accounts/login/?sharelink_notfound=1")
|
||||
if share_link.expiration is not None and share_link.expiration < timezone.now():
|
||||
|
||||
if bundle.expiration is not None and bundle.expiration < timezone.now():
|
||||
return HttpResponseRedirect("/accounts/login/?sharelink_expired=1")
|
||||
return serve_file(
|
||||
doc=share_link.document,
|
||||
use_archive=share_link.file_version == "archive",
|
||||
disposition="inline",
|
||||
|
||||
if bundle.status in {
|
||||
ShareLinkBundle.Status.PENDING,
|
||||
ShareLinkBundle.Status.PROCESSING,
|
||||
}:
|
||||
return HttpResponse(
|
||||
_(
|
||||
"The share link bundle is still being prepared. Please try again later.",
|
||||
),
|
||||
status=status.HTTP_202_ACCEPTED,
|
||||
)
|
||||
|
||||
file_path = bundle.absolute_file_path
|
||||
|
||||
if bundle.status == ShareLinkBundle.Status.FAILED or file_path is None:
|
||||
return HttpResponse(
|
||||
_(
|
||||
"The share link bundle is unavailable.",
|
||||
),
|
||||
status=status.HTTP_503_SERVICE_UNAVAILABLE,
|
||||
)
|
||||
|
||||
response = FileResponse(file_path.open("rb"), content_type="application/zip")
|
||||
short_slug = bundle.slug[:12]
|
||||
download_name = f"paperless-share-{short_slug}.zip"
|
||||
filename_normalized = (
|
||||
normalize("NFKD", download_name)
|
||||
.encode(
|
||||
"ascii",
|
||||
"ignore",
|
||||
)
|
||||
.decode("ascii")
|
||||
)
|
||||
filename_encoded = quote(download_name)
|
||||
response["Content-Disposition"] = (
|
||||
f"attachment; filename='{filename_normalized}'; "
|
||||
f"filename*=utf-8''{filename_encoded}"
|
||||
)
|
||||
return response
|
||||
|
||||
|
||||
def serve_file(*, doc: Document, use_archive: bool, disposition: str):
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -3,12 +3,15 @@ from urllib.parse import quote
|
||||
|
||||
from allauth.account.adapter import DefaultAccountAdapter
|
||||
from allauth.core import context
|
||||
from allauth.headless.tokens.strategies.sessions import SessionTokenStrategy
|
||||
from allauth.socialaccount.adapter import DefaultSocialAccountAdapter
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import Group
|
||||
from django.contrib.auth.models import User
|
||||
from django.forms import ValidationError
|
||||
from django.http import HttpRequest
|
||||
from django.urls import reverse
|
||||
from rest_framework.authtoken.models import Token
|
||||
|
||||
from documents.models import Document
|
||||
from paperless.signals import handle_social_account_updated
|
||||
@@ -159,3 +162,11 @@ class CustomSocialAccountAdapter(DefaultSocialAccountAdapter):
|
||||
exception,
|
||||
extra_context,
|
||||
)
|
||||
|
||||
|
||||
class DrfTokenStrategy(SessionTokenStrategy):
|
||||
def create_access_token(self, request: HttpRequest) -> str | None:
|
||||
if not request.user.is_authenticated:
|
||||
return None
|
||||
token, _ = Token.objects.get_or_create(user=request.user)
|
||||
return token.key
|
||||
|
||||
@@ -241,6 +241,17 @@ def _parse_beat_schedule() -> dict:
|
||||
"expires": 23.0 * 60.0 * 60.0,
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "Cleanup expired share link bundles",
|
||||
"env_key": "PAPERLESS_SHARE_LINK_BUNDLE_CLEANUP_CRON",
|
||||
# Default daily at 02:00
|
||||
"env_default": "0 2 * * *",
|
||||
"task": "documents.tasks.cleanup_expired_share_link_bundles",
|
||||
"options": {
|
||||
# 1 hour before default schedule sends again
|
||||
"expires": 23.0 * 60.0 * 60.0,
|
||||
},
|
||||
},
|
||||
]
|
||||
for task in tasks:
|
||||
# Either get the environment setting or use the default
|
||||
@@ -279,6 +290,7 @@ MEDIA_ROOT = __get_path("PAPERLESS_MEDIA_ROOT", BASE_DIR.parent / "media")
|
||||
ORIGINALS_DIR = MEDIA_ROOT / "documents" / "originals"
|
||||
ARCHIVE_DIR = MEDIA_ROOT / "documents" / "archive"
|
||||
THUMBNAIL_DIR = MEDIA_ROOT / "documents" / "thumbnails"
|
||||
SHARE_LINK_BUNDLE_DIR = MEDIA_ROOT / "documents" / "share_link_bundles"
|
||||
|
||||
DATA_DIR = __get_path("PAPERLESS_DATA_DIR", BASE_DIR.parent / "data")
|
||||
|
||||
@@ -345,6 +357,7 @@ INSTALLED_APPS = [
|
||||
"allauth.account",
|
||||
"allauth.socialaccount",
|
||||
"allauth.mfa",
|
||||
"allauth.headless",
|
||||
"drf_spectacular",
|
||||
"drf_spectacular_sidecar",
|
||||
"treenode",
|
||||
@@ -539,6 +552,12 @@ SOCIALACCOUNT_PROVIDERS = json.loads(
|
||||
)
|
||||
SOCIAL_ACCOUNT_DEFAULT_GROUPS = __get_list("PAPERLESS_SOCIAL_ACCOUNT_DEFAULT_GROUPS")
|
||||
SOCIAL_ACCOUNT_SYNC_GROUPS = __get_boolean("PAPERLESS_SOCIAL_ACCOUNT_SYNC_GROUPS")
|
||||
SOCIAL_ACCOUNT_SYNC_GROUPS_CLAIM: Final[str] = os.getenv(
|
||||
"PAPERLESS_SOCIAL_ACCOUNT_SYNC_GROUPS_CLAIM",
|
||||
"groups",
|
||||
)
|
||||
|
||||
HEADLESS_TOKEN_STRATEGY = "paperless.adapter.DrfTokenStrategy"
|
||||
|
||||
MFA_TOTP_ISSUER = "Paperless-ngx"
|
||||
|
||||
|
||||
@@ -40,15 +40,19 @@ def handle_social_account_updated(sender, request, sociallogin, **kwargs):
|
||||
|
||||
extra_data = sociallogin.account.extra_data or {}
|
||||
social_account_groups = extra_data.get(
|
||||
"groups",
|
||||
settings.SOCIAL_ACCOUNT_SYNC_GROUPS_CLAIM,
|
||||
[],
|
||||
) # pre-allauth 65.11.0 structure
|
||||
|
||||
if not social_account_groups:
|
||||
# allauth 65.11.0+ nests claims under `userinfo`/`id_token`
|
||||
social_account_groups = (
|
||||
extra_data.get("userinfo", {}).get("groups")
|
||||
or extra_data.get("id_token", {}).get("groups")
|
||||
extra_data.get("userinfo", {}).get(
|
||||
settings.SOCIAL_ACCOUNT_SYNC_GROUPS_CLAIM,
|
||||
)
|
||||
or extra_data.get("id_token", {}).get(
|
||||
settings.SOCIAL_ACCOUNT_SYNC_GROUPS_CLAIM,
|
||||
)
|
||||
or []
|
||||
)
|
||||
if settings.SOCIAL_ACCOUNT_SYNC_GROUPS and social_account_groups is not None:
|
||||
|
||||
@@ -4,6 +4,7 @@ from allauth.account.adapter import get_adapter
|
||||
from allauth.core import context
|
||||
from allauth.socialaccount.adapter import get_adapter as get_social_adapter
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import AnonymousUser
|
||||
from django.contrib.auth.models import Group
|
||||
from django.contrib.auth.models import User
|
||||
from django.forms import ValidationError
|
||||
@@ -11,6 +12,9 @@ from django.http import HttpRequest
|
||||
from django.test import TestCase
|
||||
from django.test import override_settings
|
||||
from django.urls import reverse
|
||||
from rest_framework.authtoken.models import Token
|
||||
|
||||
from paperless.adapter import DrfTokenStrategy
|
||||
|
||||
|
||||
class TestCustomAccountAdapter(TestCase):
|
||||
@@ -181,3 +185,74 @@ class TestCustomSocialAccountAdapter(TestCase):
|
||||
self.assertTrue(
|
||||
any("Test authentication error" in message for message in log_cm.output),
|
||||
)
|
||||
|
||||
|
||||
class TestDrfTokenStrategy(TestCase):
|
||||
def test_create_access_token_creates_new_token(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- A user with no existing DRF token
|
||||
WHEN:
|
||||
- create_access_token is called
|
||||
THEN:
|
||||
- A new token is created and its key is returned
|
||||
"""
|
||||
|
||||
user = User.objects.create_user("testuser")
|
||||
request = HttpRequest()
|
||||
request.user = user
|
||||
|
||||
strategy = DrfTokenStrategy()
|
||||
token_key = strategy.create_access_token(request)
|
||||
|
||||
# Verify a token was created
|
||||
self.assertIsNotNone(token_key)
|
||||
self.assertTrue(Token.objects.filter(user=user).exists())
|
||||
|
||||
# Verify the returned key matches the created token
|
||||
token = Token.objects.get(user=user)
|
||||
self.assertEqual(token_key, token.key)
|
||||
|
||||
def test_create_access_token_returns_existing_token(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- A user with an existing DRF token
|
||||
WHEN:
|
||||
- create_access_token is called again
|
||||
THEN:
|
||||
- The same token key is returned (no new token created)
|
||||
"""
|
||||
|
||||
user = User.objects.create_user("testuser")
|
||||
existing_token = Token.objects.create(user=user)
|
||||
|
||||
request = HttpRequest()
|
||||
request.user = user
|
||||
|
||||
strategy = DrfTokenStrategy()
|
||||
token_key = strategy.create_access_token(request)
|
||||
|
||||
# Verify the existing token key is returned
|
||||
self.assertEqual(token_key, existing_token.key)
|
||||
|
||||
# Verify only one token exists (no duplicate created)
|
||||
self.assertEqual(Token.objects.filter(user=user).count(), 1)
|
||||
|
||||
def test_create_access_token_returns_none_for_unauthenticated_user(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- An unauthenticated request
|
||||
WHEN:
|
||||
- create_access_token is called
|
||||
THEN:
|
||||
- None is returned and no token is created
|
||||
"""
|
||||
|
||||
request = HttpRequest()
|
||||
request.user = AnonymousUser()
|
||||
|
||||
strategy = DrfTokenStrategy()
|
||||
token_key = strategy.create_access_token(request)
|
||||
|
||||
self.assertIsNone(token_key)
|
||||
self.assertEqual(Token.objects.count(), 0)
|
||||
|
||||
@@ -161,6 +161,7 @@ class TestCeleryScheduleParsing(TestCase):
|
||||
EMPTY_TRASH_EXPIRE_TIME = 23.0 * 60.0 * 60.0
|
||||
RUN_SCHEDULED_WORKFLOWS_EXPIRE_TIME = 59.0 * 60.0
|
||||
LLM_INDEX_EXPIRE_TIME = 23.0 * 60.0 * 60.0
|
||||
CLEANUP_EXPIRED_SHARE_BUNDLES_EXPIRE_TIME = 23.0 * 60.0 * 60.0
|
||||
|
||||
def test_schedule_configuration_default(self):
|
||||
"""
|
||||
@@ -212,6 +213,13 @@ class TestCeleryScheduleParsing(TestCase):
|
||||
"expires": self.LLM_INDEX_EXPIRE_TIME,
|
||||
},
|
||||
},
|
||||
"Cleanup expired share link bundles": {
|
||||
"task": "documents.tasks.cleanup_expired_share_link_bundles",
|
||||
"schedule": crontab(minute=0, hour=2),
|
||||
"options": {
|
||||
"expires": self.CLEANUP_EXPIRED_SHARE_BUNDLES_EXPIRE_TIME,
|
||||
},
|
||||
},
|
||||
},
|
||||
schedule,
|
||||
)
|
||||
@@ -271,6 +279,13 @@ class TestCeleryScheduleParsing(TestCase):
|
||||
"expires": self.LLM_INDEX_EXPIRE_TIME,
|
||||
},
|
||||
},
|
||||
"Cleanup expired share link bundles": {
|
||||
"task": "documents.tasks.cleanup_expired_share_link_bundles",
|
||||
"schedule": crontab(minute=0, hour=2),
|
||||
"options": {
|
||||
"expires": self.CLEANUP_EXPIRED_SHARE_BUNDLES_EXPIRE_TIME,
|
||||
},
|
||||
},
|
||||
},
|
||||
schedule,
|
||||
)
|
||||
@@ -322,6 +337,13 @@ class TestCeleryScheduleParsing(TestCase):
|
||||
"expires": self.LLM_INDEX_EXPIRE_TIME,
|
||||
},
|
||||
},
|
||||
"Cleanup expired share link bundles": {
|
||||
"task": "documents.tasks.cleanup_expired_share_link_bundles",
|
||||
"schedule": crontab(minute=0, hour=2),
|
||||
"options": {
|
||||
"expires": self.CLEANUP_EXPIRED_SHARE_BUNDLES_EXPIRE_TIME,
|
||||
},
|
||||
},
|
||||
},
|
||||
schedule,
|
||||
)
|
||||
@@ -345,6 +367,7 @@ class TestCeleryScheduleParsing(TestCase):
|
||||
"PAPERLESS_EMPTY_TRASH_TASK_CRON": "disable",
|
||||
"PAPERLESS_WORKFLOW_SCHEDULED_TASK_CRON": "disable",
|
||||
"PAPERLESS_LLM_INDEX_TASK_CRON": "disable",
|
||||
"PAPERLESS_SHARE_LINK_BUNDLE_CLEANUP_CRON": "disable",
|
||||
},
|
||||
):
|
||||
schedule = _parse_beat_schedule()
|
||||
|
||||
@@ -31,6 +31,7 @@ from documents.views import SavedViewViewSet
|
||||
from documents.views import SearchAutoCompleteView
|
||||
from documents.views import SelectionDataView
|
||||
from documents.views import SharedLinkView
|
||||
from documents.views import ShareLinkBundleViewSet
|
||||
from documents.views import ShareLinkViewSet
|
||||
from documents.views import StatisticsView
|
||||
from documents.views import StoragePathViewSet
|
||||
@@ -73,6 +74,7 @@ api_router.register(r"users", UserViewSet, basename="users")
|
||||
api_router.register(r"groups", GroupViewSet, basename="groups")
|
||||
api_router.register(r"mail_accounts", MailAccountViewSet)
|
||||
api_router.register(r"mail_rules", MailRuleViewSet)
|
||||
api_router.register(r"share_link_bundles", ShareLinkBundleViewSet)
|
||||
api_router.register(r"share_links", ShareLinkViewSet)
|
||||
api_router.register(r"workflow_triggers", WorkflowTriggerViewSet)
|
||||
api_router.register(r"workflow_actions", WorkflowActionViewSet)
|
||||
@@ -228,6 +230,7 @@ urlpatterns = [
|
||||
],
|
||||
),
|
||||
),
|
||||
re_path("^auth/headless/", include("allauth.headless.urls")),
|
||||
re_path(
|
||||
"^$", # Redirect to the API swagger view
|
||||
RedirectView.as_view(url="schema/view/"),
|
||||
|
||||
@@ -1,11 +1,14 @@
|
||||
import logging
|
||||
import shutil
|
||||
from datetime import timedelta
|
||||
from pathlib import Path
|
||||
|
||||
import faiss
|
||||
import llama_index.core.settings as llama_settings
|
||||
import tqdm
|
||||
from celery import states
|
||||
from django.conf import settings
|
||||
from django.utils import timezone
|
||||
from llama_index.core import Document as LlamaDocument
|
||||
from llama_index.core import StorageContext
|
||||
from llama_index.core import VectorStoreIndex
|
||||
@@ -21,6 +24,7 @@ from llama_index.core.text_splitter import TokenTextSplitter
|
||||
from llama_index.vector_stores.faiss import FaissVectorStore
|
||||
|
||||
from documents.models import Document
|
||||
from documents.models import PaperlessTask
|
||||
from paperless_ai.embedding import build_llm_index_text
|
||||
from paperless_ai.embedding import get_embedding_dim
|
||||
from paperless_ai.embedding import get_embedding_model
|
||||
@@ -28,6 +32,29 @@ from paperless_ai.embedding import get_embedding_model
|
||||
logger = logging.getLogger("paperless_ai.indexing")
|
||||
|
||||
|
||||
def queue_llm_index_update_if_needed(*, rebuild: bool, reason: str) -> bool:
|
||||
from documents.tasks import llmindex_index
|
||||
|
||||
has_running = PaperlessTask.objects.filter(
|
||||
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
|
||||
status__in=[states.PENDING, states.STARTED],
|
||||
).exists()
|
||||
has_recent = PaperlessTask.objects.filter(
|
||||
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
|
||||
date_created__gte=(timezone.now() - timedelta(minutes=5)),
|
||||
).exists()
|
||||
if has_running or has_recent:
|
||||
return False
|
||||
|
||||
llmindex_index.delay(rebuild=rebuild, scheduled=False, auto=True)
|
||||
logger.warning(
|
||||
"Queued LLM index update%s: %s",
|
||||
" (rebuild)" if rebuild else "",
|
||||
reason,
|
||||
)
|
||||
return True
|
||||
|
||||
|
||||
def get_or_create_storage_context(*, rebuild=False):
|
||||
"""
|
||||
Loads or creates the StorageContext (vector store, docstore, index store).
|
||||
@@ -93,6 +120,10 @@ def load_or_build_index(nodes=None):
|
||||
except ValueError as e:
|
||||
logger.warning("Failed to load index from storage: %s", e)
|
||||
if not nodes:
|
||||
queue_llm_index_update_if_needed(
|
||||
rebuild=vector_store_file_exists(),
|
||||
reason="LLM index missing or invalid while loading.",
|
||||
)
|
||||
logger.info("No nodes provided for index creation.")
|
||||
raise
|
||||
return VectorStoreIndex(
|
||||
@@ -250,6 +281,13 @@ def query_similar_documents(
|
||||
"""
|
||||
Runs a similarity query and returns top-k similar Document objects.
|
||||
"""
|
||||
if not vector_store_file_exists():
|
||||
queue_llm_index_update_if_needed(
|
||||
rebuild=False,
|
||||
reason="LLM index not found for similarity query.",
|
||||
)
|
||||
return []
|
||||
|
||||
index = load_or_build_index()
|
||||
|
||||
# constrain only the node(s) that match the document IDs, if given
|
||||
|
||||
@@ -3,11 +3,13 @@ from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from celery import states
|
||||
from django.test import override_settings
|
||||
from django.utils import timezone
|
||||
from llama_index.core.base.embeddings.base import BaseEmbedding
|
||||
|
||||
from documents.models import Document
|
||||
from documents.models import PaperlessTask
|
||||
from paperless_ai import indexing
|
||||
|
||||
|
||||
@@ -288,6 +290,36 @@ def test_update_llm_index_no_documents(
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_queue_llm_index_update_if_needed_enqueues_when_idle_or_skips_recent():
|
||||
# No existing tasks
|
||||
with patch("documents.tasks.llmindex_index") as mock_task:
|
||||
result = indexing.queue_llm_index_update_if_needed(
|
||||
rebuild=True,
|
||||
reason="test enqueue",
|
||||
)
|
||||
|
||||
assert result is True
|
||||
mock_task.delay.assert_called_once_with(rebuild=True, scheduled=False, auto=True)
|
||||
|
||||
PaperlessTask.objects.create(
|
||||
task_id="task-1",
|
||||
task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE,
|
||||
status=states.STARTED,
|
||||
date_created=timezone.now(),
|
||||
)
|
||||
|
||||
# Existing running task
|
||||
with patch("documents.tasks.llmindex_index") as mock_task:
|
||||
result = indexing.queue_llm_index_update_if_needed(
|
||||
rebuild=False,
|
||||
reason="should skip",
|
||||
)
|
||||
|
||||
assert result is False
|
||||
mock_task.delay.assert_not_called()
|
||||
|
||||
|
||||
@override_settings(
|
||||
LLM_EMBEDDING_BACKEND="huggingface",
|
||||
LLM_BACKEND="ollama",
|
||||
@@ -299,11 +331,15 @@ def test_query_similar_documents(
|
||||
with (
|
||||
patch("paperless_ai.indexing.get_or_create_storage_context") as mock_storage,
|
||||
patch("paperless_ai.indexing.load_or_build_index") as mock_load_or_build_index,
|
||||
patch(
|
||||
"paperless_ai.indexing.vector_store_file_exists",
|
||||
) as mock_vector_store_exists,
|
||||
patch("paperless_ai.indexing.VectorIndexRetriever") as mock_retriever_cls,
|
||||
patch("paperless_ai.indexing.Document.objects.filter") as mock_filter,
|
||||
):
|
||||
mock_storage.return_value = MagicMock()
|
||||
mock_storage.return_value.persist_dir = temp_llm_index_dir
|
||||
mock_vector_store_exists.return_value = True
|
||||
|
||||
mock_index = MagicMock()
|
||||
mock_load_or_build_index.return_value = mock_index
|
||||
@@ -332,3 +368,31 @@ def test_query_similar_documents(
|
||||
mock_filter.assert_called_once_with(pk__in=[1, 2])
|
||||
|
||||
assert result == mock_filtered_docs
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_query_similar_documents_triggers_update_when_index_missing(
|
||||
temp_llm_index_dir,
|
||||
real_document,
|
||||
):
|
||||
with (
|
||||
patch(
|
||||
"paperless_ai.indexing.vector_store_file_exists",
|
||||
return_value=False,
|
||||
),
|
||||
patch(
|
||||
"paperless_ai.indexing.queue_llm_index_update_if_needed",
|
||||
) as mock_queue,
|
||||
patch("paperless_ai.indexing.load_or_build_index") as mock_load,
|
||||
):
|
||||
result = indexing.query_similar_documents(
|
||||
real_document,
|
||||
top_k=2,
|
||||
)
|
||||
|
||||
mock_queue.assert_called_once_with(
|
||||
rebuild=False,
|
||||
reason="LLM index not found for similarity query.",
|
||||
)
|
||||
mock_load.assert_not_called()
|
||||
assert result == []
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import os
|
||||
from collections.abc import Generator
|
||||
from pathlib import Path
|
||||
|
||||
@@ -70,18 +69,21 @@ def mail_parser() -> MailDocumentParser:
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def live_mail_account() -> Generator[MailAccount, None, None]:
|
||||
try:
|
||||
account = MailAccount.objects.create(
|
||||
name="test",
|
||||
imap_server=os.environ["PAPERLESS_MAIL_TEST_HOST"],
|
||||
username=os.environ["PAPERLESS_MAIL_TEST_USER"],
|
||||
password=os.environ["PAPERLESS_MAIL_TEST_PASSWD"],
|
||||
imap_port=993,
|
||||
)
|
||||
yield account
|
||||
finally:
|
||||
account.delete()
|
||||
def greenmail_mail_account(db: None) -> Generator[MailAccount, None, None]:
|
||||
"""
|
||||
Create a mail account configured for local Greenmail server.
|
||||
"""
|
||||
account = MailAccount.objects.create(
|
||||
name="Greenmail Test",
|
||||
imap_server="localhost",
|
||||
imap_port=3143,
|
||||
imap_security=MailAccount.ImapSecurity.NONE,
|
||||
username="test@localhost",
|
||||
password="test",
|
||||
character_set="UTF-8",
|
||||
)
|
||||
yield account
|
||||
account.delete()
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
|
||||
@@ -1,6 +1,3 @@
|
||||
import os
|
||||
import warnings
|
||||
|
||||
import pytest
|
||||
|
||||
from paperless_mail.mail import MailAccountHandler
|
||||
@@ -9,53 +6,51 @@ from paperless_mail.models import MailAccount
|
||||
from paperless_mail.models import MailRule
|
||||
|
||||
|
||||
# Only run if the environment is setup
|
||||
# And the environment is not empty (forks, I think)
|
||||
@pytest.mark.skipif(
|
||||
"PAPERLESS_MAIL_TEST_HOST" not in os.environ
|
||||
or not len(os.environ["PAPERLESS_MAIL_TEST_HOST"]),
|
||||
reason="Live server testing not enabled",
|
||||
)
|
||||
@pytest.mark.django_db()
|
||||
class TestMailLiveServer:
|
||||
def test_process_non_gmail_server_flag(
|
||||
@pytest.mark.django_db
|
||||
class TestMailGreenmail:
|
||||
"""
|
||||
Mail tests using local Greenmail server
|
||||
"""
|
||||
|
||||
def test_process_flag(
|
||||
self,
|
||||
mail_account_handler: MailAccountHandler,
|
||||
live_mail_account: MailAccount,
|
||||
):
|
||||
greenmail_mail_account: MailAccount,
|
||||
) -> None:
|
||||
"""
|
||||
Test processing mail with FLAG action.
|
||||
"""
|
||||
rule = MailRule.objects.create(
|
||||
name="testrule",
|
||||
account=greenmail_mail_account,
|
||||
action=MailRule.MailAction.FLAG,
|
||||
)
|
||||
|
||||
try:
|
||||
rule1 = MailRule.objects.create(
|
||||
name="testrule",
|
||||
account=live_mail_account,
|
||||
action=MailRule.MailAction.FLAG,
|
||||
)
|
||||
|
||||
mail_account_handler.handle_mail_account(live_mail_account)
|
||||
|
||||
rule1.delete()
|
||||
|
||||
mail_account_handler.handle_mail_account(greenmail_mail_account)
|
||||
except MailError as e:
|
||||
pytest.fail(f"Failure: {e}")
|
||||
except Exception as e:
|
||||
warnings.warn(f"Unhandled exception: {e}")
|
||||
finally:
|
||||
rule.delete()
|
||||
|
||||
def test_process_non_gmail_server_tag(
|
||||
def test_process_tag(
|
||||
self,
|
||||
mail_account_handler: MailAccountHandler,
|
||||
live_mail_account: MailAccount,
|
||||
):
|
||||
greenmail_mail_account: MailAccount,
|
||||
) -> None:
|
||||
"""
|
||||
Test processing mail with TAG action.
|
||||
"""
|
||||
rule = MailRule.objects.create(
|
||||
name="testrule",
|
||||
account=greenmail_mail_account,
|
||||
action=MailRule.MailAction.TAG,
|
||||
action_parameter="TestTag",
|
||||
)
|
||||
|
||||
try:
|
||||
rule2 = MailRule.objects.create(
|
||||
name="testrule",
|
||||
account=live_mail_account,
|
||||
action=MailRule.MailAction.TAG,
|
||||
)
|
||||
|
||||
mail_account_handler.handle_mail_account(live_mail_account)
|
||||
|
||||
rule2.delete()
|
||||
|
||||
mail_account_handler.handle_mail_account(greenmail_mail_account)
|
||||
except MailError as e:
|
||||
pytest.fail(f"Failure: {e}")
|
||||
except Exception as e:
|
||||
warnings.warn(f"Unhandled exception: {e}")
|
||||
finally:
|
||||
rule.delete()
|
||||
|
||||
Reference in New Issue
Block a user