Initial task for building

This commit is contained in:
shamoon
2025-11-04 20:55:12 -08:00
parent eba1c3e774
commit 6adeff1901
7 changed files with 312 additions and 11 deletions

View File

@@ -16,6 +16,7 @@ export interface ShareBundleSummary {
document_count: number
file_version: FileVersion
status: ShareBundleStatus
built_at?: string
size_bytes?: number
last_error?: string
}

View File

@@ -128,6 +128,22 @@ class Migration(migrations.Migration):
verbose_name="last error",
),
),
(
"file_path",
models.CharField(
blank=True,
max_length=512,
verbose_name="file path",
),
),
(
"built_at",
models.DateTimeField(
blank=True,
null=True,
verbose_name="built at",
),
),
(
"owner",
models.ForeignKey(

View File

@@ -844,6 +844,18 @@ class ShareBundle(SoftDeleteModel):
blank=True,
)
file_path = models.CharField(
_("file path"),
max_length=512,
blank=True,
)
built_at = models.DateTimeField(
_("built at"),
null=True,
blank=True,
)
documents = models.ManyToManyField(
"documents.Document",
related_name="share_bundles",
@@ -853,6 +865,31 @@ class ShareBundle(SoftDeleteModel):
def __str__(self):
return _("Share bundle %(slug)s") % {"slug": self.slug}
@property
def absolute_file_path(self) -> Path | None:
if not self.file_path:
return None
file_path = Path(self.file_path)
if not file_path.is_absolute():
file_path = (settings.MEDIA_ROOT / file_path).resolve()
return file_path
def remove_file(self):
path = self.absolute_file_path
if path and path.exists():
try:
path.unlink()
except OSError:
pass
def delete(self, using=None, *, keep_parents=False):
self.remove_file()
return super().delete(using=using, keep_parents=keep_parents)
def hard_delete(self, using=None, *, keep_parents=False):
self.remove_file()
return super().hard_delete(using=using, keep_parents=keep_parents)
class CustomField(models.Model):
"""

View File

@@ -2160,6 +2160,7 @@ class ShareBundleSerializer(OwnedObjectSerializer):
"status",
"size_bytes",
"last_error",
"built_at",
"documents",
"document_ids",
"document_count",
@@ -2172,6 +2173,7 @@ class ShareBundleSerializer(OwnedObjectSerializer):
"status",
"size_bytes",
"last_error",
"built_at",
"documents",
"document_count",
)
@@ -2223,10 +2225,14 @@ class ShareBundleSerializer(OwnedObjectSerializer):
ordered_documents = [documents_by_id[doc_id] for doc_id in document_ids]
share_bundle.documents.set(ordered_documents)
share_bundle.document_total = len(ordered_documents)
return share_bundle
def get_document_count(self, obj: ShareBundle) -> int:
count = getattr(obj, "document_total", None)
if count is not None:
return count
return obj.documents.count()

View File

@@ -3,7 +3,9 @@ import hashlib
import logging
import shutil
import uuid
import zipfile
from pathlib import Path
from tempfile import NamedTemporaryFile
from tempfile import TemporaryDirectory
import tqdm
@@ -22,6 +24,8 @@ from whoosh.writing import AsyncWriter
from documents import index
from documents import sanity_checker
from documents.barcodes import BarcodePlugin
from documents.bulk_download import ArchiveOnlyStrategy
from documents.bulk_download import OriginalsOnlyStrategy
from documents.caching import clear_document_caches
from documents.classifier import DocumentClassifier
from documents.classifier import load_classifier
@@ -39,6 +43,8 @@ from documents.models import CustomFieldInstance
from documents.models import Document
from documents.models import DocumentType
from documents.models import PaperlessTask
from documents.models import ShareBundle
from documents.models import ShareLink
from documents.models import StoragePath
from documents.models import Tag
from documents.models import Workflow
@@ -563,3 +569,92 @@ def update_document_parent_tags(tag: Tag, new_parent: Tag) -> None:
if affected:
bulk_update_documents.delay(document_ids=list(affected))
@shared_task
def build_share_bundle(bundle_id: int):
try:
bundle = (
ShareBundle.objects.filter(pk=bundle_id).prefetch_related("documents").get()
)
except ShareBundle.DoesNotExist:
logger.warning("Share bundle %s no longer exists.", bundle_id)
return
bundle.remove_file()
bundle.status = ShareBundle.Status.PROCESSING
bundle.last_error = ""
bundle.size_bytes = None
bundle.built_at = None
bundle.file_path = ""
bundle.save(
update_fields=[
"status",
"last_error",
"size_bytes",
"built_at",
"file_path",
],
)
documents = list(bundle.documents.all().order_by("pk"))
with NamedTemporaryFile(
dir=settings.SCRATCH_DIR,
suffix=".zip",
delete=False,
) as temp_zip:
temp_zip_path = Path(temp_zip.name)
try:
strategy_class = (
ArchiveOnlyStrategy
if bundle.file_version == ShareLink.FileVersion.ARCHIVE
else OriginalsOnlyStrategy
)
with zipfile.ZipFile(temp_zip_path, "w", zipfile.ZIP_DEFLATED) as zipf:
strategy = strategy_class(zipf)
for document in documents:
strategy.add_document(document)
output_dir = settings.SHARE_BUNDLE_DIR
output_dir.mkdir(parents=True, exist_ok=True)
final_path = (output_dir / f"{bundle.slug}.zip").resolve()
if final_path.exists():
final_path.unlink()
shutil.move(str(temp_zip_path), final_path)
try:
bundle.file_path = str(final_path.relative_to(settings.MEDIA_ROOT))
except ValueError:
bundle.file_path = str(final_path)
bundle.size_bytes = final_path.stat().st_size
bundle.status = ShareBundle.Status.READY
bundle.built_at = timezone.now()
bundle.last_error = ""
bundle.save(
update_fields=[
"file_path",
"size_bytes",
"status",
"built_at",
"last_error",
],
)
logger.info("Built share bundle %s", bundle.pk)
except Exception as exc:
logger.exception("Failed to build share bundle %s: %s", bundle_id, exc)
bundle.status = ShareBundle.Status.FAILED
bundle.last_error = str(exc)
bundle.save(update_fields=["status", "last_error"])
try:
temp_zip_path.unlink()
except OSError:
pass
raise
finally:
if temp_zip_path.exists():
try:
temp_zip_path.unlink()
except OSError:
pass

View File

@@ -183,6 +183,7 @@ from documents.serialisers import WorkflowActionSerializer
from documents.serialisers import WorkflowSerializer
from documents.serialisers import WorkflowTriggerSerializer
from documents.signals import document_updated
from documents.tasks import build_share_bundle
from documents.tasks import consume_file
from documents.tasks import empty_trash
from documents.tasks import index_optimize
@@ -2620,7 +2621,12 @@ class ShareBundleViewSet(ModelViewSet, PassUserMixin):
ordering_fields = ("created", "expiration", "status")
def get_queryset(self):
return super().get_queryset().prefetch_related("documents")
return (
super()
.get_queryset()
.prefetch_related("documents")
.annotate(document_total=Count("documents", distinct=True))
)
def create(self, request, *args, **kwargs):
serializer = self.get_serializer(data=request.data)
@@ -2653,17 +2659,68 @@ class ShareBundleViewSet(ModelViewSet, PassUserMixin):
},
)
serializer.save(
document_map = {document.pk: document for document in documents}
ordered_documents = [document_map[doc_id] for doc_id in document_ids]
bundle = serializer.save(
owner=request.user,
documents=documents,
documents=ordered_documents,
)
headers = self.get_success_headers(serializer.data)
bundle.remove_file()
bundle.status = ShareBundle.Status.PENDING
bundle.last_error = ""
bundle.size_bytes = None
bundle.built_at = None
bundle.file_path = ""
bundle.save(
update_fields=[
"status",
"last_error",
"size_bytes",
"built_at",
"file_path",
],
)
build_share_bundle.delay(bundle.pk)
bundle.document_total = len(ordered_documents)
response_serializer = self.get_serializer(bundle)
headers = self.get_success_headers(response_serializer.data)
return Response(
serializer.data,
response_serializer.data,
status=status.HTTP_201_CREATED,
headers=headers,
)
@action(detail=True, methods=["post"])
def rebuild(self, request, pk=None):
bundle = self.get_object()
if bundle.status == ShareBundle.Status.PROCESSING:
return Response(
{"detail": _("Bundle is already being processed.")},
status=status.HTTP_400_BAD_REQUEST,
)
bundle.remove_file()
bundle.status = ShareBundle.Status.PENDING
bundle.last_error = ""
bundle.size_bytes = None
bundle.built_at = None
bundle.file_path = ""
bundle.save(
update_fields=[
"status",
"last_error",
"size_bytes",
"built_at",
"file_path",
],
)
build_share_bundle.delay(bundle.pk)
bundle.document_total = (
getattr(bundle, "document_total", None) or bundle.documents.count()
)
serializer = self.get_serializer(bundle)
return Response(serializer.data)
class SharedLinkView(View):
authentication_classes = []
@@ -2671,15 +2728,103 @@ class SharedLinkView(View):
def get(self, request, slug):
share_link = ShareLink.objects.filter(slug=slug).first()
if share_link is None:
if share_link is not None:
if (
share_link.expiration is not None
and share_link.expiration < timezone.now()
):
return HttpResponseRedirect("/accounts/login/?sharelink_expired=1")
return serve_file(
doc=share_link.document,
use_archive=share_link.file_version == "archive",
disposition="inline",
)
share_bundle = ShareBundle.objects.filter(slug=slug).first()
if share_bundle is None:
return HttpResponseRedirect("/accounts/login/?sharelink_notfound=1")
if share_link.expiration is not None and share_link.expiration < timezone.now():
if (
share_bundle.expiration is not None
and share_bundle.expiration < timezone.now()
):
return HttpResponseRedirect("/accounts/login/?sharelink_expired=1")
return serve_file(
doc=share_link.document,
use_archive=share_link.file_version == "archive",
disposition="inline",
if share_bundle.status in {
ShareBundle.Status.PENDING,
ShareBundle.Status.PROCESSING,
}:
return HttpResponse(
_(
"The shared bundle is still being prepared. Please try again later.",
),
status=status.HTTP_202_ACCEPTED,
)
if share_bundle.status == ShareBundle.Status.FAILED:
share_bundle.remove_file()
share_bundle.status = ShareBundle.Status.PENDING
share_bundle.last_error = ""
share_bundle.size_bytes = None
share_bundle.built_at = None
share_bundle.file_path = ""
share_bundle.save(
update_fields=[
"status",
"last_error",
"size_bytes",
"built_at",
"file_path",
],
)
build_share_bundle.delay(share_bundle.pk)
return HttpResponse(
_(
"The shared bundle is temporarily unavailable. A rebuild has been scheduled. Please try again later.",
),
status=status.HTTP_503_SERVICE_UNAVAILABLE,
)
file_path = share_bundle.absolute_file_path
if file_path is None or not file_path.exists():
share_bundle.status = ShareBundle.Status.PENDING
share_bundle.last_error = ""
share_bundle.size_bytes = None
share_bundle.built_at = None
share_bundle.file_path = ""
share_bundle.save(
update_fields=[
"status",
"last_error",
"size_bytes",
"built_at",
"file_path",
],
)
build_share_bundle.delay(share_bundle.pk)
return HttpResponse(
_(
"The shared bundle is being prepared. Please try again later.",
),
status=status.HTTP_202_ACCEPTED,
)
response = FileResponse(file_path.open("rb"), content_type="application/zip")
download_name = f"paperless-share-{share_bundle.slug}.zip"
filename_normalized = (
normalize("NFKD", download_name)
.encode(
"ascii",
"ignore",
)
.decode("ascii")
)
filename_encoded = quote(download_name)
response["Content-Disposition"] = (
f"attachment; filename='{filename_normalized}'; "
f"filename*=utf-8''{filename_encoded}"
)
return response
def serve_file(*, doc: Document, use_archive: bool, disposition: str):

View File

@@ -268,6 +268,7 @@ MEDIA_ROOT = __get_path("PAPERLESS_MEDIA_ROOT", BASE_DIR.parent / "media")
ORIGINALS_DIR = MEDIA_ROOT / "documents" / "originals"
ARCHIVE_DIR = MEDIA_ROOT / "documents" / "archive"
THUMBNAIL_DIR = MEDIA_ROOT / "documents" / "thumbnails"
SHARE_BUNDLE_DIR = MEDIA_ROOT / "documents" / "share_bundles"
DATA_DIR = __get_path("PAPERLESS_DATA_DIR", BASE_DIR.parent / "data")