From a9b5463141cb27aaa423244b36d8992168860d97 Mon Sep 17 00:00:00 2001 From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Tue, 4 Nov 2025 20:55:12 -0800 Subject: [PATCH] Initial task for building --- src-ui/src/app/data/share-bundle.ts | 1 + src/documents/migrations/1075_sharebundle.py | 16 ++ src/documents/models.py | 37 ++++ src/documents/serialisers.py | 6 + src/documents/tasks.py | 95 +++++++++++ src/documents/views.py | 167 +++++++++++++++++-- src/paperless/settings.py | 1 + 7 files changed, 312 insertions(+), 11 deletions(-) diff --git a/src-ui/src/app/data/share-bundle.ts b/src-ui/src/app/data/share-bundle.ts index 4eb6f5744..3c6307730 100644 --- a/src-ui/src/app/data/share-bundle.ts +++ b/src-ui/src/app/data/share-bundle.ts @@ -16,6 +16,7 @@ export interface ShareBundleSummary { document_count: number file_version: FileVersion status: ShareBundleStatus + built_at?: string size_bytes?: number last_error?: string } diff --git a/src/documents/migrations/1075_sharebundle.py b/src/documents/migrations/1075_sharebundle.py index 9a6611205..a64ad29d3 100644 --- a/src/documents/migrations/1075_sharebundle.py +++ b/src/documents/migrations/1075_sharebundle.py @@ -128,6 +128,22 @@ class Migration(migrations.Migration): verbose_name="last error", ), ), + ( + "file_path", + models.CharField( + blank=True, + max_length=512, + verbose_name="file path", + ), + ), + ( + "built_at", + models.DateTimeField( + blank=True, + null=True, + verbose_name="built at", + ), + ), ( "owner", models.ForeignKey( diff --git a/src/documents/models.py b/src/documents/models.py index ec5fdc731..0bd4aedd8 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -844,6 +844,18 @@ class ShareBundle(SoftDeleteModel): blank=True, ) + file_path = models.CharField( + _("file path"), + max_length=512, + blank=True, + ) + + built_at = models.DateTimeField( + _("built at"), + null=True, + blank=True, + ) + documents = models.ManyToManyField( "documents.Document", related_name="share_bundles", @@ -853,6 +865,31 @@ class ShareBundle(SoftDeleteModel): def __str__(self): return _("Share bundle %(slug)s") % {"slug": self.slug} + @property + def absolute_file_path(self) -> Path | None: + if not self.file_path: + return None + file_path = Path(self.file_path) + if not file_path.is_absolute(): + file_path = (settings.MEDIA_ROOT / file_path).resolve() + return file_path + + def remove_file(self): + path = self.absolute_file_path + if path and path.exists(): + try: + path.unlink() + except OSError: + pass + + def delete(self, using=None, *, keep_parents=False): + self.remove_file() + return super().delete(using=using, keep_parents=keep_parents) + + def hard_delete(self, using=None, *, keep_parents=False): + self.remove_file() + return super().hard_delete(using=using, keep_parents=keep_parents) + class CustomField(models.Model): """ diff --git a/src/documents/serialisers.py b/src/documents/serialisers.py index 6fa2c6817..5fd3dd0c5 100644 --- a/src/documents/serialisers.py +++ b/src/documents/serialisers.py @@ -2160,6 +2160,7 @@ class ShareBundleSerializer(OwnedObjectSerializer): "status", "size_bytes", "last_error", + "built_at", "documents", "document_ids", "document_count", @@ -2172,6 +2173,7 @@ class ShareBundleSerializer(OwnedObjectSerializer): "status", "size_bytes", "last_error", + "built_at", "documents", "document_count", ) @@ -2223,10 +2225,14 @@ class ShareBundleSerializer(OwnedObjectSerializer): ordered_documents = [documents_by_id[doc_id] for doc_id in document_ids] share_bundle.documents.set(ordered_documents) + share_bundle.document_total = len(ordered_documents) return share_bundle def get_document_count(self, obj: ShareBundle) -> int: + count = getattr(obj, "document_total", None) + if count is not None: + return count return obj.documents.count() diff --git a/src/documents/tasks.py b/src/documents/tasks.py index 17bfce3b0..2a942f4b9 100644 --- a/src/documents/tasks.py +++ b/src/documents/tasks.py @@ -3,7 +3,9 @@ import hashlib import logging import shutil import uuid +import zipfile from pathlib import Path +from tempfile import NamedTemporaryFile from tempfile import TemporaryDirectory import tqdm @@ -22,6 +24,8 @@ from whoosh.writing import AsyncWriter from documents import index from documents import sanity_checker from documents.barcodes import BarcodePlugin +from documents.bulk_download import ArchiveOnlyStrategy +from documents.bulk_download import OriginalsOnlyStrategy from documents.caching import clear_document_caches from documents.classifier import DocumentClassifier from documents.classifier import load_classifier @@ -39,6 +43,8 @@ from documents.models import CustomFieldInstance from documents.models import Document from documents.models import DocumentType from documents.models import PaperlessTask +from documents.models import ShareBundle +from documents.models import ShareLink from documents.models import StoragePath from documents.models import Tag from documents.models import Workflow @@ -563,3 +569,92 @@ def update_document_parent_tags(tag: Tag, new_parent: Tag) -> None: if affected: bulk_update_documents.delay(document_ids=list(affected)) + + +@shared_task +def build_share_bundle(bundle_id: int): + try: + bundle = ( + ShareBundle.objects.filter(pk=bundle_id).prefetch_related("documents").get() + ) + except ShareBundle.DoesNotExist: + logger.warning("Share bundle %s no longer exists.", bundle_id) + return + + bundle.remove_file() + bundle.status = ShareBundle.Status.PROCESSING + bundle.last_error = "" + bundle.size_bytes = None + bundle.built_at = None + bundle.file_path = "" + bundle.save( + update_fields=[ + "status", + "last_error", + "size_bytes", + "built_at", + "file_path", + ], + ) + + documents = list(bundle.documents.all().order_by("pk")) + + with NamedTemporaryFile( + dir=settings.SCRATCH_DIR, + suffix=".zip", + delete=False, + ) as temp_zip: + temp_zip_path = Path(temp_zip.name) + + try: + strategy_class = ( + ArchiveOnlyStrategy + if bundle.file_version == ShareLink.FileVersion.ARCHIVE + else OriginalsOnlyStrategy + ) + with zipfile.ZipFile(temp_zip_path, "w", zipfile.ZIP_DEFLATED) as zipf: + strategy = strategy_class(zipf) + for document in documents: + strategy.add_document(document) + + output_dir = settings.SHARE_BUNDLE_DIR + output_dir.mkdir(parents=True, exist_ok=True) + final_path = (output_dir / f"{bundle.slug}.zip").resolve() + if final_path.exists(): + final_path.unlink() + shutil.move(str(temp_zip_path), final_path) + + try: + bundle.file_path = str(final_path.relative_to(settings.MEDIA_ROOT)) + except ValueError: + bundle.file_path = str(final_path) + bundle.size_bytes = final_path.stat().st_size + bundle.status = ShareBundle.Status.READY + bundle.built_at = timezone.now() + bundle.last_error = "" + bundle.save( + update_fields=[ + "file_path", + "size_bytes", + "status", + "built_at", + "last_error", + ], + ) + logger.info("Built share bundle %s", bundle.pk) + except Exception as exc: + logger.exception("Failed to build share bundle %s: %s", bundle_id, exc) + bundle.status = ShareBundle.Status.FAILED + bundle.last_error = str(exc) + bundle.save(update_fields=["status", "last_error"]) + try: + temp_zip_path.unlink() + except OSError: + pass + raise + finally: + if temp_zip_path.exists(): + try: + temp_zip_path.unlink() + except OSError: + pass diff --git a/src/documents/views.py b/src/documents/views.py index cdd070e1b..f687a759c 100644 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -184,6 +184,7 @@ from documents.serialisers import WorkflowActionSerializer from documents.serialisers import WorkflowSerializer from documents.serialisers import WorkflowTriggerSerializer from documents.signals import document_updated +from documents.tasks import build_share_bundle from documents.tasks import consume_file from documents.tasks import empty_trash from documents.tasks import index_optimize @@ -2637,7 +2638,12 @@ class ShareBundleViewSet(ModelViewSet, PassUserMixin): ordering_fields = ("created", "expiration", "status") def get_queryset(self): - return super().get_queryset().prefetch_related("documents") + return ( + super() + .get_queryset() + .prefetch_related("documents") + .annotate(document_total=Count("documents", distinct=True)) + ) def create(self, request, *args, **kwargs): serializer = self.get_serializer(data=request.data) @@ -2670,17 +2676,68 @@ class ShareBundleViewSet(ModelViewSet, PassUserMixin): }, ) - serializer.save( + document_map = {document.pk: document for document in documents} + ordered_documents = [document_map[doc_id] for doc_id in document_ids] + + bundle = serializer.save( owner=request.user, - documents=documents, + documents=ordered_documents, ) - headers = self.get_success_headers(serializer.data) + bundle.remove_file() + bundle.status = ShareBundle.Status.PENDING + bundle.last_error = "" + bundle.size_bytes = None + bundle.built_at = None + bundle.file_path = "" + bundle.save( + update_fields=[ + "status", + "last_error", + "size_bytes", + "built_at", + "file_path", + ], + ) + build_share_bundle.delay(bundle.pk) + bundle.document_total = len(ordered_documents) + response_serializer = self.get_serializer(bundle) + headers = self.get_success_headers(response_serializer.data) return Response( - serializer.data, + response_serializer.data, status=status.HTTP_201_CREATED, headers=headers, ) + @action(detail=True, methods=["post"]) + def rebuild(self, request, pk=None): + bundle = self.get_object() + if bundle.status == ShareBundle.Status.PROCESSING: + return Response( + {"detail": _("Bundle is already being processed.")}, + status=status.HTTP_400_BAD_REQUEST, + ) + bundle.remove_file() + bundle.status = ShareBundle.Status.PENDING + bundle.last_error = "" + bundle.size_bytes = None + bundle.built_at = None + bundle.file_path = "" + bundle.save( + update_fields=[ + "status", + "last_error", + "size_bytes", + "built_at", + "file_path", + ], + ) + build_share_bundle.delay(bundle.pk) + bundle.document_total = ( + getattr(bundle, "document_total", None) or bundle.documents.count() + ) + serializer = self.get_serializer(bundle) + return Response(serializer.data) + class SharedLinkView(View): authentication_classes = [] @@ -2688,15 +2745,103 @@ class SharedLinkView(View): def get(self, request, slug): share_link = ShareLink.objects.filter(slug=slug).first() - if share_link is None: + if share_link is not None: + if ( + share_link.expiration is not None + and share_link.expiration < timezone.now() + ): + return HttpResponseRedirect("/accounts/login/?sharelink_expired=1") + return serve_file( + doc=share_link.document, + use_archive=share_link.file_version == "archive", + disposition="inline", + ) + + share_bundle = ShareBundle.objects.filter(slug=slug).first() + if share_bundle is None: return HttpResponseRedirect("/accounts/login/?sharelink_notfound=1") - if share_link.expiration is not None and share_link.expiration < timezone.now(): + + if ( + share_bundle.expiration is not None + and share_bundle.expiration < timezone.now() + ): return HttpResponseRedirect("/accounts/login/?sharelink_expired=1") - return serve_file( - doc=share_link.document, - use_archive=share_link.file_version == "archive", - disposition="inline", + + if share_bundle.status in { + ShareBundle.Status.PENDING, + ShareBundle.Status.PROCESSING, + }: + return HttpResponse( + _( + "The shared bundle is still being prepared. Please try again later.", + ), + status=status.HTTP_202_ACCEPTED, + ) + + if share_bundle.status == ShareBundle.Status.FAILED: + share_bundle.remove_file() + share_bundle.status = ShareBundle.Status.PENDING + share_bundle.last_error = "" + share_bundle.size_bytes = None + share_bundle.built_at = None + share_bundle.file_path = "" + share_bundle.save( + update_fields=[ + "status", + "last_error", + "size_bytes", + "built_at", + "file_path", + ], + ) + build_share_bundle.delay(share_bundle.pk) + return HttpResponse( + _( + "The shared bundle is temporarily unavailable. A rebuild has been scheduled. Please try again later.", + ), + status=status.HTTP_503_SERVICE_UNAVAILABLE, + ) + + file_path = share_bundle.absolute_file_path + if file_path is None or not file_path.exists(): + share_bundle.status = ShareBundle.Status.PENDING + share_bundle.last_error = "" + share_bundle.size_bytes = None + share_bundle.built_at = None + share_bundle.file_path = "" + share_bundle.save( + update_fields=[ + "status", + "last_error", + "size_bytes", + "built_at", + "file_path", + ], + ) + build_share_bundle.delay(share_bundle.pk) + return HttpResponse( + _( + "The shared bundle is being prepared. Please try again later.", + ), + status=status.HTTP_202_ACCEPTED, + ) + + response = FileResponse(file_path.open("rb"), content_type="application/zip") + download_name = f"paperless-share-{share_bundle.slug}.zip" + filename_normalized = ( + normalize("NFKD", download_name) + .encode( + "ascii", + "ignore", + ) + .decode("ascii") ) + filename_encoded = quote(download_name) + response["Content-Disposition"] = ( + f"attachment; filename='{filename_normalized}'; " + f"filename*=utf-8''{filename_encoded}" + ) + return response def serve_file(*, doc: Document, use_archive: bool, disposition: str): diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 97d0ca06f..da75508d2 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -268,6 +268,7 @@ MEDIA_ROOT = __get_path("PAPERLESS_MEDIA_ROOT", BASE_DIR.parent / "media") ORIGINALS_DIR = MEDIA_ROOT / "documents" / "originals" ARCHIVE_DIR = MEDIA_ROOT / "documents" / "archive" THUMBNAIL_DIR = MEDIA_ROOT / "documents" / "thumbnails" +SHARE_BUNDLE_DIR = MEDIA_ROOT / "documents" / "share_bundles" DATA_DIR = __get_path("PAPERLESS_DATA_DIR", BASE_DIR.parent / "data")