Initial task for building

This commit is contained in:
shamoon
2025-11-04 20:55:12 -08:00
parent 0af895ca0a
commit a9b5463141
7 changed files with 312 additions and 11 deletions

View File

@@ -16,6 +16,7 @@ export interface ShareBundleSummary {
document_count: number document_count: number
file_version: FileVersion file_version: FileVersion
status: ShareBundleStatus status: ShareBundleStatus
built_at?: string
size_bytes?: number size_bytes?: number
last_error?: string last_error?: string
} }

View File

@@ -128,6 +128,22 @@ class Migration(migrations.Migration):
verbose_name="last error", verbose_name="last error",
), ),
), ),
(
"file_path",
models.CharField(
blank=True,
max_length=512,
verbose_name="file path",
),
),
(
"built_at",
models.DateTimeField(
blank=True,
null=True,
verbose_name="built at",
),
),
( (
"owner", "owner",
models.ForeignKey( models.ForeignKey(

View File

@@ -844,6 +844,18 @@ class ShareBundle(SoftDeleteModel):
blank=True, blank=True,
) )
file_path = models.CharField(
_("file path"),
max_length=512,
blank=True,
)
built_at = models.DateTimeField(
_("built at"),
null=True,
blank=True,
)
documents = models.ManyToManyField( documents = models.ManyToManyField(
"documents.Document", "documents.Document",
related_name="share_bundles", related_name="share_bundles",
@@ -853,6 +865,31 @@ class ShareBundle(SoftDeleteModel):
def __str__(self): def __str__(self):
return _("Share bundle %(slug)s") % {"slug": self.slug} return _("Share bundle %(slug)s") % {"slug": self.slug}
@property
def absolute_file_path(self) -> Path | None:
if not self.file_path:
return None
file_path = Path(self.file_path)
if not file_path.is_absolute():
file_path = (settings.MEDIA_ROOT / file_path).resolve()
return file_path
def remove_file(self):
path = self.absolute_file_path
if path and path.exists():
try:
path.unlink()
except OSError:
pass
def delete(self, using=None, *, keep_parents=False):
self.remove_file()
return super().delete(using=using, keep_parents=keep_parents)
def hard_delete(self, using=None, *, keep_parents=False):
self.remove_file()
return super().hard_delete(using=using, keep_parents=keep_parents)
class CustomField(models.Model): class CustomField(models.Model):
""" """

View File

@@ -2160,6 +2160,7 @@ class ShareBundleSerializer(OwnedObjectSerializer):
"status", "status",
"size_bytes", "size_bytes",
"last_error", "last_error",
"built_at",
"documents", "documents",
"document_ids", "document_ids",
"document_count", "document_count",
@@ -2172,6 +2173,7 @@ class ShareBundleSerializer(OwnedObjectSerializer):
"status", "status",
"size_bytes", "size_bytes",
"last_error", "last_error",
"built_at",
"documents", "documents",
"document_count", "document_count",
) )
@@ -2223,10 +2225,14 @@ class ShareBundleSerializer(OwnedObjectSerializer):
ordered_documents = [documents_by_id[doc_id] for doc_id in document_ids] ordered_documents = [documents_by_id[doc_id] for doc_id in document_ids]
share_bundle.documents.set(ordered_documents) share_bundle.documents.set(ordered_documents)
share_bundle.document_total = len(ordered_documents)
return share_bundle return share_bundle
def get_document_count(self, obj: ShareBundle) -> int: def get_document_count(self, obj: ShareBundle) -> int:
count = getattr(obj, "document_total", None)
if count is not None:
return count
return obj.documents.count() return obj.documents.count()

View File

@@ -3,7 +3,9 @@ import hashlib
import logging import logging
import shutil import shutil
import uuid import uuid
import zipfile
from pathlib import Path from pathlib import Path
from tempfile import NamedTemporaryFile
from tempfile import TemporaryDirectory from tempfile import TemporaryDirectory
import tqdm import tqdm
@@ -22,6 +24,8 @@ from whoosh.writing import AsyncWriter
from documents import index from documents import index
from documents import sanity_checker from documents import sanity_checker
from documents.barcodes import BarcodePlugin from documents.barcodes import BarcodePlugin
from documents.bulk_download import ArchiveOnlyStrategy
from documents.bulk_download import OriginalsOnlyStrategy
from documents.caching import clear_document_caches from documents.caching import clear_document_caches
from documents.classifier import DocumentClassifier from documents.classifier import DocumentClassifier
from documents.classifier import load_classifier from documents.classifier import load_classifier
@@ -39,6 +43,8 @@ from documents.models import CustomFieldInstance
from documents.models import Document from documents.models import Document
from documents.models import DocumentType from documents.models import DocumentType
from documents.models import PaperlessTask from documents.models import PaperlessTask
from documents.models import ShareBundle
from documents.models import ShareLink
from documents.models import StoragePath from documents.models import StoragePath
from documents.models import Tag from documents.models import Tag
from documents.models import Workflow from documents.models import Workflow
@@ -563,3 +569,92 @@ def update_document_parent_tags(tag: Tag, new_parent: Tag) -> None:
if affected: if affected:
bulk_update_documents.delay(document_ids=list(affected)) bulk_update_documents.delay(document_ids=list(affected))
@shared_task
def build_share_bundle(bundle_id: int):
try:
bundle = (
ShareBundle.objects.filter(pk=bundle_id).prefetch_related("documents").get()
)
except ShareBundle.DoesNotExist:
logger.warning("Share bundle %s no longer exists.", bundle_id)
return
bundle.remove_file()
bundle.status = ShareBundle.Status.PROCESSING
bundle.last_error = ""
bundle.size_bytes = None
bundle.built_at = None
bundle.file_path = ""
bundle.save(
update_fields=[
"status",
"last_error",
"size_bytes",
"built_at",
"file_path",
],
)
documents = list(bundle.documents.all().order_by("pk"))
with NamedTemporaryFile(
dir=settings.SCRATCH_DIR,
suffix=".zip",
delete=False,
) as temp_zip:
temp_zip_path = Path(temp_zip.name)
try:
strategy_class = (
ArchiveOnlyStrategy
if bundle.file_version == ShareLink.FileVersion.ARCHIVE
else OriginalsOnlyStrategy
)
with zipfile.ZipFile(temp_zip_path, "w", zipfile.ZIP_DEFLATED) as zipf:
strategy = strategy_class(zipf)
for document in documents:
strategy.add_document(document)
output_dir = settings.SHARE_BUNDLE_DIR
output_dir.mkdir(parents=True, exist_ok=True)
final_path = (output_dir / f"{bundle.slug}.zip").resolve()
if final_path.exists():
final_path.unlink()
shutil.move(str(temp_zip_path), final_path)
try:
bundle.file_path = str(final_path.relative_to(settings.MEDIA_ROOT))
except ValueError:
bundle.file_path = str(final_path)
bundle.size_bytes = final_path.stat().st_size
bundle.status = ShareBundle.Status.READY
bundle.built_at = timezone.now()
bundle.last_error = ""
bundle.save(
update_fields=[
"file_path",
"size_bytes",
"status",
"built_at",
"last_error",
],
)
logger.info("Built share bundle %s", bundle.pk)
except Exception as exc:
logger.exception("Failed to build share bundle %s: %s", bundle_id, exc)
bundle.status = ShareBundle.Status.FAILED
bundle.last_error = str(exc)
bundle.save(update_fields=["status", "last_error"])
try:
temp_zip_path.unlink()
except OSError:
pass
raise
finally:
if temp_zip_path.exists():
try:
temp_zip_path.unlink()
except OSError:
pass

View File

@@ -184,6 +184,7 @@ from documents.serialisers import WorkflowActionSerializer
from documents.serialisers import WorkflowSerializer from documents.serialisers import WorkflowSerializer
from documents.serialisers import WorkflowTriggerSerializer from documents.serialisers import WorkflowTriggerSerializer
from documents.signals import document_updated from documents.signals import document_updated
from documents.tasks import build_share_bundle
from documents.tasks import consume_file from documents.tasks import consume_file
from documents.tasks import empty_trash from documents.tasks import empty_trash
from documents.tasks import index_optimize from documents.tasks import index_optimize
@@ -2637,7 +2638,12 @@ class ShareBundleViewSet(ModelViewSet, PassUserMixin):
ordering_fields = ("created", "expiration", "status") ordering_fields = ("created", "expiration", "status")
def get_queryset(self): def get_queryset(self):
return super().get_queryset().prefetch_related("documents") return (
super()
.get_queryset()
.prefetch_related("documents")
.annotate(document_total=Count("documents", distinct=True))
)
def create(self, request, *args, **kwargs): def create(self, request, *args, **kwargs):
serializer = self.get_serializer(data=request.data) serializer = self.get_serializer(data=request.data)
@@ -2670,17 +2676,68 @@ class ShareBundleViewSet(ModelViewSet, PassUserMixin):
}, },
) )
serializer.save( document_map = {document.pk: document for document in documents}
ordered_documents = [document_map[doc_id] for doc_id in document_ids]
bundle = serializer.save(
owner=request.user, owner=request.user,
documents=documents, documents=ordered_documents,
) )
headers = self.get_success_headers(serializer.data) bundle.remove_file()
bundle.status = ShareBundle.Status.PENDING
bundle.last_error = ""
bundle.size_bytes = None
bundle.built_at = None
bundle.file_path = ""
bundle.save(
update_fields=[
"status",
"last_error",
"size_bytes",
"built_at",
"file_path",
],
)
build_share_bundle.delay(bundle.pk)
bundle.document_total = len(ordered_documents)
response_serializer = self.get_serializer(bundle)
headers = self.get_success_headers(response_serializer.data)
return Response( return Response(
serializer.data, response_serializer.data,
status=status.HTTP_201_CREATED, status=status.HTTP_201_CREATED,
headers=headers, headers=headers,
) )
@action(detail=True, methods=["post"])
def rebuild(self, request, pk=None):
bundle = self.get_object()
if bundle.status == ShareBundle.Status.PROCESSING:
return Response(
{"detail": _("Bundle is already being processed.")},
status=status.HTTP_400_BAD_REQUEST,
)
bundle.remove_file()
bundle.status = ShareBundle.Status.PENDING
bundle.last_error = ""
bundle.size_bytes = None
bundle.built_at = None
bundle.file_path = ""
bundle.save(
update_fields=[
"status",
"last_error",
"size_bytes",
"built_at",
"file_path",
],
)
build_share_bundle.delay(bundle.pk)
bundle.document_total = (
getattr(bundle, "document_total", None) or bundle.documents.count()
)
serializer = self.get_serializer(bundle)
return Response(serializer.data)
class SharedLinkView(View): class SharedLinkView(View):
authentication_classes = [] authentication_classes = []
@@ -2688,15 +2745,103 @@ class SharedLinkView(View):
def get(self, request, slug): def get(self, request, slug):
share_link = ShareLink.objects.filter(slug=slug).first() share_link = ShareLink.objects.filter(slug=slug).first()
if share_link is None: if share_link is not None:
if (
share_link.expiration is not None
and share_link.expiration < timezone.now()
):
return HttpResponseRedirect("/accounts/login/?sharelink_expired=1")
return serve_file(
doc=share_link.document,
use_archive=share_link.file_version == "archive",
disposition="inline",
)
share_bundle = ShareBundle.objects.filter(slug=slug).first()
if share_bundle is None:
return HttpResponseRedirect("/accounts/login/?sharelink_notfound=1") return HttpResponseRedirect("/accounts/login/?sharelink_notfound=1")
if share_link.expiration is not None and share_link.expiration < timezone.now():
if (
share_bundle.expiration is not None
and share_bundle.expiration < timezone.now()
):
return HttpResponseRedirect("/accounts/login/?sharelink_expired=1") return HttpResponseRedirect("/accounts/login/?sharelink_expired=1")
return serve_file(
doc=share_link.document, if share_bundle.status in {
use_archive=share_link.file_version == "archive", ShareBundle.Status.PENDING,
disposition="inline", ShareBundle.Status.PROCESSING,
}:
return HttpResponse(
_(
"The shared bundle is still being prepared. Please try again later.",
),
status=status.HTTP_202_ACCEPTED,
)
if share_bundle.status == ShareBundle.Status.FAILED:
share_bundle.remove_file()
share_bundle.status = ShareBundle.Status.PENDING
share_bundle.last_error = ""
share_bundle.size_bytes = None
share_bundle.built_at = None
share_bundle.file_path = ""
share_bundle.save(
update_fields=[
"status",
"last_error",
"size_bytes",
"built_at",
"file_path",
],
)
build_share_bundle.delay(share_bundle.pk)
return HttpResponse(
_(
"The shared bundle is temporarily unavailable. A rebuild has been scheduled. Please try again later.",
),
status=status.HTTP_503_SERVICE_UNAVAILABLE,
)
file_path = share_bundle.absolute_file_path
if file_path is None or not file_path.exists():
share_bundle.status = ShareBundle.Status.PENDING
share_bundle.last_error = ""
share_bundle.size_bytes = None
share_bundle.built_at = None
share_bundle.file_path = ""
share_bundle.save(
update_fields=[
"status",
"last_error",
"size_bytes",
"built_at",
"file_path",
],
)
build_share_bundle.delay(share_bundle.pk)
return HttpResponse(
_(
"The shared bundle is being prepared. Please try again later.",
),
status=status.HTTP_202_ACCEPTED,
)
response = FileResponse(file_path.open("rb"), content_type="application/zip")
download_name = f"paperless-share-{share_bundle.slug}.zip"
filename_normalized = (
normalize("NFKD", download_name)
.encode(
"ascii",
"ignore",
)
.decode("ascii")
) )
filename_encoded = quote(download_name)
response["Content-Disposition"] = (
f"attachment; filename='{filename_normalized}'; "
f"filename*=utf-8''{filename_encoded}"
)
return response
def serve_file(*, doc: Document, use_archive: bool, disposition: str): def serve_file(*, doc: Document, use_archive: bool, disposition: str):

View File

@@ -268,6 +268,7 @@ MEDIA_ROOT = __get_path("PAPERLESS_MEDIA_ROOT", BASE_DIR.parent / "media")
ORIGINALS_DIR = MEDIA_ROOT / "documents" / "originals" ORIGINALS_DIR = MEDIA_ROOT / "documents" / "originals"
ARCHIVE_DIR = MEDIA_ROOT / "documents" / "archive" ARCHIVE_DIR = MEDIA_ROOT / "documents" / "archive"
THUMBNAIL_DIR = MEDIA_ROOT / "documents" / "thumbnails" THUMBNAIL_DIR = MEDIA_ROOT / "documents" / "thumbnails"
SHARE_BUNDLE_DIR = MEDIA_ROOT / "documents" / "share_bundles"
DATA_DIR = __get_path("PAPERLESS_DATA_DIR", BASE_DIR.parent / "data") DATA_DIR = __get_path("PAPERLESS_DATA_DIR", BASE_DIR.parent / "data")