mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-11-11 03:56:07 -06:00
Initial task for building
This commit is contained in:
@@ -16,6 +16,7 @@ export interface ShareBundleSummary {
|
||||
document_count: number
|
||||
file_version: FileVersion
|
||||
status: ShareBundleStatus
|
||||
built_at?: string
|
||||
size_bytes?: number
|
||||
last_error?: string
|
||||
}
|
||||
|
||||
@@ -128,6 +128,22 @@ class Migration(migrations.Migration):
|
||||
verbose_name="last error",
|
||||
),
|
||||
),
|
||||
(
|
||||
"file_path",
|
||||
models.CharField(
|
||||
blank=True,
|
||||
max_length=512,
|
||||
verbose_name="file path",
|
||||
),
|
||||
),
|
||||
(
|
||||
"built_at",
|
||||
models.DateTimeField(
|
||||
blank=True,
|
||||
null=True,
|
||||
verbose_name="built at",
|
||||
),
|
||||
),
|
||||
(
|
||||
"owner",
|
||||
models.ForeignKey(
|
||||
|
||||
@@ -844,6 +844,18 @@ class ShareBundle(SoftDeleteModel):
|
||||
blank=True,
|
||||
)
|
||||
|
||||
file_path = models.CharField(
|
||||
_("file path"),
|
||||
max_length=512,
|
||||
blank=True,
|
||||
)
|
||||
|
||||
built_at = models.DateTimeField(
|
||||
_("built at"),
|
||||
null=True,
|
||||
blank=True,
|
||||
)
|
||||
|
||||
documents = models.ManyToManyField(
|
||||
"documents.Document",
|
||||
related_name="share_bundles",
|
||||
@@ -853,6 +865,31 @@ class ShareBundle(SoftDeleteModel):
|
||||
def __str__(self):
|
||||
return _("Share bundle %(slug)s") % {"slug": self.slug}
|
||||
|
||||
@property
|
||||
def absolute_file_path(self) -> Path | None:
|
||||
if not self.file_path:
|
||||
return None
|
||||
file_path = Path(self.file_path)
|
||||
if not file_path.is_absolute():
|
||||
file_path = (settings.MEDIA_ROOT / file_path).resolve()
|
||||
return file_path
|
||||
|
||||
def remove_file(self):
|
||||
path = self.absolute_file_path
|
||||
if path and path.exists():
|
||||
try:
|
||||
path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def delete(self, using=None, *, keep_parents=False):
|
||||
self.remove_file()
|
||||
return super().delete(using=using, keep_parents=keep_parents)
|
||||
|
||||
def hard_delete(self, using=None, *, keep_parents=False):
|
||||
self.remove_file()
|
||||
return super().hard_delete(using=using, keep_parents=keep_parents)
|
||||
|
||||
|
||||
class CustomField(models.Model):
|
||||
"""
|
||||
|
||||
@@ -2160,6 +2160,7 @@ class ShareBundleSerializer(OwnedObjectSerializer):
|
||||
"status",
|
||||
"size_bytes",
|
||||
"last_error",
|
||||
"built_at",
|
||||
"documents",
|
||||
"document_ids",
|
||||
"document_count",
|
||||
@@ -2172,6 +2173,7 @@ class ShareBundleSerializer(OwnedObjectSerializer):
|
||||
"status",
|
||||
"size_bytes",
|
||||
"last_error",
|
||||
"built_at",
|
||||
"documents",
|
||||
"document_count",
|
||||
)
|
||||
@@ -2223,10 +2225,14 @@ class ShareBundleSerializer(OwnedObjectSerializer):
|
||||
|
||||
ordered_documents = [documents_by_id[doc_id] for doc_id in document_ids]
|
||||
share_bundle.documents.set(ordered_documents)
|
||||
share_bundle.document_total = len(ordered_documents)
|
||||
|
||||
return share_bundle
|
||||
|
||||
def get_document_count(self, obj: ShareBundle) -> int:
|
||||
count = getattr(obj, "document_total", None)
|
||||
if count is not None:
|
||||
return count
|
||||
return obj.documents.count()
|
||||
|
||||
|
||||
|
||||
@@ -3,7 +3,9 @@ import hashlib
|
||||
import logging
|
||||
import shutil
|
||||
import uuid
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
from tempfile import NamedTemporaryFile
|
||||
from tempfile import TemporaryDirectory
|
||||
|
||||
import tqdm
|
||||
@@ -22,6 +24,8 @@ from whoosh.writing import AsyncWriter
|
||||
from documents import index
|
||||
from documents import sanity_checker
|
||||
from documents.barcodes import BarcodePlugin
|
||||
from documents.bulk_download import ArchiveOnlyStrategy
|
||||
from documents.bulk_download import OriginalsOnlyStrategy
|
||||
from documents.caching import clear_document_caches
|
||||
from documents.classifier import DocumentClassifier
|
||||
from documents.classifier import load_classifier
|
||||
@@ -39,6 +43,8 @@ from documents.models import CustomFieldInstance
|
||||
from documents.models import Document
|
||||
from documents.models import DocumentType
|
||||
from documents.models import PaperlessTask
|
||||
from documents.models import ShareBundle
|
||||
from documents.models import ShareLink
|
||||
from documents.models import StoragePath
|
||||
from documents.models import Tag
|
||||
from documents.models import Workflow
|
||||
@@ -563,3 +569,92 @@ def update_document_parent_tags(tag: Tag, new_parent: Tag) -> None:
|
||||
|
||||
if affected:
|
||||
bulk_update_documents.delay(document_ids=list(affected))
|
||||
|
||||
|
||||
@shared_task
|
||||
def build_share_bundle(bundle_id: int):
|
||||
try:
|
||||
bundle = (
|
||||
ShareBundle.objects.filter(pk=bundle_id).prefetch_related("documents").get()
|
||||
)
|
||||
except ShareBundle.DoesNotExist:
|
||||
logger.warning("Share bundle %s no longer exists.", bundle_id)
|
||||
return
|
||||
|
||||
bundle.remove_file()
|
||||
bundle.status = ShareBundle.Status.PROCESSING
|
||||
bundle.last_error = ""
|
||||
bundle.size_bytes = None
|
||||
bundle.built_at = None
|
||||
bundle.file_path = ""
|
||||
bundle.save(
|
||||
update_fields=[
|
||||
"status",
|
||||
"last_error",
|
||||
"size_bytes",
|
||||
"built_at",
|
||||
"file_path",
|
||||
],
|
||||
)
|
||||
|
||||
documents = list(bundle.documents.all().order_by("pk"))
|
||||
|
||||
with NamedTemporaryFile(
|
||||
dir=settings.SCRATCH_DIR,
|
||||
suffix=".zip",
|
||||
delete=False,
|
||||
) as temp_zip:
|
||||
temp_zip_path = Path(temp_zip.name)
|
||||
|
||||
try:
|
||||
strategy_class = (
|
||||
ArchiveOnlyStrategy
|
||||
if bundle.file_version == ShareLink.FileVersion.ARCHIVE
|
||||
else OriginalsOnlyStrategy
|
||||
)
|
||||
with zipfile.ZipFile(temp_zip_path, "w", zipfile.ZIP_DEFLATED) as zipf:
|
||||
strategy = strategy_class(zipf)
|
||||
for document in documents:
|
||||
strategy.add_document(document)
|
||||
|
||||
output_dir = settings.SHARE_BUNDLE_DIR
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
final_path = (output_dir / f"{bundle.slug}.zip").resolve()
|
||||
if final_path.exists():
|
||||
final_path.unlink()
|
||||
shutil.move(str(temp_zip_path), final_path)
|
||||
|
||||
try:
|
||||
bundle.file_path = str(final_path.relative_to(settings.MEDIA_ROOT))
|
||||
except ValueError:
|
||||
bundle.file_path = str(final_path)
|
||||
bundle.size_bytes = final_path.stat().st_size
|
||||
bundle.status = ShareBundle.Status.READY
|
||||
bundle.built_at = timezone.now()
|
||||
bundle.last_error = ""
|
||||
bundle.save(
|
||||
update_fields=[
|
||||
"file_path",
|
||||
"size_bytes",
|
||||
"status",
|
||||
"built_at",
|
||||
"last_error",
|
||||
],
|
||||
)
|
||||
logger.info("Built share bundle %s", bundle.pk)
|
||||
except Exception as exc:
|
||||
logger.exception("Failed to build share bundle %s: %s", bundle_id, exc)
|
||||
bundle.status = ShareBundle.Status.FAILED
|
||||
bundle.last_error = str(exc)
|
||||
bundle.save(update_fields=["status", "last_error"])
|
||||
try:
|
||||
temp_zip_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
finally:
|
||||
if temp_zip_path.exists():
|
||||
try:
|
||||
temp_zip_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
@@ -183,6 +183,7 @@ from documents.serialisers import WorkflowActionSerializer
|
||||
from documents.serialisers import WorkflowSerializer
|
||||
from documents.serialisers import WorkflowTriggerSerializer
|
||||
from documents.signals import document_updated
|
||||
from documents.tasks import build_share_bundle
|
||||
from documents.tasks import consume_file
|
||||
from documents.tasks import empty_trash
|
||||
from documents.tasks import index_optimize
|
||||
@@ -2620,7 +2621,12 @@ class ShareBundleViewSet(ModelViewSet, PassUserMixin):
|
||||
ordering_fields = ("created", "expiration", "status")
|
||||
|
||||
def get_queryset(self):
|
||||
return super().get_queryset().prefetch_related("documents")
|
||||
return (
|
||||
super()
|
||||
.get_queryset()
|
||||
.prefetch_related("documents")
|
||||
.annotate(document_total=Count("documents", distinct=True))
|
||||
)
|
||||
|
||||
def create(self, request, *args, **kwargs):
|
||||
serializer = self.get_serializer(data=request.data)
|
||||
@@ -2653,17 +2659,68 @@ class ShareBundleViewSet(ModelViewSet, PassUserMixin):
|
||||
},
|
||||
)
|
||||
|
||||
serializer.save(
|
||||
document_map = {document.pk: document for document in documents}
|
||||
ordered_documents = [document_map[doc_id] for doc_id in document_ids]
|
||||
|
||||
bundle = serializer.save(
|
||||
owner=request.user,
|
||||
documents=documents,
|
||||
documents=ordered_documents,
|
||||
)
|
||||
headers = self.get_success_headers(serializer.data)
|
||||
bundle.remove_file()
|
||||
bundle.status = ShareBundle.Status.PENDING
|
||||
bundle.last_error = ""
|
||||
bundle.size_bytes = None
|
||||
bundle.built_at = None
|
||||
bundle.file_path = ""
|
||||
bundle.save(
|
||||
update_fields=[
|
||||
"status",
|
||||
"last_error",
|
||||
"size_bytes",
|
||||
"built_at",
|
||||
"file_path",
|
||||
],
|
||||
)
|
||||
build_share_bundle.delay(bundle.pk)
|
||||
bundle.document_total = len(ordered_documents)
|
||||
response_serializer = self.get_serializer(bundle)
|
||||
headers = self.get_success_headers(response_serializer.data)
|
||||
return Response(
|
||||
serializer.data,
|
||||
response_serializer.data,
|
||||
status=status.HTTP_201_CREATED,
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
@action(detail=True, methods=["post"])
|
||||
def rebuild(self, request, pk=None):
|
||||
bundle = self.get_object()
|
||||
if bundle.status == ShareBundle.Status.PROCESSING:
|
||||
return Response(
|
||||
{"detail": _("Bundle is already being processed.")},
|
||||
status=status.HTTP_400_BAD_REQUEST,
|
||||
)
|
||||
bundle.remove_file()
|
||||
bundle.status = ShareBundle.Status.PENDING
|
||||
bundle.last_error = ""
|
||||
bundle.size_bytes = None
|
||||
bundle.built_at = None
|
||||
bundle.file_path = ""
|
||||
bundle.save(
|
||||
update_fields=[
|
||||
"status",
|
||||
"last_error",
|
||||
"size_bytes",
|
||||
"built_at",
|
||||
"file_path",
|
||||
],
|
||||
)
|
||||
build_share_bundle.delay(bundle.pk)
|
||||
bundle.document_total = (
|
||||
getattr(bundle, "document_total", None) or bundle.documents.count()
|
||||
)
|
||||
serializer = self.get_serializer(bundle)
|
||||
return Response(serializer.data)
|
||||
|
||||
|
||||
class SharedLinkView(View):
|
||||
authentication_classes = []
|
||||
@@ -2671,15 +2728,103 @@ class SharedLinkView(View):
|
||||
|
||||
def get(self, request, slug):
|
||||
share_link = ShareLink.objects.filter(slug=slug).first()
|
||||
if share_link is None:
|
||||
if share_link is not None:
|
||||
if (
|
||||
share_link.expiration is not None
|
||||
and share_link.expiration < timezone.now()
|
||||
):
|
||||
return HttpResponseRedirect("/accounts/login/?sharelink_expired=1")
|
||||
return serve_file(
|
||||
doc=share_link.document,
|
||||
use_archive=share_link.file_version == "archive",
|
||||
disposition="inline",
|
||||
)
|
||||
|
||||
share_bundle = ShareBundle.objects.filter(slug=slug).first()
|
||||
if share_bundle is None:
|
||||
return HttpResponseRedirect("/accounts/login/?sharelink_notfound=1")
|
||||
if share_link.expiration is not None and share_link.expiration < timezone.now():
|
||||
|
||||
if (
|
||||
share_bundle.expiration is not None
|
||||
and share_bundle.expiration < timezone.now()
|
||||
):
|
||||
return HttpResponseRedirect("/accounts/login/?sharelink_expired=1")
|
||||
return serve_file(
|
||||
doc=share_link.document,
|
||||
use_archive=share_link.file_version == "archive",
|
||||
disposition="inline",
|
||||
|
||||
if share_bundle.status in {
|
||||
ShareBundle.Status.PENDING,
|
||||
ShareBundle.Status.PROCESSING,
|
||||
}:
|
||||
return HttpResponse(
|
||||
_(
|
||||
"The shared bundle is still being prepared. Please try again later.",
|
||||
),
|
||||
status=status.HTTP_202_ACCEPTED,
|
||||
)
|
||||
|
||||
if share_bundle.status == ShareBundle.Status.FAILED:
|
||||
share_bundle.remove_file()
|
||||
share_bundle.status = ShareBundle.Status.PENDING
|
||||
share_bundle.last_error = ""
|
||||
share_bundle.size_bytes = None
|
||||
share_bundle.built_at = None
|
||||
share_bundle.file_path = ""
|
||||
share_bundle.save(
|
||||
update_fields=[
|
||||
"status",
|
||||
"last_error",
|
||||
"size_bytes",
|
||||
"built_at",
|
||||
"file_path",
|
||||
],
|
||||
)
|
||||
build_share_bundle.delay(share_bundle.pk)
|
||||
return HttpResponse(
|
||||
_(
|
||||
"The shared bundle is temporarily unavailable. A rebuild has been scheduled. Please try again later.",
|
||||
),
|
||||
status=status.HTTP_503_SERVICE_UNAVAILABLE,
|
||||
)
|
||||
|
||||
file_path = share_bundle.absolute_file_path
|
||||
if file_path is None or not file_path.exists():
|
||||
share_bundle.status = ShareBundle.Status.PENDING
|
||||
share_bundle.last_error = ""
|
||||
share_bundle.size_bytes = None
|
||||
share_bundle.built_at = None
|
||||
share_bundle.file_path = ""
|
||||
share_bundle.save(
|
||||
update_fields=[
|
||||
"status",
|
||||
"last_error",
|
||||
"size_bytes",
|
||||
"built_at",
|
||||
"file_path",
|
||||
],
|
||||
)
|
||||
build_share_bundle.delay(share_bundle.pk)
|
||||
return HttpResponse(
|
||||
_(
|
||||
"The shared bundle is being prepared. Please try again later.",
|
||||
),
|
||||
status=status.HTTP_202_ACCEPTED,
|
||||
)
|
||||
|
||||
response = FileResponse(file_path.open("rb"), content_type="application/zip")
|
||||
download_name = f"paperless-share-{share_bundle.slug}.zip"
|
||||
filename_normalized = (
|
||||
normalize("NFKD", download_name)
|
||||
.encode(
|
||||
"ascii",
|
||||
"ignore",
|
||||
)
|
||||
.decode("ascii")
|
||||
)
|
||||
filename_encoded = quote(download_name)
|
||||
response["Content-Disposition"] = (
|
||||
f"attachment; filename='{filename_normalized}'; "
|
||||
f"filename*=utf-8''{filename_encoded}"
|
||||
)
|
||||
return response
|
||||
|
||||
|
||||
def serve_file(*, doc: Document, use_archive: bool, disposition: str):
|
||||
|
||||
@@ -268,6 +268,7 @@ MEDIA_ROOT = __get_path("PAPERLESS_MEDIA_ROOT", BASE_DIR.parent / "media")
|
||||
ORIGINALS_DIR = MEDIA_ROOT / "documents" / "originals"
|
||||
ARCHIVE_DIR = MEDIA_ROOT / "documents" / "archive"
|
||||
THUMBNAIL_DIR = MEDIA_ROOT / "documents" / "thumbnails"
|
||||
SHARE_BUNDLE_DIR = MEDIA_ROOT / "documents" / "share_bundles"
|
||||
|
||||
DATA_DIR = __get_path("PAPERLESS_DATA_DIR", BASE_DIR.parent / "data")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user