mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-12-02 00:21:21 -06:00
Initial task for building
This commit is contained in:
@@ -16,6 +16,7 @@ export interface ShareBundleSummary {
|
|||||||
document_count: number
|
document_count: number
|
||||||
file_version: FileVersion
|
file_version: FileVersion
|
||||||
status: ShareBundleStatus
|
status: ShareBundleStatus
|
||||||
|
built_at?: string
|
||||||
size_bytes?: number
|
size_bytes?: number
|
||||||
last_error?: string
|
last_error?: string
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -128,6 +128,22 @@ class Migration(migrations.Migration):
|
|||||||
verbose_name="last error",
|
verbose_name="last error",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
"file_path",
|
||||||
|
models.CharField(
|
||||||
|
blank=True,
|
||||||
|
max_length=512,
|
||||||
|
verbose_name="file path",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"built_at",
|
||||||
|
models.DateTimeField(
|
||||||
|
blank=True,
|
||||||
|
null=True,
|
||||||
|
verbose_name="built at",
|
||||||
|
),
|
||||||
|
),
|
||||||
(
|
(
|
||||||
"owner",
|
"owner",
|
||||||
models.ForeignKey(
|
models.ForeignKey(
|
||||||
|
|||||||
@@ -844,6 +844,18 @@ class ShareBundle(SoftDeleteModel):
|
|||||||
blank=True,
|
blank=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
file_path = models.CharField(
|
||||||
|
_("file path"),
|
||||||
|
max_length=512,
|
||||||
|
blank=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
built_at = models.DateTimeField(
|
||||||
|
_("built at"),
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
|
)
|
||||||
|
|
||||||
documents = models.ManyToManyField(
|
documents = models.ManyToManyField(
|
||||||
"documents.Document",
|
"documents.Document",
|
||||||
related_name="share_bundles",
|
related_name="share_bundles",
|
||||||
@@ -853,6 +865,31 @@ class ShareBundle(SoftDeleteModel):
|
|||||||
def __str__(self):
|
def __str__(self):
|
||||||
return _("Share bundle %(slug)s") % {"slug": self.slug}
|
return _("Share bundle %(slug)s") % {"slug": self.slug}
|
||||||
|
|
||||||
|
@property
|
||||||
|
def absolute_file_path(self) -> Path | None:
|
||||||
|
if not self.file_path:
|
||||||
|
return None
|
||||||
|
file_path = Path(self.file_path)
|
||||||
|
if not file_path.is_absolute():
|
||||||
|
file_path = (settings.MEDIA_ROOT / file_path).resolve()
|
||||||
|
return file_path
|
||||||
|
|
||||||
|
def remove_file(self):
|
||||||
|
path = self.absolute_file_path
|
||||||
|
if path and path.exists():
|
||||||
|
try:
|
||||||
|
path.unlink()
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def delete(self, using=None, *, keep_parents=False):
|
||||||
|
self.remove_file()
|
||||||
|
return super().delete(using=using, keep_parents=keep_parents)
|
||||||
|
|
||||||
|
def hard_delete(self, using=None, *, keep_parents=False):
|
||||||
|
self.remove_file()
|
||||||
|
return super().hard_delete(using=using, keep_parents=keep_parents)
|
||||||
|
|
||||||
|
|
||||||
class CustomField(models.Model):
|
class CustomField(models.Model):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -2160,6 +2160,7 @@ class ShareBundleSerializer(OwnedObjectSerializer):
|
|||||||
"status",
|
"status",
|
||||||
"size_bytes",
|
"size_bytes",
|
||||||
"last_error",
|
"last_error",
|
||||||
|
"built_at",
|
||||||
"documents",
|
"documents",
|
||||||
"document_ids",
|
"document_ids",
|
||||||
"document_count",
|
"document_count",
|
||||||
@@ -2172,6 +2173,7 @@ class ShareBundleSerializer(OwnedObjectSerializer):
|
|||||||
"status",
|
"status",
|
||||||
"size_bytes",
|
"size_bytes",
|
||||||
"last_error",
|
"last_error",
|
||||||
|
"built_at",
|
||||||
"documents",
|
"documents",
|
||||||
"document_count",
|
"document_count",
|
||||||
)
|
)
|
||||||
@@ -2223,10 +2225,14 @@ class ShareBundleSerializer(OwnedObjectSerializer):
|
|||||||
|
|
||||||
ordered_documents = [documents_by_id[doc_id] for doc_id in document_ids]
|
ordered_documents = [documents_by_id[doc_id] for doc_id in document_ids]
|
||||||
share_bundle.documents.set(ordered_documents)
|
share_bundle.documents.set(ordered_documents)
|
||||||
|
share_bundle.document_total = len(ordered_documents)
|
||||||
|
|
||||||
return share_bundle
|
return share_bundle
|
||||||
|
|
||||||
def get_document_count(self, obj: ShareBundle) -> int:
|
def get_document_count(self, obj: ShareBundle) -> int:
|
||||||
|
count = getattr(obj, "document_total", None)
|
||||||
|
if count is not None:
|
||||||
|
return count
|
||||||
return obj.documents.count()
|
return obj.documents.count()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,9 @@ import hashlib
|
|||||||
import logging
|
import logging
|
||||||
import shutil
|
import shutil
|
||||||
import uuid
|
import uuid
|
||||||
|
import zipfile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from tempfile import NamedTemporaryFile
|
||||||
from tempfile import TemporaryDirectory
|
from tempfile import TemporaryDirectory
|
||||||
|
|
||||||
import tqdm
|
import tqdm
|
||||||
@@ -22,6 +24,8 @@ from whoosh.writing import AsyncWriter
|
|||||||
from documents import index
|
from documents import index
|
||||||
from documents import sanity_checker
|
from documents import sanity_checker
|
||||||
from documents.barcodes import BarcodePlugin
|
from documents.barcodes import BarcodePlugin
|
||||||
|
from documents.bulk_download import ArchiveOnlyStrategy
|
||||||
|
from documents.bulk_download import OriginalsOnlyStrategy
|
||||||
from documents.caching import clear_document_caches
|
from documents.caching import clear_document_caches
|
||||||
from documents.classifier import DocumentClassifier
|
from documents.classifier import DocumentClassifier
|
||||||
from documents.classifier import load_classifier
|
from documents.classifier import load_classifier
|
||||||
@@ -39,6 +43,8 @@ from documents.models import CustomFieldInstance
|
|||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
from documents.models import DocumentType
|
from documents.models import DocumentType
|
||||||
from documents.models import PaperlessTask
|
from documents.models import PaperlessTask
|
||||||
|
from documents.models import ShareBundle
|
||||||
|
from documents.models import ShareLink
|
||||||
from documents.models import StoragePath
|
from documents.models import StoragePath
|
||||||
from documents.models import Tag
|
from documents.models import Tag
|
||||||
from documents.models import Workflow
|
from documents.models import Workflow
|
||||||
@@ -563,3 +569,92 @@ def update_document_parent_tags(tag: Tag, new_parent: Tag) -> None:
|
|||||||
|
|
||||||
if affected:
|
if affected:
|
||||||
bulk_update_documents.delay(document_ids=list(affected))
|
bulk_update_documents.delay(document_ids=list(affected))
|
||||||
|
|
||||||
|
|
||||||
|
@shared_task
|
||||||
|
def build_share_bundle(bundle_id: int):
|
||||||
|
try:
|
||||||
|
bundle = (
|
||||||
|
ShareBundle.objects.filter(pk=bundle_id).prefetch_related("documents").get()
|
||||||
|
)
|
||||||
|
except ShareBundle.DoesNotExist:
|
||||||
|
logger.warning("Share bundle %s no longer exists.", bundle_id)
|
||||||
|
return
|
||||||
|
|
||||||
|
bundle.remove_file()
|
||||||
|
bundle.status = ShareBundle.Status.PROCESSING
|
||||||
|
bundle.last_error = ""
|
||||||
|
bundle.size_bytes = None
|
||||||
|
bundle.built_at = None
|
||||||
|
bundle.file_path = ""
|
||||||
|
bundle.save(
|
||||||
|
update_fields=[
|
||||||
|
"status",
|
||||||
|
"last_error",
|
||||||
|
"size_bytes",
|
||||||
|
"built_at",
|
||||||
|
"file_path",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
documents = list(bundle.documents.all().order_by("pk"))
|
||||||
|
|
||||||
|
with NamedTemporaryFile(
|
||||||
|
dir=settings.SCRATCH_DIR,
|
||||||
|
suffix=".zip",
|
||||||
|
delete=False,
|
||||||
|
) as temp_zip:
|
||||||
|
temp_zip_path = Path(temp_zip.name)
|
||||||
|
|
||||||
|
try:
|
||||||
|
strategy_class = (
|
||||||
|
ArchiveOnlyStrategy
|
||||||
|
if bundle.file_version == ShareLink.FileVersion.ARCHIVE
|
||||||
|
else OriginalsOnlyStrategy
|
||||||
|
)
|
||||||
|
with zipfile.ZipFile(temp_zip_path, "w", zipfile.ZIP_DEFLATED) as zipf:
|
||||||
|
strategy = strategy_class(zipf)
|
||||||
|
for document in documents:
|
||||||
|
strategy.add_document(document)
|
||||||
|
|
||||||
|
output_dir = settings.SHARE_BUNDLE_DIR
|
||||||
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
final_path = (output_dir / f"{bundle.slug}.zip").resolve()
|
||||||
|
if final_path.exists():
|
||||||
|
final_path.unlink()
|
||||||
|
shutil.move(str(temp_zip_path), final_path)
|
||||||
|
|
||||||
|
try:
|
||||||
|
bundle.file_path = str(final_path.relative_to(settings.MEDIA_ROOT))
|
||||||
|
except ValueError:
|
||||||
|
bundle.file_path = str(final_path)
|
||||||
|
bundle.size_bytes = final_path.stat().st_size
|
||||||
|
bundle.status = ShareBundle.Status.READY
|
||||||
|
bundle.built_at = timezone.now()
|
||||||
|
bundle.last_error = ""
|
||||||
|
bundle.save(
|
||||||
|
update_fields=[
|
||||||
|
"file_path",
|
||||||
|
"size_bytes",
|
||||||
|
"status",
|
||||||
|
"built_at",
|
||||||
|
"last_error",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
logger.info("Built share bundle %s", bundle.pk)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.exception("Failed to build share bundle %s: %s", bundle_id, exc)
|
||||||
|
bundle.status = ShareBundle.Status.FAILED
|
||||||
|
bundle.last_error = str(exc)
|
||||||
|
bundle.save(update_fields=["status", "last_error"])
|
||||||
|
try:
|
||||||
|
temp_zip_path.unlink()
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
raise
|
||||||
|
finally:
|
||||||
|
if temp_zip_path.exists():
|
||||||
|
try:
|
||||||
|
temp_zip_path.unlink()
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|||||||
@@ -184,6 +184,7 @@ from documents.serialisers import WorkflowActionSerializer
|
|||||||
from documents.serialisers import WorkflowSerializer
|
from documents.serialisers import WorkflowSerializer
|
||||||
from documents.serialisers import WorkflowTriggerSerializer
|
from documents.serialisers import WorkflowTriggerSerializer
|
||||||
from documents.signals import document_updated
|
from documents.signals import document_updated
|
||||||
|
from documents.tasks import build_share_bundle
|
||||||
from documents.tasks import consume_file
|
from documents.tasks import consume_file
|
||||||
from documents.tasks import empty_trash
|
from documents.tasks import empty_trash
|
||||||
from documents.tasks import index_optimize
|
from documents.tasks import index_optimize
|
||||||
@@ -2637,7 +2638,12 @@ class ShareBundleViewSet(ModelViewSet, PassUserMixin):
|
|||||||
ordering_fields = ("created", "expiration", "status")
|
ordering_fields = ("created", "expiration", "status")
|
||||||
|
|
||||||
def get_queryset(self):
|
def get_queryset(self):
|
||||||
return super().get_queryset().prefetch_related("documents")
|
return (
|
||||||
|
super()
|
||||||
|
.get_queryset()
|
||||||
|
.prefetch_related("documents")
|
||||||
|
.annotate(document_total=Count("documents", distinct=True))
|
||||||
|
)
|
||||||
|
|
||||||
def create(self, request, *args, **kwargs):
|
def create(self, request, *args, **kwargs):
|
||||||
serializer = self.get_serializer(data=request.data)
|
serializer = self.get_serializer(data=request.data)
|
||||||
@@ -2670,17 +2676,68 @@ class ShareBundleViewSet(ModelViewSet, PassUserMixin):
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
serializer.save(
|
document_map = {document.pk: document for document in documents}
|
||||||
|
ordered_documents = [document_map[doc_id] for doc_id in document_ids]
|
||||||
|
|
||||||
|
bundle = serializer.save(
|
||||||
owner=request.user,
|
owner=request.user,
|
||||||
documents=documents,
|
documents=ordered_documents,
|
||||||
)
|
)
|
||||||
headers = self.get_success_headers(serializer.data)
|
bundle.remove_file()
|
||||||
|
bundle.status = ShareBundle.Status.PENDING
|
||||||
|
bundle.last_error = ""
|
||||||
|
bundle.size_bytes = None
|
||||||
|
bundle.built_at = None
|
||||||
|
bundle.file_path = ""
|
||||||
|
bundle.save(
|
||||||
|
update_fields=[
|
||||||
|
"status",
|
||||||
|
"last_error",
|
||||||
|
"size_bytes",
|
||||||
|
"built_at",
|
||||||
|
"file_path",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
build_share_bundle.delay(bundle.pk)
|
||||||
|
bundle.document_total = len(ordered_documents)
|
||||||
|
response_serializer = self.get_serializer(bundle)
|
||||||
|
headers = self.get_success_headers(response_serializer.data)
|
||||||
return Response(
|
return Response(
|
||||||
serializer.data,
|
response_serializer.data,
|
||||||
status=status.HTTP_201_CREATED,
|
status=status.HTTP_201_CREATED,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@action(detail=True, methods=["post"])
|
||||||
|
def rebuild(self, request, pk=None):
|
||||||
|
bundle = self.get_object()
|
||||||
|
if bundle.status == ShareBundle.Status.PROCESSING:
|
||||||
|
return Response(
|
||||||
|
{"detail": _("Bundle is already being processed.")},
|
||||||
|
status=status.HTTP_400_BAD_REQUEST,
|
||||||
|
)
|
||||||
|
bundle.remove_file()
|
||||||
|
bundle.status = ShareBundle.Status.PENDING
|
||||||
|
bundle.last_error = ""
|
||||||
|
bundle.size_bytes = None
|
||||||
|
bundle.built_at = None
|
||||||
|
bundle.file_path = ""
|
||||||
|
bundle.save(
|
||||||
|
update_fields=[
|
||||||
|
"status",
|
||||||
|
"last_error",
|
||||||
|
"size_bytes",
|
||||||
|
"built_at",
|
||||||
|
"file_path",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
build_share_bundle.delay(bundle.pk)
|
||||||
|
bundle.document_total = (
|
||||||
|
getattr(bundle, "document_total", None) or bundle.documents.count()
|
||||||
|
)
|
||||||
|
serializer = self.get_serializer(bundle)
|
||||||
|
return Response(serializer.data)
|
||||||
|
|
||||||
|
|
||||||
class SharedLinkView(View):
|
class SharedLinkView(View):
|
||||||
authentication_classes = []
|
authentication_classes = []
|
||||||
@@ -2688,15 +2745,103 @@ class SharedLinkView(View):
|
|||||||
|
|
||||||
def get(self, request, slug):
|
def get(self, request, slug):
|
||||||
share_link = ShareLink.objects.filter(slug=slug).first()
|
share_link = ShareLink.objects.filter(slug=slug).first()
|
||||||
if share_link is None:
|
if share_link is not None:
|
||||||
|
if (
|
||||||
|
share_link.expiration is not None
|
||||||
|
and share_link.expiration < timezone.now()
|
||||||
|
):
|
||||||
|
return HttpResponseRedirect("/accounts/login/?sharelink_expired=1")
|
||||||
|
return serve_file(
|
||||||
|
doc=share_link.document,
|
||||||
|
use_archive=share_link.file_version == "archive",
|
||||||
|
disposition="inline",
|
||||||
|
)
|
||||||
|
|
||||||
|
share_bundle = ShareBundle.objects.filter(slug=slug).first()
|
||||||
|
if share_bundle is None:
|
||||||
return HttpResponseRedirect("/accounts/login/?sharelink_notfound=1")
|
return HttpResponseRedirect("/accounts/login/?sharelink_notfound=1")
|
||||||
if share_link.expiration is not None and share_link.expiration < timezone.now():
|
|
||||||
|
if (
|
||||||
|
share_bundle.expiration is not None
|
||||||
|
and share_bundle.expiration < timezone.now()
|
||||||
|
):
|
||||||
return HttpResponseRedirect("/accounts/login/?sharelink_expired=1")
|
return HttpResponseRedirect("/accounts/login/?sharelink_expired=1")
|
||||||
return serve_file(
|
|
||||||
doc=share_link.document,
|
if share_bundle.status in {
|
||||||
use_archive=share_link.file_version == "archive",
|
ShareBundle.Status.PENDING,
|
||||||
disposition="inline",
|
ShareBundle.Status.PROCESSING,
|
||||||
|
}:
|
||||||
|
return HttpResponse(
|
||||||
|
_(
|
||||||
|
"The shared bundle is still being prepared. Please try again later.",
|
||||||
|
),
|
||||||
|
status=status.HTTP_202_ACCEPTED,
|
||||||
|
)
|
||||||
|
|
||||||
|
if share_bundle.status == ShareBundle.Status.FAILED:
|
||||||
|
share_bundle.remove_file()
|
||||||
|
share_bundle.status = ShareBundle.Status.PENDING
|
||||||
|
share_bundle.last_error = ""
|
||||||
|
share_bundle.size_bytes = None
|
||||||
|
share_bundle.built_at = None
|
||||||
|
share_bundle.file_path = ""
|
||||||
|
share_bundle.save(
|
||||||
|
update_fields=[
|
||||||
|
"status",
|
||||||
|
"last_error",
|
||||||
|
"size_bytes",
|
||||||
|
"built_at",
|
||||||
|
"file_path",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
build_share_bundle.delay(share_bundle.pk)
|
||||||
|
return HttpResponse(
|
||||||
|
_(
|
||||||
|
"The shared bundle is temporarily unavailable. A rebuild has been scheduled. Please try again later.",
|
||||||
|
),
|
||||||
|
status=status.HTTP_503_SERVICE_UNAVAILABLE,
|
||||||
|
)
|
||||||
|
|
||||||
|
file_path = share_bundle.absolute_file_path
|
||||||
|
if file_path is None or not file_path.exists():
|
||||||
|
share_bundle.status = ShareBundle.Status.PENDING
|
||||||
|
share_bundle.last_error = ""
|
||||||
|
share_bundle.size_bytes = None
|
||||||
|
share_bundle.built_at = None
|
||||||
|
share_bundle.file_path = ""
|
||||||
|
share_bundle.save(
|
||||||
|
update_fields=[
|
||||||
|
"status",
|
||||||
|
"last_error",
|
||||||
|
"size_bytes",
|
||||||
|
"built_at",
|
||||||
|
"file_path",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
build_share_bundle.delay(share_bundle.pk)
|
||||||
|
return HttpResponse(
|
||||||
|
_(
|
||||||
|
"The shared bundle is being prepared. Please try again later.",
|
||||||
|
),
|
||||||
|
status=status.HTTP_202_ACCEPTED,
|
||||||
|
)
|
||||||
|
|
||||||
|
response = FileResponse(file_path.open("rb"), content_type="application/zip")
|
||||||
|
download_name = f"paperless-share-{share_bundle.slug}.zip"
|
||||||
|
filename_normalized = (
|
||||||
|
normalize("NFKD", download_name)
|
||||||
|
.encode(
|
||||||
|
"ascii",
|
||||||
|
"ignore",
|
||||||
|
)
|
||||||
|
.decode("ascii")
|
||||||
)
|
)
|
||||||
|
filename_encoded = quote(download_name)
|
||||||
|
response["Content-Disposition"] = (
|
||||||
|
f"attachment; filename='{filename_normalized}'; "
|
||||||
|
f"filename*=utf-8''{filename_encoded}"
|
||||||
|
)
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
def serve_file(*, doc: Document, use_archive: bool, disposition: str):
|
def serve_file(*, doc: Document, use_archive: bool, disposition: str):
|
||||||
|
|||||||
@@ -268,6 +268,7 @@ MEDIA_ROOT = __get_path("PAPERLESS_MEDIA_ROOT", BASE_DIR.parent / "media")
|
|||||||
ORIGINALS_DIR = MEDIA_ROOT / "documents" / "originals"
|
ORIGINALS_DIR = MEDIA_ROOT / "documents" / "originals"
|
||||||
ARCHIVE_DIR = MEDIA_ROOT / "documents" / "archive"
|
ARCHIVE_DIR = MEDIA_ROOT / "documents" / "archive"
|
||||||
THUMBNAIL_DIR = MEDIA_ROOT / "documents" / "thumbnails"
|
THUMBNAIL_DIR = MEDIA_ROOT / "documents" / "thumbnails"
|
||||||
|
SHARE_BUNDLE_DIR = MEDIA_ROOT / "documents" / "share_bundles"
|
||||||
|
|
||||||
DATA_DIR = __get_path("PAPERLESS_DATA_DIR", BASE_DIR.parent / "data")
|
DATA_DIR = __get_path("PAPERLESS_DATA_DIR", BASE_DIR.parent / "data")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user