Compare commits

..

15 Commits

Author SHA1 Message Date
shamoon
b436530e4f Fix tests 2025-09-20 16:08:36 -07:00
shamoon
0ab94ab130 Make head_version and versions read-only via API 2025-09-20 15:38:21 -07:00
shamoon
ce5f5140f9 Random cleanup 2025-09-20 10:47:56 -07:00
shamoon
d8cb07b4a6 Llint 2025-09-20 10:17:54 -07:00
shamoon
1e48f9f9a9 Fix migration 2025-09-20 10:11:03 -07:00
shamoon
dc20db39e7 Fix caching
[ci skip]
2025-09-20 10:10:09 -07:00
shamoon
065f501272 Fix frontend versions switching
[ci skip]
2025-09-20 10:10:09 -07:00
shamoon
339a4db893 Update views.py 2025-09-20 10:10:08 -07:00
shamoon
0cc5f12cbf version aware doc endpoints 2025-09-20 10:10:08 -07:00
shamoon
e099998b2f Fix archive filename clash 2025-09-20 10:10:07 -07:00
shamoon
521628c1c3 Super basic UI stuff
[ci skip]
2025-09-20 10:10:07 -07:00
shamoon
80ed84f538 Bulk editing to update version instead of replace 2025-09-20 10:10:06 -07:00
shamoon
2557c03463 Fix migration 2025-09-20 10:09:35 -07:00
shamoon
9ed75561e7 Basic start of update endpoint 2025-09-20 10:09:34 -07:00
shamoon
02a7500696 Add head_version 2025-09-20 10:09:31 -07:00
17 changed files with 665 additions and 256 deletions

View File

@@ -166,13 +166,10 @@
</div>
<div class="nav-group mt-3 mb-1">
<h6 class="sidebar-heading px-3 text-muted d-flex align-items-center">
<h6 class="sidebar-heading px-3 text-muted">
<span i18n>Manage</span>
<button class="btn btn-link p-2 py-0" (click)="manageCollapse.toggle()">
<i-bs width="0.9em" height="0.9em" [name]="isManageMenuCollapsed ? 'chevron-down' : 'chevron-up'"></i-bs>
</button>
</h6>
<ul class="nav flex-column mb-2" #manageCollapse="ngbCollapse" [(ngbCollapse)]="isManageMenuCollapsed">
<ul class="nav flex-column mb-2">
<li class="nav-item app-link"
*pngxIfPermissions="{ action: PermissionAction.View, type: PermissionType.Correspondent }">
<a class="nav-link" routerLink="correspondents" routerLinkActive="active" (click)="closeMenu()"
@@ -246,14 +243,10 @@
</div>
<div class="nav-group mt-auto mb-1">
<h6 class="sidebar-heading px-3 pt-4 text-muted d-flex align-items-center">
<h6 class="sidebar-heading px-3 pt-4 text-muted">
<span i18n>Administration</span>
<button class="btn btn-link p-2 py-0" (click)="adminCollapse.toggle()">
<i-bs width="0.9em" height="0.9em" [name]="isAdminMenuCollapsed ? 'chevron-down' : 'chevron-up'"></i-bs>
</button>
</h6>
<div class="mb-2">
<ul class="nav flex-column" #adminCollapse="ngbCollapse" [(ngbCollapse)]="isAdminMenuCollapsed">
<ul class="nav flex-column mb-2">
<li class="nav-item app-link" *pngxIfPermissions="{ action: PermissionAction.Change, type: PermissionType.UISettings }"
tourAnchor="tour.settings">
<a class="nav-link" routerLink="settings" routerLinkActive="active" (click)="closeMenu()"
@@ -299,8 +292,6 @@
</a>
</li>
}
</ul>
<ul class="nav flex-column">
<li class="nav-item mt-2" tourAnchor="tour.outro">
<a class="px-3 py-2 text-muted small d-flex align-items-center flex-wrap text-decoration-none"
target="_blank" rel="noopener noreferrer" href="https://docs.paperless-ngx.com" ngbPopover="Documentation"
@@ -365,7 +356,6 @@
</ul>
</div>
</div>
</div>
</nav>
<main role="main" class="ms-sm-auto px-md-4"

View File

@@ -89,8 +89,6 @@ export class AppFrameComponent
appRemoteVersion: AppRemoteVersion
isMenuCollapsed: boolean = true
isManageMenuCollapsed: boolean = false
isAdminMenuCollapsed: boolean = false
slimSidebarAnimating: boolean = false

View File

@@ -1,7 +1,30 @@
<pngx-page-header [(title)]="title">
@if (document?.versions?.length > 0) {
<div class="btn-group" ngbDropdown role="group">
<div class="btn-group" ngbDropdown role="group">
<button class="btn btn-sm btn-outline-secondary dropdown-toggle" ngbDropdownToggle [disabled]="!hasVersions">
<i-bs name="layers"></i-bs>
<span class="d-none d-lg-inline ps-1" i18n>Version</span>
</button>
<div class="dropdown-menu shadow" ngbDropdownMenu>
@for (vid of document.versions; track vid) {
<button ngbDropdownItem (click)="selectVersion(vid)">
<span i18n>Version</span> {{vid}}
@if (selectedVersionId === vid) { <span>&nbsp;</span> }
</button>
}
</div>
</div>
<input #versionFileInput type="file" class="visually-hidden" (change)="onVersionFileSelected($event)" />
<button class="btn btn-sm btn-outline-secondary" title="Upload new version" i18n-title (click)="versionFileInput.click()" [disabled]="!userIsOwner || !userCanEdit">
<i-bs name="file-earmark-plus"></i-bs><span class="visually-hidden" i18n>Upload new version</span>
</button>
</div>
}
@if (archiveContentRenderType === ContentRenderType.PDF && !useNativePdfViewer) {
@if (previewNumPages) {
<div class="input-group input-group-sm d-none d-md-flex">
<div class="input-group input-group-sm ms-2 d-none d-md-flex">
<div class="input-group-text" i18n>Page</div>
<input class="form-control flex-grow-0 w-auto" type="number" min="1" [max]="previewNumPages" [(ngModel)]="previewCurrentPage" />
<div class="input-group-text" i18n>of {{previewNumPages}}</div>

View File

@@ -222,6 +222,8 @@ export class DocumentDetailComponent
titleSubject: Subject<string> = new Subject()
previewUrl: string
thumbUrl: string
// Versioning: which document ID to use for file preview/download
selectedVersionId: number
previewText: string
previewLoaded: boolean = false
tiffURL: string
@@ -270,6 +272,7 @@ export class DocumentDetailComponent
public readonly DataType = DataType
@ViewChild('nav') nav: NgbNav
@ViewChild('versionFileInput') versionFileInput
@ViewChild('pdfPreview') set pdfPreview(element) {
// this gets called when component added or removed from DOM
if (
@@ -402,7 +405,10 @@ export class DocumentDetailComponent
}
private loadDocument(documentId: number): void {
this.previewUrl = this.documentsService.getPreviewUrl(documentId)
this.selectedVersionId = documentId
this.previewUrl = this.documentsService.getPreviewUrl(
this.selectedVersionId
)
this.http
.get(this.previewUrl, { responseType: 'text' })
.pipe(
@@ -417,7 +423,7 @@ export class DocumentDetailComponent
err.message ?? err.toString()
}`),
})
this.thumbUrl = this.documentsService.getThumbUrl(documentId)
this.thumbUrl = this.documentsService.getThumbUrl(this.selectedVersionId)
this.documentsService
.get(documentId)
.pipe(
@@ -638,6 +644,10 @@ export class DocumentDetailComponent
updateComponent(doc: Document) {
this.document = doc
// Default selected version is the newest version
this.selectedVersionId = doc.versions?.length
? Math.max(...doc.versions)
: doc.id
this.requiresPassword = false
this.updateFormForCustomFields()
if (this.archiveContentRenderType === ContentRenderType.TIFF) {
@@ -702,6 +712,36 @@ export class DocumentDetailComponent
this.prepareForm(doc)
}
get hasVersions(): boolean {
return this.document?.versions?.length > 1
}
// Update file preview and download target to a specific version (by document id)
selectVersion(versionId: number) {
this.selectedVersionId = versionId
this.previewUrl = this.documentsService.getPreviewUrl(
this.documentId,
false,
this.selectedVersionId
)
this.thumbUrl = this.documentsService.getThumbUrl(this.selectedVersionId)
// For text previews, refresh content
this.http
.get(this.previewUrl, { responseType: 'text' })
.pipe(
first(),
takeUntil(this.unsubscribeNotifier),
takeUntil(this.docChangeNotifier)
)
.subscribe({
next: (res) => (this.previewText = res.toString()),
error: (err) =>
(this.previewText = $localize`An error occurred loading content: ${
err.message ?? err.toString()
}`),
})
}
get customFieldFormFields(): FormArray {
return this.documentForm.get('custom_fields') as FormArray
}
@@ -1049,10 +1089,36 @@ export class DocumentDetailComponent
})
}
onVersionFileSelected(event: Event) {
const input = event.target as HTMLInputElement
if (!input?.files || input.files.length === 0) return
const file = input.files[0]
// Reset input to allow re-selection of the same file later
input.value = ''
this.documentsService
.uploadVersion(this.documentId, file)
.pipe(first())
.subscribe({
next: () => {
this.toastService.showInfo(
$localize`Uploading new version. Processing will happen in the background.`
)
// Refresh metadata to reflect that versions changed (when ready)
this.openDocumentService.refreshDocument(this.documentId)
},
error: (error) => {
this.toastService.showError(
$localize`Error uploading new version`,
error
)
},
})
}
download(original: boolean = false) {
this.downloading = true
const downloadUrl = this.documentsService.getDownloadUrl(
this.documentId,
this.selectedVersionId || this.documentId,
original
)
this.http

View File

@@ -159,6 +159,10 @@ export interface Document extends ObjectWithPermissions {
page_count?: number
// Versioning
head_version?: number
versions?: number[]
// Frontend only
__changedFields?: string[]
}

View File

@@ -163,12 +163,19 @@ export class DocumentService extends AbstractPaperlessService<Document> {
})
}
getPreviewUrl(id: number, original: boolean = false): string {
getPreviewUrl(
id: number,
original: boolean = false,
versionID: number = null
): string {
let url = new URL(this.getResourceUrl(id, 'preview'))
if (this._searchQuery) url.hash = `#search="${this.searchQuery}"`
if (original) {
url.searchParams.append('original', 'true')
}
if (versionID) {
url.searchParams.append('version', versionID.toString())
}
return url.toString()
}
@@ -184,6 +191,16 @@ export class DocumentService extends AbstractPaperlessService<Document> {
return url
}
uploadVersion(documentId: number, file: File) {
const formData = new FormData()
formData.append('document', file, file.name)
return this.http.post(
this.getResourceUrl(documentId, 'update_version'),
formData,
{ reportProgress: true, observe: 'events' }
)
}
getNextAsn(): Observable<number> {
return this.http.get<number>(this.getResourceUrl(null, 'next_asn'))
}

View File

@@ -55,9 +55,7 @@ import {
checkLg,
chevronDoubleLeft,
chevronDoubleRight,
chevronDown,
chevronRight,
chevronUp,
clipboard,
clipboardCheck,
clipboardCheckFill,
@@ -80,6 +78,7 @@ import {
fileEarmarkFill,
fileEarmarkLock,
fileEarmarkMinus,
fileEarmarkPlus,
fileEarmarkRichtext,
fileText,
files,
@@ -96,6 +95,7 @@ import {
house,
infoCircle,
journals,
layers,
link,
listNested,
listTask,
@@ -269,9 +269,7 @@ const icons = {
checkLg,
chevronDoubleLeft,
chevronDoubleRight,
chevronDown,
chevronRight,
chevronUp,
clipboard,
clipboardCheck,
clipboardCheckFill,
@@ -294,6 +292,7 @@ const icons = {
fileEarmarkFill,
fileEarmarkLock,
fileEarmarkMinus,
fileEarmarkPlus,
fileEarmarkRichtext,
files,
fileText,
@@ -310,6 +309,7 @@ const icons = {
house,
infoCircle,
journals,
layers,
link,
listNested,
listTask,

View File

@@ -1,6 +1,5 @@
from __future__ import annotations
import hashlib
import logging
import tempfile
from pathlib import Path
@@ -333,10 +332,8 @@ def rotate(doc_ids: list[int], degrees: int) -> Literal["OK"]:
f"Attempting to rotate {len(doc_ids)} documents by {degrees} degrees.",
)
qs = Document.objects.filter(id__in=doc_ids)
affected_docs: list[int] = []
import pikepdf
rotate_tasks = []
for doc in qs:
if doc.mime_type != "application/pdf":
logger.warning(
@@ -344,28 +341,34 @@ def rotate(doc_ids: list[int], degrees: int) -> Literal["OK"]:
)
continue
try:
with pikepdf.open(doc.source_path, allow_overwriting_input=True) as pdf:
# Write rotated output to a temp file and create a new version via consume pipeline
filepath: Path = (
Path(tempfile.mkdtemp(dir=settings.SCRATCH_DIR))
/ f"{doc.id}_rotated.pdf"
)
with pikepdf.open(doc.source_path) as pdf:
for page in pdf.pages:
page.rotate(degrees, relative=True)
pdf.save()
doc.checksum = hashlib.md5(doc.source_path.read_bytes()).hexdigest()
doc.save()
rotate_tasks.append(
update_document_content_maybe_archive_file.s(
document_id=doc.id,
pdf.remove_unreferenced_resources()
pdf.save(filepath)
# Preserve metadata/permissions via overrides; mark as new version
overrides = DocumentMetadataOverrides().from_document(doc)
consume_file.delay(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=filepath,
head_version_id=doc.id,
),
overrides,
)
logger.info(
f"Rotated document {doc.id} by {degrees} degrees",
f"Queued new rotated version for document {doc.id} by {degrees} degrees",
)
affected_docs.append(doc.id)
except Exception as e:
logger.exception(f"Error rotating document {doc.id}: {e}")
if len(affected_docs) > 0:
bulk_update_task = bulk_update_documents.si(document_ids=affected_docs)
chord(header=rotate_tasks, body=bulk_update_task).delay()
return "OK"
@@ -528,19 +531,31 @@ def delete_pages(doc_ids: list[int], pages: list[int]) -> Literal["OK"]:
import pikepdf
try:
with pikepdf.open(doc.source_path, allow_overwriting_input=True) as pdf:
# Produce edited PDF to a temp file and create a new version
filepath: Path = (
Path(tempfile.mkdtemp(dir=settings.SCRATCH_DIR))
/ f"{doc.id}_pages_deleted.pdf"
)
with pikepdf.open(doc.source_path) as pdf:
offset = 1 # pages are 1-indexed
for page_num in pages:
pdf.pages.remove(pdf.pages[page_num - offset])
offset += 1 # remove() changes the index of the pages
pdf.remove_unreferenced_resources()
pdf.save()
doc.checksum = hashlib.md5(doc.source_path.read_bytes()).hexdigest()
if doc.page_count is not None:
doc.page_count = doc.page_count - len(pages)
doc.save()
update_document_content_maybe_archive_file.delay(document_id=doc.id)
logger.info(f"Deleted pages {pages} from document {doc.id}")
pdf.save(filepath)
overrides = DocumentMetadataOverrides().from_document(doc)
consume_file.delay(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=filepath,
head_version_id=doc.id,
),
overrides,
)
logger.info(
f"Queued new version for document {doc.id} after deleting pages {pages}",
)
except Exception as e:
logger.exception(f"Error deleting pages from document {doc.id}: {e}")
@@ -592,17 +607,29 @@ def edit_pdf(
dst.pages[-1].rotate(op["rotate"], relative=True)
if update_document:
temp_path = doc.source_path.with_suffix(".tmp.pdf")
# Create a new version from the edited PDF rather than replacing in-place
pdf = pdf_docs[0]
pdf.remove_unreferenced_resources()
# save the edited PDF to a temporary file in case of errors
pdf.save(temp_path)
# replace the original document with the edited one
temp_path.replace(doc.source_path)
doc.checksum = hashlib.md5(doc.source_path.read_bytes()).hexdigest()
doc.page_count = len(pdf.pages)
doc.save()
update_document_content_maybe_archive_file.delay(document_id=doc.id)
filepath: Path = (
Path(tempfile.mkdtemp(dir=settings.SCRATCH_DIR))
/ f"{doc.id}_edited.pdf"
)
pdf.save(filepath)
overrides = (
DocumentMetadataOverrides().from_document(doc)
if include_metadata
else DocumentMetadataOverrides()
)
if user is not None:
overrides.owner_id = user.id
consume_file.delay(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=filepath,
head_version_id=doc.id,
),
overrides,
)
else:
consume_tasks = []
overrides = (

View File

@@ -14,6 +14,51 @@ from documents.classifier import DocumentClassifier
from documents.models import Document
def _resolve_effective_doc(pk: int, request) -> Document | None:
"""
Resolve which Document row should be considered for caching keys:
- If a version is requested, use that version
- If pk is a head doc, use its newest child version if present, else the head.
- Else, pk is a version, use that version.
Returns None if resolution fails (treat as no-cache).
"""
try:
request_doc = Document.objects.only("id", "head_version_id").get(pk=pk)
except Document.DoesNotExist:
return None
head_doc = (
request_doc
if request_doc.head_version_id is None
else Document.objects.only("id").get(id=request_doc.head_version_id)
)
version_param = (
request.query_params.get("version")
if hasattr(request, "query_params")
else None
)
if version_param:
try:
version_id = int(version_param)
candidate = Document.objects.only("id", "head_version_id").get(
id=version_id,
)
if candidate.id != head_doc.id and candidate.head_version_id != head_doc.id:
return None
return candidate
except Exception:
return None
# Default behavior: if pk is a head doc, prefer its newest child version
if request_doc.head_version_id is None:
latest = head_doc.versions.only("id").order_by("id").last()
return latest or head_doc
# pk is already a version
return request_doc
def suggestions_etag(request, pk: int) -> str | None:
"""
Returns an optional string for the ETag, allowing browser caching of
@@ -71,11 +116,10 @@ def metadata_etag(request, pk: int) -> str | None:
Metadata is extracted from the original file, so use its checksum as the
ETag
"""
try:
doc = Document.objects.only("checksum").get(pk=pk)
return doc.checksum
except Document.DoesNotExist: # pragma: no cover
doc = _resolve_effective_doc(pk, request)
if doc is None:
return None
return doc.checksum
return None
@@ -85,11 +129,10 @@ def metadata_last_modified(request, pk: int) -> datetime | None:
not the modification of the original file, but of the database object, but might as well
error on the side of more cautious
"""
try:
doc = Document.objects.only("modified").get(pk=pk)
return doc.modified
except Document.DoesNotExist: # pragma: no cover
doc = _resolve_effective_doc(pk, request)
if doc is None:
return None
return doc.modified
return None
@@ -97,15 +140,15 @@ def preview_etag(request, pk: int) -> str | None:
"""
ETag for the document preview, using the original or archive checksum, depending on the request
"""
try:
doc = Document.objects.only("checksum", "archive_checksum").get(pk=pk)
doc = _resolve_effective_doc(pk, request)
if doc is None:
return None
use_original = (
"original" in request.query_params
hasattr(request, "query_params")
and "original" in request.query_params
and request.query_params["original"] == "true"
)
return doc.checksum if use_original else doc.archive_checksum
except Document.DoesNotExist: # pragma: no cover
return None
return None
@@ -114,11 +157,10 @@ def preview_last_modified(request, pk: int) -> datetime | None:
Uses the documents modified time to set the Last-Modified header. Not strictly
speaking correct, but close enough and quick
"""
try:
doc = Document.objects.only("modified").get(pk=pk)
return doc.modified
except Document.DoesNotExist: # pragma: no cover
doc = _resolve_effective_doc(pk, request)
if doc is None:
return None
return doc.modified
return None
@@ -128,10 +170,13 @@ def thumbnail_last_modified(request, pk: int) -> datetime | None:
Cache should be (slightly?) faster than filesystem
"""
try:
doc = Document.objects.only("storage_type").get(pk=pk)
doc = _resolve_effective_doc(pk, request)
if doc is None:
return None
if not doc.thumbnail_path.exists():
return None
doc_key = get_thumbnail_modified_key(pk)
# Use the effective document id for cache key
doc_key = get_thumbnail_modified_key(doc.id)
cache_hit = cache.get(doc_key)
if cache_hit is not None:

View File

@@ -113,6 +113,12 @@ class ConsumerPluginMixin:
self.filename = self.metadata.filename or self.input_doc.original_file.name
if input_doc.head_version_id:
self.log.debug(f"Document head version id: {input_doc.head_version_id}")
head_version = Document.objects.get(pk=input_doc.head_version_id)
version_index = head_version.versions.count()
self.filename += f"_v{version_index}"
def _send_progress(
self,
current_progress: int,
@@ -470,6 +476,38 @@ class ConsumerPlugin(
try:
with transaction.atomic():
# store the document.
if self.input_doc.head_version_id:
# If this is a new version of an existing document, we need
# to make sure we're not creating a new document, but updating
# the existing one.
original_document = Document.objects.get(
pk=self.input_doc.head_version_id,
)
self.log.debug("Saving record for updated version to database")
original_document.pk = None
original_document.head_version = Document.objects.get(
pk=self.input_doc.head_version_id,
)
file_for_checksum = (
self.unmodified_original
if self.unmodified_original is not None
else self.working_copy
)
original_document.checksum = hashlib.md5(
file_for_checksum.read_bytes(),
).hexdigest()
original_document.content = text
original_document.page_count = page_count
original_document.mime_type = mime_type
original_document.original_filename = self.filename
# Clear unique file path fields so they can be generated uniquely later
original_document.filename = None
original_document.archive_filename = None
original_document.archive_checksum = None
original_document.modified = timezone.now()
original_document.save()
document = original_document
else:
document = self._store(
text=text,
date=date,

View File

@@ -156,6 +156,7 @@ class ConsumableDocument:
source: DocumentSource
original_file: Path
head_version_id: int | None = None
mailrule_id: int | None = None
mime_type: str = dataclasses.field(init=False, default=None)

View File

@@ -0,0 +1,26 @@
# Generated by Django 5.1.6 on 2025-02-26 17:08
import django.db.models.deletion
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "1071_tag_tn_ancestors_count_tag_tn_ancestors_pks_and_more"),
]
operations = [
migrations.AddField(
model_name="document",
name="head_version",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="versions",
to="documents.document",
verbose_name="head version of document",
),
),
]

View File

@@ -313,6 +313,15 @@ class Document(SoftDeleteModel, ModelWithOwner):
),
)
head_version = models.ForeignKey(
"self",
blank=True,
null=True,
related_name="versions",
on_delete=models.CASCADE,
verbose_name=_("head version of document"),
)
class Meta:
ordering = ("-created",)
verbose_name = _("document")

View File

@@ -974,6 +974,8 @@ class DocumentSerializer(
page_count = SerializerMethodField()
notes = NotesSerializer(many=True, required=False, read_only=True)
head_version = serializers.PrimaryKeyRelatedField(read_only=True)
versions = serializers.PrimaryKeyRelatedField(many=True, read_only=True)
custom_fields = CustomFieldInstanceSerializer(
many=True,
@@ -1016,6 +1018,10 @@ class DocumentSerializer(
request.version if request else settings.REST_FRAMEWORK["DEFAULT_VERSION"],
)
if doc.get("versions") is not None:
doc["versions"] = sorted(doc["versions"], reverse=True)
doc["versions"].append(doc["id"])
if api_version < 9:
# provide created as a datetime for backwards compatibility
from django.utils import timezone
@@ -1184,6 +1190,8 @@ class DocumentSerializer(
"remove_inbox_tags",
"page_count",
"mime_type",
"head_version",
"versions",
)
list_serializer_class = OwnedObjectListSerializer
@@ -1867,6 +1875,15 @@ class PostDocumentSerializer(serializers.Serializer):
return created.date()
class DocumentVersionSerializer(serializers.Serializer):
document = serializers.FileField(
label="Document",
write_only=True,
)
validate_document = PostDocumentSerializer().validate_document
class BulkDownloadSerializer(DocumentListSerializer):
content = serializers.ChoiceField(
choices=["archive", "originals", "both"],

View File

@@ -145,13 +145,17 @@ def consume_file(
if overrides is None:
overrides = DocumentMetadataOverrides()
plugins: list[type[ConsumeTaskPlugin]] = [
plugins: list[type[ConsumeTaskPlugin]] = (
[ConsumerPreflightPlugin, ConsumerPlugin]
if input_doc.head_version_id is not None
else [
ConsumerPreflightPlugin,
CollatePlugin,
BarcodePlugin,
WorkflowTriggerPlugin,
ConsumerPlugin,
]
)
with (
ProgressManager(

View File

@@ -787,10 +787,8 @@ class TestPDFActions(DirectoriesMixin, TestCase):
mock_consume_file.assert_not_called()
@mock.patch("documents.tasks.bulk_update_documents.si")
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.s")
@mock.patch("celery.chord.delay")
def test_rotate(self, mock_chord, mock_update_document, mock_update_documents):
@mock.patch("documents.tasks.consume_file.delay")
def test_rotate(self, mock_consume_delay):
"""
GIVEN:
- Existing documents
@@ -801,19 +799,22 @@ class TestPDFActions(DirectoriesMixin, TestCase):
"""
doc_ids = [self.doc1.id, self.doc2.id]
result = bulk_edit.rotate(doc_ids, 90)
self.assertEqual(mock_update_document.call_count, 2)
mock_update_documents.assert_called_once()
mock_chord.assert_called_once()
self.assertEqual(mock_consume_delay.call_count, 2)
for call, expected_id in zip(
mock_consume_delay.call_args_list,
doc_ids,
):
consumable, overrides = call.args
self.assertEqual(consumable.head_version_id, expected_id)
self.assertIsNotNone(overrides)
self.assertEqual(result, "OK")
@mock.patch("documents.tasks.bulk_update_documents.si")
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.s")
@mock.patch("documents.tasks.consume_file.delay")
@mock.patch("pikepdf.Pdf.save")
def test_rotate_with_error(
self,
mock_pdf_save,
mock_update_archive_file,
mock_update_documents,
mock_consume_delay,
):
"""
GIVEN:
@@ -832,16 +833,12 @@ class TestPDFActions(DirectoriesMixin, TestCase):
error_str = cm.output[0]
expected_str = "Error rotating document"
self.assertIn(expected_str, error_str)
mock_update_archive_file.assert_not_called()
mock_consume_delay.assert_not_called()
@mock.patch("documents.tasks.bulk_update_documents.si")
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.s")
@mock.patch("celery.chord.delay")
@mock.patch("documents.tasks.consume_file.delay")
def test_rotate_non_pdf(
self,
mock_chord,
mock_update_document,
mock_update_documents,
mock_consume_delay,
):
"""
GIVEN:
@@ -856,14 +853,16 @@ class TestPDFActions(DirectoriesMixin, TestCase):
output_str = cm.output[1]
expected_str = "Document 4 is not a PDF, skipping rotation"
self.assertIn(expected_str, output_str)
self.assertEqual(mock_update_document.call_count, 1)
mock_update_documents.assert_called_once()
mock_chord.assert_called_once()
self.assertEqual(mock_consume_delay.call_count, 1)
consumable, overrides = mock_consume_delay.call_args[0]
self.assertEqual(consumable.head_version_id, self.doc2.id)
self.assertIsNotNone(overrides)
self.assertEqual(result, "OK")
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.delay")
@mock.patch("documents.tasks.consume_file.delay")
@mock.patch("pikepdf.Pdf.save")
def test_delete_pages(self, mock_pdf_save, mock_update_archive_file):
@mock.patch("documents.data_models.magic.from_file", return_value="application/pdf")
def test_delete_pages(self, mock_magic, mock_pdf_save, mock_consume_delay):
"""
GIVEN:
- Existing documents
@@ -871,24 +870,22 @@ class TestPDFActions(DirectoriesMixin, TestCase):
- Delete pages action is called with 1 document and 2 pages
THEN:
- Save should be called once
- Archive file should be updated once
- The document's page_count should be reduced by the number of deleted pages
- A new version should be enqueued via consume_file
"""
doc_ids = [self.doc2.id]
initial_page_count = self.doc2.page_count
pages = [1, 3]
result = bulk_edit.delete_pages(doc_ids, pages)
mock_pdf_save.assert_called_once()
mock_update_archive_file.assert_called_once()
mock_consume_delay.assert_called_once()
consumable, overrides = mock_consume_delay.call_args[0]
self.assertEqual(consumable.head_version_id, self.doc2.id)
self.assertTrue(str(consumable.original_file).endswith("_pages_deleted.pdf"))
self.assertIsNotNone(overrides)
self.assertEqual(result, "OK")
expected_page_count = initial_page_count - len(pages)
self.doc2.refresh_from_db()
self.assertEqual(self.doc2.page_count, expected_page_count)
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.delay")
@mock.patch("documents.tasks.consume_file.delay")
@mock.patch("pikepdf.Pdf.save")
def test_delete_pages_with_error(self, mock_pdf_save, mock_update_archive_file):
def test_delete_pages_with_error(self, mock_pdf_save, mock_consume_delay):
"""
GIVEN:
- Existing documents
@@ -897,7 +894,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
- PikePDF raises an error
THEN:
- Save should be called once
- Archive file should not be updated
- No new version should be enqueued
"""
mock_pdf_save.side_effect = Exception("Error saving PDF")
doc_ids = [self.doc2.id]
@@ -908,7 +905,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
error_str = cm.output[0]
expected_str = "Error deleting pages from document"
self.assertIn(expected_str, error_str)
mock_update_archive_file.assert_not_called()
mock_consume_delay.assert_not_called()
@mock.patch("documents.bulk_edit.group")
@mock.patch("documents.tasks.consume_file.s")
@@ -968,21 +965,18 @@ class TestPDFActions(DirectoriesMixin, TestCase):
self.assertEqual(result, "OK")
mock_chord.assert_called_once()
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.delay")
def test_edit_pdf_with_update_document(self, mock_update_document):
@mock.patch("documents.tasks.consume_file.delay")
def test_edit_pdf_with_update_document(self, mock_consume_delay):
"""
GIVEN:
- A single existing PDF document
WHEN:
- edit_pdf is called with update_document=True and a single output
THEN:
- The original document is updated in-place
- The update_document_content_maybe_archive_file task is triggered
- A version update is enqueued targeting the existing document
"""
doc_ids = [self.doc2.id]
operations = [{"page": 1}, {"page": 2}]
original_checksum = self.doc2.checksum
original_page_count = self.doc2.page_count
result = bulk_edit.edit_pdf(
doc_ids,
@@ -992,10 +986,11 @@ class TestPDFActions(DirectoriesMixin, TestCase):
)
self.assertEqual(result, "OK")
self.doc2.refresh_from_db()
self.assertNotEqual(self.doc2.checksum, original_checksum)
self.assertNotEqual(self.doc2.page_count, original_page_count)
mock_update_document.assert_called_once_with(document_id=self.doc2.id)
mock_consume_delay.assert_called_once()
consumable, overrides = mock_consume_delay.call_args[0]
self.assertEqual(consumable.head_version_id, self.doc2.id)
self.assertTrue(str(consumable.original_file).endswith("_edited.pdf"))
self.assertIsNotNone(overrides)
@mock.patch("documents.bulk_edit.group")
@mock.patch("documents.tasks.consume_file.s")

View File

@@ -147,6 +147,7 @@ from documents.serialisers import CustomFieldSerializer
from documents.serialisers import DocumentListSerializer
from documents.serialisers import DocumentSerializer
from documents.serialisers import DocumentTypeSerializer
from documents.serialisers import DocumentVersionSerializer
from documents.serialisers import NotesSerializer
from documents.serialisers import PostDocumentSerializer
from documents.serialisers import RunTaskViewSerializer
@@ -567,7 +568,7 @@ class DocumentViewSet(
GenericViewSet,
):
model = Document
queryset = Document.objects.annotate(num_notes=Count("notes"))
queryset = Document.objects.all()
serializer_class = DocumentSerializer
pagination_class = StandardPagination
permission_classes = (IsAuthenticated, PaperlessObjectPermissions)
@@ -596,7 +597,8 @@ class DocumentViewSet(
def get_queryset(self):
return (
Document.objects.distinct()
Document.objects.filter(head_version__isnull=True)
.distinct()
.order_by("-created")
.annotate(num_notes=Count("notes"))
.select_related("correspondent", "storage_path", "document_type", "owner")
@@ -658,18 +660,55 @@ class DocumentViewSet(
and request.query_params["original"] == "true"
)
def _resolve_file_doc(self, head_doc: Document, request):
version_param = request.query_params.get("version")
if version_param:
try:
version_id = int(version_param)
except (TypeError, ValueError):
raise NotFound("Invalid version parameter")
try:
candidate = Document.global_objects.select_related("owner").get(
id=version_id,
)
except Document.DoesNotExist:
raise Http404
if candidate.id != head_doc.id and candidate.head_version_id != head_doc.id:
raise Http404
return candidate
latest = head_doc.versions.order_by("id").last()
return latest or head_doc
def file_response(self, pk, request, disposition):
doc = Document.global_objects.select_related("owner").get(id=pk)
request_doc = Document.global_objects.select_related("owner").get(id=pk)
head_doc = (
request_doc
if request_doc.head_version_id is None
else Document.global_objects.select_related("owner").get(
id=request_doc.head_version_id,
)
)
if request.user is not None and not has_perms_owner_aware(
request.user,
"view_document",
doc,
head_doc,
):
return HttpResponseForbidden("Insufficient permissions")
# If a version is explicitly requested, use it. Otherwise:
# - if pk is a head document: serve newest version
# - if pk is a version: serve that version
if "version" in request.query_params:
file_doc = self._resolve_file_doc(head_doc, request)
else:
file_doc = (
self._resolve_file_doc(head_doc, request)
if request_doc.head_version_id is None
else request_doc
)
return serve_file(
doc=doc,
doc=file_doc,
use_archive=not self.original_requested(request)
and doc.has_archive_version,
and file_doc.has_archive_version,
disposition=disposition,
)
@@ -704,16 +743,33 @@ class DocumentViewSet(
)
def metadata(self, request, pk=None):
try:
doc = Document.objects.select_related("owner").get(pk=pk)
request_doc = Document.objects.select_related("owner").get(pk=pk)
head_doc = (
request_doc
if request_doc.head_version_id is None
else Document.objects.select_related("owner").get(
id=request_doc.head_version_id,
)
)
if request.user is not None and not has_perms_owner_aware(
request.user,
"view_document",
doc,
head_doc,
):
return HttpResponseForbidden("Insufficient permissions")
except Document.DoesNotExist:
raise Http404
# Choose the effective document (newest version by default, or explicit via ?version=)
if "version" in request.query_params:
doc = self._resolve_file_doc(head_doc, request)
else:
doc = (
self._resolve_file_doc(head_doc, request)
if request_doc.head_version_id is None
else request_doc
)
document_cached_metadata = get_metadata_cache(doc.pk)
archive_metadata = None
@@ -815,8 +871,36 @@ class DocumentViewSet(
)
def preview(self, request, pk=None):
try:
response = self.file_response(pk, request, "inline")
return response
request_doc = Document.objects.select_related("owner").get(id=pk)
head_doc = (
request_doc
if request_doc.head_version_id is None
else Document.objects.select_related("owner").get(
id=request_doc.head_version_id,
)
)
if request.user is not None and not has_perms_owner_aware(
request.user,
"view_document",
head_doc,
):
return HttpResponseForbidden("Insufficient permissions")
if "version" in request.query_params:
file_doc = self._resolve_file_doc(head_doc, request)
else:
file_doc = (
self._resolve_file_doc(head_doc, request)
if request_doc.head_version_id is None
else request_doc
)
return serve_file(
doc=file_doc,
use_archive=not self.original_requested(request)
and file_doc.has_archive_version,
disposition="inline",
)
except (FileNotFoundError, Document.DoesNotExist):
raise Http404
@@ -825,17 +909,32 @@ class DocumentViewSet(
@method_decorator(last_modified(thumbnail_last_modified))
def thumb(self, request, pk=None):
try:
doc = Document.objects.select_related("owner").get(id=pk)
request_doc = Document.objects.select_related("owner").get(id=pk)
head_doc = (
request_doc
if request_doc.head_version_id is None
else Document.objects.select_related("owner").get(
id=request_doc.head_version_id,
)
)
if request.user is not None and not has_perms_owner_aware(
request.user,
"view_document",
doc,
head_doc,
):
return HttpResponseForbidden("Insufficient permissions")
if doc.storage_type == Document.STORAGE_TYPE_GPG:
handle = GnuPG.decrypted(doc.thumbnail_file)
if "version" in request.query_params:
file_doc = self._resolve_file_doc(head_doc, request)
else:
handle = doc.thumbnail_file
file_doc = (
self._resolve_file_doc(head_doc, request)
if request_doc.head_version_id is None
else request_doc
)
if file_doc.storage_type == Document.STORAGE_TYPE_GPG:
handle = GnuPG.decrypted(file_doc.thumbnail_file)
else:
handle = file_doc.thumbnail_file
return HttpResponse(handle, content_type="image/webp")
except (FileNotFoundError, Document.DoesNotExist):
@@ -1103,6 +1202,56 @@ class DocumentViewSet(
"Error emailing document, check logs for more detail.",
)
@action(methods=["post"], detail=True)
def update_version(self, request, pk=None):
serializer = DocumentVersionSerializer(data=request.data)
serializer.is_valid(raise_exception=True)
try:
doc = Document.objects.select_related("owner").get(pk=pk)
if request.user is not None and not has_perms_owner_aware(
request.user,
"change_document",
doc,
):
return HttpResponseForbidden("Insufficient permissions")
except Document.DoesNotExist:
raise Http404
try:
doc_name, doc_data = serializer.validated_data.get("document")
t = int(mktime(datetime.now().timetuple()))
settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
temp_file_path = Path(tempfile.mkdtemp(dir=settings.SCRATCH_DIR)) / Path(
pathvalidate.sanitize_filename(doc_name),
)
temp_file_path.write_bytes(doc_data)
os.utime(temp_file_path, times=(t, t))
input_doc = ConsumableDocument(
source=DocumentSource.ApiUpload,
original_file=temp_file_path,
head_version_id=doc.pk,
)
async_task = consume_file.delay(
input_doc,
)
logger.debug(
f"Updated document {doc.id} with new version",
)
return Response(async_task.id)
except Exception as e:
logger.warning(f"An error occurred updating document: {e!s}")
return HttpResponseServerError(
"Error updating document, check logs for more detail.",
)
@extend_schema_view(
list=extend_schema(