Compare commits

..

3 Commits

Author SHA1 Message Date
shamoon
11ecc03d71 A ChatGPT script for testing 2025-12-26 11:05:27 -08:00
shamoon
c167a7424c Optimize tag children retrieval 2025-12-26 11:05:27 -08:00
shamoon
5ed31c9657 Run Tag tree updates once per transaction 2025-12-26 11:05:27 -08:00
27 changed files with 270 additions and 933 deletions

View File

@@ -294,13 +294,6 @@ The following methods are supported:
- `"delete_original": true` to delete the original documents after editing.
- `"update_document": true` to update the existing document with the edited PDF.
- `"include_metadata": true` to copy metadata from the original document to the edited document.
- `remove_password`
- Requires `parameters`:
- `"password": "PASSWORD_STRING"` The password to remove from the PDF documents.
- Optional `parameters`:
- `"update_document": true` to replace the existing document with the password-less PDF.
- `"delete_original": true` to delete the original document after editing.
- `"include_metadata": true` to copy metadata from the original document to the new password-less document.
- `merge`
- No additional `parameters` required.
- The ordering of the merged document is determined by the list of IDs.

139
scripts/tag_perf_probe.py Normal file
View File

@@ -0,0 +1,139 @@
# noqa: INP001
"""
Ad-hoc script to gauge Tag + treenode performance locally.
It bootstraps a fresh SQLite DB in a temp folder (or PAPERLESS_DATA_DIR),
uses locmem cache/redis to avoid external services, creates synthetic tags,
and measures:
- creation time
- query count and wall time for the Tag list view
Usage:
PAPERLESS_DEBUG=1 PAPERLESS_REDIS=locmem:// PYTHONPATH=src \
PAPERLESS_DATA_DIR=/tmp/paperless-tags-probe \
.venv/bin/python scripts/tag_perf_probe.py
"""
import os
import sys
import time
from collections.abc import Iterable
from contextlib import contextmanager
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings")
os.environ.setdefault("PAPERLESS_DEBUG", "1")
os.environ.setdefault("PAPERLESS_REDIS", "locmem://")
os.environ.setdefault("PYTHONPATH", "src")
import django
django.setup()
from django.contrib.auth import get_user_model # noqa: E402
from django.core.management import call_command # noqa: E402
from django.db import connection # noqa: E402
from django.test.client import RequestFactory # noqa: E402
from rest_framework.test import force_authenticate # noqa: E402
from treenode.signals import no_signals # noqa: E402
from documents.models import Tag # noqa: E402
from documents.views import TagViewSet # noqa: E402
User = get_user_model()
@contextmanager
def count_queries():
total = 0
def wrapper(execute, sql, params, many, context):
nonlocal total
total += 1
return execute(sql, params, many, context)
with connection.execute_wrapper(wrapper):
yield lambda: total
def measure_list(tag_count: int, user) -> tuple[int, float]:
"""Render Tag list with page_size=tag_count and return (queries, seconds)."""
rf = RequestFactory()
view = TagViewSet.as_view({"get": "list"})
request = rf.get("/api/tags/", {"page_size": tag_count})
force_authenticate(request, user=user)
with count_queries() as get_count:
start = time.perf_counter()
response = view(request)
response.render()
elapsed = time.perf_counter() - start
total_queries = get_count()
return total_queries, elapsed
def bulk_create_tags(count: int, parents: Iterable[Tag] | None = None) -> None:
"""Create tags; when parents provided, create one child per parent."""
if parents is None:
Tag.objects.bulk_create([Tag(name=f"Flat {i}") for i in range(count)])
return
children = []
for p in parents:
children.append(Tag(name=f"Child {p.id}", tn_parent=p))
Tag.objects.bulk_create(children)
def run():
# Ensure tables exist when pointing at a fresh DATA_DIR.
call_command("migrate", interactive=False, verbosity=0)
user, _ = User.objects.get_or_create(
username="admin",
defaults={"is_superuser": True, "is_staff": True},
)
# Flat scenario
Tag.objects.all().delete()
start = time.perf_counter()
bulk_create_tags(200)
flat_create = time.perf_counter() - start
q, t = measure_list(tag_count=200, user=user)
print(f"Flat create 200 -> {flat_create:.2f}s; list -> {q} queries, {t:.2f}s") # noqa: T201
# Nested scenario (parents + 2 children each => 600 total)
Tag.objects.all().delete()
start = time.perf_counter()
with no_signals(): # avoid per-save tree rebuild; rebuild once
parents = Tag.objects.bulk_create([Tag(name=f"Parent {i}") for i in range(200)])
children = []
for p in parents:
children.extend(
Tag(name=f"Child {p.id}-{j}", tn_parent=p) for j in range(2)
)
Tag.objects.bulk_create(children)
Tag.update_tree()
nested_create = time.perf_counter() - start
q, t = measure_list(tag_count=600, user=user)
print(f"Nested create 600 -> {nested_create:.2f}s; list -> {q} queries, {t:.2f}s") # noqa: T201
# Larger nested scenario (1 child per parent, 3000 total)
Tag.objects.all().delete()
start = time.perf_counter()
with no_signals():
parents = Tag.objects.bulk_create(
[Tag(name=f"Parent {i}") for i in range(1500)],
)
bulk_create_tags(0, parents=parents)
Tag.update_tree()
big_create = time.perf_counter() - start
q, t = measure_list(tag_count=3000, user=user)
print(f"Nested create 3000 -> {big_create:.2f}s; list -> {q} queries, {t:.2f}s") # noqa: T201
if __name__ == "__main__":
if "runserver" in sys.argv:
print("Run directly: .venv/bin/python scripts/tag_perf_probe.py") # noqa: T201
sys.exit(1)
run()

View File

@@ -1,75 +0,0 @@
<div class="modal-header">
<h4 class="modal-title" id="modal-basic-title">{{title}}</h4>
<button type="button" class="btn-close" aria-label="Close" (click)="cancel()">
</button>
</div>
<div class="modal-body">
@if (message) {
<p class="mb-3" [innerHTML]="message"></p>
}
<div class="btn-group mb-3" role="group">
<input
type="radio"
class="btn-check"
name="passwordRemoveMode"
id="removeReplace"
[(ngModel)]="updateDocument"
[value]="true"
(ngModelChange)="onUpdateDocumentChange($event)"
/>
<label class="btn btn-outline-primary btn-sm" for="removeReplace">
<i-bs name="pencil"></i-bs>
<span class="ms-2" i18n>Replace current document</span>
</label>
<input
type="radio"
class="btn-check"
name="passwordRemoveMode"
id="removeCreate"
[(ngModel)]="updateDocument"
[value]="false"
(ngModelChange)="onUpdateDocumentChange($event)"
/>
<label class="btn btn-outline-primary btn-sm" for="removeCreate">
<i-bs name="plus"></i-bs>
<span class="ms-2" i18n>Create new document</span>
</label>
</div>
@if (!updateDocument) {
<div class="d-flex flex-column flex-md-row w-100 gap-3 align-items-center">
<div class="form-group d-flex">
<div class="form-check">
<input class="form-check-input" type="checkbox" id="copyMetaRemove" [(ngModel)]="includeMetadata" />
<label class="form-check-label" for="copyMetaRemove" i18n> Copy metadata
</label>
</div>
<div class="form-check ms-3">
<input class="form-check-input" type="checkbox" id="deleteOriginalRemove" [(ngModel)]="deleteOriginal" />
<label class="form-check-label" for="deleteOriginalRemove" i18n> Delete original</label>
</div>
</div>
</div>
}
</div>
<div class="modal-footer flex-nowrap gap-2">
<button
type="button"
class="btn"
[class]="cancelBtnClass"
(click)="cancel()"
[disabled]="!buttonsEnabled"
>
<span class="d-inline-block" style="padding-bottom: 1px;">
{{cancelBtnCaption}}
</span>
</button>
<button
type="button"
class="btn"
[class]="btnClass"
(click)="confirm()"
[disabled]="!confirmButtonEnabled || !buttonsEnabled"
>
{{btnCaption}}
</button>
</div>

View File

@@ -1,53 +0,0 @@
import { ComponentFixture, TestBed } from '@angular/core/testing'
import { By } from '@angular/platform-browser'
import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'
import { NgxBootstrapIconsModule, allIcons } from 'ngx-bootstrap-icons'
import { PasswordRemovalConfirmDialogComponent } from './password-removal-confirm-dialog.component'
describe('PasswordRemovalConfirmDialogComponent', () => {
let component: PasswordRemovalConfirmDialogComponent
let fixture: ComponentFixture<PasswordRemovalConfirmDialogComponent>
beforeEach(async () => {
await TestBed.configureTestingModule({
providers: [NgbActiveModal],
imports: [
NgxBootstrapIconsModule.pick(allIcons),
PasswordRemovalConfirmDialogComponent,
],
}).compileComponents()
fixture = TestBed.createComponent(PasswordRemovalConfirmDialogComponent)
component = fixture.componentInstance
fixture.detectChanges()
})
it('should default to replacing the document', () => {
expect(component.updateDocument).toBe(true)
expect(
fixture.debugElement.query(By.css('#removeReplace')).nativeElement.checked
).toBe(true)
})
it('should allow creating a new document with metadata and delete toggle', () => {
component.onUpdateDocumentChange(false)
fixture.detectChanges()
expect(component.updateDocument).toBe(false)
expect(fixture.debugElement.query(By.css('#copyMetaRemove'))).not.toBeNull()
component.includeMetadata = false
component.deleteOriginal = true
component.onUpdateDocumentChange(true)
expect(component.updateDocument).toBe(true)
expect(component.includeMetadata).toBe(true)
expect(component.deleteOriginal).toBe(false)
})
it('should emit confirm when confirmed', () => {
let confirmed = false
component.confirmClicked.subscribe(() => (confirmed = true))
component.confirm()
expect(confirmed).toBe(true)
})
})

View File

@@ -1,38 +0,0 @@
import { Component, Input } from '@angular/core'
import { FormsModule } from '@angular/forms'
import { NgxBootstrapIconsModule } from 'ngx-bootstrap-icons'
import { ConfirmDialogComponent } from '../confirm-dialog.component'
@Component({
selector: 'pngx-password-removal-confirm-dialog',
templateUrl: './password-removal-confirm-dialog.component.html',
styleUrls: ['./password-removal-confirm-dialog.component.scss'],
imports: [FormsModule, NgxBootstrapIconsModule],
})
export class PasswordRemovalConfirmDialogComponent extends ConfirmDialogComponent {
updateDocument: boolean = true
includeMetadata: boolean = true
deleteOriginal: boolean = false
@Input()
override title = $localize`Remove password protection`
@Input()
override message =
$localize`Create an unprotected copy or replace the existing file.`
@Input()
override btnCaption = $localize`Start`
constructor() {
super()
}
onUpdateDocumentChange(updateDocument: boolean) {
this.updateDocument = updateDocument
if (this.updateDocument) {
this.deleteOriginal = false
this.includeMetadata = true
}
}
}

View File

@@ -430,22 +430,6 @@
</div>
</div>
}
@case (WorkflowActionType.PasswordRemoval) {
<div class="row">
<div class="col">
<p class="small" i18n>
One or more passwords separated by commas or new lines. The workflow will try them in order until one succeeds.
</p>
<pngx-input-textarea
i18n-title
title="Passwords"
formControlName="passwords"
rows="4"
[error]="error?.actions?.[i]?.passwords"
></pngx-input-textarea>
</div>
</div>
}
}
</div>
</ng-template>

View File

@@ -139,10 +139,6 @@ export const WORKFLOW_ACTION_OPTIONS = [
id: WorkflowActionType.Webhook,
name: $localize`Webhook`,
},
{
id: WorkflowActionType.PasswordRemoval,
name: $localize`Password removal`,
},
]
export enum TriggerFilterType {
@@ -1137,7 +1133,6 @@ export class WorkflowEditDialogComponent
headers: new FormControl(action.webhook?.headers),
include_document: new FormControl(!!action.webhook?.include_document),
}),
passwords: new FormControl(action.passwords),
}),
{ emitEvent }
)

View File

@@ -65,12 +65,6 @@
<button ngbDropdownItem (click)="editPdf()" [disabled]="!userIsOwner || !userCanEdit || originalContentRenderType !== ContentRenderType.PDF">
<i-bs name="pencil"></i-bs>&nbsp;<ng-container i18n>PDF Editor</ng-container>
</button>
@if (userIsOwner && (requiresPassword || password)) {
<button ngbDropdownItem (click)="removePassword()" [disabled]="!password">
<i-bs name="unlock"></i-bs>&nbsp;<ng-container i18n>Remove Password</ng-container>
</button>
}
</div>
</div>

View File

@@ -66,7 +66,6 @@ import { SettingsService } from 'src/app/services/settings.service'
import { ToastService } from 'src/app/services/toast.service'
import { environment } from 'src/environments/environment'
import { ConfirmDialogComponent } from '../common/confirm-dialog/confirm-dialog.component'
import { PasswordRemovalConfirmDialogComponent } from '../common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component'
import { CustomFieldsDropdownComponent } from '../common/custom-fields-dropdown/custom-fields-dropdown.component'
import {
DocumentDetailComponent,
@@ -1210,88 +1209,6 @@ describe('DocumentDetailComponent', () => {
expect(closeSpy).toHaveBeenCalled()
})
it('should support removing password protection from pdfs', () => {
let modal: NgbModalRef
modalService.activeInstances.subscribe((m) => (modal = m[0]))
initNormally()
component.password = 'secret'
component.removePassword()
const dialog =
modal.componentInstance as PasswordRemovalConfirmDialogComponent
dialog.updateDocument = false
dialog.includeMetadata = false
dialog.deleteOriginal = true
dialog.confirm()
const req = httpTestingController.expectOne(
`${environment.apiBaseUrl}documents/bulk_edit/`
)
expect(req.request.body).toEqual({
documents: [doc.id],
method: 'remove_password',
parameters: {
password: 'secret',
update_document: false,
include_metadata: false,
delete_original: true,
},
})
req.flush(true)
})
it('should require the current password before removing it', () => {
initNormally()
const errorSpy = jest.spyOn(toastService, 'showError')
component.requiresPassword = true
component.password = ''
component.removePassword()
expect(errorSpy).toHaveBeenCalled()
httpTestingController.expectNone(
`${environment.apiBaseUrl}documents/bulk_edit/`
)
})
it('should handle failures when removing password protection', () => {
let modal: NgbModalRef
modalService.activeInstances.subscribe((m) => (modal = m[0]))
initNormally()
const errorSpy = jest.spyOn(toastService, 'showError')
component.password = 'secret'
component.removePassword()
const dialog =
modal.componentInstance as PasswordRemovalConfirmDialogComponent
dialog.confirm()
const req = httpTestingController.expectOne(
`${environment.apiBaseUrl}documents/bulk_edit/`
)
req.error(new ErrorEvent('failed'))
expect(errorSpy).toHaveBeenCalled()
expect(component.networkActive).toBe(false)
expect(dialog.buttonsEnabled).toBe(true)
})
it('should refresh the document when removing password in update mode', () => {
let modal: NgbModalRef
modalService.activeInstances.subscribe((m) => (modal = m[0]))
const refreshSpy = jest.spyOn(openDocumentsService, 'refreshDocument')
initNormally()
component.password = 'secret'
component.removePassword()
const dialog =
modal.componentInstance as PasswordRemovalConfirmDialogComponent
dialog.confirm()
const req = httpTestingController.expectOne(
`${environment.apiBaseUrl}documents/bulk_edit/`
)
req.flush(true)
expect(refreshSpy).toHaveBeenCalledWith(doc.id)
})
it('should support keyboard shortcuts', () => {
initNormally()

View File

@@ -83,7 +83,6 @@ import { getFilenameFromContentDisposition } from 'src/app/utils/http'
import { ISODateAdapter } from 'src/app/utils/ngb-iso-date-adapter'
import * as UTIF from 'utif'
import { ConfirmDialogComponent } from '../common/confirm-dialog/confirm-dialog.component'
import { PasswordRemovalConfirmDialogComponent } from '../common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component'
import { CustomFieldsDropdownComponent } from '../common/custom-fields-dropdown/custom-fields-dropdown.component'
import { CorrespondentEditDialogComponent } from '../common/edit-dialog/correspondent-edit-dialog/correspondent-edit-dialog.component'
import { DocumentTypeEditDialogComponent } from '../common/edit-dialog/document-type-edit-dialog/document-type-edit-dialog.component'
@@ -176,7 +175,6 @@ export enum ZoomSetting {
NgxBootstrapIconsModule,
PdfViewerModule,
TextAreaComponent,
PasswordRemovalConfirmDialogComponent,
],
})
export class DocumentDetailComponent
@@ -1430,63 +1428,6 @@ export class DocumentDetailComponent
})
}
removePassword() {
if (this.requiresPassword || !this.password) {
this.toastService.showError(
$localize`Please enter the current password before attempting to remove it.`
)
return
}
const modal = this.modalService.open(
PasswordRemovalConfirmDialogComponent,
{
backdrop: 'static',
}
)
modal.componentInstance.title = $localize`Remove password protection`
modal.componentInstance.message = $localize`Create an unprotected copy or replace the existing file.`
modal.componentInstance.btnCaption = $localize`Start`
modal.componentInstance.confirmClicked
.pipe(takeUntil(this.unsubscribeNotifier))
.subscribe(() => {
const dialog =
modal.componentInstance as PasswordRemovalConfirmDialogComponent
dialog.buttonsEnabled = false
this.networkActive = true
this.documentsService
.bulkEdit([this.document.id], 'remove_password', {
password: this.password,
update_document: dialog.updateDocument,
include_metadata: dialog.includeMetadata,
delete_original: dialog.deleteOriginal,
})
.pipe(first(), takeUntil(this.unsubscribeNotifier))
.subscribe({
next: () => {
this.toastService.showInfo(
$localize`Password removal operation for "${this.document.title}" will begin in the background.`
)
this.networkActive = false
modal.close()
if (!dialog.updateDocument && dialog.deleteOriginal) {
this.openDocumentService.closeDocument(this.document)
} else if (dialog.updateDocument) {
this.openDocumentService.refreshDocument(this.documentId)
}
},
error: (error) => {
dialog.buttonsEnabled = true
this.networkActive = false
this.toastService.showError(
$localize`Error executing password removal operation`,
error
)
},
})
})
}
printDocument() {
const printUrl = this.documentsService.getDownloadUrl(
this.document.id,

View File

@@ -5,7 +5,6 @@ export enum WorkflowActionType {
Removal = 2,
Email = 3,
Webhook = 4,
PasswordRemoval = 5,
}
export interface WorkflowActionEmail extends ObjectWithId {
@@ -98,6 +97,4 @@ export interface WorkflowAction extends ObjectWithId {
email?: WorkflowActionEmail
webhook?: WorkflowActionWebhook
passwords?: string
}

View File

@@ -132,7 +132,6 @@ import {
threeDotsVertical,
trash,
uiRadios,
unlock,
upcScan,
windowStack,
x,
@@ -349,7 +348,6 @@ const icons = {
threeDotsVertical,
trash,
uiRadios,
unlock,
upcScan,
windowStack,
x,

View File

@@ -1,5 +1,9 @@
from django.apps import AppConfig
from django.db.models.signals import post_delete
from django.db.models.signals import post_save
from django.utils.translation import gettext_lazy as _
from treenode.signals import post_delete_treenode
from treenode.signals import post_save_treenode
class DocumentsConfig(AppConfig):
@@ -8,12 +12,14 @@ class DocumentsConfig(AppConfig):
verbose_name = _("Documents")
def ready(self):
from documents.models import Tag
from documents.signals import document_consumption_finished
from documents.signals import document_updated
from documents.signals.handlers import add_inbox_tags
from documents.signals.handlers import add_to_index
from documents.signals.handlers import run_workflows_added
from documents.signals.handlers import run_workflows_updated
from documents.signals.handlers import schedule_tag_tree_update
from documents.signals.handlers import set_correspondent
from documents.signals.handlers import set_document_type
from documents.signals.handlers import set_storage_path
@@ -28,6 +34,29 @@ class DocumentsConfig(AppConfig):
document_consumption_finished.connect(run_workflows_added)
document_updated.connect(run_workflows_updated)
# treenode updates the entire tree on every save/delete via hooks
# so disconnect for Tags and run once-per-transaction.
post_save.disconnect(
post_save_treenode,
sender=Tag,
dispatch_uid="post_save_treenode",
)
post_delete.disconnect(
post_delete_treenode,
sender=Tag,
dispatch_uid="post_delete_treenode",
)
post_save.connect(
schedule_tag_tree_update,
sender=Tag,
dispatch_uid="paperless_tag_mark_dirty_save",
)
post_delete.connect(
schedule_tag_tree_update,
sender=Tag,
dispatch_uid="paperless_tag_mark_dirty_delete",
)
import documents.schema # noqa: F401
AppConfig.ready(self)

View File

@@ -646,77 +646,6 @@ def edit_pdf(
return "OK"
def remove_password(
doc_ids: list[int],
password: str,
*,
update_document: bool = False,
delete_original: bool = False,
include_metadata: bool = True,
user: User | None = None,
) -> Literal["OK"]:
"""
Remove password protection from PDF documents.
"""
import pikepdf
for doc_id in doc_ids:
doc = Document.objects.get(id=doc_id)
try:
logger.info(
f"Attempting password removal from document {doc_ids[0]}",
)
with pikepdf.open(doc.source_path, password=password) as pdf:
temp_path = doc.source_path.with_suffix(".tmp.pdf")
pdf.remove_unreferenced_resources()
pdf.save(temp_path)
if update_document:
# replace the original document with the unprotected one
temp_path.replace(doc.source_path)
doc.checksum = hashlib.md5(doc.source_path.read_bytes()).hexdigest()
doc.page_count = len(pdf.pages)
doc.save()
update_document_content_maybe_archive_file.delay(document_id=doc.id)
else:
consume_tasks = []
overrides = (
DocumentMetadataOverrides().from_document(doc)
if include_metadata
else DocumentMetadataOverrides()
)
if user is not None:
overrides.owner_id = user.id
filepath: Path = (
Path(tempfile.mkdtemp(dir=settings.SCRATCH_DIR))
/ f"{doc.id}_unprotected.pdf"
)
temp_path.replace(filepath)
consume_tasks.append(
consume_file.s(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=filepath,
),
overrides,
),
)
if delete_original:
chord(header=consume_tasks, body=delete.si([doc.id])).delay()
else:
group(consume_tasks).delay()
except Exception as e:
logger.exception(f"Error removing password from document {doc.id}: {e}")
raise ValueError(
f"An error occurred while removing the password: {e}",
) from e
return "OK"
def reflect_doclinks(
document: Document,
field: CustomField,

View File

@@ -22,7 +22,7 @@ class DocumentMetadataOverrides:
document_type_id: int | None = None
tag_ids: list[int] | None = None
storage_path_id: int | None = None
created: datetime.date | None = None
created: datetime.datetime | None = None
asn: int | None = None
owner_id: int | None = None
view_users: list[int] | None = None
@@ -103,7 +103,6 @@ class DocumentMetadataOverrides:
overrides.storage_path_id = doc.storage_path.id if doc.storage_path else None
overrides.owner_id = doc.owner.id if doc.owner else None
overrides.tag_ids = list(doc.tags.values_list("id", flat=True))
overrides.created = doc.created
overrides.view_users = list(
get_users_with_perms(

View File

@@ -1,38 +0,0 @@
# Generated by Django 5.2.7 on 2025-12-29 03:56
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "1074_workflowrun_deleted_at_workflowrun_restored_at_and_more"),
]
operations = [
migrations.AddField(
model_name="workflowaction",
name="passwords",
field=models.TextField(
blank=True,
help_text="Passwords to try when removing PDF protection. Separate with commas or new lines.",
null=True,
verbose_name="passwords",
),
),
migrations.AlterField(
model_name="workflowaction",
name="type",
field=models.PositiveIntegerField(
choices=[
(1, "Assignment"),
(2, "Removal"),
(3, "Email"),
(4, "Webhook"),
(5, "Password removal"),
],
default=1,
verbose_name="Workflow Action Type",
),
),
]

View File

@@ -1287,10 +1287,6 @@ class WorkflowAction(models.Model):
4,
_("Webhook"),
)
PASSWORD_REMOVAL = (
5,
_("Password removal"),
)
type = models.PositiveIntegerField(
_("Workflow Action Type"),
@@ -1518,15 +1514,6 @@ class WorkflowAction(models.Model):
verbose_name=_("webhook"),
)
passwords = models.TextField(
_("passwords"),
null=True,
blank=True,
help_text=_(
"Passwords to try when removing PDF protection. Separate with commas or new lines.",
),
)
class Meta:
verbose_name = _("workflow action")
verbose_name_plural = _("workflow actions")

View File

@@ -578,30 +578,34 @@ class TagSerializer(MatchingModelSerializer, OwnedObjectSerializer):
),
)
def get_children(self, obj):
filter_q = self.context.get("document_count_filter")
request = self.context.get("request")
if filter_q is None:
user = getattr(request, "user", None) if request else None
filter_q = get_document_count_filter_for_user(user)
self.context["document_count_filter"] = filter_q
children_map = self.context.get("children_map")
if children_map is not None:
children = children_map.get(obj.pk, [])
else:
filter_q = self.context.get("document_count_filter")
request = self.context.get("request")
if filter_q is None:
user = getattr(request, "user", None) if request else None
filter_q = get_document_count_filter_for_user(user)
self.context["document_count_filter"] = filter_q
children_queryset = (
obj.get_children_queryset()
.select_related("owner")
.annotate(document_count=Count("documents", filter=filter_q))
)
children = (
obj.get_children_queryset()
.select_related("owner")
.annotate(document_count=Count("documents", filter=filter_q))
)
view = self.context.get("view")
ordering = (
OrderingFilter().get_ordering(request, children_queryset, view)
if request and view
else None
)
ordering = ordering or (Lower("name"),)
children_queryset = children_queryset.order_by(*ordering)
view = self.context.get("view")
ordering = (
OrderingFilter().get_ordering(request, children, view)
if request and view
else None
)
ordering = ordering or (Lower("name"),)
children = children.order_by(*ordering)
serializer = TagSerializer(
children_queryset,
children,
many=True,
user=self.user,
full_perms=self.full_perms,
@@ -1421,7 +1425,6 @@ class BulkEditSerializer(
"split",
"delete_pages",
"edit_pdf",
"remove_password",
],
label="Method",
write_only=True,
@@ -1497,8 +1500,6 @@ class BulkEditSerializer(
return bulk_edit.delete_pages
elif method == "edit_pdf":
return bulk_edit.edit_pdf
elif method == "remove_password":
return bulk_edit.remove_password
else: # pragma: no cover
# This will never happen as it is handled by the ChoiceField
raise serializers.ValidationError("Unsupported method.")
@@ -1695,12 +1696,6 @@ class BulkEditSerializer(
f"Page {op['page']} is out of bounds for document with {doc.page_count} pages.",
)
def validate_parameters_remove_password(self, parameters):
if "password" not in parameters:
raise serializers.ValidationError("password not specified")
if not isinstance(parameters["password"], str):
raise serializers.ValidationError("password must be a string")
def validate(self, attrs):
method = attrs["method"]
parameters = attrs["parameters"]
@@ -1741,8 +1736,6 @@ class BulkEditSerializer(
"Edit PDF method only supports one document",
)
self._validate_parameters_edit_pdf(parameters, attrs["documents"][0])
elif method == bulk_edit.remove_password:
self.validate_parameters_remove_password(parameters)
return attrs
@@ -2440,7 +2433,6 @@ class WorkflowActionSerializer(serializers.ModelSerializer):
"remove_change_groups",
"email",
"webhook",
"passwords",
]
def validate(self, attrs):
@@ -2497,20 +2489,6 @@ class WorkflowActionSerializer(serializers.ModelSerializer):
"Webhook data is required for webhook actions",
)
if (
"type" in attrs
and attrs["type"] == WorkflowAction.WorkflowActionType.PASSWORD_REMOVAL
):
passwords = attrs.get("passwords")
if passwords is None or not isinstance(passwords, str):
raise serializers.ValidationError(
"Passwords are required for password removal actions",
)
if not passwords.strip():
raise serializers.ValidationError(
"Passwords are required for password removal actions",
)
return attrs

View File

@@ -19,6 +19,7 @@ from django.db import DatabaseError
from django.db import close_old_connections
from django.db import connections
from django.db import models
from django.db import transaction
from django.db.models import Q
from django.dispatch import receiver
from django.utils import timezone
@@ -46,7 +47,6 @@ from documents.permissions import get_objects_for_user_owner_aware
from documents.templating.utils import convert_format_str_to_template_format
from documents.workflows.actions import build_workflow_action_context
from documents.workflows.actions import execute_email_action
from documents.workflows.actions import execute_password_removal_action
from documents.workflows.actions import execute_webhook_action
from documents.workflows.mutations import apply_assignment_to_document
from documents.workflows.mutations import apply_assignment_to_overrides
@@ -61,6 +61,8 @@ if TYPE_CHECKING:
logger = logging.getLogger("paperless.handlers")
_tag_tree_update_scheduled = False
def add_inbox_tags(sender, document: Document, logging_group=None, **kwargs):
if document.owner is not None:
@@ -793,12 +795,6 @@ def run_workflows(
logging_group,
original_file,
)
elif (
action.type == WorkflowAction.WorkflowActionType.PASSWORD_REMOVAL
and not use_overrides
):
# Password removal only makes sense on actual documents
execute_password_removal_action(action, document, logging_group)
if not use_overrides:
# limit title to 128 characters
@@ -951,3 +947,26 @@ def close_connection_pool_on_worker_init(**kwargs):
for conn in connections.all(initialized_only=True):
if conn.alias == "default" and hasattr(conn, "pool") and conn.pool:
conn.close_pool()
def schedule_tag_tree_update(**_kwargs):
"""
Schedule a single Tag.update_tree() at transaction commit.
Treenode's default post_save hooks rebuild the entire tree on every save,
which is very slow for large tag sets so collapse to one update per
transaction.
"""
global _tag_tree_update_scheduled
if _tag_tree_update_scheduled:
return
_tag_tree_update_scheduled = True
def _run():
global _tag_tree_update_scheduled
try:
Tag.update_tree()
finally:
_tag_tree_update_scheduled = False
transaction.on_commit(_run)

View File

@@ -1582,58 +1582,6 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"out of bounds", response.content)
@mock.patch("documents.serialisers.bulk_edit.remove_password")
def test_remove_password(self, m):
self.setup_mock(m, "remove_password")
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "remove_password",
"parameters": {"password": "secret", "update_document": True},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
m.assert_called_once()
args, kwargs = m.call_args
self.assertCountEqual(args[0], [self.doc2.id])
self.assertEqual(kwargs["password"], "secret")
self.assertTrue(kwargs["update_document"])
self.assertEqual(kwargs["user"], self.user)
def test_remove_password_invalid_params(self):
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "remove_password",
"parameters": {},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"password not specified", response.content)
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "remove_password",
"parameters": {"password": 123},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"password must be a string", response.content)
@override_settings(AUDIT_LOG_ENABLED=True)
def test_bulk_edit_audit_log_enabled_simple_field(self):
"""

View File

@@ -808,57 +808,3 @@ class TestApiWorkflows(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.action.refresh_from_db()
self.assertEqual(self.action.assign_title, "Patched Title")
def test_password_action_passwords_field(self):
"""
GIVEN:
- Nothing
WHEN:
- A workflow password removal action is created with passwords set
THEN:
- The passwords field is correctly stored and retrieved
"""
passwords = "password1,password2\npassword3"
response = self.client.post(
"/api/workflow_actions/",
{
"type": WorkflowAction.WorkflowActionType.PASSWORD_REMOVAL,
"passwords": passwords,
},
)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
self.assertEqual(response.data["passwords"], passwords)
def test_password_action_no_passwords_field(self):
"""
GIVEN:
- Nothing
WHEN:
- A workflow password removal action is created with no passwords set
- A workflow password removal action is created with passwords set to empty string
THEN:
- The required validation error is raised
"""
response = self.client.post(
"/api/workflow_actions/",
{
"type": WorkflowAction.WorkflowActionType.PASSWORD_REMOVAL,
},
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(
"Passwords are required",
str(response.data["non_field_errors"][0]),
)
response = self.client.post(
"/api/workflow_actions/",
{
"type": WorkflowAction.WorkflowActionType.PASSWORD_REMOVAL,
"passwords": "",
},
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(
"Passwords are required",
str(response.data["non_field_errors"][0]),
)

View File

@@ -1,4 +1,3 @@
import hashlib
import shutil
from datetime import date
from pathlib import Path
@@ -582,7 +581,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
- Consume file should be called
"""
doc_ids = [self.doc1.id, self.doc2.id, self.doc3.id]
metadata_document_id = self.doc2.id
metadata_document_id = self.doc1.id
user = User.objects.create(username="test_user")
result = bulk_edit.merge(
@@ -608,8 +607,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
# With metadata_document_id overrides
result = bulk_edit.merge(doc_ids, metadata_document_id=metadata_document_id)
consume_file_args, _ = mock_consume_file.call_args
self.assertEqual(consume_file_args[1].title, "B (merged)")
self.assertEqual(consume_file_args[1].created, self.doc2.created)
self.assertEqual(consume_file_args[1].title, "A (merged)")
self.assertTrue(consume_file_args[1].skip_asn)
self.assertEqual(result, "OK")
@@ -1067,147 +1065,3 @@ class TestPDFActions(DirectoriesMixin, TestCase):
bulk_edit.edit_pdf(doc_ids, operations, update_document=True)
mock_group.assert_not_called()
mock_consume_file.assert_not_called()
@mock.patch("documents.bulk_edit.update_document_content_maybe_archive_file.delay")
@mock.patch("pikepdf.open")
def test_remove_password_update_document(self, mock_open, mock_update_document):
doc = self.doc1
original_checksum = doc.checksum
fake_pdf = mock.MagicMock()
fake_pdf.pages = [mock.Mock(), mock.Mock(), mock.Mock()]
def save_side_effect(target_path):
Path(target_path).write_bytes(b"new pdf content")
fake_pdf.save.side_effect = save_side_effect
mock_open.return_value.__enter__.return_value = fake_pdf
result = bulk_edit.remove_password(
[doc.id],
password="secret",
update_document=True,
)
self.assertEqual(result, "OK")
mock_open.assert_called_once_with(doc.source_path, password="secret")
fake_pdf.remove_unreferenced_resources.assert_called_once()
doc.refresh_from_db()
self.assertNotEqual(doc.checksum, original_checksum)
expected_checksum = hashlib.md5(doc.source_path.read_bytes()).hexdigest()
self.assertEqual(doc.checksum, expected_checksum)
self.assertEqual(doc.page_count, len(fake_pdf.pages))
mock_update_document.assert_called_once_with(document_id=doc.id)
@mock.patch("documents.bulk_edit.chord")
@mock.patch("documents.bulk_edit.group")
@mock.patch("documents.tasks.consume_file.s")
@mock.patch("documents.bulk_edit.tempfile.mkdtemp")
@mock.patch("pikepdf.open")
def test_remove_password_creates_consumable_document(
self,
mock_open,
mock_mkdtemp,
mock_consume_file,
mock_group,
mock_chord,
):
doc = self.doc2
temp_dir = self.dirs.scratch_dir / "remove-password"
temp_dir.mkdir(parents=True, exist_ok=True)
mock_mkdtemp.return_value = str(temp_dir)
fake_pdf = mock.MagicMock()
fake_pdf.pages = [mock.Mock(), mock.Mock()]
def save_side_effect(target_path):
Path(target_path).write_bytes(b"password removed")
fake_pdf.save.side_effect = save_side_effect
mock_open.return_value.__enter__.return_value = fake_pdf
mock_group.return_value.delay.return_value = None
user = User.objects.create(username="owner")
result = bulk_edit.remove_password(
[doc.id],
password="secret",
include_metadata=False,
update_document=False,
delete_original=False,
user=user,
)
self.assertEqual(result, "OK")
mock_open.assert_called_once_with(doc.source_path, password="secret")
mock_consume_file.assert_called_once()
consume_args, _ = mock_consume_file.call_args
consumable_document = consume_args[0]
overrides = consume_args[1]
expected_path = temp_dir / f"{doc.id}_unprotected.pdf"
self.assertTrue(expected_path.exists())
self.assertEqual(
Path(consumable_document.original_file).resolve(),
expected_path.resolve(),
)
self.assertEqual(overrides.owner_id, user.id)
mock_group.assert_called_once_with([mock_consume_file.return_value])
mock_group.return_value.delay.assert_called_once()
mock_chord.assert_not_called()
@mock.patch("documents.bulk_edit.delete")
@mock.patch("documents.bulk_edit.chord")
@mock.patch("documents.bulk_edit.group")
@mock.patch("documents.tasks.consume_file.s")
@mock.patch("documents.bulk_edit.tempfile.mkdtemp")
@mock.patch("pikepdf.open")
def test_remove_password_deletes_original(
self,
mock_open,
mock_mkdtemp,
mock_consume_file,
mock_group,
mock_chord,
mock_delete,
):
doc = self.doc2
temp_dir = self.dirs.scratch_dir / "remove-password-delete"
temp_dir.mkdir(parents=True, exist_ok=True)
mock_mkdtemp.return_value = str(temp_dir)
fake_pdf = mock.MagicMock()
fake_pdf.pages = [mock.Mock(), mock.Mock()]
def save_side_effect(target_path):
Path(target_path).write_bytes(b"password removed")
fake_pdf.save.side_effect = save_side_effect
mock_open.return_value.__enter__.return_value = fake_pdf
mock_chord.return_value.delay.return_value = None
result = bulk_edit.remove_password(
[doc.id],
password="secret",
include_metadata=False,
update_document=False,
delete_original=True,
)
self.assertEqual(result, "OK")
mock_open.assert_called_once_with(doc.source_path, password="secret")
mock_consume_file.assert_called_once()
mock_group.assert_not_called()
mock_chord.assert_called_once()
mock_chord.return_value.delay.assert_called_once()
mock_delete.si.assert_called_once_with([doc.id])
@mock.patch("pikepdf.open")
def test_remove_password_open_failure(self, mock_open):
mock_open.side_effect = RuntimeError("wrong password")
with self.assertLogs("paperless.bulk_edit", level="ERROR") as cm:
with self.assertRaises(ValueError) as exc:
bulk_edit.remove_password([self.doc1.id], password="secret")
self.assertIn("wrong password", str(exc.exception))
self.assertIn("Error removing password from document", cm.output[0])

View File

@@ -250,3 +250,16 @@ class TestTagHierarchy(APITestCase):
row for row in response.data["results"] if row["id"] == self.parent.pk
)
assert any(child["id"] == self.child.pk for child in parent_entry["children"])
def test_tag_tree_deferred_update_runs_on_commit(self):
from django.db import transaction
# Create tags inside an explicit transaction and commit.
with transaction.atomic():
parent = Tag.objects.create(name="Parent 2")
child = Tag.objects.create(name="Child 2", tn_parent=parent)
# After commit, tn_* fields should be populated.
parent.refresh_from_db()
child.refresh_from_db()
assert parent.tn_children_count == 1
assert child.tn_ancestors_count == 1

View File

@@ -3548,99 +3548,6 @@ class TestWorkflows(
mock_post.assert_called_once()
@mock.patch("documents.bulk_edit.remove_password")
def test_password_removal_action_attempts_multiple_passwords(
self,
mock_remove_password,
):
doc = Document.objects.create(
title="Protected",
checksum="pw-checksum",
)
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED,
)
action = WorkflowAction.objects.create(
type=WorkflowAction.WorkflowActionType.PASSWORD_REMOVAL,
passwords="wrong, right\n extra ",
)
workflow = Workflow.objects.create(name="Password workflow")
workflow.triggers.add(trigger)
workflow.actions.add(action)
mock_remove_password.side_effect = [
ValueError("wrong password"),
"OK",
]
run_workflows(trigger.type, doc)
assert mock_remove_password.call_count == 2
mock_remove_password.assert_has_calls(
[
mock.call(
[doc.id],
password="wrong",
update_document=True,
user=doc.owner,
),
mock.call(
[doc.id],
password="right",
update_document=True,
user=doc.owner,
),
],
)
@mock.patch("documents.bulk_edit.remove_password")
def test_password_removal_action_fails_without_correct_password(
self,
mock_remove_password,
):
doc = Document.objects.create(
title="Protected",
checksum="pw-checksum-2",
)
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED,
)
action = WorkflowAction.objects.create(
type=WorkflowAction.WorkflowActionType.PASSWORD_REMOVAL,
passwords=" \n , ",
)
workflow = Workflow.objects.create(name="Password workflow missing passwords")
workflow.triggers.add(trigger)
workflow.actions.add(action)
run_workflows(trigger.type, doc)
mock_remove_password.assert_not_called()
@mock.patch("documents.bulk_edit.remove_password")
def test_password_removal_action_skips_without_passwords(
self,
mock_remove_password,
):
doc = Document.objects.create(
title="Protected",
checksum="pw-checksum-2",
)
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED,
)
action = WorkflowAction.objects.create(
type=WorkflowAction.WorkflowActionType.PASSWORD_REMOVAL,
passwords="",
)
workflow = Workflow.objects.create(name="Password workflow missing passwords")
workflow.triggers.add(trigger)
workflow.actions.add(action)
run_workflows(trigger.type, doc)
mock_remove_password.assert_not_called()
class TestWebhookSend:
def test_send_webhook_data_or_json(

View File

@@ -448,8 +448,43 @@ class TagViewSet(ModelViewSet, PermissionsAwareDocumentCountMixin):
def get_serializer_context(self):
context = super().get_serializer_context()
context["document_count_filter"] = self.get_document_count_filter()
if hasattr(self, "_children_map"):
context["children_map"] = self._children_map
return context
def list(self, request, *args, **kwargs):
"""
Build a children map once to avoid per-parent queries in the serializer.
"""
queryset = self.filter_queryset(self.get_queryset())
ordering = OrderingFilter().get_ordering(request, queryset, self) or (
Lower("name"),
)
queryset = queryset.order_by(*ordering)
all_tags = list(queryset)
descendant_pks = {pk for tag in all_tags for pk in tag.get_descendants_pks()}
if descendant_pks:
filter_q = self.get_document_count_filter()
children_source = (
Tag.objects.filter(pk__in=descendant_pks | {t.pk for t in all_tags})
.select_related("owner")
.annotate(document_count=Count("documents", filter=filter_q))
.order_by(*ordering)
)
else:
children_source = all_tags
children_map = {}
for tag in children_source:
children_map.setdefault(tag.tn_parent_id, []).append(tag)
self._children_map = children_map
page = self.paginate_queryset(queryset)
serializer = self.get_serializer(page, many=True)
return self.get_paginated_response(serializer.data)
def perform_update(self, serializer):
old_parent = self.get_object().get_parent()
tag = serializer.save()
@@ -708,7 +743,6 @@ class DocumentViewSet(
"title",
"correspondent__name",
"document_type__name",
"storage_path__name",
"created",
"modified",
"added",
@@ -1504,7 +1538,6 @@ class BulkEditView(PassUserMixin):
"merge": None,
"edit_pdf": "checksum",
"reprocess": "checksum",
"remove_password": "checksum",
}
permission_classes = (IsAuthenticated,)
@@ -1523,7 +1556,6 @@ class BulkEditView(PassUserMixin):
bulk_edit.split,
bulk_edit.merge,
bulk_edit.edit_pdf,
bulk_edit.remove_password,
]:
parameters["user"] = user
@@ -1552,7 +1584,6 @@ class BulkEditView(PassUserMixin):
bulk_edit.rotate,
bulk_edit.delete_pages,
bulk_edit.edit_pdf,
bulk_edit.remove_password,
]
)
or (
@@ -1569,7 +1600,7 @@ class BulkEditView(PassUserMixin):
and (
method in [bulk_edit.split, bulk_edit.merge]
or (
method in [bulk_edit.edit_pdf, bulk_edit.remove_password]
method == bulk_edit.edit_pdf
and not parameters["update_document"]
)
)

View File

@@ -1,5 +1,4 @@
import logging
import re
from pathlib import Path
from django.conf import settings
@@ -260,59 +259,3 @@ def execute_webhook_action(
f"Error occurred sending webhook: {e}",
extra={"group": logging_group},
)
def execute_password_removal_action(
action: WorkflowAction,
document: Document,
logging_group,
) -> None:
"""
Try to remove a password from a document using the configured list.
"""
passwords = action.passwords
if not passwords:
logger.warning(
"Password removal action %s has no passwords configured",
action.pk,
extra={"group": logging_group},
)
return
passwords = [
password.strip()
for password in re.split(r"[,\n]", passwords)
if password.strip()
]
# import here to avoid circular dependency
from documents.bulk_edit import remove_password
for password in passwords:
try:
remove_password(
[document.id],
password=password,
update_document=True,
user=document.owner,
)
logger.info(
"Removed password from document %s using workflow action %s",
document.pk,
action.pk,
extra={"group": logging_group},
)
return
except ValueError as e:
logger.warning(
"Password removal failed for document %s with supplied password: %s",
document.pk,
e,
extra={"group": logging_group},
)
logger.error(
"Password removal failed for document %s after trying all provided passwords",
document.pk,
extra={"group": logging_group},
)