Compare commits

...

24 Commits

Author SHA1 Message Date
shamoon
8c01926be5 Performance script, available as management command
[ci skip]
2026-01-28 11:58:08 -08:00
shamoon
d9091aa42b Fix user checks in management scripts 2026-01-28 09:56:21 -08:00
shamoon
891f4a2faf Fix: correctly extract all ids for nested tags (#11888) 2026-01-26 09:12:03 -08:00
shamoon
2312314aa7 Performance: improve treenode inefficiencies (#11606) 2026-01-25 21:47:08 -08:00
shamoon
72e8b73108 Fix test 2026-01-25 17:08:15 -08:00
shamoon
5c9ff367e3 Fixhancement: change date calculation for 'this year' to include future documents (#11884) 2026-01-25 16:56:51 -08:00
Trenton H
94f6b8d36d Fixes the management scripts under a non-root install where the user ID is something besides 1000 (#11870) 2026-01-23 16:08:28 -08:00
shamoon
32d04e1fd3 Fix: use correct field id for overrides (#11869) 2026-01-23 15:49:22 -08:00
Trenton H
56c744fd56 Fixes the spelling of the commitish argument to the action 2026-01-23 15:49:00 -08:00
shamoon
d1aa76e4ce Narrow scope of these css rules 2026-01-20 12:30:06 -08:00
shamoon
5381bc5907 Fix: fix tag list horizontal scroll, again (#11839) 2026-01-20 12:30:06 -08:00
shamoon
771f3f150a Bump version to 2.20.5 2026-01-19 09:18:23 -08:00
shamoon
ecfeff5054 Chore: reverse migration order (#11813) 2026-01-18 11:21:35 -08:00
shamoon
37477d391e Fix: ensure horizontal scroll for long tag names in list, wrap tags without parent (#11811) 2026-01-18 08:22:01 -08:00
Trenton H
2f1cd31e31 Adds the release-drafter commitish filtering to perhaps generate the release notes better 2026-01-16 07:42:54 -08:00
shamoon
742c136773 Fix: use explicit order field for workflow actions (#11781) 2026-01-16 07:39:00 -08:00
shamoon
3618c50b62 Bump version to 2.20.4 2026-01-13 10:01:42 -08:00
shamoon
6f4497185e Fix merge conflict 2026-01-13 10:01:41 -08:00
shamoon
e816269db5 Fix: recurring workflow to respect latest run time (#11735) 2026-01-13 09:36:53 -08:00
shamoon
d4e60e13bf Fixhancement: add error handling and retry when opening index (#11731) 2026-01-13 09:36:44 -08:00
shamoon
cb091665e2 Fix: validate cf integer values within PostgreSQL range (#11666) 2026-01-13 09:36:29 -08:00
shamoon
00bb92e3e1 Fix: support ordering by storage path name (#11661) 2026-01-13 09:36:14 -08:00
shamoon
11ec676909 Fix: propagate metadata override created value (#11659) 2026-01-13 09:36:07 -08:00
shamoon
7c457466b7 Security: prevent path traversal in storage paths 2026-01-13 09:29:48 -08:00
48 changed files with 1235 additions and 80 deletions

View File

@@ -44,6 +44,7 @@ include-labels:
- 'notable' - 'notable'
exclude-labels: exclude-labels:
- 'skip-changelog' - 'skip-changelog'
filter-by-commitish: true
category-template: '### $TITLE' category-template: '### $TITLE'
change-template: '- $TITLE @$AUTHOR ([#$NUMBER]($URL))' change-template: '- $TITLE @$AUTHOR ([#$NUMBER]($URL))'
change-title-escapes: '\<*_&#@' change-title-escapes: '\<*_&#@'

View File

@@ -617,6 +617,7 @@ jobs:
version: ${{ steps.get_version.outputs.version }} version: ${{ steps.get_version.outputs.version }}
prerelease: ${{ steps.get_version.outputs.prerelease }} prerelease: ${{ steps.get_version.outputs.prerelease }}
publish: true # ensures release is not marked as draft publish: true # ensures release is not marked as draft
commitish: ${{ github.sha }}
env: env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Upload release archive - name: Upload release archive

View File

@@ -19,7 +19,8 @@ for command in decrypt_documents \
manage_superuser \ manage_superuser \
convert_mariadb_uuid \ convert_mariadb_uuid \
prune_audit_logs \ prune_audit_logs \
createsuperuser; createsuperuser \
document_perf_benchmark;
do do
echo "installing $command..." echo "installing $command..."
sed "s/management_command/$command/g" management_script.sh >"$PWD/rootfs/usr/local/bin/$command" sed "s/management_command/$command/g" management_script.sh >"$PWD/rootfs/usr/local/bin/$command"

View File

@@ -5,10 +5,13 @@ set -e
cd "${PAPERLESS_SRC_DIR}" cd "${PAPERLESS_SRC_DIR}"
if [[ $(id -u) == 0 ]]; then if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py management_command "$@"
elif [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py management_command "$@" s6-setuidgid paperless python3 manage.py management_command "$@"
elif [[ $(id -un) == "paperless" ]]; then elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py management_command "$@" python3 manage.py management_command "$@"
else else
echo "Unknown user." echo "Unknown user."
exit 1
fi fi

View File

@@ -5,10 +5,13 @@ set -e
cd "${PAPERLESS_SRC_DIR}" cd "${PAPERLESS_SRC_DIR}"
if [[ $(id -u) == 0 ]]; then if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py convert_mariadb_uuid "$@"
elif [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py convert_mariadb_uuid "$@" s6-setuidgid paperless python3 manage.py convert_mariadb_uuid "$@"
elif [[ $(id -un) == "paperless" ]]; then elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py convert_mariadb_uuid "$@" python3 manage.py convert_mariadb_uuid "$@"
else else
echo "Unknown user." echo "Unknown user."
exit 1
fi fi

View File

@@ -5,10 +5,13 @@ set -e
cd "${PAPERLESS_SRC_DIR}" cd "${PAPERLESS_SRC_DIR}"
if [[ $(id -u) == 0 ]]; then if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py createsuperuser "$@"
elif [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py createsuperuser "$@" s6-setuidgid paperless python3 manage.py createsuperuser "$@"
elif [[ $(id -un) == "paperless" ]]; then elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py createsuperuser "$@" python3 manage.py createsuperuser "$@"
else else
echo "Unknown user." echo "Unknown user."
exit 1
fi fi

View File

@@ -5,10 +5,13 @@ set -e
cd "${PAPERLESS_SRC_DIR}" cd "${PAPERLESS_SRC_DIR}"
if [[ $(id -u) == 0 ]]; then if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py decrypt_documents "$@"
elif [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py decrypt_documents "$@" s6-setuidgid paperless python3 manage.py decrypt_documents "$@"
elif [[ $(id -un) == "paperless" ]]; then elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py decrypt_documents "$@" python3 manage.py decrypt_documents "$@"
else else
echo "Unknown user." echo "Unknown user."
exit 1
fi fi

View File

@@ -5,10 +5,13 @@ set -e
cd "${PAPERLESS_SRC_DIR}" cd "${PAPERLESS_SRC_DIR}"
if [[ $(id -u) == 0 ]]; then if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py document_archiver "$@"
elif [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py document_archiver "$@" s6-setuidgid paperless python3 manage.py document_archiver "$@"
elif [[ $(id -un) == "paperless" ]]; then elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py document_archiver "$@" python3 manage.py document_archiver "$@"
else else
echo "Unknown user." echo "Unknown user."
exit 1
fi fi

View File

@@ -5,10 +5,17 @@ set -e
cd "${PAPERLESS_SRC_DIR}" cd "${PAPERLESS_SRC_DIR}"
if [[ $(id -u) == 0 ]]; then if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py document_create_classifier "$@"
elif [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py document_create_classifier "$@" s6-setuidgid paperless python3 manage.py document_create_classifier "$@"
elif [[ $(id -un) == "paperless" ]]; then elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py document_create_classifier "$@" python3 manage.py document_create_classifier "$@"
else else
echo "Unknown user." echo "Unknown user."
exit 1
fi
er "$@"
elif [[ $(id -un) == "paperless" ]]; then
s6-setuidgid paperless python3 manage.py document_create_classifier "$@"
fi fi

View File

@@ -5,10 +5,13 @@ set -e
cd "${PAPERLESS_SRC_DIR}" cd "${PAPERLESS_SRC_DIR}"
if [[ $(id -u) == 0 ]]; then if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py document_exporter "$@"
elif [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py document_exporter "$@" s6-setuidgid paperless python3 manage.py document_exporter "$@"
elif [[ $(id -un) == "paperless" ]]; then elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py document_exporter "$@" python3 manage.py document_exporter "$@"
else else
echo "Unknown user." echo "Unknown user."
exit 1
fi fi

View File

@@ -5,10 +5,13 @@ set -e
cd "${PAPERLESS_SRC_DIR}" cd "${PAPERLESS_SRC_DIR}"
if [[ $(id -u) == 0 ]]; then if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py document_fuzzy_match "$@"
elif [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py document_fuzzy_match "$@" s6-setuidgid paperless python3 manage.py document_fuzzy_match "$@"
elif [[ $(id -un) == "paperless" ]]; then elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py document_fuzzy_match "$@" python3 manage.py document_fuzzy_match "$@"
else else
echo "Unknown user." echo "Unknown user."
exit 1
fi fi

View File

@@ -5,10 +5,13 @@ set -e
cd "${PAPERLESS_SRC_DIR}" cd "${PAPERLESS_SRC_DIR}"
if [[ $(id -u) == 0 ]]; then if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py document_importer "$@"
elif [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py document_importer "$@" s6-setuidgid paperless python3 manage.py document_importer "$@"
elif [[ $(id -un) == "paperless" ]]; then elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py document_importer "$@" python3 manage.py document_importer "$@"
else else
echo "Unknown user." echo "Unknown user."
exit 1
fi fi

View File

@@ -5,10 +5,13 @@ set -e
cd "${PAPERLESS_SRC_DIR}" cd "${PAPERLESS_SRC_DIR}"
if [[ $(id -u) == 0 ]]; then if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py document_index "$@"
elif [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py document_index "$@" s6-setuidgid paperless python3 manage.py document_index "$@"
elif [[ $(id -un) == "paperless" ]]; then elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py document_index "$@" python3 manage.py document_index "$@"
else else
echo "Unknown user." echo "Unknown user."
exit 1
fi fi

View File

@@ -0,0 +1,17 @@
#!/command/with-contenv /usr/bin/bash
# shellcheck shell=bash
set -e
cd "${PAPERLESS_SRC_DIR}"
if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py document_perf_benchmark "$@"
elif [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py document_perf_benchmark "$@"
elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py document_perf_benchmark "$@"
else
echo "Unknown user."
exit 1
fi

View File

@@ -5,10 +5,13 @@ set -e
cd "${PAPERLESS_SRC_DIR}" cd "${PAPERLESS_SRC_DIR}"
if [[ $(id -u) == 0 ]]; then if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py document_renamer "$@"
elif [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py document_renamer "$@" s6-setuidgid paperless python3 manage.py document_renamer "$@"
elif [[ $(id -un) == "paperless" ]]; then elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py document_renamer "$@" python3 manage.py document_renamer "$@"
else else
echo "Unknown user." echo "Unknown user."
exit 1
fi fi

View File

@@ -5,10 +5,13 @@ set -e
cd "${PAPERLESS_SRC_DIR}" cd "${PAPERLESS_SRC_DIR}"
if [[ $(id -u) == 0 ]]; then if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py document_retagger "$@"
elif [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py document_retagger "$@" s6-setuidgid paperless python3 manage.py document_retagger "$@"
elif [[ $(id -un) == "paperless" ]]; then elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py document_retagger "$@" python3 manage.py document_retagger "$@"
else else
echo "Unknown user." echo "Unknown user."
exit 1
fi fi

View File

@@ -5,10 +5,13 @@ set -e
cd "${PAPERLESS_SRC_DIR}" cd "${PAPERLESS_SRC_DIR}"
if [[ $(id -u) == 0 ]]; then if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py document_sanity_checker "$@"
elif [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py document_sanity_checker "$@" s6-setuidgid paperless python3 manage.py document_sanity_checker "$@"
elif [[ $(id -un) == "paperless" ]]; then elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py document_sanity_checker "$@" python3 manage.py document_sanity_checker "$@"
else else
echo "Unknown user." echo "Unknown user."
exit 1
fi fi

View File

@@ -5,10 +5,13 @@ set -e
cd "${PAPERLESS_SRC_DIR}" cd "${PAPERLESS_SRC_DIR}"
if [[ $(id -u) == 0 ]]; then if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py document_thumbnails "$@"
elif [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py document_thumbnails "$@" s6-setuidgid paperless python3 manage.py document_thumbnails "$@"
elif [[ $(id -un) == "paperless" ]]; then elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py document_thumbnails "$@" python3 manage.py document_thumbnails "$@"
else else
echo "Unknown user." echo "Unknown user."
exit 1
fi fi

View File

@@ -5,10 +5,13 @@ set -e
cd "${PAPERLESS_SRC_DIR}" cd "${PAPERLESS_SRC_DIR}"
if [[ $(id -u) == 0 ]]; then if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py mail_fetcher "$@"
elif [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py mail_fetcher "$@" s6-setuidgid paperless python3 manage.py mail_fetcher "$@"
elif [[ $(id -un) == "paperless" ]]; then elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py mail_fetcher "$@" python3 manage.py mail_fetcher "$@"
else else
echo "Unknown user." echo "Unknown user."
exit 1
fi fi

View File

@@ -5,10 +5,13 @@ set -e
cd "${PAPERLESS_SRC_DIR}" cd "${PAPERLESS_SRC_DIR}"
if [[ $(id -u) == 0 ]]; then if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py manage_superuser "$@"
elif [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py manage_superuser "$@" s6-setuidgid paperless python3 manage.py manage_superuser "$@"
elif [[ $(id -un) == "paperless" ]]; then elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py manage_superuser "$@" python3 manage.py manage_superuser "$@"
else else
echo "Unknown user." echo "Unknown user."
exit 1
fi fi

View File

@@ -5,10 +5,13 @@ set -e
cd "${PAPERLESS_SRC_DIR}" cd "${PAPERLESS_SRC_DIR}"
if [[ $(id -u) == 0 ]]; then if [[ -n "${USER_IS_NON_ROOT}" ]]; then
python3 manage.py prune_audit_logs "$@"
elif [[ $(id -u) == 0 ]]; then
s6-setuidgid paperless python3 manage.py prune_audit_logs "$@" s6-setuidgid paperless python3 manage.py prune_audit_logs "$@"
elif [[ $(id -un) == "paperless" ]]; then elif [[ $(id -un) == "paperless" ]]; then
python3 manage.py prune_audit_logs "$@" python3 manage.py prune_audit_logs "$@"
else else
echo "Unknown user." echo "Unknown user."
exit 1
fi fi

View File

@@ -1,6 +1,6 @@
[project] [project]
name = "paperless-ngx" name = "paperless-ngx"
version = "2.20.3" version = "2.20.5"
description = "A community-supported supercharged document management system: scan, index and archive all your physical documents" description = "A community-supported supercharged document management system: scan, index and archive all your physical documents"
readme = "README.md" readme = "README.md"
requires-python = ">=3.10" requires-python = ">=3.10"
@@ -238,7 +238,7 @@ lint.isort.force-single-line = true
[tool.codespell] [tool.codespell]
write-changes = true write-changes = true
ignore-words-list = "criterias,afterall,valeu,ureue,equest,ure,assertIn,Oktober" ignore-words-list = "criterias,afterall,valeu,ureue,equest,ure,assertIn,Oktober,commitish"
skip = "src-ui/src/locale/*,src-ui/pnpm-lock.yaml,src-ui/e2e/*,src/paperless_mail/tests/samples/*,src/documents/tests/samples/*,*.po,*.json" skip = "src-ui/src/locale/*,src-ui/pnpm-lock.yaml,src-ui/e2e/*,src/paperless_mail/tests/samples/*,src/documents/tests/samples/*,*.po,*.json"
[tool.pytest.ini_options] [tool.pytest.ini_options]

View File

@@ -1,6 +1,6 @@
{ {
"name": "paperless-ngx-ui", "name": "paperless-ngx-ui",
"version": "2.20.3", "version": "2.20.5",
"scripts": { "scripts": {
"preinstall": "npx only-allow pnpm", "preinstall": "npx only-allow pnpm",
"ng": "ng", "ng": "ng",

View File

@@ -252,7 +252,7 @@ describe('WorkflowEditDialogComponent', () => {
expect(component.object.actions.length).toEqual(2) expect(component.object.actions.length).toEqual(2)
}) })
it('should update order and remove ids from actions on drag n drop', () => { it('should update order on drag n drop', () => {
const action1 = workflow.actions[0] const action1 = workflow.actions[0]
const action2 = workflow.actions[1] const action2 = workflow.actions[1]
component.object = workflow component.object = workflow
@@ -261,8 +261,6 @@ describe('WorkflowEditDialogComponent', () => {
WorkflowAction[] WorkflowAction[]
>) >)
expect(component.object.actions).toEqual([action2, action1]) expect(component.object.actions).toEqual([action2, action1])
expect(action1.id).toBeNull()
expect(action2.id).toBeNull()
}) })
it('should not include auto matching in algorithms', () => { it('should not include auto matching in algorithms', () => {

View File

@@ -1283,11 +1283,6 @@ export class WorkflowEditDialogComponent
const actionField = this.actionFields.at(event.previousIndex) const actionField = this.actionFields.at(event.previousIndex)
this.actionFields.removeAt(event.previousIndex) this.actionFields.removeAt(event.previousIndex)
this.actionFields.insert(event.currentIndex, actionField) this.actionFields.insert(event.currentIndex, actionField)
// removing id will effectively re-create the actions in this order
this.object.actions.forEach((a) => (a.id = null))
this.actionFields.controls.forEach((c) =>
c.get('id').setValue(null, { emitEvent: false })
)
} }
save(): void { save(): void {

View File

@@ -28,7 +28,7 @@
</button> </button>
</ng-template> </ng-template>
<ng-template ng-option-tmp let-item="item" let-index="index" let-search="searchTerm"> <ng-template ng-option-tmp let-item="item" let-index="index" let-search="searchTerm">
<div class="tag-option-row d-flex align-items-center"> <div class="tag-option-row d-flex align-items-center" [class.w-auto]="!getTag(item.id)?.parent">
@if (item.id && tags) { @if (item.id && tags) {
@if (getTag(item.id)?.parent) { @if (getTag(item.id)?.parent) {
<i-bs name="list-nested" class="me-1"></i-bs> <i-bs name="list-nested" class="me-1"></i-bs>

View File

@@ -22,8 +22,8 @@
} }
// Dropdown hierarchy reveal for ng-select options // Dropdown hierarchy reveal for ng-select options
::ng-deep .ng-dropdown-panel .ng-option { :host ::ng-deep .ng-dropdown-panel .ng-option {
overflow-x: scroll; overflow-x: auto !important;
.tag-option-row { .tag-option-row {
font-size: 1rem; font-size: 1rem;
@@ -41,12 +41,12 @@
} }
} }
::ng-deep .ng-dropdown-panel .ng-option:hover .hierarchy-reveal, :host ::ng-deep .ng-dropdown-panel .ng-option:hover .hierarchy-reveal,
::ng-deep .ng-dropdown-panel .ng-option.ng-option-marked .hierarchy-reveal { :host ::ng-deep .ng-dropdown-panel .ng-option.ng-option-marked .hierarchy-reveal {
max-width: 1000px; max-width: 1000px;
} }
::ng-deep .ng-dropdown-panel .ng-option:hover .hierarchy-indicator, ::ng-deep .ng-dropdown-panel .ng-option:hover .hierarchy-indicator,
::ng-deep .ng-dropdown-panel .ng-option.ng-option-marked .hierarchy-indicator { :host ::ng-deep .ng-dropdown-panel .ng-option.ng-option-marked .hierarchy-indicator {
background: transparent; background: transparent;
} }

View File

@@ -229,6 +229,21 @@ describe('ManagementListComponent', () => {
expect(reloadSpy).toHaveBeenCalled() expect(reloadSpy).toHaveBeenCalled()
}) })
it('should use the all list length for collection size when provided', fakeAsync(() => {
jest.spyOn(tagService, 'listFiltered').mockReturnValueOnce(
of({
count: 1,
all: [1, 2, 3],
results: tags.slice(0, 1),
})
)
component.reloadData()
tick(100)
expect(component.collectionSize).toBe(3)
}))
it('should support quick filter for objects', () => { it('should support quick filter for objects', () => {
const qfSpy = jest.spyOn(documentListViewService, 'quickFilter') const qfSpy = jest.spyOn(documentListViewService, 'quickFilter')
const filterButton = fixture.debugElement.queryAll(By.css('button'))[9] const filterButton = fixture.debugElement.queryAll(By.css('button'))[9]

View File

@@ -171,7 +171,7 @@ export abstract class ManagementListComponent<T extends MatchingModel>
tap((c) => { tap((c) => {
this.unfilteredData = c.results this.unfilteredData = c.results
this.data = this.filterData(c.results) this.data = this.filterData(c.results)
this.collectionSize = c.count this.collectionSize = c.all?.length ?? c.count
}), }),
delay(100) delay(100)
) )

View File

@@ -6,7 +6,7 @@ export const environment = {
apiVersion: '9', // match src/paperless/settings.py apiVersion: '9', // match src/paperless/settings.py
appTitle: 'Paperless-ngx', appTitle: 'Paperless-ngx',
tag: 'prod', tag: 'prod',
version: '2.20.3', version: '2.20.5',
webSocketHost: window.location.host, webSocketHost: window.location.host,
webSocketProtocol: window.location.protocol == 'https:' ? 'wss:' : 'ws:', webSocketProtocol: window.location.protocol == 'https:' ? 'wss:' : 'ws:',
webSocketBaseUrl: base_url.pathname + 'ws/', webSocketBaseUrl: base_url.pathname + 'ws/',

View File

@@ -22,7 +22,7 @@ class DocumentMetadataOverrides:
document_type_id: int | None = None document_type_id: int | None = None
tag_ids: list[int] | None = None tag_ids: list[int] | None = None
storage_path_id: int | None = None storage_path_id: int | None = None
created: datetime.datetime | None = None created: datetime.date | None = None
asn: int | None = None asn: int | None = None
owner_id: int | None = None owner_id: int | None = None
view_users: list[int] | None = None view_users: list[int] | None = None
@@ -100,6 +100,7 @@ class DocumentMetadataOverrides:
overrides.storage_path_id = doc.storage_path.id if doc.storage_path else None overrides.storage_path_id = doc.storage_path.id if doc.storage_path else None
overrides.owner_id = doc.owner.id if doc.owner else None overrides.owner_id = doc.owner.id if doc.owner else None
overrides.tag_ids = list(doc.tags.values_list("id", flat=True)) overrides.tag_ids = list(doc.tags.values_list("id", flat=True))
overrides.created = doc.created
overrides.view_users = list( overrides.view_users = list(
get_users_with_perms( get_users_with_perms(
@@ -114,7 +115,7 @@ class DocumentMetadataOverrides:
).values_list("id", flat=True), ).values_list("id", flat=True),
) )
overrides.custom_fields = { overrides.custom_fields = {
custom_field.id: custom_field.value custom_field.field.id: custom_field.value
for custom_field in doc.custom_fields.all() for custom_field in doc.custom_fields.all()
} }

View File

@@ -10,6 +10,7 @@ from datetime import time
from datetime import timedelta from datetime import timedelta
from datetime import timezone from datetime import timezone
from shutil import rmtree from shutil import rmtree
from time import sleep
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from typing import Literal from typing import Literal
@@ -32,6 +33,7 @@ from whoosh.highlight import HtmlFormatter
from whoosh.idsets import BitSet from whoosh.idsets import BitSet
from whoosh.idsets import DocIdSet from whoosh.idsets import DocIdSet
from whoosh.index import FileIndex from whoosh.index import FileIndex
from whoosh.index import LockError
from whoosh.index import create_in from whoosh.index import create_in
from whoosh.index import exists_in from whoosh.index import exists_in
from whoosh.index import open_dir from whoosh.index import open_dir
@@ -97,11 +99,33 @@ def get_schema() -> Schema:
def open_index(*, recreate=False) -> FileIndex: def open_index(*, recreate=False) -> FileIndex:
transient_exceptions = (FileNotFoundError, LockError)
max_retries = 3
retry_delay = 0.1
for attempt in range(max_retries + 1):
try: try:
if exists_in(settings.INDEX_DIR) and not recreate: if exists_in(settings.INDEX_DIR) and not recreate:
return open_dir(settings.INDEX_DIR, schema=get_schema()) return open_dir(settings.INDEX_DIR, schema=get_schema())
break
except transient_exceptions as exc:
is_last_attempt = attempt == max_retries or recreate
if is_last_attempt:
logger.exception(
"Error while opening the index after retries, recreating.",
)
break
logger.warning(
"Transient error while opening the index (attempt %s/%s): %s. Retrying.",
attempt + 1,
max_retries + 1,
exc,
)
sleep(retry_delay)
except Exception: except Exception:
logger.exception("Error while opening the index, recreating.") logger.exception("Error while opening the index, recreating.")
break
# create_in doesn't handle corrupted indexes very well, remove the directory entirely first # create_in doesn't handle corrupted indexes very well, remove the directory entirely first
if settings.INDEX_DIR.is_dir(): if settings.INDEX_DIR.is_dir():
@@ -578,7 +602,7 @@ def rewrite_natural_date_keywords(query_string: str) -> str:
case "this year": case "this year":
start = datetime(local_now.year, 1, 1, 0, 0, 0, tzinfo=tz) start = datetime(local_now.year, 1, 1, 0, 0, 0, tzinfo=tz)
end = datetime.combine(today, time.max, tzinfo=tz) end = datetime(local_now.year, 12, 31, 23, 59, 59, tzinfo=tz)
case "previous week": case "previous week":
days_since_monday = local_now.weekday() days_since_monday = local_now.weekday()

View File

@@ -0,0 +1,663 @@
import contextlib
import io
import logging
import math
import signal
import uuid
from time import perf_counter
from django.contrib.auth import get_user_model
from django.contrib.auth.models import Permission
from django.core.management.base import BaseCommand
from django.core.management.base import CommandError
from django.db import connections
from django.db import reset_queries
from django.db.models import Count
from django.db.models import Q
from django.db.models import Subquery
from guardian.shortcuts import assign_perm
from rest_framework.test import APIClient
from documents.models import CustomField
from documents.models import CustomFieldInstance
from documents.models import Document
from documents.models import Tag
from documents.permissions import get_objects_for_user_owner_aware
from documents.permissions import permitted_document_ids
class Command(BaseCommand):
# e.g. docker compose exec webserver / manage.py ...
# document_perf_benchmark --reuse-existing --documents 500000 --chunk-size 5000 --tags 40 --tags-per-doc 3 --custom-fields 6 --custom-fields-per-doc 2
help = (
"Seed a synthetic dataset and benchmark permission-filtered document queries "
"for superusers vs non-superusers."
)
def add_arguments(self, parser):
parser.add_argument(
"--documents",
type=int,
default=10000,
help="Total documents to generate (default: 10,000)",
)
parser.add_argument(
"--owner-ratio",
type=float,
default=0.6,
help="Fraction owned by the benchmarked user (default: 0.6)",
)
parser.add_argument(
"--unowned-ratio",
type=float,
default=0.1,
help="Fraction of unowned documents (default: 0.1)",
)
parser.add_argument(
"--shared-ratio",
type=float,
default=0.25,
help=(
"Fraction of other-user documents that are shared via object perms "
"with the benchmarked user (default: 0.25)"
),
)
parser.add_argument(
"--chunk-size",
type=int,
default=2000,
help="Bulk create size for documents (default: 2000)",
)
parser.add_argument(
"--iterations",
type=int,
default=3,
help="Number of timing runs per query shape (default: 3)",
)
parser.add_argument(
"--prefix",
default="perf-benchmark",
help="Title prefix used to mark generated documents (default: perf-benchmark)",
)
parser.add_argument(
"--tags",
type=int,
default=0,
help="Number of tags to create and assign (default: 0)",
)
parser.add_argument(
"--tags-per-doc",
type=int,
default=1,
help="How many tags to attach to each document (default: 1)",
)
parser.add_argument(
"--custom-fields",
type=int,
default=0,
help="Number of string custom fields to create (default: 0)",
)
parser.add_argument(
"--custom-fields-per-doc",
type=int,
default=1,
help="How many custom field instances per document (default: 1)",
)
parser.add_argument(
"--skip-tags",
action="store_true",
help="Skip tag document_count benchmarks (useful for large datasets on Postgres)",
)
parser.add_argument(
"--skip-custom-fields",
action="store_true",
help="Skip custom field document_count benchmarks",
)
parser.add_argument(
"--reuse-existing",
action="store_true",
help="Keep previously generated documents with the given prefix instead of recreating",
)
parser.add_argument(
"--cleanup",
action="store_true",
help="Delete previously generated documents with the given prefix and exit",
)
parser.add_argument(
"--api-timeout",
type=float,
default=30.0,
help="Per-request timeout (seconds) for API timings (default: 30s)",
)
def handle(self, *args, **options):
# keep options for downstream checks
self.options = options
document_total = options["documents"]
owner_ratio = options["owner_ratio"]
unowned_ratio = options["unowned_ratio"]
shared_ratio = options["shared_ratio"]
chunk_size = options["chunk_size"]
iterations = options["iterations"]
prefix = options["prefix"]
tags = options["tags"]
tags_per_doc = options["tags_per_doc"]
custom_fields = options["custom_fields"]
custom_fields_per_doc = options["custom_fields_per_doc"]
self._validate_ratios(owner_ratio, unowned_ratio)
if tags_per_doc < 0 or custom_fields_per_doc < 0:
raise CommandError("Per-document counts must be non-negative")
target_user, other_user, superuser = self._ensure_users()
skip_seed = False
if options["cleanup"]:
removed = self._cleanup(prefix)
self.stdout.write(
self.style.SUCCESS(f"Removed {removed} generated documents"),
)
return
if not options["reuse_existing"]:
removed = self._cleanup(prefix)
if removed:
self.stdout.write(f"Removed existing generated documents: {removed}")
else:
existing = Document.objects.filter(title__startswith=prefix).count()
if existing:
skip_seed = True
self.stdout.write(
f"Reusing existing dataset with prefix '{prefix}': {existing} docs",
)
if skip_seed:
dataset_size = Document.objects.filter(title__startswith=prefix).count()
self.stdout.write(
self.style.SUCCESS(
f"Dataset ready (reused): {dataset_size} docs | prefix={prefix}",
),
)
else:
self.stdout.write(
f"Seeding {document_total} documents (owner_ratio={owner_ratio}, "
f"unowned_ratio={unowned_ratio}, shared_ratio={shared_ratio})",
)
created_counts = self._seed_documents(
total=document_total,
owner_ratio=owner_ratio,
unowned_ratio=unowned_ratio,
shared_ratio=shared_ratio,
chunk_size=chunk_size,
prefix=prefix,
target_user=target_user,
other_user=other_user,
)
created_tags = []
if tags:
created_tags = self._seed_tags(prefix=prefix, count=tags)
if tags_per_doc and created_tags:
self._assign_tags_to_documents(
prefix=prefix,
tags=created_tags,
tags_per_doc=tags_per_doc,
chunk_size=chunk_size,
)
created_custom_fields = []
if custom_fields:
created_custom_fields = self._seed_custom_fields(prefix, custom_fields)
if custom_fields_per_doc and created_custom_fields:
self._seed_custom_field_instances(
prefix=prefix,
custom_fields=created_custom_fields,
per_doc=custom_fields_per_doc,
chunk_size=chunk_size,
)
dataset_size = Document.objects.filter(title__startswith=prefix).count()
self.stdout.write(
self.style.SUCCESS(
f"Dataset ready: {dataset_size} docs | owned by target {created_counts['owned']} | "
f"owned by other {created_counts['other_owned']} | unowned {created_counts['unowned']} | "
f"shared-perms {created_counts['shared']} | tags {len(created_tags)} | "
f"custom fields {len(created_custom_fields)}",
),
)
self.stdout.write("\nRunning benchmarks...\n")
self._run_benchmarks(
iterations=iterations,
target_user=target_user,
superuser=superuser,
prefix=prefix,
)
def _validate_ratios(self, owner_ratio: float, unowned_ratio: float):
if owner_ratio < 0 or unowned_ratio < 0:
raise CommandError("Ratios must be non-negative")
if owner_ratio + unowned_ratio > 1:
raise CommandError("owner-ratio + unowned-ratio cannot exceed 1.0")
def _ensure_users(self):
User = get_user_model()
target_user, _ = User.objects.get_or_create(
username="perf_user",
defaults={"email": "perf_user@example.com"},
)
target_user.set_password("perf_user")
target_user.is_staff = True
target_user.save()
other_user, _ = User.objects.get_or_create(
username="perf_owner",
defaults={"email": "perf_owner@example.com"},
)
other_user.set_password("perf_owner")
other_user.is_staff = True
other_user.save()
superuser, _ = User.objects.get_or_create(
username="perf_admin",
defaults={
"email": "perf_admin@example.com",
"is_staff": True,
"is_superuser": True,
},
)
superuser.set_password("perf_admin")
superuser.save()
perms = Permission.objects.all()
target_user.user_permissions.set(perms)
other_user.user_permissions.set(perms)
return target_user, other_user, superuser
def _cleanup(self, prefix: str) -> int:
docs_qs = Document.global_objects.filter(title__startswith=prefix)
doc_count = docs_qs.count()
if doc_count:
docs_qs.hard_delete()
tag_count = Tag.objects.filter(name__startswith=prefix).count()
if tag_count:
Tag.objects.filter(name__startswith=prefix).delete()
cf_qs = CustomField.objects.filter(name__startswith=prefix)
cf_count = cf_qs.count()
if cf_count:
cf_qs.delete()
cfi_qs = CustomFieldInstance.global_objects.filter(
document__title__startswith=prefix,
)
cfi_count = cfi_qs.count()
if cfi_count:
cfi_qs.hard_delete()
return doc_count + tag_count + cf_count + cfi_count
def _seed_documents(
self,
*,
total: int,
owner_ratio: float,
unowned_ratio: float,
shared_ratio: float,
chunk_size: int,
prefix: str,
target_user,
other_user,
) -> dict[str, int]:
target_count = math.floor(total * owner_ratio)
unowned_count = math.floor(total * unowned_ratio)
other_count = total - target_count - unowned_count
documents: list[Document] = []
other_docs: list[Document] = []
for idx in range(total):
if idx < target_count:
owner = target_user
elif idx < target_count + other_count:
owner = other_user
else:
owner = None
doc = Document(
owner=owner,
title=f"{prefix}-{idx:07d}",
mime_type="application/pdf",
checksum=self._unique_checksum(idx),
page_count=1,
)
if owner is other_user:
other_docs.append(doc)
documents.append(doc)
if len(documents) >= chunk_size:
Document.objects.bulk_create(documents, batch_size=chunk_size)
documents.clear()
if documents:
Document.objects.bulk_create(documents, batch_size=chunk_size)
shared_target = math.floor(len(other_docs) * shared_ratio)
for doc in other_docs[:shared_target]:
assign_perm("documents.view_document", target_user, doc)
return {
"owned": target_count,
"other_owned": other_count,
"unowned": unowned_count,
"shared": shared_target,
}
def _seed_tags(self, *, prefix: str, count: int) -> list[Tag]:
tags = [
Tag(
name=f"{prefix}-tag-{idx:03d}",
)
for idx in range(count)
]
Tag.objects.bulk_create(tags, ignore_conflicts=True)
return list(Tag.objects.filter(name__startswith=prefix))
def _assign_tags_to_documents(
self,
*,
prefix: str,
tags: list[Tag],
tags_per_doc: int,
chunk_size: int,
):
if not tags or tags_per_doc < 1:
return
rels = []
through = Document.tags.through
tag_ids = [t.id for t in tags]
tag_count = len(tag_ids)
iterator = (
Document.objects.filter(title__startswith=prefix)
.values_list(
"id",
flat=True,
)
.iterator()
)
for idx, doc_id in enumerate(iterator):
start = idx % tag_count
chosen = set()
for offset in range(tags_per_doc):
tag_id = tag_ids[(start + offset) % tag_count]
if tag_id in chosen:
continue
chosen.add(tag_id)
rels.append(through(document_id=doc_id, tag_id=tag_id))
if len(rels) >= chunk_size:
through.objects.bulk_create(rels, ignore_conflicts=True)
rels.clear()
if rels:
through.objects.bulk_create(rels, ignore_conflicts=True)
def _seed_custom_fields(self, prefix: str, count: int) -> list[CustomField]:
fields = [
CustomField(
name=f"{prefix}-cf-{idx:03d}",
data_type=CustomField.FieldDataType.STRING,
)
for idx in range(count)
]
CustomField.objects.bulk_create(fields, ignore_conflicts=True)
return list(CustomField.objects.filter(name__startswith=prefix))
def _seed_custom_field_instances(
self,
*,
prefix: str,
custom_fields: list[CustomField],
per_doc: int,
chunk_size: int,
):
if not custom_fields or per_doc < 1:
return
instances = []
cf_ids = [cf.id for cf in custom_fields]
cf_count = len(cf_ids)
iterator = (
Document.objects.filter(title__startswith=prefix)
.values_list(
"id",
flat=True,
)
.iterator()
)
for idx, doc_id in enumerate(iterator):
start = idx % cf_count
for offset in range(per_doc):
cf_id = cf_ids[(start + offset) % cf_count]
instances.append(
CustomFieldInstance(
document_id=doc_id,
field_id=cf_id,
value_text=f"val-{doc_id}-{cf_id}",
),
)
if len(instances) >= chunk_size:
CustomFieldInstance.objects.bulk_create(
instances,
batch_size=chunk_size,
ignore_conflicts=True,
)
instances.clear()
if instances:
CustomFieldInstance.objects.bulk_create(
instances,
batch_size=chunk_size,
ignore_conflicts=True,
)
def _run_benchmarks(self, *, iterations: int, target_user, superuser, prefix: str):
self.stdout.write("-> API benchmarks")
for user in ("perf_admin", "perf_user"):
pwd = user
self.stdout.write(f"-> API documents ({user})")
self._time_api_get(
label=f"{user} /api/documents/",
username=user,
password=pwd,
path="/api/documents/",
)
self.stdout.write(f"-> API tags ({user})")
self._time_api_get(
label=f"{user} /api/tags/",
username=user,
password=pwd,
path="/api/tags/",
)
self.stdout.write(f"-> API custom fields ({user})")
self._time_api_get(
label=f"{user} /api/custom_fields/",
username=user,
password=pwd,
path="/api/custom_fields/",
)
def _count_with_values_list(self, user) -> int:
qs = get_objects_for_user_owner_aware(
user,
"documents.view_document",
Document,
)
return Document.objects.filter(id__in=qs.values_list("id", flat=True)).count()
def _count_with_subquery(self, user) -> int:
qs = get_objects_for_user_owner_aware(
user,
"documents.view_document",
Document,
)
subquery = Subquery(qs.values_list("id"))
return Document.objects.filter(id__in=subquery).count()
def _document_filter(self, user, *, use_subquery: bool):
if user is None or getattr(user, "is_superuser", False):
return Q(documents__deleted_at__isnull=True)
qs = get_objects_for_user_owner_aware(
user,
"documents.view_document",
Document,
)
ids = (
Subquery(qs.values_list("id"))
if use_subquery
else qs.values_list("id", flat=True)
)
return Q(documents__deleted_at__isnull=True, documents__id__in=ids)
def _tag_queryset(self, *, prefix: str, filter_q: Q):
return Tag.objects.filter(name__startswith=prefix).annotate(
document_count=Count("documents", filter=filter_q),
)
def _time_tag_counts(self, *, iterations: int, prefix: str, user):
if not Tag.objects.filter(name__startswith=prefix).exists():
return
self._time_query(
label="tag document_count (grouped)",
iterations=iterations,
fn=lambda: list(
Tag.documents.through.objects.filter(
document_id__in=Subquery(permitted_document_ids(user)),
)
.values("tag_id")
.annotate(c=Count("document_id"))
.values_list("tag_id", "c"),
),
)
def _time_custom_field_counts(
self,
*,
iterations: int,
prefix: str,
user,
superuser,
):
if not CustomField.objects.filter(name__startswith=prefix).exists():
return
permitted = Subquery(permitted_document_ids(user))
super_permitted = CustomFieldInstance.objects.filter(
document__deleted_at__isnull=True,
).values_list("document_id")
def _run(ids_subquery):
return list(
CustomFieldInstance.objects.filter(
document_id__in=ids_subquery,
field__name__startswith=prefix,
)
.values("field_id")
.annotate(c=Count("document_id"))
.values_list("field_id", "c"),
)
self._time_query(
label="custom fields document_count (grouped permitted)",
iterations=iterations,
fn=lambda: _run(permitted),
)
self._time_query(
label="custom fields document_count superuser baseline",
iterations=iterations,
fn=lambda: _run(super_permitted),
)
def _time_api_get(self, *, label: str, username: str, password: str, path: str):
client = APIClient()
client.raise_request_exception = False
if not client.login(username=username, password=password):
self.stdout.write(f"{label}: login failed")
return
timeout_s = float(self.options.get("api_timeout", 30.0))
logger = logging.getLogger("django.request")
prev_level = logger.level
logger.setLevel(logging.CRITICAL)
def _timeout_handler(signum, frame): # pragma: no cover
raise TimeoutError
old_handler = signal.signal(signal.SIGALRM, _timeout_handler)
signal.alarm(max(1, int(timeout_s)))
try:
reset_queries()
start = perf_counter()
with contextlib.redirect_stderr(io.StringIO()):
resp = client.get(path, format="json")
duration = perf_counter() - start
size = None
if resp.headers.get("Content-Type", "").startswith("application/json"):
data = resp.json()
if isinstance(data, dict) and "results" in data:
size = len(data.get("results", []))
elif isinstance(data, list):
size = len(data)
if resp.status_code == 500 and duration >= timeout_s - 0.1:
self.stdout.write(
self.style.ERROR(f"{label}: TIMEOUT after {timeout_s:.1f}s"),
)
elif resp.status_code == 200:
self.stdout.write(
self.style.SUCCESS(
f"{label}: status={resp.status_code} time={duration:.4f}s items={size}",
),
)
else:
self.stdout.write(
self.style.WARNING(
f"{label}: status={resp.status_code} time={duration:.4f}s",
),
)
except TimeoutError:
self.stdout.write(f"{label}: TIMEOUT after {timeout_s:.1f}s")
except Exception as e:
text = str(e)
self.stdout.write(f"{label}: ERROR {text}")
finally:
signal.alarm(0)
signal.signal(signal.SIGALRM, old_handler)
logger.setLevel(prev_level)
connections.close_all()
def _time_query(self, *, label: str, iterations: int, fn):
durations = []
for _ in range(iterations):
reset_queries()
start = perf_counter()
fn()
durations.append(perf_counter() - start)
avg = sum(durations) / len(durations)
self.stdout.write(
f"{label}: min={min(durations):.4f}s avg={avg:.4f}s max={max(durations):.4f}s",
)
def _unique_checksum(self, idx: int) -> str:
return f"{uuid.uuid4().hex}{idx:08d}"[:32]

View File

@@ -0,0 +1,28 @@
# Generated by Django 5.2.7 on 2026-01-14 16:53
from django.db import migrations
from django.db import models
from django.db.models import F
def populate_action_order(apps, schema_editor):
WorkflowAction = apps.get_model("documents", "WorkflowAction")
WorkflowAction.objects.all().update(order=F("id"))
class Migration(migrations.Migration):
dependencies = [
("documents", "1074_workflowrun_deleted_at_workflowrun_restored_at_and_more"),
]
operations = [
migrations.AddField(
model_name="workflowaction",
name="order",
field=models.PositiveIntegerField(default=0, verbose_name="order"),
),
migrations.RunPython(
populate_action_order,
reverse_code=migrations.RunPython.noop,
),
]

View File

@@ -1294,6 +1294,8 @@ class WorkflowAction(models.Model):
default=WorkflowActionType.ASSIGNMENT, default=WorkflowActionType.ASSIGNMENT,
) )
order = models.PositiveIntegerField(_("order"), default=0)
assign_title = models.TextField( assign_title = models.TextField(
_("assign title"), _("assign title"),
null=True, null=True,

View File

@@ -18,6 +18,8 @@ from django.core.exceptions import ValidationError
from django.core.validators import DecimalValidator from django.core.validators import DecimalValidator
from django.core.validators import EmailValidator from django.core.validators import EmailValidator
from django.core.validators import MaxLengthValidator from django.core.validators import MaxLengthValidator
from django.core.validators import MaxValueValidator
from django.core.validators import MinValueValidator
from django.core.validators import RegexValidator from django.core.validators import RegexValidator
from django.core.validators import integer_validator from django.core.validators import integer_validator
from django.db.models import Count from django.db.models import Count
@@ -578,6 +580,10 @@ class TagSerializer(MatchingModelSerializer, OwnedObjectSerializer):
), ),
) )
def get_children(self, obj): def get_children(self, obj):
children_map = self.context.get("children_map")
if children_map is not None:
children = children_map.get(obj.pk, [])
else:
filter_q = self.context.get("document_count_filter") filter_q = self.context.get("document_count_filter")
request = self.context.get("request") request = self.context.get("request")
if filter_q is None: if filter_q is None:
@@ -585,7 +591,7 @@ class TagSerializer(MatchingModelSerializer, OwnedObjectSerializer):
filter_q = get_document_count_filter_for_user(user) filter_q = get_document_count_filter_for_user(user)
self.context["document_count_filter"] = filter_q self.context["document_count_filter"] = filter_q
children_queryset = ( children = (
obj.get_children_queryset() obj.get_children_queryset()
.select_related("owner") .select_related("owner")
.annotate(document_count=Count("documents", filter=filter_q)) .annotate(document_count=Count("documents", filter=filter_q))
@@ -593,15 +599,15 @@ class TagSerializer(MatchingModelSerializer, OwnedObjectSerializer):
view = self.context.get("view") view = self.context.get("view")
ordering = ( ordering = (
OrderingFilter().get_ordering(request, children_queryset, view) OrderingFilter().get_ordering(request, children, view)
if request and view if request and view
else None else None
) )
ordering = ordering or (Lower("name"),) ordering = ordering or (Lower("name"),)
children_queryset = children_queryset.order_by(*ordering) children = children.order_by(*ordering)
serializer = TagSerializer( serializer = TagSerializer(
children_queryset, children,
many=True, many=True,
user=self.user, user=self.user,
full_perms=self.full_perms, full_perms=self.full_perms,
@@ -875,6 +881,13 @@ class CustomFieldInstanceSerializer(serializers.ModelSerializer):
uri_validator(data["value"]) uri_validator(data["value"])
elif field.data_type == CustomField.FieldDataType.INT: elif field.data_type == CustomField.FieldDataType.INT:
integer_validator(data["value"]) integer_validator(data["value"])
try:
value_int = int(data["value"])
except (TypeError, ValueError):
raise serializers.ValidationError("Enter a valid integer.")
# Keep values within the PostgreSQL integer range
MinValueValidator(-2147483648)(value_int)
MaxValueValidator(2147483647)(value_int)
elif ( elif (
field.data_type == CustomField.FieldDataType.MONETARY field.data_type == CustomField.FieldDataType.MONETARY
and data["value"] != "" and data["value"] != ""
@@ -2553,7 +2566,8 @@ class WorkflowSerializer(serializers.ModelSerializer):
set_triggers.append(trigger_instance) set_triggers.append(trigger_instance)
if actions is not None and actions is not serializers.empty: if actions is not None and actions is not serializers.empty:
for action in actions: for index, action in enumerate(actions):
action["order"] = index
assign_tags = action.pop("assign_tags", None) assign_tags = action.pop("assign_tags", None)
assign_view_users = action.pop("assign_view_users", None) assign_view_users = action.pop("assign_view_users", None)
assign_view_groups = action.pop("assign_view_groups", None) assign_view_groups = action.pop("assign_view_groups", None)
@@ -2680,6 +2694,16 @@ class WorkflowSerializer(serializers.ModelSerializer):
return instance return instance
def to_representation(self, instance):
data = super().to_representation(instance)
actions = instance.actions.order_by("order", "pk")
data["actions"] = WorkflowActionSerializer(
actions,
many=True,
context=self.context,
).data
return data
class TrashSerializer(SerializerWithPerms): class TrashSerializer(SerializerWithPerms):
documents = serializers.ListField( documents = serializers.ListField(

View File

@@ -418,7 +418,15 @@ def update_filename_and_move_files(
return return
instance = instance.document instance = instance.document
def validate_move(instance, old_path: Path, new_path: Path): def validate_move(instance, old_path: Path, new_path: Path, root: Path):
if not new_path.is_relative_to(root):
msg = (
f"Document {instance!s}: Refusing to move file outside root {root}: "
f"{new_path}."
)
logger.warning(msg)
raise CannotMoveFilesException(msg)
if not old_path.is_file(): if not old_path.is_file():
# Can't do anything if the old file does not exist anymore. # Can't do anything if the old file does not exist anymore.
msg = f"Document {instance!s}: File {old_path} doesn't exist." msg = f"Document {instance!s}: File {old_path} doesn't exist."
@@ -507,12 +515,22 @@ def update_filename_and_move_files(
return return
if move_original: if move_original:
validate_move(instance, old_source_path, instance.source_path) validate_move(
instance,
old_source_path,
instance.source_path,
settings.ORIGINALS_DIR,
)
create_source_path_directory(instance.source_path) create_source_path_directory(instance.source_path)
shutil.move(old_source_path, instance.source_path) shutil.move(old_source_path, instance.source_path)
if move_archive: if move_archive:
validate_move(instance, old_archive_path, instance.archive_path) validate_move(
instance,
old_archive_path,
instance.archive_path,
settings.ARCHIVE_DIR,
)
create_source_path_directory(instance.archive_path) create_source_path_directory(instance.archive_path)
shutil.move(old_archive_path, instance.archive_path) shutil.move(old_archive_path, instance.archive_path)
@@ -751,7 +769,7 @@ def run_workflows(
if matching.document_matches_workflow(document, workflow, trigger_type): if matching.document_matches_workflow(document, workflow, trigger_type):
action: WorkflowAction action: WorkflowAction
for action in workflow.actions.all(): for action in workflow.actions.order_by("order", "pk"):
message = f"Applying {action} from {workflow}" message = f"Applying {action} from {workflow}"
if not use_overrides: if not use_overrides:
logger.info(message, extra={"group": logging_group}) logger.info(message, extra={"group": logging_group})

View File

@@ -493,7 +493,7 @@ def check_scheduled_workflows():
trigger.schedule_is_recurring trigger.schedule_is_recurring
and workflow_runs.exists() and workflow_runs.exists()
and ( and (
workflow_runs.last().run_at workflow_runs.first().run_at
> now > now
- datetime.timedelta( - datetime.timedelta(
days=trigger.schedule_recurring_interval_days, days=trigger.schedule_recurring_interval_days,

View File

@@ -262,6 +262,17 @@ def get_custom_fields_context(
return field_data return field_data
def _is_safe_relative_path(value: str) -> bool:
if value == "":
return True
path = PurePath(value)
if path.is_absolute() or path.drive:
return False
return ".." not in path.parts
def validate_filepath_template_and_render( def validate_filepath_template_and_render(
template_string: str, template_string: str,
document: Document | None = None, document: Document | None = None,
@@ -309,6 +320,12 @@ def validate_filepath_template_and_render(
) )
rendered_template = template.render(context) rendered_template = template.render(context)
if not _is_safe_relative_path(rendered_template):
logger.warning(
"Template rendered an unsafe path (absolute or containing traversal).",
)
return None
# We're good! # We're good!
return rendered_template return rendered_template
except UndefinedError: except UndefinedError:

View File

@@ -1664,6 +1664,44 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
self.consume_file_mock.assert_not_called() self.consume_file_mock.assert_not_called()
def test_patch_document_integer_custom_field_out_of_range(self):
"""
GIVEN:
- An integer custom field
- A document
WHEN:
- Patching the document with an integer value exceeding PostgreSQL's range
THEN:
- HTTP 400 is returned (validation catches the overflow)
- No custom field instance is created
"""
cf_int = CustomField.objects.create(
name="intfield",
data_type=CustomField.FieldDataType.INT,
)
doc = Document.objects.create(
title="Doc",
checksum="123",
mime_type="application/pdf",
)
response = self.client.patch(
f"/api/documents/{doc.pk}/",
{
"custom_fields": [
{
"field": cf_int.pk,
"value": 2**31, # overflow for PostgreSQL integer fields
},
],
},
format="json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn("custom_fields", response.data)
self.assertEqual(CustomFieldInstance.objects.count(), 0)
def test_upload_with_webui_source(self): def test_upload_with_webui_source(self):
""" """
GIVEN: A document with a source file GIVEN: A document with a source file

View File

@@ -219,6 +219,30 @@ class TestApiStoragePaths(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(StoragePath.objects.count(), 1) self.assertEqual(StoragePath.objects.count(), 1)
def test_api_create_storage_path_rejects_traversal(self):
"""
GIVEN:
- API request to create a storage paths
- Storage path attempts directory traversal
WHEN:
- API is called
THEN:
- Correct HTTP 400 response
- No storage path is created
"""
response = self.client.post(
self.ENDPOINT,
json.dumps(
{
"name": "Traversal path",
"path": "../../../../../tmp/proof",
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(StoragePath.objects.count(), 1)
def test_api_storage_path_placeholders(self): def test_api_storage_path_placeholders(self):
""" """
GIVEN: GIVEN:

View File

@@ -581,7 +581,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
- Consume file should be called - Consume file should be called
""" """
doc_ids = [self.doc1.id, self.doc2.id, self.doc3.id] doc_ids = [self.doc1.id, self.doc2.id, self.doc3.id]
metadata_document_id = self.doc1.id metadata_document_id = self.doc2.id
user = User.objects.create(username="test_user") user = User.objects.create(username="test_user")
result = bulk_edit.merge( result = bulk_edit.merge(
@@ -606,7 +606,8 @@ class TestPDFActions(DirectoriesMixin, TestCase):
# With metadata_document_id overrides # With metadata_document_id overrides
result = bulk_edit.merge(doc_ids, metadata_document_id=metadata_document_id) result = bulk_edit.merge(doc_ids, metadata_document_id=metadata_document_id)
consume_file_args, _ = mock_consume_file.call_args consume_file_args, _ = mock_consume_file.call_args
self.assertEqual(consume_file_args[1].title, "A (merged)") self.assertEqual(consume_file_args[1].title, "B (merged)")
self.assertEqual(consume_file_args[1].created, self.doc2.created)
self.assertEqual(result, "OK") self.assertEqual(result, "OK")

View File

@@ -1,6 +1,7 @@
from datetime import datetime from datetime import datetime
from unittest import mock from unittest import mock
from django.conf import settings
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.test import SimpleTestCase from django.test import SimpleTestCase
from django.test import TestCase from django.test import TestCase
@@ -179,7 +180,7 @@ class TestRewriteNaturalDateKeywords(SimpleTestCase):
( (
"added:this year", "added:this year",
datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc), datetime(2025, 7, 15, 12, 0, 0, tzinfo=timezone.utc),
("added:[20250101", "TO 20250715"), ("added:[20250101", "TO 20251231"),
), ),
( (
"added:previous year", "added:previous year",
@@ -251,3 +252,120 @@ class TestRewriteNaturalDateKeywords(SimpleTestCase):
result = self._rewrite_with_now("added:today", fixed_now) result = self._rewrite_with_now("added:today", fixed_now)
# Should convert to UTC properly # Should convert to UTC properly
self.assertIn("added:[20250719", result) self.assertIn("added:[20250719", result)
class TestIndexResilience(DirectoriesMixin, SimpleTestCase):
def _assert_recreate_called(self, mock_create_in):
mock_create_in.assert_called_once()
path_arg, schema_arg = mock_create_in.call_args.args
self.assertEqual(path_arg, settings.INDEX_DIR)
self.assertEqual(schema_arg.__class__.__name__, "Schema")
def test_transient_missing_segment_does_not_force_recreate(self):
"""
GIVEN:
- Index directory exists
WHEN:
- open_index is called
- Opening the index raises FileNotFoundError once due to a
transient missing segment
THEN:
- Index is opened successfully on retry
- Index is not recreated
"""
file_marker = settings.INDEX_DIR / "file_marker.txt"
file_marker.write_text("keep")
expected_index = object()
with (
mock.patch("documents.index.exists_in", return_value=True),
mock.patch(
"documents.index.open_dir",
side_effect=[FileNotFoundError("missing"), expected_index],
) as mock_open_dir,
mock.patch(
"documents.index.create_in",
) as mock_create_in,
mock.patch(
"documents.index.rmtree",
) as mock_rmtree,
):
ix = index.open_index()
self.assertIs(ix, expected_index)
self.assertGreaterEqual(mock_open_dir.call_count, 2)
mock_rmtree.assert_not_called()
mock_create_in.assert_not_called()
self.assertEqual(file_marker.read_text(), "keep")
def test_transient_errors_exhaust_retries_and_recreate(self):
"""
GIVEN:
- Index directory exists
WHEN:
- open_index is called
- Opening the index raises FileNotFoundError multiple times due to
transient missing segments
THEN:
- Index is recreated after retries are exhausted
"""
recreated_index = object()
with (
self.assertLogs("paperless.index", level="ERROR") as cm,
mock.patch("documents.index.exists_in", return_value=True),
mock.patch(
"documents.index.open_dir",
side_effect=FileNotFoundError("missing"),
) as mock_open_dir,
mock.patch("documents.index.rmtree") as mock_rmtree,
mock.patch(
"documents.index.create_in",
return_value=recreated_index,
) as mock_create_in,
):
ix = index.open_index()
self.assertIs(ix, recreated_index)
self.assertEqual(mock_open_dir.call_count, 4)
mock_rmtree.assert_called_once_with(settings.INDEX_DIR)
self._assert_recreate_called(mock_create_in)
self.assertIn(
"Error while opening the index after retries, recreating.",
cm.output[0],
)
def test_non_transient_error_recreates_index(self):
"""
GIVEN:
- Index directory exists
WHEN:
- open_index is called
- Opening the index raises a "non-transient" error
THEN:
- Index is recreated
"""
recreated_index = object()
with (
self.assertLogs("paperless.index", level="ERROR") as cm,
mock.patch("documents.index.exists_in", return_value=True),
mock.patch(
"documents.index.open_dir",
side_effect=RuntimeError("boom"),
),
mock.patch("documents.index.rmtree") as mock_rmtree,
mock.patch(
"documents.index.create_in",
return_value=recreated_index,
) as mock_create_in,
):
ix = index.open_index()
self.assertIs(ix, recreated_index)
mock_rmtree.assert_called_once_with(settings.INDEX_DIR)
self._assert_recreate_called(mock_create_in)
self.assertIn(
"Error while opening the index, recreating.",
cm.output[0],
)

View File

@@ -2094,6 +2094,68 @@ class TestWorkflows(
doc.refresh_from_db() doc.refresh_from_db()
self.assertIsNone(doc.owner) self.assertIsNone(doc.owner)
def test_workflow_scheduled_recurring_respects_latest_run(self):
"""
GIVEN:
- Scheduled workflow marked as recurring with a 1-day interval
- Document that matches the trigger
- Two prior runs exist: one 2 days ago and one 1 hour ago
WHEN:
- Scheduled workflows are checked again
THEN:
- Workflow does not run because the most recent run is inside the interval
"""
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
schedule_date_field=WorkflowTrigger.ScheduleDateField.CREATED,
schedule_is_recurring=True,
schedule_recurring_interval_days=1,
)
action = WorkflowAction.objects.create(
assign_title="Doc assign owner",
assign_owner=self.user2,
)
w = Workflow.objects.create(
name="Workflow 1",
order=0,
)
w.triggers.add(trigger)
w.actions.add(action)
w.save()
doc = Document.objects.create(
title="sample test",
correspondent=self.c,
original_filename="sample.pdf",
created=timezone.now().date() - timedelta(days=3),
)
WorkflowRun.objects.create(
workflow=w,
document=doc,
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
run_at=timezone.now() - timedelta(days=2),
)
WorkflowRun.objects.create(
workflow=w,
document=doc,
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
run_at=timezone.now() - timedelta(hours=1),
)
tasks.check_scheduled_workflows()
doc.refresh_from_db()
self.assertIsNone(doc.owner)
self.assertEqual(
WorkflowRun.objects.filter(
workflow=w,
document=doc,
type=WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
).count(),
2,
)
def test_workflow_scheduled_trigger_negative_offset_customfield(self): def test_workflow_scheduled_trigger_negative_offset_customfield(self):
""" """
GIVEN: GIVEN:

View File

@@ -448,8 +448,47 @@ class TagViewSet(ModelViewSet, PermissionsAwareDocumentCountMixin):
def get_serializer_context(self): def get_serializer_context(self):
context = super().get_serializer_context() context = super().get_serializer_context()
context["document_count_filter"] = self.get_document_count_filter() context["document_count_filter"] = self.get_document_count_filter()
if hasattr(self, "_children_map"):
context["children_map"] = self._children_map
return context return context
def list(self, request, *args, **kwargs):
"""
Build a children map once to avoid per-parent queries in the serializer.
"""
queryset = self.filter_queryset(self.get_queryset())
ordering = OrderingFilter().get_ordering(request, queryset, self) or (
Lower("name"),
)
queryset = queryset.order_by(*ordering)
all_tags = list(queryset)
descendant_pks = {pk for tag in all_tags for pk in tag.get_descendants_pks()}
if descendant_pks:
filter_q = self.get_document_count_filter()
children_source = list(
Tag.objects.filter(pk__in=descendant_pks | {t.pk for t in all_tags})
.select_related("owner")
.annotate(document_count=Count("documents", filter=filter_q))
.order_by(*ordering),
)
else:
children_source = all_tags
children_map = {}
for tag in children_source:
children_map.setdefault(tag.tn_parent_id, []).append(tag)
self._children_map = children_map
page = self.paginate_queryset(queryset)
serializer = self.get_serializer(page, many=True)
response = self.get_paginated_response(serializer.data)
if descendant_pks:
# Include children in the "all" field, if needed
response.data["all"] = [tag.pk for tag in children_source]
return response
def perform_update(self, serializer): def perform_update(self, serializer):
old_parent = self.get_object().get_parent() old_parent = self.get_object().get_parent()
tag = serializer.save() tag = serializer.save()
@@ -708,6 +747,7 @@ class DocumentViewSet(
"title", "title",
"correspondent__name", "correspondent__name",
"document_type__name", "document_type__name",
"storage_path__name",
"created", "created",
"modified", "modified",
"added", "added",

View File

@@ -20,9 +20,6 @@ def get_workflows_for_trigger(
wrap it in a list; otherwise fetch enabled workflows for the trigger with wrap it in a list; otherwise fetch enabled workflows for the trigger with
the prefetches used by the runner. the prefetches used by the runner.
""" """
if workflow_to_run is not None:
return [workflow_to_run]
annotated_actions = ( annotated_actions = (
WorkflowAction.objects.select_related( WorkflowAction.objects.select_related(
"assign_correspondent", "assign_correspondent",
@@ -105,10 +102,25 @@ def get_workflows_for_trigger(
) )
) )
action_prefetch = Prefetch(
"actions",
queryset=annotated_actions.order_by("order", "pk"),
)
if workflow_to_run is not None:
return (
Workflow.objects.filter(pk=workflow_to_run.pk)
.prefetch_related(
action_prefetch,
"triggers",
)
.distinct()
)
return ( return (
Workflow.objects.filter(enabled=True, triggers__type=trigger_type) Workflow.objects.filter(enabled=True, triggers__type=trigger_type)
.prefetch_related( .prefetch_related(
Prefetch("actions", queryset=annotated_actions), action_prefetch,
"triggers", "triggers",
) )
.order_by("order") .order_by("order")

View File

@@ -1,6 +1,6 @@
from typing import Final from typing import Final
__version__: Final[tuple[int, int, int]] = (2, 20, 3) __version__: Final[tuple[int, int, int]] = (2, 20, 5)
# Version string like X.Y.Z # Version string like X.Y.Z
__full_version_str__: Final[str] = ".".join(map(str, __version__)) __full_version_str__: Final[str] = ".".join(map(str, __version__))
# Version string like X.Y # Version string like X.Y

2
uv.lock generated
View File

@@ -2115,7 +2115,7 @@ wheels = [
[[package]] [[package]]
name = "paperless-ngx" name = "paperless-ngx"
version = "2.20.3" version = "2.20.5"
source = { virtual = "." } source = { virtual = "." }
dependencies = [ dependencies = [
{ name = "babel", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "babel", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },