Compare commits

..

1 Commits

34 changed files with 794 additions and 1935 deletions

View File

@@ -23,7 +23,7 @@ env:
jobs:
build:
name: Build Documentation
runs-on: ubuntu-24.04
runs-on: ubuntu-slim
steps:
- name: Checkout
uses: actions/checkout@v6
@@ -58,7 +58,7 @@ jobs:
name: Deploy Documentation
needs: build
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
runs-on: ubuntu-24.04
runs-on: ubuntu-slim
steps:
- name: Checkout
uses: actions/checkout@v6

View File

@@ -12,7 +12,7 @@ concurrency:
jobs:
pre-commit:
name: Pre-commit Checks
runs-on: ubuntu-24.04
runs-on: ubuntu-slim
steps:
- name: Checkout
uses: actions/checkout@v6

View File

@@ -10,7 +10,7 @@ jobs:
synchronize-with-crowdin:
name: Crowdin Sync
if: github.repository_owner == 'paperless-ngx'
runs-on: ubuntu-24.04
runs-on: ubuntu-slim
steps:
- name: Checkout
uses: actions/checkout@v6

View File

@@ -8,7 +8,7 @@ permissions:
jobs:
pr-bot:
name: Automated PR Bot
runs-on: ubuntu-latest
runs-on: ubuntu-slim
steps:
- name: Label PR by file path or branch name
# see .github/labeler.yml for the labeler config

View File

@@ -12,7 +12,7 @@ permissions:
jobs:
pr_opened_or_reopened:
name: pr_opened_or_reopened
runs-on: ubuntu-24.04
runs-on: ubuntu-slim
permissions:
# write permission is required for autolabeler
pull-requests: write

View File

@@ -13,7 +13,7 @@ jobs:
stale:
name: 'Stale'
if: github.repository_owner == 'paperless-ngx'
runs-on: ubuntu-24.04
runs-on: ubuntu-slim
steps:
- uses: actions/stale@v10
with:
@@ -35,7 +35,7 @@ jobs:
lock-threads:
name: 'Lock Old Threads'
if: github.repository_owner == 'paperless-ngx'
runs-on: ubuntu-24.04
runs-on: ubuntu-slim
steps:
- uses: dessant/lock-threads@v6
with:
@@ -55,7 +55,7 @@ jobs:
close-answered-discussions:
name: 'Close Answered Discussions'
if: github.repository_owner == 'paperless-ngx'
runs-on: ubuntu-24.04
runs-on: ubuntu-slim
steps:
- uses: actions/github-script@v8
with:
@@ -112,7 +112,7 @@ jobs:
close-outdated-discussions:
name: 'Close Outdated Discussions'
if: github.repository_owner == 'paperless-ngx'
runs-on: ubuntu-24.04
runs-on: ubuntu-slim
steps:
- uses: actions/github-script@v8
with:
@@ -204,7 +204,7 @@ jobs:
close-unsupported-feature-requests:
name: 'Close Unsupported Feature Requests'
if: github.repository_owner == 'paperless-ngx'
runs-on: ubuntu-24.04
runs-on: ubuntu-slim
steps:
- uses: actions/github-script@v8
with:

View File

@@ -6,7 +6,7 @@ on:
jobs:
generate-translate-strings:
name: Generate Translation Strings
runs-on: ubuntu-latest
runs-on: ubuntu-slim
permissions:
contents: write
steps:

View File

@@ -1152,9 +1152,8 @@ via the consumption directory, you can disable the consumer to save resources.
#### [`PAPERLESS_CONSUMER_DELETE_DUPLICATES=<bool>`](#PAPERLESS_CONSUMER_DELETE_DUPLICATES) {#PAPERLESS_CONSUMER_DELETE_DUPLICATES}
: As of version 3.0 Paperless-ngx allows duplicate documents to be consumed by default, _except_ when
this setting is enabled. When enabled, Paperless will check if a document with the same hash already
exists in the system and delete the duplicate file from the consumption directory without consuming it.
: When the consumer detects a duplicate document, it will not touch
the original document. This default behavior can be changed here.
Defaults to false.

File diff suppressed because it is too large Load Diff

View File

@@ -103,6 +103,22 @@
</div>
<div class="row mb-3">
<div class="col-md-3 col-form-label pt-0">
<span i18n>Items per page</span>
</div>
<div class="col">
<select class="form-select" formControlName="documentListItemPerPage">
<option [ngValue]="10">10</option>
<option [ngValue]="25">25</option>
<option [ngValue]="50">50</option>
<option [ngValue]="100">100</option>
</select>
</div>
</div>
<div class="row">
<div class="col-md-3 col-form-label pt-0">
<span i18n>Sidebar</span>
</div>
@@ -137,28 +153,8 @@
</button>
</div>
</div>
</div>
<div class="col-xl-6 ps-xl-5">
<h5 class="mt-3 mt-md-0" i18n>Global search</h5>
<div class="row">
<div class="col">
<pngx-input-check i18n-title title="Do not include advanced search results" formControlName="searchDbOnly"></pngx-input-check>
</div>
</div>
<div class="row mb-3">
<div class="col-md-3 col-form-label pt-0">
<span i18n>Full search links to</span>
</div>
<div class="col mb-3">
<select class="form-select" formControlName="searchLink">
<option [ngValue]="GlobalSearchType.TITLE_CONTENT" i18n>Title and content search</option>
<option [ngValue]="GlobalSearchType.ADVANCED" i18n>Advanced search</option>
</select>
</div>
</div>
<h5 class="mt-3 mt-md-0" id="update-checking" i18n>Update checking</h5>
<h5 class="mt-3" id="update-checking" i18n>Update checking</h5>
<div class="row mb-3">
<div class="col d-flex flex-row align-items-start">
<pngx-input-check i18n-title title="Enable update checking" formControlName="updateCheckingEnabled"></pngx-input-check>
@@ -183,33 +179,11 @@
<pngx-input-check i18n-title title="Show document counts in sidebar saved views" formControlName="sidebarViewsShowCount"></pngx-input-check>
</div>
</div>
</div>
</div>
<div class="col-xl-6 ps-xl-5">
<h5 class="mt-3 mt-md-0" i18n>Document editing</h5>
</ng-template>
</li>
<li [ngbNavItem]="SettingsNavIDs.Documents">
<a ngbNavLink i18n>Documents</a>
<ng-template ngbNavContent>
<div class="row">
<div class="col-xl-6 pe-xl-5">
<h5 i18n>Documents</h5>
<div class="row mb-3">
<div class="col-md-3 col-form-label pt-0">
<span i18n>Items per page</span>
</div>
<div class="col">
<select class="form-select" formControlName="documentListItemPerPage">
<option [ngValue]="10">10</option>
<option [ngValue]="25">25</option>
<option [ngValue]="50">50</option>
<option [ngValue]="100">100</option>
</select>
</div>
</div>
<h5 class="mt-3" i18n>Document editing</h5>
<div class="row">
<div class="col">
<pngx-input-check i18n-title title="Use PDF viewer provided by the browser" i18n-hint hint="This is usually faster for displaying large PDF documents, but it might not work on some browsers." formControlName="useNativePdfViewer"></pngx-input-check>
@@ -235,31 +209,31 @@
</div>
</div>
<div class="row">
<div class="row mb-3">
<div class="col">
<pngx-input-check i18n-title title="Show document thumbnail during loading" formControlName="documentEditingOverlayThumbnail"></pngx-input-check>
</div>
</div>
<div class="row mb-3">
<h5 class="mt-3" i18n>Global search</h5>
<div class="row">
<div class="col">
<p class="mb-2" i18n>Built-in fields to show:</p>
@for (option of documentDetailFieldOptions; track option.id) {
<div class="form-check ms-3">
<input class="form-check-input" type="checkbox"
[id]="'documentDetailField-' + option.id"
[checked]="isDocumentDetailFieldShown(option.id)"
(change)="toggleDocumentDetailField(option.id, $event.target.checked)" />
<label class="form-check-label" [for]="'documentDetailField-' + option.id">
{{ option.label }}
</label>
</div>
}
<p class="small text-muted mt-1" i18n>Uncheck fields to hide them on the document details page.</p>
<pngx-input-check i18n-title title="Do not include advanced search results" formControlName="searchDbOnly"></pngx-input-check>
</div>
</div>
</div>
<div class="col-xl-6 ps-xl-5">
<div class="row mb-3">
<div class="col-md-3 col-form-label pt-0">
<span i18n>Full search links to</span>
</div>
<div class="col mb-3">
<select class="form-select" formControlName="searchLink">
<option [ngValue]="GlobalSearchType.TITLE_CONTENT" i18n>Title and content search</option>
<option [ngValue]="GlobalSearchType.ADVANCED" i18n>Advanced search</option>
</select>
</div>
</div>
<h5 class="mt-3" i18n>Bulk editing</h5>
<div class="row mb-3">
<div class="col">
@@ -274,8 +248,10 @@
<pngx-input-check i18n-title title="Enable notes" formControlName="notesEnabled"></pngx-input-check>
</div>
</div>
</div>
</div>
</ng-template>
</li>

View File

@@ -201,9 +201,9 @@ describe('SettingsComponent', () => {
const navigateSpy = jest.spyOn(router, 'navigate')
const tabButtons = fixture.debugElement.queryAll(By.directive(NgbNavLink))
tabButtons[1].nativeElement.dispatchEvent(new MouseEvent('click'))
expect(navigateSpy).toHaveBeenCalledWith(['settings', 'documents'])
tabButtons[2].nativeElement.dispatchEvent(new MouseEvent('click'))
expect(navigateSpy).toHaveBeenCalledWith(['settings', 'permissions'])
tabButtons[2].nativeElement.dispatchEvent(new MouseEvent('click'))
expect(navigateSpy).toHaveBeenCalledWith(['settings', 'notifications'])
const initSpy = jest.spyOn(component, 'initialize')
component.isDirty = true // mock dirty
@@ -213,8 +213,8 @@ describe('SettingsComponent', () => {
expect(initSpy).not.toHaveBeenCalled()
navigateSpy.mockResolvedValueOnce(true) // nav accepted even though dirty
tabButtons[2].nativeElement.dispatchEvent(new MouseEvent('click'))
expect(navigateSpy).toHaveBeenCalledWith(['settings', 'permissions'])
tabButtons[1].nativeElement.dispatchEvent(new MouseEvent('click'))
expect(navigateSpy).toHaveBeenCalledWith(['settings', 'notifications'])
expect(initSpy).toHaveBeenCalled()
})
@@ -226,7 +226,7 @@ describe('SettingsComponent', () => {
activatedRoute.snapshot.fragment = '#notifications'
const scrollSpy = jest.spyOn(viewportScroller, 'scrollToAnchor')
component.ngOnInit()
expect(component.activeNavID).toEqual(4) // Notifications
expect(component.activeNavID).toEqual(3) // Notifications
component.ngAfterViewInit()
expect(scrollSpy).toHaveBeenCalledWith('#notifications')
})
@@ -251,7 +251,7 @@ describe('SettingsComponent', () => {
expect(toastErrorSpy).toHaveBeenCalled()
expect(storeSpy).toHaveBeenCalled()
expect(appearanceSettingsSpy).not.toHaveBeenCalled()
expect(setSpy).toHaveBeenCalledTimes(31)
expect(setSpy).toHaveBeenCalledTimes(30)
// succeed
storeSpy.mockReturnValueOnce(of(true))
@@ -366,22 +366,4 @@ describe('SettingsComponent', () => {
settingsService.settingsSaved.emit(true)
expect(maybeRefreshSpy).toHaveBeenCalled()
})
it('should support toggling document detail fields', () => {
completeSetup()
const field = 'storage_path'
expect(
component.settingsForm.get('documentDetailsHiddenFields').value.length
).toEqual(0)
component.toggleDocumentDetailField(field, false)
expect(
component.settingsForm.get('documentDetailsHiddenFields').value.length
).toEqual(1)
expect(component.isDocumentDetailFieldShown(field)).toBeFalsy()
component.toggleDocumentDetailField(field, true)
expect(
component.settingsForm.get('documentDetailsHiddenFields').value.length
).toEqual(0)
expect(component.isDocumentDetailFieldShown(field)).toBeTruthy()
})
})

View File

@@ -70,9 +70,9 @@ import { ComponentWithPermissions } from '../../with-permissions/with-permission
enum SettingsNavIDs {
General = 1,
Documents = 2,
Permissions = 3,
Notifications = 4,
Permissions = 2,
Notifications = 3,
SavedViews = 4,
}
const systemLanguage = { code: '', name: $localize`Use system language` }
@@ -81,25 +81,6 @@ const systemDateFormat = {
name: $localize`Use date format of display language`,
}
export enum DocumentDetailFieldID {
ArchiveSerialNumber = 'archive_serial_number',
Correspondent = 'correspondent',
DocumentType = 'document_type',
StoragePath = 'storage_path',
Tags = 'tags',
}
const documentDetailFieldOptions = [
{
id: DocumentDetailFieldID.ArchiveSerialNumber,
label: $localize`Archive serial number`,
},
{ id: DocumentDetailFieldID.Correspondent, label: $localize`Correspondent` },
{ id: DocumentDetailFieldID.DocumentType, label: $localize`Document type` },
{ id: DocumentDetailFieldID.StoragePath, label: $localize`Storage path` },
{ id: DocumentDetailFieldID.Tags, label: $localize`Tags` },
]
@Component({
selector: 'pngx-settings',
templateUrl: './settings.component.html',
@@ -165,7 +146,6 @@ export class SettingsComponent
pdfViewerDefaultZoom: new FormControl(null),
documentEditingRemoveInboxTags: new FormControl(null),
documentEditingOverlayThumbnail: new FormControl(null),
documentDetailsHiddenFields: new FormControl([]),
searchDbOnly: new FormControl(null),
searchLink: new FormControl(null),
@@ -196,8 +176,6 @@ export class SettingsComponent
public readonly ZoomSetting = ZoomSetting
public readonly documentDetailFieldOptions = documentDetailFieldOptions
get systemStatusHasErrors(): boolean {
return (
this.systemStatus.database.status === SystemStatusItemStatus.ERROR ||
@@ -358,9 +336,6 @@ export class SettingsComponent
documentEditingOverlayThumbnail: this.settings.get(
SETTINGS_KEYS.DOCUMENT_EDITING_OVERLAY_THUMBNAIL
),
documentDetailsHiddenFields: this.settings.get(
SETTINGS_KEYS.DOCUMENT_DETAILS_HIDDEN_FIELDS
),
searchDbOnly: this.settings.get(SETTINGS_KEYS.SEARCH_DB_ONLY),
searchLink: this.settings.get(SETTINGS_KEYS.SEARCH_FULL_TYPE),
}
@@ -551,10 +526,6 @@ export class SettingsComponent
SETTINGS_KEYS.DOCUMENT_EDITING_OVERLAY_THUMBNAIL,
this.settingsForm.value.documentEditingOverlayThumbnail
)
this.settings.set(
SETTINGS_KEYS.DOCUMENT_DETAILS_HIDDEN_FIELDS,
this.settingsForm.value.documentDetailsHiddenFields
)
this.settings.set(
SETTINGS_KEYS.SEARCH_DB_ONLY,
this.settingsForm.value.searchDbOnly
@@ -616,26 +587,6 @@ export class SettingsComponent
this.settingsForm.get('themeColor').patchValue('')
}
isDocumentDetailFieldShown(fieldId: string): boolean {
const hiddenFields =
this.settingsForm.value.documentDetailsHiddenFields || []
return !hiddenFields.includes(fieldId)
}
toggleDocumentDetailField(fieldId: string, checked: boolean) {
const hiddenFields = new Set(
this.settingsForm.value.documentDetailsHiddenFields || []
)
if (checked) {
hiddenFields.delete(fieldId)
} else {
hiddenFields.add(fieldId)
}
this.settingsForm
.get('documentDetailsHiddenFields')
.setValue(Array.from(hiddenFields))
}
showSystemStatus() {
const modal: NgbModalRef = this.modalService.open(
SystemStatusDialogComponent,

View File

@@ -97,12 +97,6 @@
<br/><em>(<ng-container i18n>click for full output</ng-container>)</em>
}
</ng-template>
@if (task.duplicate_documents?.length > 0) {
<div class="small text-warning-emphasis d-flex align-items-center gap-1">
<i-bs class="lh-1" width="1em" height="1em" name="exclamation-triangle"></i-bs>
<span i18n>Duplicate(s) detected</span>
</div>
}
</td>
}
<td class="d-lg-none">

View File

@@ -164,11 +164,9 @@
{{ item.name }}
<span class="ms-auto text-muted small">
@if (item.dateEnd) {
{{ item.date | customDate:'mediumDate' }} &ndash; {{ item.dateEnd | customDate:'mediumDate' }}
} @else if (item.dateTilNow) {
{{ item.dateTilNow | customDate:'mediumDate' }} &ndash; <ng-container i18n>now</ng-container>
{{ item.date | customDate:'MMM d' }} &ndash; {{ item.dateEnd | customDate:'mediumDate' }}
} @else {
{{ item.date | customDate:'mediumDate' }}
{{ item.date | customDate:'mediumDate' }} &ndash; <ng-container i18n>now</ng-container>
}
</span>
</div>

View File

@@ -79,34 +79,32 @@ export class DatesDropdownComponent implements OnInit, OnDestroy {
{
id: RelativeDate.WITHIN_1_WEEK,
name: $localize`Within 1 week`,
dateTilNow: new Date().setDate(new Date().getDate() - 7),
date: new Date().setDate(new Date().getDate() - 7),
},
{
id: RelativeDate.WITHIN_1_MONTH,
name: $localize`Within 1 month`,
dateTilNow: new Date().setMonth(new Date().getMonth() - 1),
date: new Date().setMonth(new Date().getMonth() - 1),
},
{
id: RelativeDate.WITHIN_3_MONTHS,
name: $localize`Within 3 months`,
dateTilNow: new Date().setMonth(new Date().getMonth() - 3),
date: new Date().setMonth(new Date().getMonth() - 3),
},
{
id: RelativeDate.WITHIN_1_YEAR,
name: $localize`Within 1 year`,
dateTilNow: new Date().setFullYear(new Date().getFullYear() - 1),
date: new Date().setFullYear(new Date().getFullYear() - 1),
},
{
id: RelativeDate.THIS_YEAR,
name: $localize`This year`,
date: new Date('1/1/' + new Date().getFullYear()),
dateEnd: new Date('12/31/' + new Date().getFullYear()),
},
{
id: RelativeDate.THIS_MONTH,
name: $localize`This month`,
date: new Date().setDate(1),
dateEnd: new Date(new Date().getFullYear(), new Date().getMonth() + 1, 0),
},
{
id: RelativeDate.TODAY,

View File

@@ -146,26 +146,16 @@
<ng-template ngbNavContent>
<div>
<pngx-input-text #inputTitle i18n-title title="Title" formControlName="title" [horizontal]="true" [suggestion]="suggestions?.title" (keyup)="titleKeyUp($event)" [error]="error?.title"></pngx-input-text>
@if (!isFieldHidden(DocumentDetailFieldID.ArchiveSerialNumber)) {
<pngx-input-number i18n-title title="Archive serial number" [error]="error?.archive_serial_number" [horizontal]="true" formControlName='archive_serial_number'></pngx-input-number>
}
<pngx-input-number i18n-title title="Archive serial number" [error]="error?.archive_serial_number" [horizontal]="true" formControlName='archive_serial_number'></pngx-input-number>
<pngx-input-date i18n-title title="Date created" formControlName="created" [suggestions]="suggestions?.dates" [showFilter]="true" [horizontal]="true" (filterDocuments)="filterDocuments($event)"
[error]="error?.created"></pngx-input-date>
@if (!isFieldHidden(DocumentDetailFieldID.Correspondent)) {
<pngx-input-select [items]="correspondents" i18n-title title="Correspondent" formControlName="correspondent" [allowNull]="true" [showFilter]="true" [horizontal]="true" (filterDocuments)="filterDocuments($event, DataType.Correspondent)"
(createNew)="createCorrespondent($event)" [hideAddButton]="createDisabled(DataType.Correspondent)" [suggestions]="suggestions?.correspondents" *pngxIfPermissions="{ action: PermissionAction.View, type: PermissionType.Correspondent }"></pngx-input-select>
}
@if (!isFieldHidden(DocumentDetailFieldID.DocumentType)) {
<pngx-input-select [items]="documentTypes" i18n-title title="Document type" formControlName="document_type" [allowNull]="true" [showFilter]="true" [horizontal]="true" (filterDocuments)="filterDocuments($event, DataType.DocumentType)"
(createNew)="createDocumentType($event)" [hideAddButton]="createDisabled(DataType.DocumentType)" [suggestions]="suggestions?.document_types" *pngxIfPermissions="{ action: PermissionAction.View, type: PermissionType.DocumentType }"></pngx-input-select>
}
@if (!isFieldHidden(DocumentDetailFieldID.StoragePath)) {
<pngx-input-select [items]="storagePaths" i18n-title title="Storage path" formControlName="storage_path" [allowNull]="true" [showFilter]="true" [horizontal]="true" (filterDocuments)="filterDocuments($event, DataType.StoragePath)"
(createNew)="createStoragePath($event)" [hideAddButton]="createDisabled(DataType.StoragePath)" [suggestions]="suggestions?.storage_paths" i18n-placeholder placeholder="Default" *pngxIfPermissions="{ action: PermissionAction.View, type: PermissionType.StoragePath }"></pngx-input-select>
}
@if (!isFieldHidden(DocumentDetailFieldID.Tags)) {
<pngx-input-tags #tagsInput formControlName="tags" [suggestions]="suggestions?.tags" [showFilter]="true" [horizontal]="true" (filterDocuments)="filterDocuments($event, DataType.Tag)" [hideAddButton]="createDisabled(DataType.Tag)" *pngxIfPermissions="{ action: PermissionAction.View, type: PermissionType.Tag }"></pngx-input-tags>
}
<pngx-input-select [items]="correspondents" i18n-title title="Correspondent" formControlName="correspondent" [allowNull]="true" [showFilter]="true" [horizontal]="true" (filterDocuments)="filterDocuments($event, DataType.Correspondent)"
(createNew)="createCorrespondent($event)" [hideAddButton]="createDisabled(DataType.Correspondent)" [suggestions]="suggestions?.correspondents" *pngxIfPermissions="{ action: PermissionAction.View, type: PermissionType.Correspondent }"></pngx-input-select>
<pngx-input-select [items]="documentTypes" i18n-title title="Document type" formControlName="document_type" [allowNull]="true" [showFilter]="true" [horizontal]="true" (filterDocuments)="filterDocuments($event, DataType.DocumentType)"
(createNew)="createDocumentType($event)" [hideAddButton]="createDisabled(DataType.DocumentType)" [suggestions]="suggestions?.document_types" *pngxIfPermissions="{ action: PermissionAction.View, type: PermissionType.DocumentType }"></pngx-input-select>
<pngx-input-select [items]="storagePaths" i18n-title title="Storage path" formControlName="storage_path" [allowNull]="true" [showFilter]="true" [horizontal]="true" (filterDocuments)="filterDocuments($event, DataType.StoragePath)"
(createNew)="createStoragePath($event)" [hideAddButton]="createDisabled(DataType.StoragePath)" [suggestions]="suggestions?.storage_paths" i18n-placeholder placeholder="Default" *pngxIfPermissions="{ action: PermissionAction.View, type: PermissionType.StoragePath }"></pngx-input-select>
<pngx-input-tags #tagsInput formControlName="tags" [suggestions]="suggestions?.tags" [showFilter]="true" [horizontal]="true" (filterDocuments)="filterDocuments($event, DataType.Tag)" [hideAddButton]="createDisabled(DataType.Tag)" *pngxIfPermissions="{ action: PermissionAction.View, type: PermissionType.Tag }"></pngx-input-tags>
@for (fieldInstance of document?.custom_fields; track fieldInstance.field; let i = $index) {
<div [formGroup]="customFieldFormFields.controls[i]">
@switch (getCustomFieldFromInstance(fieldInstance)?.data_type) {
@@ -380,37 +370,6 @@
</ng-template>
</li>
}
@if (document?.duplicate_documents?.length) {
<li [ngbNavItem]="DocumentDetailNavIDs.Duplicates">
<a class="text-nowrap" ngbNavLink i18n>
Duplicates
<span class="badge text-bg-secondary ms-1">{{ document.duplicate_documents.length }}</span>
</a>
<ng-template ngbNavContent>
<div class="d-flex flex-column gap-2">
<div class="fst-italic" i18n>Duplicate documents detected:</div>
<div class="list-group">
@for (duplicate of document.duplicate_documents; track duplicate.id) {
<a
class="list-group-item list-group-item-action d-flex justify-content-between align-items-center"
[routerLink]="['/documents', duplicate.id, 'details']"
[class.disabled]="duplicate.deleted_at"
>
<span class="d-flex align-items-center gap-2">
<span>{{ duplicate.title || ('#' + duplicate.id) }}</span>
@if (duplicate.deleted_at) {
<span class="badge text-bg-secondary" i18n>In trash</span>
}
</span>
<span class="text-secondary">#{{ duplicate.id }}</span>
</a>
}
</div>
</div>
</ng-template>
</li>
}
</ul>
<div [ngbNavOutlet]="nav" class="mt-3"></div>

View File

@@ -48,7 +48,6 @@ import {
} from 'src/app/data/filter-rule-type'
import { StoragePath } from 'src/app/data/storage-path'
import { Tag } from 'src/app/data/tag'
import { SETTINGS_KEYS } from 'src/app/data/ui-settings'
import { PermissionsGuard } from 'src/app/guards/permissions.guard'
import { CustomDatePipe } from 'src/app/pipes/custom-date.pipe'
import { DocumentTitlePipe } from 'src/app/pipes/document-title.pipe'
@@ -302,16 +301,16 @@ describe('DocumentDetailComponent', () => {
.spyOn(openDocumentsService, 'openDocument')
.mockReturnValueOnce(of(true))
fixture.detectChanges()
expect(component.activeNavID).toEqual(component.DocumentDetailNavIDs.Notes)
expect(component.activeNavID).toEqual(5) // DocumentDetailNavIDs.Notes
})
it('should change url on tab switch', () => {
initNormally()
const navigateSpy = jest.spyOn(router, 'navigate')
component.nav.select(component.DocumentDetailNavIDs.Notes)
component.nav.select(5)
component.nav.navChange.next({
activeId: 1,
nextId: component.DocumentDetailNavIDs.Notes,
nextId: 5,
preventDefault: () => {},
})
fixture.detectChanges()
@@ -353,18 +352,6 @@ describe('DocumentDetailComponent', () => {
expect(component.document).toEqual(doc)
})
it('should fall back to details tab when duplicates tab is active but no duplicates', () => {
initNormally()
component.activeNavID = component.DocumentDetailNavIDs.Duplicates
const noDupDoc = { ...doc, duplicate_documents: [] }
component.updateComponent(noDupDoc)
expect(component.activeNavID).toEqual(
component.DocumentDetailNavIDs.Details
)
})
it('should load already-opened document via param', () => {
initNormally()
jest.spyOn(documentService, 'get').mockReturnValueOnce(of(doc))
@@ -380,38 +367,6 @@ describe('DocumentDetailComponent', () => {
expect(component.document).toEqual(doc)
})
it('should update cached open document duplicates when reloading an open doc', () => {
const openDoc = { ...doc, duplicate_documents: [{ id: 1, title: 'Old' }] }
const updatedDuplicates = [
{ id: 2, title: 'Newer duplicate', deleted_at: null },
]
jest
.spyOn(activatedRoute, 'paramMap', 'get')
.mockReturnValue(of(convertToParamMap({ id: 3, section: 'details' })))
jest.spyOn(documentService, 'get').mockReturnValue(
of({
...doc,
modified: new Date('2024-01-02T00:00:00Z'),
duplicate_documents: updatedDuplicates,
})
)
jest.spyOn(openDocumentsService, 'getOpenDocument').mockReturnValue(openDoc)
const saveSpy = jest.spyOn(openDocumentsService, 'save')
jest.spyOn(openDocumentsService, 'openDocument').mockReturnValue(of(true))
jest.spyOn(customFieldsService, 'listAll').mockReturnValue(
of({
count: customFields.length,
all: customFields.map((f) => f.id),
results: customFields,
})
)
fixture.detectChanges()
expect(openDoc.duplicate_documents).toEqual(updatedDuplicates)
expect(saveSpy).toHaveBeenCalled()
})
it('should disable form if user cannot edit', () => {
currentUserHasObjectPermissions = false
initNormally()
@@ -1016,7 +971,7 @@ describe('DocumentDetailComponent', () => {
it('should display built-in pdf viewer if not disabled', () => {
initNormally()
component.document.archived_file_name = 'file.pdf'
settingsService.set(SETTINGS_KEYS.USE_NATIVE_PDF_VIEWER, false)
jest.spyOn(settingsService, 'get').mockReturnValue(false)
expect(component.useNativePdfViewer).toBeFalsy()
fixture.detectChanges()
expect(fixture.debugElement.query(By.css('pdf-viewer'))).not.toBeNull()
@@ -1025,7 +980,7 @@ describe('DocumentDetailComponent', () => {
it('should display native pdf viewer if enabled', () => {
initNormally()
component.document.archived_file_name = 'file.pdf'
settingsService.set(SETTINGS_KEYS.USE_NATIVE_PDF_VIEWER, true)
jest.spyOn(settingsService, 'get').mockReturnValue(true)
expect(component.useNativePdfViewer).toBeTruthy()
fixture.detectChanges()
expect(fixture.debugElement.query(By.css('object'))).not.toBeNull()

View File

@@ -8,7 +8,7 @@ import {
FormsModule,
ReactiveFormsModule,
} from '@angular/forms'
import { ActivatedRoute, Router, RouterModule } from '@angular/router'
import { ActivatedRoute, Router } from '@angular/router'
import {
NgbDateStruct,
NgbDropdownModule,
@@ -84,7 +84,6 @@ import { ToastService } from 'src/app/services/toast.service'
import { getFilenameFromContentDisposition } from 'src/app/utils/http'
import { ISODateAdapter } from 'src/app/utils/ngb-iso-date-adapter'
import * as UTIF from 'utif'
import { DocumentDetailFieldID } from '../admin/settings/settings.component'
import { ConfirmDialogComponent } from '../common/confirm-dialog/confirm-dialog.component'
import { PasswordRemovalConfirmDialogComponent } from '../common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component'
import { CustomFieldsDropdownComponent } from '../common/custom-fields-dropdown/custom-fields-dropdown.component'
@@ -125,7 +124,6 @@ enum DocumentDetailNavIDs {
Notes = 5,
Permissions = 6,
History = 7,
Duplicates = 8,
}
enum ContentRenderType {
@@ -183,7 +181,6 @@ export enum ZoomSetting {
NgxBootstrapIconsModule,
PdfViewerModule,
TextAreaComponent,
RouterModule,
],
})
export class DocumentDetailComponent
@@ -282,8 +279,6 @@ export class DocumentDetailComponent
public readonly DataType = DataType
public readonly DocumentDetailFieldID = DocumentDetailFieldID
@ViewChild('nav') nav: NgbNav
@ViewChild('pdfPreview') set pdfPreview(element) {
// this gets called when component added or removed from DOM
@@ -330,12 +325,6 @@ export class DocumentDetailComponent
return this.settings.get(SETTINGS_KEYS.DOCUMENT_EDITING_OVERLAY_THUMBNAIL)
}
isFieldHidden(fieldId: DocumentDetailFieldID): boolean {
return this.settings
.get(SETTINGS_KEYS.DOCUMENT_DETAILS_HIDDEN_FIELDS)
.includes(fieldId)
}
private getRenderType(mimeType: string): ContentRenderType {
if (!mimeType) return ContentRenderType.Unknown
if (mimeType === 'application/pdf') {
@@ -465,11 +454,6 @@ export class DocumentDetailComponent
const openDocument = this.openDocumentService.getOpenDocument(
this.documentId
)
// update duplicate documents if present
if (openDocument && doc?.duplicate_documents) {
openDocument.duplicate_documents = doc.duplicate_documents
this.openDocumentService.save()
}
const useDoc = openDocument || doc
if (openDocument) {
if (
@@ -720,13 +704,6 @@ export class DocumentDetailComponent
}
this.title = this.documentTitlePipe.transform(doc.title)
this.prepareForm(doc)
if (
this.activeNavID === DocumentDetailNavIDs.Duplicates &&
!doc?.duplicate_documents?.length
) {
this.activeNavID = DocumentDetailNavIDs.Details
}
}
get customFieldFormFields(): FormArray {

View File

@@ -159,8 +159,6 @@ export interface Document extends ObjectWithPermissions {
page_count?: number
duplicate_documents?: Document[]
// Frontend only
__changedFields?: string[]
}

View File

@@ -1,4 +1,3 @@
import { Document } from './document'
import { ObjectWithId } from './object-with-id'
export enum PaperlessTaskType {
@@ -43,7 +42,5 @@ export interface PaperlessTask extends ObjectWithId {
related_document?: number
duplicate_documents?: Document[]
owner?: number
}

View File

@@ -70,8 +70,6 @@ export const SETTINGS_KEYS = {
'general-settings:document-editing:remove-inbox-tags',
DOCUMENT_EDITING_OVERLAY_THUMBNAIL:
'general-settings:document-editing:overlay-thumbnail',
DOCUMENT_DETAILS_HIDDEN_FIELDS:
'general-settings:document-details:hidden-fields',
SEARCH_DB_ONLY: 'general-settings:search:db-only',
SEARCH_FULL_TYPE: 'general-settings:search:more-link',
EMPTY_TRASH_DELAY: 'trash_delay',
@@ -257,11 +255,6 @@ export const SETTINGS: UiSetting[] = [
type: 'boolean',
default: true,
},
{
key: SETTINGS_KEYS.DOCUMENT_DETAILS_HIDDEN_FIELDS,
type: 'array',
default: [],
},
{
key: SETTINGS_KEYS.SEARCH_DB_ONLY,
type: 'boolean',

View File

@@ -779,45 +779,19 @@ class ConsumerPreflightPlugin(
Q(checksum=checksum) | Q(archive_checksum=checksum),
)
if existing_doc.exists():
existing_doc = existing_doc.order_by("-created")
duplicates_in_trash = existing_doc.filter(deleted_at__isnull=False)
log_msg = (
f"Consuming duplicate {self.filename}: "
f"{existing_doc.count()} existing document(s) share the same content."
)
msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS
log_msg = f"Not consuming {self.filename}: It is a duplicate of {existing_doc.get().title} (#{existing_doc.get().pk})."
if duplicates_in_trash.exists():
log_msg += " Note: at least one existing document is in the trash."
self.log.warning(log_msg)
if existing_doc.first().deleted_at is not None:
msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS_IN_TRASH
log_msg += " Note: existing document is in the trash."
if settings.CONSUMER_DELETE_DUPLICATES:
duplicate = existing_doc.first()
duplicate_label = (
duplicate.title
or duplicate.original_filename
or (Path(duplicate.filename).name if duplicate.filename else None)
or str(duplicate.pk)
)
Path(self.input_doc.original_file).unlink()
failure_msg = (
f"Not consuming {self.filename}: "
f"It is a duplicate of {duplicate_label} (#{duplicate.pk})"
)
status_msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS
if duplicates_in_trash.exists():
status_msg = (
ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS_IN_TRASH
)
failure_msg += " Note: existing document is in the trash."
self._fail(
status_msg,
failure_msg,
)
self._fail(
msg,
log_msg,
)
def pre_check_directories(self):
"""

View File

@@ -1,598 +0,0 @@
import math
import uuid
from time import perf_counter
from django.contrib.auth import get_user_model
from django.core.management.base import BaseCommand
from django.core.management.base import CommandError
from django.db import reset_queries
from django.db.models import Count
from django.db.models import Q
from django.db.models import Subquery
from guardian.shortcuts import assign_perm
from documents.models import CustomField
from documents.models import CustomFieldInstance
from documents.models import Document
from documents.models import Tag
from documents.permissions import get_objects_for_user_owner_aware
from documents.permissions import permitted_document_ids
class Command(BaseCommand):
# e.g. manage.py document_perf_benchmark --documents 500000 --chunk-size 5000 --tags 40 --tags-per-doc 3 --custom-fields 6 --custom-fields-per-doc 2
help = (
"Seed a synthetic dataset and benchmark permission-filtered document queries "
"for superusers vs non-superusers."
)
def add_arguments(self, parser):
parser.add_argument(
"--documents",
type=int,
default=10000,
help="Total documents to generate (default: 10,000)",
)
parser.add_argument(
"--owner-ratio",
type=float,
default=0.6,
help="Fraction owned by the benchmarked user (default: 0.6)",
)
parser.add_argument(
"--unowned-ratio",
type=float,
default=0.1,
help="Fraction of unowned documents (default: 0.1)",
)
parser.add_argument(
"--shared-ratio",
type=float,
default=0.25,
help=(
"Fraction of other-user documents that are shared via object perms "
"with the benchmarked user (default: 0.25)"
),
)
parser.add_argument(
"--chunk-size",
type=int,
default=2000,
help="Bulk create size for documents (default: 2000)",
)
parser.add_argument(
"--iterations",
type=int,
default=3,
help="Number of timing runs per query shape (default: 3)",
)
parser.add_argument(
"--prefix",
default="perf-benchmark",
help="Title prefix used to mark generated documents (default: perf-benchmark)",
)
parser.add_argument(
"--username",
default="perf_user",
help="Username of the non-superuser to benchmark (default: perf_user)",
)
parser.add_argument(
"--other-username",
default="perf_owner",
help="Username used for documents not owned by the benchmarked user (default: perf_owner)",
)
parser.add_argument(
"--super-username",
default="perf_admin",
help="Username of the superuser baseline (default: perf_admin)",
)
parser.add_argument(
"--tags",
type=int,
default=0,
help="Number of tags to create and assign (default: 0)",
)
parser.add_argument(
"--tags-per-doc",
type=int,
default=1,
help="How many tags to attach to each document (default: 1)",
)
parser.add_argument(
"--custom-fields",
type=int,
default=0,
help="Number of string custom fields to create (default: 0)",
)
parser.add_argument(
"--custom-fields-per-doc",
type=int,
default=1,
help="How many custom field instances per document (default: 1)",
)
parser.add_argument(
"--skip-tags",
action="store_true",
help="Skip tag document_count benchmarks (useful for large datasets on Postgres)",
)
parser.add_argument(
"--skip-custom-fields",
action="store_true",
help="Skip custom field document_count benchmarks",
)
parser.add_argument(
"--reuse-existing",
action="store_true",
help="Keep previously generated documents with the given prefix instead of recreating",
)
parser.add_argument(
"--cleanup",
action="store_true",
help="Delete previously generated documents with the given prefix and exit",
)
def handle(self, *args, **options):
# keep options for downstream checks
self.options = options
document_total = options["documents"]
owner_ratio = options["owner_ratio"]
unowned_ratio = options["unowned_ratio"]
shared_ratio = options["shared_ratio"]
chunk_size = options["chunk_size"]
iterations = options["iterations"]
prefix = options["prefix"]
tags = options["tags"]
tags_per_doc = options["tags_per_doc"]
custom_fields = options["custom_fields"]
custom_fields_per_doc = options["custom_fields_per_doc"]
self._validate_ratios(owner_ratio, unowned_ratio)
if tags_per_doc < 0 or custom_fields_per_doc < 0:
raise CommandError("Per-document counts must be non-negative")
target_user, other_user, superuser = self._ensure_users(options)
skip_seed = False
if options["cleanup"]:
removed = self._cleanup(prefix)
self.stdout.write(
self.style.SUCCESS(f"Removed {removed} generated documents"),
)
return
if not options["reuse_existing"]:
removed = self._cleanup(prefix)
if removed:
self.stdout.write(f"Removed existing generated documents: {removed}")
else:
existing = Document.objects.filter(title__startswith=prefix).count()
if existing:
skip_seed = True
self.stdout.write(
f"Reusing existing dataset with prefix '{prefix}': {existing} docs",
)
if skip_seed:
dataset_size = Document.objects.filter(title__startswith=prefix).count()
self.stdout.write(
self.style.SUCCESS(
f"Dataset ready (reused): {dataset_size} docs | prefix={prefix}",
),
)
else:
self.stdout.write(
f"Seeding {document_total} documents (owner_ratio={owner_ratio}, "
f"unowned_ratio={unowned_ratio}, shared_ratio={shared_ratio})",
)
created_counts = self._seed_documents(
total=document_total,
owner_ratio=owner_ratio,
unowned_ratio=unowned_ratio,
shared_ratio=shared_ratio,
chunk_size=chunk_size,
prefix=prefix,
target_user=target_user,
other_user=other_user,
)
created_tags = []
if tags:
created_tags = self._seed_tags(prefix=prefix, count=tags)
if tags_per_doc and created_tags:
self._assign_tags_to_documents(
prefix=prefix,
tags=created_tags,
tags_per_doc=tags_per_doc,
chunk_size=chunk_size,
)
created_custom_fields = []
if custom_fields:
created_custom_fields = self._seed_custom_fields(prefix, custom_fields)
if custom_fields_per_doc and created_custom_fields:
self._seed_custom_field_instances(
prefix=prefix,
custom_fields=created_custom_fields,
per_doc=custom_fields_per_doc,
chunk_size=chunk_size,
)
dataset_size = Document.objects.filter(title__startswith=prefix).count()
self.stdout.write(
self.style.SUCCESS(
f"Dataset ready: {dataset_size} docs | owned by target {created_counts['owned']} | "
f"owned by other {created_counts['other_owned']} | unowned {created_counts['unowned']} | "
f"shared-perms {created_counts['shared']} | tags {len(created_tags)} | "
f"custom fields {len(created_custom_fields)}",
),
)
self.stdout.write("\nRunning benchmarks...\n")
self._run_benchmarks(
iterations=iterations,
target_user=target_user,
superuser=superuser,
prefix=prefix,
)
def _validate_ratios(self, owner_ratio: float, unowned_ratio: float):
if owner_ratio < 0 or unowned_ratio < 0:
raise CommandError("Ratios must be non-negative")
if owner_ratio + unowned_ratio > 1:
raise CommandError("owner-ratio + unowned-ratio cannot exceed 1.0")
def _ensure_users(self, options):
User = get_user_model()
target_user, _ = User.objects.get_or_create(
username=options["username"],
defaults={"email": "perf_user@example.com"},
)
other_user, _ = User.objects.get_or_create(
username=options["other_username"],
defaults={"email": "perf_owner@example.com"},
)
superuser, _ = User.objects.get_or_create(
username=options["super_username"],
defaults={
"email": "perf_admin@example.com",
"is_staff": True,
"is_superuser": True,
},
)
return target_user, other_user, superuser
def _cleanup(self, prefix: str) -> int:
docs_qs = Document.global_objects.filter(title__startswith=prefix)
doc_count = docs_qs.count()
if doc_count:
docs_qs.hard_delete()
tag_count = Tag.objects.filter(name__startswith=prefix).count()
if tag_count:
Tag.objects.filter(name__startswith=prefix).delete()
cf_qs = CustomField.objects.filter(name__startswith=prefix)
cf_count = cf_qs.count()
if cf_count:
cf_qs.delete()
cfi_qs = CustomFieldInstance.global_objects.filter(
document__title__startswith=prefix,
)
cfi_count = cfi_qs.count()
if cfi_count:
cfi_qs.hard_delete()
return doc_count + tag_count + cf_count + cfi_count
def _seed_documents(
self,
*,
total: int,
owner_ratio: float,
unowned_ratio: float,
shared_ratio: float,
chunk_size: int,
prefix: str,
target_user,
other_user,
) -> dict[str, int]:
target_count = math.floor(total * owner_ratio)
unowned_count = math.floor(total * unowned_ratio)
other_count = total - target_count - unowned_count
documents: list[Document] = []
other_docs: list[Document] = []
for idx in range(total):
if idx < target_count:
owner = target_user
elif idx < target_count + other_count:
owner = other_user
else:
owner = None
doc = Document(
owner=owner,
title=f"{prefix}-{idx:07d}",
mime_type="application/pdf",
checksum=self._unique_checksum(idx),
page_count=1,
)
if owner is other_user:
other_docs.append(doc)
documents.append(doc)
if len(documents) >= chunk_size:
Document.objects.bulk_create(documents, batch_size=chunk_size)
documents.clear()
if documents:
Document.objects.bulk_create(documents, batch_size=chunk_size)
shared_target = math.floor(len(other_docs) * shared_ratio)
for doc in other_docs[:shared_target]:
assign_perm("documents.view_document", target_user, doc)
return {
"owned": target_count,
"other_owned": other_count,
"unowned": unowned_count,
"shared": shared_target,
}
def _seed_tags(self, *, prefix: str, count: int) -> list[Tag]:
tags = [
Tag(
name=f"{prefix}-tag-{idx:03d}",
)
for idx in range(count)
]
Tag.objects.bulk_create(tags, ignore_conflicts=True)
return list(Tag.objects.filter(name__startswith=prefix))
def _assign_tags_to_documents(
self,
*,
prefix: str,
tags: list[Tag],
tags_per_doc: int,
chunk_size: int,
):
if not tags or tags_per_doc < 1:
return
rels = []
through = Document.tags.through
tag_ids = [t.id for t in tags]
tag_count = len(tag_ids)
iterator = (
Document.objects.filter(title__startswith=prefix)
.values_list(
"id",
flat=True,
)
.iterator()
)
for idx, doc_id in enumerate(iterator):
start = idx % tag_count
chosen = set()
for offset in range(tags_per_doc):
tag_id = tag_ids[(start + offset) % tag_count]
if tag_id in chosen:
continue
chosen.add(tag_id)
rels.append(through(document_id=doc_id, tag_id=tag_id))
if len(rels) >= chunk_size:
through.objects.bulk_create(rels, ignore_conflicts=True)
rels.clear()
if rels:
through.objects.bulk_create(rels, ignore_conflicts=True)
def _seed_custom_fields(self, prefix: str, count: int) -> list[CustomField]:
fields = [
CustomField(
name=f"{prefix}-cf-{idx:03d}",
data_type=CustomField.FieldDataType.STRING,
)
for idx in range(count)
]
CustomField.objects.bulk_create(fields, ignore_conflicts=True)
return list(CustomField.objects.filter(name__startswith=prefix))
def _seed_custom_field_instances(
self,
*,
prefix: str,
custom_fields: list[CustomField],
per_doc: int,
chunk_size: int,
):
if not custom_fields or per_doc < 1:
return
instances = []
cf_ids = [cf.id for cf in custom_fields]
cf_count = len(cf_ids)
iterator = (
Document.objects.filter(title__startswith=prefix)
.values_list(
"id",
flat=True,
)
.iterator()
)
for idx, doc_id in enumerate(iterator):
start = idx % cf_count
for offset in range(per_doc):
cf_id = cf_ids[(start + offset) % cf_count]
instances.append(
CustomFieldInstance(
document_id=doc_id,
field_id=cf_id,
value_text=f"val-{doc_id}-{cf_id}",
),
)
if len(instances) >= chunk_size:
CustomFieldInstance.objects.bulk_create(
instances,
batch_size=chunk_size,
ignore_conflicts=True,
)
instances.clear()
if instances:
CustomFieldInstance.objects.bulk_create(
instances,
batch_size=chunk_size,
ignore_conflicts=True,
)
def _run_benchmarks(self, *, iterations: int, target_user, superuser, prefix: str):
self.stdout.write("-> doc counts")
self._time_query(
label="non-superuser: id__in(values_list flat=True)",
iterations=iterations,
fn=lambda: self._count_with_values_list(target_user),
)
self._time_query(
label="non-superuser: id__in(Subquery(values_list))",
iterations=iterations,
fn=lambda: self._count_with_subquery(target_user),
)
self._time_query(
label="superuser baseline",
iterations=iterations,
fn=lambda: Document.objects.count(),
)
if not self.options.get("skip_tags"):
self.stdout.write("-> tag counts")
self._time_tag_counts(
iterations=iterations,
prefix=prefix,
user=target_user,
)
if not self.options.get("skip_custom_fields"):
self.stdout.write("-> custom field counts")
self._time_custom_field_counts(
iterations=iterations,
prefix=prefix,
user=target_user,
superuser=superuser,
)
def _count_with_values_list(self, user) -> int:
qs = get_objects_for_user_owner_aware(
user,
"documents.view_document",
Document,
)
return Document.objects.filter(id__in=qs.values_list("id", flat=True)).count()
def _count_with_subquery(self, user) -> int:
qs = get_objects_for_user_owner_aware(
user,
"documents.view_document",
Document,
)
subquery = Subquery(qs.values_list("id"))
return Document.objects.filter(id__in=subquery).count()
def _document_filter(self, user, *, use_subquery: bool):
if user is None or getattr(user, "is_superuser", False):
return Q(documents__deleted_at__isnull=True)
qs = get_objects_for_user_owner_aware(
user,
"documents.view_document",
Document,
)
ids = (
Subquery(qs.values_list("id"))
if use_subquery
else qs.values_list("id", flat=True)
)
return Q(documents__deleted_at__isnull=True, documents__id__in=ids)
def _tag_queryset(self, *, prefix: str, filter_q: Q):
return Tag.objects.filter(name__startswith=prefix).annotate(
document_count=Count("documents", filter=filter_q),
)
def _time_tag_counts(self, *, iterations: int, prefix: str, user):
if not Tag.objects.filter(name__startswith=prefix).exists():
return
self._time_query(
label="tag document_count (grouped)",
iterations=iterations,
fn=lambda: list(
Tag.documents.through.objects.filter(
document_id__in=Subquery(permitted_document_ids(user)),
)
.values("tag_id")
.annotate(c=Count("document_id"))
.values_list("tag_id", "c"),
),
)
def _time_custom_field_counts(
self,
*,
iterations: int,
prefix: str,
user,
superuser,
):
if not CustomField.objects.filter(name__startswith=prefix).exists():
return
permitted = Subquery(permitted_document_ids(user))
super_permitted = CustomFieldInstance.objects.filter(
document__deleted_at__isnull=True,
).values_list("document_id")
def _run(ids_subquery):
return list(
CustomFieldInstance.objects.filter(
document_id__in=ids_subquery,
field__name__startswith=prefix,
)
.values("field_id")
.annotate(c=Count("document_id"))
.values_list("field_id", "c"),
)
self._time_query(
label="custom fields document_count (grouped permitted)",
iterations=iterations,
fn=lambda: _run(permitted),
)
self._time_query(
label="custom fields document_count superuser baseline",
iterations=iterations,
fn=lambda: _run(super_permitted),
)
def _time_query(self, *, label: str, iterations: int, fn):
durations = []
for _ in range(iterations):
reset_queries()
start = perf_counter()
fn()
durations.append(perf_counter() - start)
avg = sum(durations) / len(durations)
self.stdout.write(
f"{label}: min={min(durations):.4f}s avg={avg:.4f}s max={max(durations):.4f}s",
)
def _unique_checksum(self, idx: int) -> str:
return f"{uuid.uuid4().hex}{idx:08d}"[:32]

View File

@@ -1,23 +0,0 @@
# Generated by Django 5.2.7 on 2026-01-14 17:45
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0005_workflowtrigger_filter_has_any_correspondents_and_more"),
]
operations = [
migrations.AlterField(
model_name="document",
name="checksum",
field=models.CharField(
editable=False,
max_length=32,
verbose_name="checksum",
help_text="The checksum of the original document.",
),
),
]

View File

@@ -1,25 +0,0 @@
# Generated by Django 5.2.6 on 2026-01-24 07:33
import django.db.models.functions.text
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0006_alter_document_checksum_unique"),
]
operations = [
migrations.AddField(
model_name="document",
name="content_length",
field=models.GeneratedField(
db_persist=True,
expression=django.db.models.functions.text.Length("content"),
null=False,
help_text="Length of the content field in characters. Automatically maintained by the database for faster statistics computation.",
output_field=models.PositiveIntegerField(default=0),
),
),
]

View File

@@ -20,9 +20,7 @@ if settings.AUDIT_LOG_ENABLED:
from auditlog.registry import auditlog
from django.db.models import Case
from django.db.models import PositiveIntegerField
from django.db.models.functions import Cast
from django.db.models.functions import Length
from django.db.models.functions import Substr
from django_softdelete.models import SoftDeleteModel
@@ -194,15 +192,6 @@ class Document(SoftDeleteModel, ModelWithOwner):
),
)
content_length = models.GeneratedField(
expression=Length("content"),
output_field=PositiveIntegerField(default=0),
db_persist=True,
null=False,
serialize=False,
help_text="Length of the content field in characters. Automatically maintained by the database for faster statistics computation.",
)
mime_type = models.CharField(_("mime type"), max_length=256, editable=False)
tags = models.ManyToManyField(
@@ -216,6 +205,7 @@ class Document(SoftDeleteModel, ModelWithOwner):
_("checksum"),
max_length=32,
editable=False,
unique=True,
help_text=_("The checksum of the original document."),
)
@@ -956,7 +946,7 @@ if settings.AUDIT_LOG_ENABLED:
auditlog.register(
Document,
m2m_fields={"tags"},
exclude_fields=["content_length", "modified"],
exclude_fields=["modified"],
)
auditlog.register(Correspondent)
auditlog.register(Tag)

View File

@@ -139,48 +139,22 @@ def get_document_count_filter_for_user(user):
if getattr(user, "is_superuser", False):
return Q(documents__deleted_at__isnull=True)
return Q(
documents__id__in=permitted_document_ids(user),
documents__deleted_at__isnull=True,
documents__id__in=get_objects_for_user_owner_aware(
user,
"documents.view_document",
Document,
).values_list("id", flat=True),
)
def permitted_document_ids(user):
"""
Return a Subquery of permitted, non-deleted document IDs for the user.
Used to avoid repeated joins to the Document table in count annotations.
"""
if user is None or not getattr(user, "is_authenticated", False):
return Document.objects.none().values_list("id")
qs = get_objects_for_user_owner_aware(
user,
"documents.view_document",
Document,
).filter(deleted_at__isnull=True)
return qs.values_list("id")
def get_objects_for_user_owner_aware(
user,
perms,
Model,
*,
include_deleted=False,
) -> QuerySet:
"""
Returns objects the user owns, are unowned, or has explicit perms.
When include_deleted is True, soft-deleted items are also included.
"""
manager = (
Model.global_objects
if include_deleted and hasattr(Model, "global_objects")
else Model.objects
)
objects_owned = manager.filter(owner=user)
objects_unowned = manager.filter(owner__isnull=True)
def get_objects_for_user_owner_aware(user, perms, Model) -> QuerySet:
objects_owned = Model.objects.filter(owner=user)
objects_unowned = Model.objects.filter(owner__isnull=True)
objects_with_perms = get_objects_for_user(
user=user,
perms=perms,
klass=manager.all(),
klass=Model,
accept_global_perms=False,
)
return objects_owned | objects_unowned | objects_with_perms

View File

@@ -23,8 +23,6 @@ from django.core.validators import MinValueValidator
from django.core.validators import RegexValidator
from django.core.validators import integer_validator
from django.db.models import Count
from django.db.models import Q
from django.db.models import Subquery
from django.db.models.functions import Lower
from django.utils.crypto import get_random_string
from django.utils.dateparse import parse_datetime
@@ -72,9 +70,8 @@ from documents.models import WorkflowActionEmail
from documents.models import WorkflowActionWebhook
from documents.models import WorkflowTrigger
from documents.parsers import is_mime_type_supported
from documents.permissions import get_document_count_filter_for_user
from documents.permissions import get_groups_with_only_permission
from documents.permissions import get_objects_for_user_owner_aware
from documents.permissions import permitted_document_ids
from documents.permissions import set_permissions_for_object
from documents.regex import validate_regex_pattern
from documents.templating.filepath import validate_filepath_template_and_render
@@ -85,9 +82,6 @@ from documents.validators import url_validator
if TYPE_CHECKING:
from collections.abc import Iterable
from django.db.models.query import QuerySet
logger = logging.getLogger("paperless.serializers")
@@ -590,41 +584,18 @@ class TagSerializer(MatchingModelSerializer, OwnedObjectSerializer):
if children_map is not None:
children = children_map.get(obj.pk, [])
else:
filter_q = self.context.get("document_count_filter")
request = self.context.get("request")
user = getattr(request, "user", None) if request else None
if filter_q is None:
user = getattr(request, "user", None) if request else None
filter_q = get_document_count_filter_for_user(user)
self.context["document_count_filter"] = filter_q
filter_kind = self.context.get("document_count_filter")
if filter_kind is None:
filter_kind = (
"superuser"
if user and getattr(user, "is_superuser", False)
else "restricted"
)
self.context["document_count_filter"] = filter_kind
queryset = obj.get_children_queryset().select_related("owner")
if filter_kind == "superuser":
children = queryset.annotate(
document_count=Count(
"documents",
filter=Q(documents__deleted_at__isnull=True),
distinct=True,
),
)
else:
permitted_ids = Subquery(permitted_document_ids(user))
counts = dict(
Tag.documents.through.objects.filter(
document_id__in=permitted_ids,
)
.values("tag_id")
.annotate(c=Count("document_id"))
.values_list("tag_id", "c"),
)
children = list(queryset)
for child in children:
child.document_count = counts.get(child.id, 0)
children = (
obj.get_children_queryset()
.select_related("owner")
.annotate(document_count=Count("documents", filter=filter_q))
)
view = self.context.get("view")
ordering = (
@@ -633,11 +604,7 @@ class TagSerializer(MatchingModelSerializer, OwnedObjectSerializer):
else None
)
ordering = ordering or (Lower("name"),)
if hasattr(children, "order_by"):
children = children.order_by(*ordering)
else:
# children is a list (pre-fetched); apply basic ordering on name
children = sorted(children, key=lambda c: (c.name or "").lower())
children = children.order_by(*ordering)
serializer = TagSerializer(
children,
@@ -1047,32 +1014,6 @@ class NotesSerializer(serializers.ModelSerializer):
return ret
def _get_viewable_duplicates(
document: Document,
user: User | None,
) -> QuerySet[Document]:
checksums = {document.checksum}
if document.archive_checksum:
checksums.add(document.archive_checksum)
duplicates = Document.global_objects.filter(
Q(checksum__in=checksums) | Q(archive_checksum__in=checksums),
).exclude(pk=document.pk)
duplicates = duplicates.order_by("-created")
allowed = get_objects_for_user_owner_aware(
user,
"documents.view_document",
Document,
include_deleted=True,
)
return duplicates.filter(id__in=allowed)
class DuplicateDocumentSummarySerializer(serializers.Serializer):
id = serializers.IntegerField()
title = serializers.CharField()
deleted_at = serializers.DateTimeField(allow_null=True)
@extend_schema_serializer(
deprecate_fields=["created_date"],
)
@@ -1090,7 +1031,6 @@ class DocumentSerializer(
archived_file_name = SerializerMethodField()
created_date = serializers.DateField(required=False)
page_count = SerializerMethodField()
duplicate_documents = SerializerMethodField()
notes = NotesSerializer(many=True, required=False, read_only=True)
@@ -1116,16 +1056,6 @@ class DocumentSerializer(
def get_page_count(self, obj) -> int | None:
return obj.page_count
@extend_schema_field(DuplicateDocumentSummarySerializer(many=True))
def get_duplicate_documents(self, obj):
view = self.context.get("view")
if view and getattr(view, "action", None) != "retrieve":
return []
request = self.context.get("request")
user = request.user if request else None
duplicates = _get_viewable_duplicates(obj, user)
return list(duplicates.values("id", "title", "deleted_at"))
def get_original_file_name(self, obj) -> str | None:
return obj.original_filename
@@ -1303,7 +1233,6 @@ class DocumentSerializer(
"archive_serial_number",
"original_file_name",
"archived_file_name",
"duplicate_documents",
"owner",
"permissions",
"user_can_change",
@@ -2165,12 +2094,10 @@ class TasksViewSerializer(OwnedObjectSerializer):
"result",
"acknowledged",
"related_document",
"duplicate_documents",
"owner",
)
related_document = serializers.SerializerMethodField()
duplicate_documents = serializers.SerializerMethodField()
created_doc_re = re.compile(r"New document id (\d+) created")
duplicate_doc_re = re.compile(r"It is a duplicate of .* \(#(\d+)\)")
@@ -2195,17 +2122,6 @@ class TasksViewSerializer(OwnedObjectSerializer):
return result
@extend_schema_field(DuplicateDocumentSummarySerializer(many=True))
def get_duplicate_documents(self, obj):
related_document = self.get_related_document(obj)
request = self.context.get("request")
user = request.user if request else None
document = Document.global_objects.filter(pk=related_document).first()
if not related_document or not user or not document:
return []
duplicates = _get_viewable_duplicates(document, user)
return list(duplicates.values("id", "title", "deleted_at"))
class RunTaskViewSerializer(serializers.Serializer):
task_name = serializers.ChoiceField(

View File

@@ -131,10 +131,6 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
self.assertIn("content", results_full[0])
self.assertIn("id", results_full[0])
# Content length is used internally for performance reasons.
# No need to expose this field.
self.assertNotIn("content_length", results_full[0])
response = self.client.get("/api/documents/?fields=id", format="json")
self.assertEqual(response.status_code, status.HTTP_200_OK)
results = response.data["results"]

View File

@@ -7,7 +7,6 @@ from django.contrib.auth.models import User
from rest_framework import status
from rest_framework.test import APITestCase
from documents.models import Document
from documents.models import PaperlessTask
from documents.tests.utils import DirectoriesMixin
from documents.views import TasksViewSet
@@ -259,7 +258,7 @@ class TestTasks(DirectoriesMixin, APITestCase):
task_id=str(uuid.uuid4()),
task_file_name="task_one.pdf",
status=celery.states.FAILURE,
result="test.pdf: Unexpected error during ingestion.",
result="test.pdf: Not consuming test.pdf: It is a duplicate.",
)
response = self.client.get(self.ENDPOINT)
@@ -271,7 +270,7 @@ class TestTasks(DirectoriesMixin, APITestCase):
self.assertEqual(
returned_data["result"],
"test.pdf: Unexpected error during ingestion.",
"test.pdf: Not consuming test.pdf: It is a duplicate.",
)
def test_task_name_webui(self):
@@ -326,34 +325,20 @@ class TestTasks(DirectoriesMixin, APITestCase):
self.assertEqual(returned_data["task_file_name"], "anothertest.pdf")
def test_task_result_duplicate_warning_includes_count(self):
def test_task_result_failed_duplicate_includes_related_doc(self):
"""
GIVEN:
- A celery task succeeds, but a duplicate exists
- A celery task failed with a duplicate error
WHEN:
- API call is made to get tasks
THEN:
- The returned data includes duplicate warning metadata
- The returned data includes a related document link
"""
checksum = "duplicate-checksum"
Document.objects.create(
title="Existing",
content="",
mime_type="application/pdf",
checksum=checksum,
)
created_doc = Document.objects.create(
title="Created",
content="",
mime_type="application/pdf",
checksum=checksum,
archive_checksum="another-checksum",
)
PaperlessTask.objects.create(
task_id=str(uuid.uuid4()),
task_file_name="task_one.pdf",
status=celery.states.SUCCESS,
result=f"Success. New document id {created_doc.pk} created",
status=celery.states.FAILURE,
result="Not consuming task_one.pdf: It is a duplicate of task_one_existing.pdf (#1234).",
)
response = self.client.get(self.ENDPOINT)
@@ -363,7 +348,7 @@ class TestTasks(DirectoriesMixin, APITestCase):
returned_data = response.data[0]
self.assertEqual(returned_data["related_document"], str(created_doc.pk))
self.assertEqual(returned_data["related_document"], "1234")
def test_run_train_classifier_task(self):
"""

View File

@@ -485,21 +485,21 @@ class TestConsumer(
with self.get_consumer(self.get_test_file()) as consumer:
consumer.run()
with self.get_consumer(self.get_test_file()) as consumer:
consumer.run()
with self.assertRaisesMessage(ConsumerError, "It is a duplicate"):
with self.get_consumer(self.get_test_file()) as consumer:
consumer.run()
self.assertEqual(Document.objects.count(), 2)
self._assert_first_last_send_progress()
self._assert_first_last_send_progress(last_status="FAILED")
def testDuplicates2(self):
with self.get_consumer(self.get_test_file()) as consumer:
consumer.run()
with self.get_consumer(self.get_test_archive_file()) as consumer:
consumer.run()
with self.assertRaisesMessage(ConsumerError, "It is a duplicate"):
with self.get_consumer(self.get_test_archive_file()) as consumer:
consumer.run()
self.assertEqual(Document.objects.count(), 2)
self._assert_first_last_send_progress()
self._assert_first_last_send_progress(last_status="FAILED")
def testDuplicates3(self):
with self.get_consumer(self.get_test_archive_file()) as consumer:
@@ -513,10 +513,9 @@ class TestConsumer(
Document.objects.all().delete()
with self.get_consumer(self.get_test_file()) as consumer:
consumer.run()
self.assertEqual(Document.objects.count(), 1)
with self.assertRaisesMessage(ConsumerError, "document is in the trash"):
with self.get_consumer(self.get_test_file()) as consumer:
consumer.run()
def testAsnExists(self):
with self.get_consumer(
@@ -719,45 +718,12 @@ class TestConsumer(
dst = self.get_test_file()
self.assertIsFile(dst)
expected_message = (
f"{dst.name}: Not consuming {dst.name}: "
f"It is a duplicate of {document.title} (#{document.pk})"
)
with self.assertRaisesMessage(ConsumerError, expected_message):
with self.assertRaises(ConsumerError):
with self.get_consumer(dst) as consumer:
consumer.run()
self.assertIsNotFile(dst)
self.assertEqual(Document.objects.count(), 1)
self._assert_first_last_send_progress(last_status=ProgressStatusOptions.FAILED)
@override_settings(CONSUMER_DELETE_DUPLICATES=True)
def test_delete_duplicate_in_trash(self):
dst = self.get_test_file()
with self.get_consumer(dst) as consumer:
consumer.run()
# Move the existing document to trash
document = Document.objects.first()
document.delete()
dst = self.get_test_file()
self.assertIsFile(dst)
expected_message = (
f"{dst.name}: Not consuming {dst.name}: "
f"It is a duplicate of {document.title} (#{document.pk})"
f" Note: existing document is in the trash."
)
with self.assertRaisesMessage(ConsumerError, expected_message):
with self.get_consumer(dst) as consumer:
consumer.run()
self.assertIsNotFile(dst)
self.assertEqual(Document.global_objects.count(), 1)
self.assertEqual(Document.objects.count(), 0)
self._assert_first_last_send_progress(last_status="FAILED")
@override_settings(CONSUMER_DELETE_DUPLICATES=False)
def test_no_delete_duplicate(self):
@@ -777,12 +743,15 @@ class TestConsumer(
dst = self.get_test_file()
self.assertIsFile(dst)
with self.get_consumer(dst) as consumer:
consumer.run()
with self.assertRaisesRegex(
ConsumerError,
r"sample\.pdf: Not consuming sample\.pdf: It is a duplicate of sample \(#\d+\)",
):
with self.get_consumer(dst) as consumer:
consumer.run()
self.assertIsNotFile(dst)
self.assertEqual(Document.objects.count(), 2)
self._assert_first_last_send_progress()
self.assertIsFile(dst)
self._assert_first_last_send_progress(last_status="FAILED")
@override_settings(FILENAME_FORMAT="{title}")
@mock.patch("documents.parsers.document_consumer_declaration.send")

View File

@@ -241,10 +241,6 @@ class TestExportImport(
checksum = hashlib.md5(f.read()).hexdigest()
self.assertEqual(checksum, element["fields"]["checksum"])
# Generated field "content_length" should not be exported,
# it is automatically computed during import.
self.assertNotIn("content_length", element["fields"])
if document_exporter.EXPORTER_ARCHIVE_NAME in element:
fname = (
self.target / element[document_exporter.EXPORTER_ARCHIVE_NAME]

View File

@@ -33,9 +33,9 @@ from django.db.models import IntegerField
from django.db.models import Max
from django.db.models import Model
from django.db.models import Q
from django.db.models import Subquery
from django.db.models import Sum
from django.db.models import When
from django.db.models.functions import Length
from django.db.models.functions import Lower
from django.db.models.manager import Manager
from django.http import FileResponse
@@ -154,7 +154,6 @@ from documents.permissions import ViewDocumentsPermissions
from documents.permissions import get_document_count_filter_for_user
from documents.permissions import get_objects_for_user_owner_aware
from documents.permissions import has_perms_owner_aware
from documents.permissions import permitted_document_ids
from documents.permissions import set_permissions_for_object
from documents.schema import generate_object_with_permissions_schema
from documents.serialisers import AcknowledgeTasksViewSerializer
@@ -2327,19 +2326,23 @@ class StatisticsView(GenericAPIView):
user = request.user if request.user is not None else None
documents = (
Document.objects.all()
if user is None
else get_objects_for_user_owner_aware(
user,
"documents.view_document",
Document,
(
Document.objects.all()
if user is None
else get_objects_for_user_owner_aware(
user,
"documents.view_document",
Document,
)
)
.only("mime_type", "content")
.prefetch_related("tags")
)
tags = (
Tag.objects.all()
if user is None
else get_objects_for_user_owner_aware(user, "documents.view_tag", Tag)
).only("id", "is_inbox_tag")
)
correspondent_count = (
Correspondent.objects.count()
if user is None
@@ -2368,33 +2371,31 @@ class StatisticsView(GenericAPIView):
).count()
)
inbox_tag_pks = list(
tags.filter(is_inbox_tag=True).values_list("pk", flat=True),
)
documents_total = documents.count()
inbox_tags = tags.filter(is_inbox_tag=True)
documents_inbox = (
documents.filter(tags__id__in=inbox_tag_pks).values("id").distinct().count()
if inbox_tag_pks
documents.filter(tags__id__in=inbox_tags).distinct().count()
if inbox_tags.exists()
else None
)
# Single SQL request for document stats and mime type counts
mime_type_stats = list(
document_file_type_counts = (
documents.values("mime_type")
.annotate(
mime_type_count=Count("id"),
mime_type_chars=Sum("content_length"),
)
.order_by("-mime_type_count"),
.annotate(mime_type_count=Count("mime_type"))
.order_by("-mime_type_count")
if documents_total > 0
else []
)
# Calculate totals from grouped results
documents_total = sum(row["mime_type_count"] for row in mime_type_stats)
character_count = sum(row["mime_type_chars"] or 0 for row in mime_type_stats)
document_file_type_counts = [
{"mime_type": row["mime_type"], "mime_type_count": row["mime_type_count"]}
for row in mime_type_stats
]
character_count = (
documents.annotate(
characters=Length("content"),
)
.aggregate(Sum("characters"))
.get("characters__sum")
)
current_asn = Document.objects.aggregate(
Max("archive_serial_number", default=0),
@@ -2407,9 +2408,11 @@ class StatisticsView(GenericAPIView):
"documents_total": documents_total,
"documents_inbox": documents_inbox,
"inbox_tag": (
inbox_tag_pks[0] if inbox_tag_pks else None
inbox_tags.first().pk if inbox_tags.exists() else None
), # backwards compatibility
"inbox_tags": (inbox_tag_pks if inbox_tag_pks else None),
"inbox_tags": (
[tag.pk for tag in inbox_tags] if inbox_tags.exists() else None
),
"document_file_type_counts": document_file_type_counts,
"character_count": character_count,
"tag_count": len(tags),
@@ -3009,32 +3012,27 @@ class CustomFieldViewSet(ModelViewSet):
queryset = CustomField.objects.all().order_by("-created")
def get_queryset(self):
user = self.request.user
if user is None or user.is_superuser:
return (
super()
.get_queryset()
.annotate(
document_count=Count(
"fields",
filter=Q(fields__document__deleted_at__isnull=True),
distinct=True,
),
filter = (
Q(fields__document__deleted_at__isnull=True)
if self.request.user is None or self.request.user.is_superuser
else (
Q(
fields__document__deleted_at__isnull=True,
fields__document__id__in=get_objects_for_user_owner_aware(
self.request.user,
"documents.view_document",
Document,
).values_list("id", flat=True),
)
)
permitted_ids = Subquery(permitted_document_ids(user))
)
return (
super()
.get_queryset()
.annotate(
document_count=Count(
"fields",
filter=Q(
fields__document__deleted_at__isnull=True,
fields__document_id__in=permitted_ids,
),
distinct=True,
filter=filter,
),
)
)

File diff suppressed because it is too large Load Diff