Compare commits

..

1 Commits

34 changed files with 794 additions and 1935 deletions

View File

@@ -23,7 +23,7 @@ env:
jobs: jobs:
build: build:
name: Build Documentation name: Build Documentation
runs-on: ubuntu-24.04 runs-on: ubuntu-slim
steps: steps:
- name: Checkout - name: Checkout
uses: actions/checkout@v6 uses: actions/checkout@v6
@@ -58,7 +58,7 @@ jobs:
name: Deploy Documentation name: Deploy Documentation
needs: build needs: build
if: github.event_name == 'push' && github.ref == 'refs/heads/main' if: github.event_name == 'push' && github.ref == 'refs/heads/main'
runs-on: ubuntu-24.04 runs-on: ubuntu-slim
steps: steps:
- name: Checkout - name: Checkout
uses: actions/checkout@v6 uses: actions/checkout@v6

View File

@@ -12,7 +12,7 @@ concurrency:
jobs: jobs:
pre-commit: pre-commit:
name: Pre-commit Checks name: Pre-commit Checks
runs-on: ubuntu-24.04 runs-on: ubuntu-slim
steps: steps:
- name: Checkout - name: Checkout
uses: actions/checkout@v6 uses: actions/checkout@v6

View File

@@ -10,7 +10,7 @@ jobs:
synchronize-with-crowdin: synchronize-with-crowdin:
name: Crowdin Sync name: Crowdin Sync
if: github.repository_owner == 'paperless-ngx' if: github.repository_owner == 'paperless-ngx'
runs-on: ubuntu-24.04 runs-on: ubuntu-slim
steps: steps:
- name: Checkout - name: Checkout
uses: actions/checkout@v6 uses: actions/checkout@v6

View File

@@ -8,7 +8,7 @@ permissions:
jobs: jobs:
pr-bot: pr-bot:
name: Automated PR Bot name: Automated PR Bot
runs-on: ubuntu-latest runs-on: ubuntu-slim
steps: steps:
- name: Label PR by file path or branch name - name: Label PR by file path or branch name
# see .github/labeler.yml for the labeler config # see .github/labeler.yml for the labeler config

View File

@@ -12,7 +12,7 @@ permissions:
jobs: jobs:
pr_opened_or_reopened: pr_opened_or_reopened:
name: pr_opened_or_reopened name: pr_opened_or_reopened
runs-on: ubuntu-24.04 runs-on: ubuntu-slim
permissions: permissions:
# write permission is required for autolabeler # write permission is required for autolabeler
pull-requests: write pull-requests: write

View File

@@ -13,7 +13,7 @@ jobs:
stale: stale:
name: 'Stale' name: 'Stale'
if: github.repository_owner == 'paperless-ngx' if: github.repository_owner == 'paperless-ngx'
runs-on: ubuntu-24.04 runs-on: ubuntu-slim
steps: steps:
- uses: actions/stale@v10 - uses: actions/stale@v10
with: with:
@@ -35,7 +35,7 @@ jobs:
lock-threads: lock-threads:
name: 'Lock Old Threads' name: 'Lock Old Threads'
if: github.repository_owner == 'paperless-ngx' if: github.repository_owner == 'paperless-ngx'
runs-on: ubuntu-24.04 runs-on: ubuntu-slim
steps: steps:
- uses: dessant/lock-threads@v6 - uses: dessant/lock-threads@v6
with: with:
@@ -55,7 +55,7 @@ jobs:
close-answered-discussions: close-answered-discussions:
name: 'Close Answered Discussions' name: 'Close Answered Discussions'
if: github.repository_owner == 'paperless-ngx' if: github.repository_owner == 'paperless-ngx'
runs-on: ubuntu-24.04 runs-on: ubuntu-slim
steps: steps:
- uses: actions/github-script@v8 - uses: actions/github-script@v8
with: with:
@@ -112,7 +112,7 @@ jobs:
close-outdated-discussions: close-outdated-discussions:
name: 'Close Outdated Discussions' name: 'Close Outdated Discussions'
if: github.repository_owner == 'paperless-ngx' if: github.repository_owner == 'paperless-ngx'
runs-on: ubuntu-24.04 runs-on: ubuntu-slim
steps: steps:
- uses: actions/github-script@v8 - uses: actions/github-script@v8
with: with:
@@ -204,7 +204,7 @@ jobs:
close-unsupported-feature-requests: close-unsupported-feature-requests:
name: 'Close Unsupported Feature Requests' name: 'Close Unsupported Feature Requests'
if: github.repository_owner == 'paperless-ngx' if: github.repository_owner == 'paperless-ngx'
runs-on: ubuntu-24.04 runs-on: ubuntu-slim
steps: steps:
- uses: actions/github-script@v8 - uses: actions/github-script@v8
with: with:

View File

@@ -6,7 +6,7 @@ on:
jobs: jobs:
generate-translate-strings: generate-translate-strings:
name: Generate Translation Strings name: Generate Translation Strings
runs-on: ubuntu-latest runs-on: ubuntu-slim
permissions: permissions:
contents: write contents: write
steps: steps:

View File

@@ -1152,9 +1152,8 @@ via the consumption directory, you can disable the consumer to save resources.
#### [`PAPERLESS_CONSUMER_DELETE_DUPLICATES=<bool>`](#PAPERLESS_CONSUMER_DELETE_DUPLICATES) {#PAPERLESS_CONSUMER_DELETE_DUPLICATES} #### [`PAPERLESS_CONSUMER_DELETE_DUPLICATES=<bool>`](#PAPERLESS_CONSUMER_DELETE_DUPLICATES) {#PAPERLESS_CONSUMER_DELETE_DUPLICATES}
: As of version 3.0 Paperless-ngx allows duplicate documents to be consumed by default, _except_ when : When the consumer detects a duplicate document, it will not touch
this setting is enabled. When enabled, Paperless will check if a document with the same hash already the original document. This default behavior can be changed here.
exists in the system and delete the duplicate file from the consumption directory without consuming it.
Defaults to false. Defaults to false.

File diff suppressed because it is too large Load Diff

View File

@@ -103,6 +103,22 @@
</div> </div>
<div class="row mb-3"> <div class="row mb-3">
<div class="col-md-3 col-form-label pt-0">
<span i18n>Items per page</span>
</div>
<div class="col">
<select class="form-select" formControlName="documentListItemPerPage">
<option [ngValue]="10">10</option>
<option [ngValue]="25">25</option>
<option [ngValue]="50">50</option>
<option [ngValue]="100">100</option>
</select>
</div>
</div>
<div class="row">
<div class="col-md-3 col-form-label pt-0"> <div class="col-md-3 col-form-label pt-0">
<span i18n>Sidebar</span> <span i18n>Sidebar</span>
</div> </div>
@@ -137,28 +153,8 @@
</button> </button>
</div> </div>
</div> </div>
</div>
<div class="col-xl-6 ps-xl-5">
<h5 class="mt-3 mt-md-0" i18n>Global search</h5>
<div class="row">
<div class="col">
<pngx-input-check i18n-title title="Do not include advanced search results" formControlName="searchDbOnly"></pngx-input-check>
</div>
</div>
<div class="row mb-3"> <h5 class="mt-3" id="update-checking" i18n>Update checking</h5>
<div class="col-md-3 col-form-label pt-0">
<span i18n>Full search links to</span>
</div>
<div class="col mb-3">
<select class="form-select" formControlName="searchLink">
<option [ngValue]="GlobalSearchType.TITLE_CONTENT" i18n>Title and content search</option>
<option [ngValue]="GlobalSearchType.ADVANCED" i18n>Advanced search</option>
</select>
</div>
</div>
<h5 class="mt-3 mt-md-0" id="update-checking" i18n>Update checking</h5>
<div class="row mb-3"> <div class="row mb-3">
<div class="col d-flex flex-row align-items-start"> <div class="col d-flex flex-row align-items-start">
<pngx-input-check i18n-title title="Enable update checking" formControlName="updateCheckingEnabled"></pngx-input-check> <pngx-input-check i18n-title title="Enable update checking" formControlName="updateCheckingEnabled"></pngx-input-check>
@@ -183,33 +179,11 @@
<pngx-input-check i18n-title title="Show document counts in sidebar saved views" formControlName="sidebarViewsShowCount"></pngx-input-check> <pngx-input-check i18n-title title="Show document counts in sidebar saved views" formControlName="sidebarViewsShowCount"></pngx-input-check>
</div> </div>
</div> </div>
</div> </div>
</div> <div class="col-xl-6 ps-xl-5">
<h5 class="mt-3 mt-md-0" i18n>Document editing</h5>
</ng-template>
</li>
<li [ngbNavItem]="SettingsNavIDs.Documents">
<a ngbNavLink i18n>Documents</a>
<ng-template ngbNavContent>
<div class="row">
<div class="col-xl-6 pe-xl-5">
<h5 i18n>Documents</h5>
<div class="row mb-3">
<div class="col-md-3 col-form-label pt-0">
<span i18n>Items per page</span>
</div>
<div class="col">
<select class="form-select" formControlName="documentListItemPerPage">
<option [ngValue]="10">10</option>
<option [ngValue]="25">25</option>
<option [ngValue]="50">50</option>
<option [ngValue]="100">100</option>
</select>
</div>
</div>
<h5 class="mt-3" i18n>Document editing</h5>
<div class="row"> <div class="row">
<div class="col"> <div class="col">
<pngx-input-check i18n-title title="Use PDF viewer provided by the browser" i18n-hint hint="This is usually faster for displaying large PDF documents, but it might not work on some browsers." formControlName="useNativePdfViewer"></pngx-input-check> <pngx-input-check i18n-title title="Use PDF viewer provided by the browser" i18n-hint hint="This is usually faster for displaying large PDF documents, but it might not work on some browsers." formControlName="useNativePdfViewer"></pngx-input-check>
@@ -235,31 +209,31 @@
</div> </div>
</div> </div>
<div class="row"> <div class="row mb-3">
<div class="col"> <div class="col">
<pngx-input-check i18n-title title="Show document thumbnail during loading" formControlName="documentEditingOverlayThumbnail"></pngx-input-check> <pngx-input-check i18n-title title="Show document thumbnail during loading" formControlName="documentEditingOverlayThumbnail"></pngx-input-check>
</div> </div>
</div> </div>
<div class="row mb-3"> <h5 class="mt-3" i18n>Global search</h5>
<div class="row">
<div class="col"> <div class="col">
<p class="mb-2" i18n>Built-in fields to show:</p> <pngx-input-check i18n-title title="Do not include advanced search results" formControlName="searchDbOnly"></pngx-input-check>
@for (option of documentDetailFieldOptions; track option.id) {
<div class="form-check ms-3">
<input class="form-check-input" type="checkbox"
[id]="'documentDetailField-' + option.id"
[checked]="isDocumentDetailFieldShown(option.id)"
(change)="toggleDocumentDetailField(option.id, $event.target.checked)" />
<label class="form-check-label" [for]="'documentDetailField-' + option.id">
{{ option.label }}
</label>
</div>
}
<p class="small text-muted mt-1" i18n>Uncheck fields to hide them on the document details page.</p>
</div> </div>
</div> </div>
</div>
<div class="col-xl-6 ps-xl-5"> <div class="row mb-3">
<div class="col-md-3 col-form-label pt-0">
<span i18n>Full search links to</span>
</div>
<div class="col mb-3">
<select class="form-select" formControlName="searchLink">
<option [ngValue]="GlobalSearchType.TITLE_CONTENT" i18n>Title and content search</option>
<option [ngValue]="GlobalSearchType.ADVANCED" i18n>Advanced search</option>
</select>
</div>
</div>
<h5 class="mt-3" i18n>Bulk editing</h5> <h5 class="mt-3" i18n>Bulk editing</h5>
<div class="row mb-3"> <div class="row mb-3">
<div class="col"> <div class="col">
@@ -274,8 +248,10 @@
<pngx-input-check i18n-title title="Enable notes" formControlName="notesEnabled"></pngx-input-check> <pngx-input-check i18n-title title="Enable notes" formControlName="notesEnabled"></pngx-input-check>
</div> </div>
</div> </div>
</div> </div>
</div> </div>
</ng-template> </ng-template>
</li> </li>

View File

@@ -201,9 +201,9 @@ describe('SettingsComponent', () => {
const navigateSpy = jest.spyOn(router, 'navigate') const navigateSpy = jest.spyOn(router, 'navigate')
const tabButtons = fixture.debugElement.queryAll(By.directive(NgbNavLink)) const tabButtons = fixture.debugElement.queryAll(By.directive(NgbNavLink))
tabButtons[1].nativeElement.dispatchEvent(new MouseEvent('click')) tabButtons[1].nativeElement.dispatchEvent(new MouseEvent('click'))
expect(navigateSpy).toHaveBeenCalledWith(['settings', 'documents'])
tabButtons[2].nativeElement.dispatchEvent(new MouseEvent('click'))
expect(navigateSpy).toHaveBeenCalledWith(['settings', 'permissions']) expect(navigateSpy).toHaveBeenCalledWith(['settings', 'permissions'])
tabButtons[2].nativeElement.dispatchEvent(new MouseEvent('click'))
expect(navigateSpy).toHaveBeenCalledWith(['settings', 'notifications'])
const initSpy = jest.spyOn(component, 'initialize') const initSpy = jest.spyOn(component, 'initialize')
component.isDirty = true // mock dirty component.isDirty = true // mock dirty
@@ -213,8 +213,8 @@ describe('SettingsComponent', () => {
expect(initSpy).not.toHaveBeenCalled() expect(initSpy).not.toHaveBeenCalled()
navigateSpy.mockResolvedValueOnce(true) // nav accepted even though dirty navigateSpy.mockResolvedValueOnce(true) // nav accepted even though dirty
tabButtons[2].nativeElement.dispatchEvent(new MouseEvent('click')) tabButtons[1].nativeElement.dispatchEvent(new MouseEvent('click'))
expect(navigateSpy).toHaveBeenCalledWith(['settings', 'permissions']) expect(navigateSpy).toHaveBeenCalledWith(['settings', 'notifications'])
expect(initSpy).toHaveBeenCalled() expect(initSpy).toHaveBeenCalled()
}) })
@@ -226,7 +226,7 @@ describe('SettingsComponent', () => {
activatedRoute.snapshot.fragment = '#notifications' activatedRoute.snapshot.fragment = '#notifications'
const scrollSpy = jest.spyOn(viewportScroller, 'scrollToAnchor') const scrollSpy = jest.spyOn(viewportScroller, 'scrollToAnchor')
component.ngOnInit() component.ngOnInit()
expect(component.activeNavID).toEqual(4) // Notifications expect(component.activeNavID).toEqual(3) // Notifications
component.ngAfterViewInit() component.ngAfterViewInit()
expect(scrollSpy).toHaveBeenCalledWith('#notifications') expect(scrollSpy).toHaveBeenCalledWith('#notifications')
}) })
@@ -251,7 +251,7 @@ describe('SettingsComponent', () => {
expect(toastErrorSpy).toHaveBeenCalled() expect(toastErrorSpy).toHaveBeenCalled()
expect(storeSpy).toHaveBeenCalled() expect(storeSpy).toHaveBeenCalled()
expect(appearanceSettingsSpy).not.toHaveBeenCalled() expect(appearanceSettingsSpy).not.toHaveBeenCalled()
expect(setSpy).toHaveBeenCalledTimes(31) expect(setSpy).toHaveBeenCalledTimes(30)
// succeed // succeed
storeSpy.mockReturnValueOnce(of(true)) storeSpy.mockReturnValueOnce(of(true))
@@ -366,22 +366,4 @@ describe('SettingsComponent', () => {
settingsService.settingsSaved.emit(true) settingsService.settingsSaved.emit(true)
expect(maybeRefreshSpy).toHaveBeenCalled() expect(maybeRefreshSpy).toHaveBeenCalled()
}) })
it('should support toggling document detail fields', () => {
completeSetup()
const field = 'storage_path'
expect(
component.settingsForm.get('documentDetailsHiddenFields').value.length
).toEqual(0)
component.toggleDocumentDetailField(field, false)
expect(
component.settingsForm.get('documentDetailsHiddenFields').value.length
).toEqual(1)
expect(component.isDocumentDetailFieldShown(field)).toBeFalsy()
component.toggleDocumentDetailField(field, true)
expect(
component.settingsForm.get('documentDetailsHiddenFields').value.length
).toEqual(0)
expect(component.isDocumentDetailFieldShown(field)).toBeTruthy()
})
}) })

View File

@@ -70,9 +70,9 @@ import { ComponentWithPermissions } from '../../with-permissions/with-permission
enum SettingsNavIDs { enum SettingsNavIDs {
General = 1, General = 1,
Documents = 2, Permissions = 2,
Permissions = 3, Notifications = 3,
Notifications = 4, SavedViews = 4,
} }
const systemLanguage = { code: '', name: $localize`Use system language` } const systemLanguage = { code: '', name: $localize`Use system language` }
@@ -81,25 +81,6 @@ const systemDateFormat = {
name: $localize`Use date format of display language`, name: $localize`Use date format of display language`,
} }
export enum DocumentDetailFieldID {
ArchiveSerialNumber = 'archive_serial_number',
Correspondent = 'correspondent',
DocumentType = 'document_type',
StoragePath = 'storage_path',
Tags = 'tags',
}
const documentDetailFieldOptions = [
{
id: DocumentDetailFieldID.ArchiveSerialNumber,
label: $localize`Archive serial number`,
},
{ id: DocumentDetailFieldID.Correspondent, label: $localize`Correspondent` },
{ id: DocumentDetailFieldID.DocumentType, label: $localize`Document type` },
{ id: DocumentDetailFieldID.StoragePath, label: $localize`Storage path` },
{ id: DocumentDetailFieldID.Tags, label: $localize`Tags` },
]
@Component({ @Component({
selector: 'pngx-settings', selector: 'pngx-settings',
templateUrl: './settings.component.html', templateUrl: './settings.component.html',
@@ -165,7 +146,6 @@ export class SettingsComponent
pdfViewerDefaultZoom: new FormControl(null), pdfViewerDefaultZoom: new FormControl(null),
documentEditingRemoveInboxTags: new FormControl(null), documentEditingRemoveInboxTags: new FormControl(null),
documentEditingOverlayThumbnail: new FormControl(null), documentEditingOverlayThumbnail: new FormControl(null),
documentDetailsHiddenFields: new FormControl([]),
searchDbOnly: new FormControl(null), searchDbOnly: new FormControl(null),
searchLink: new FormControl(null), searchLink: new FormControl(null),
@@ -196,8 +176,6 @@ export class SettingsComponent
public readonly ZoomSetting = ZoomSetting public readonly ZoomSetting = ZoomSetting
public readonly documentDetailFieldOptions = documentDetailFieldOptions
get systemStatusHasErrors(): boolean { get systemStatusHasErrors(): boolean {
return ( return (
this.systemStatus.database.status === SystemStatusItemStatus.ERROR || this.systemStatus.database.status === SystemStatusItemStatus.ERROR ||
@@ -358,9 +336,6 @@ export class SettingsComponent
documentEditingOverlayThumbnail: this.settings.get( documentEditingOverlayThumbnail: this.settings.get(
SETTINGS_KEYS.DOCUMENT_EDITING_OVERLAY_THUMBNAIL SETTINGS_KEYS.DOCUMENT_EDITING_OVERLAY_THUMBNAIL
), ),
documentDetailsHiddenFields: this.settings.get(
SETTINGS_KEYS.DOCUMENT_DETAILS_HIDDEN_FIELDS
),
searchDbOnly: this.settings.get(SETTINGS_KEYS.SEARCH_DB_ONLY), searchDbOnly: this.settings.get(SETTINGS_KEYS.SEARCH_DB_ONLY),
searchLink: this.settings.get(SETTINGS_KEYS.SEARCH_FULL_TYPE), searchLink: this.settings.get(SETTINGS_KEYS.SEARCH_FULL_TYPE),
} }
@@ -551,10 +526,6 @@ export class SettingsComponent
SETTINGS_KEYS.DOCUMENT_EDITING_OVERLAY_THUMBNAIL, SETTINGS_KEYS.DOCUMENT_EDITING_OVERLAY_THUMBNAIL,
this.settingsForm.value.documentEditingOverlayThumbnail this.settingsForm.value.documentEditingOverlayThumbnail
) )
this.settings.set(
SETTINGS_KEYS.DOCUMENT_DETAILS_HIDDEN_FIELDS,
this.settingsForm.value.documentDetailsHiddenFields
)
this.settings.set( this.settings.set(
SETTINGS_KEYS.SEARCH_DB_ONLY, SETTINGS_KEYS.SEARCH_DB_ONLY,
this.settingsForm.value.searchDbOnly this.settingsForm.value.searchDbOnly
@@ -616,26 +587,6 @@ export class SettingsComponent
this.settingsForm.get('themeColor').patchValue('') this.settingsForm.get('themeColor').patchValue('')
} }
isDocumentDetailFieldShown(fieldId: string): boolean {
const hiddenFields =
this.settingsForm.value.documentDetailsHiddenFields || []
return !hiddenFields.includes(fieldId)
}
toggleDocumentDetailField(fieldId: string, checked: boolean) {
const hiddenFields = new Set(
this.settingsForm.value.documentDetailsHiddenFields || []
)
if (checked) {
hiddenFields.delete(fieldId)
} else {
hiddenFields.add(fieldId)
}
this.settingsForm
.get('documentDetailsHiddenFields')
.setValue(Array.from(hiddenFields))
}
showSystemStatus() { showSystemStatus() {
const modal: NgbModalRef = this.modalService.open( const modal: NgbModalRef = this.modalService.open(
SystemStatusDialogComponent, SystemStatusDialogComponent,

View File

@@ -97,12 +97,6 @@
<br/><em>(<ng-container i18n>click for full output</ng-container>)</em> <br/><em>(<ng-container i18n>click for full output</ng-container>)</em>
} }
</ng-template> </ng-template>
@if (task.duplicate_documents?.length > 0) {
<div class="small text-warning-emphasis d-flex align-items-center gap-1">
<i-bs class="lh-1" width="1em" height="1em" name="exclamation-triangle"></i-bs>
<span i18n>Duplicate(s) detected</span>
</div>
}
</td> </td>
} }
<td class="d-lg-none"> <td class="d-lg-none">

View File

@@ -164,11 +164,9 @@
{{ item.name }} {{ item.name }}
<span class="ms-auto text-muted small"> <span class="ms-auto text-muted small">
@if (item.dateEnd) { @if (item.dateEnd) {
{{ item.date | customDate:'mediumDate' }} &ndash; {{ item.dateEnd | customDate:'mediumDate' }} {{ item.date | customDate:'MMM d' }} &ndash; {{ item.dateEnd | customDate:'mediumDate' }}
} @else if (item.dateTilNow) {
{{ item.dateTilNow | customDate:'mediumDate' }} &ndash; <ng-container i18n>now</ng-container>
} @else { } @else {
{{ item.date | customDate:'mediumDate' }} {{ item.date | customDate:'mediumDate' }} &ndash; <ng-container i18n>now</ng-container>
} }
</span> </span>
</div> </div>

View File

@@ -79,34 +79,32 @@ export class DatesDropdownComponent implements OnInit, OnDestroy {
{ {
id: RelativeDate.WITHIN_1_WEEK, id: RelativeDate.WITHIN_1_WEEK,
name: $localize`Within 1 week`, name: $localize`Within 1 week`,
dateTilNow: new Date().setDate(new Date().getDate() - 7), date: new Date().setDate(new Date().getDate() - 7),
}, },
{ {
id: RelativeDate.WITHIN_1_MONTH, id: RelativeDate.WITHIN_1_MONTH,
name: $localize`Within 1 month`, name: $localize`Within 1 month`,
dateTilNow: new Date().setMonth(new Date().getMonth() - 1), date: new Date().setMonth(new Date().getMonth() - 1),
}, },
{ {
id: RelativeDate.WITHIN_3_MONTHS, id: RelativeDate.WITHIN_3_MONTHS,
name: $localize`Within 3 months`, name: $localize`Within 3 months`,
dateTilNow: new Date().setMonth(new Date().getMonth() - 3), date: new Date().setMonth(new Date().getMonth() - 3),
}, },
{ {
id: RelativeDate.WITHIN_1_YEAR, id: RelativeDate.WITHIN_1_YEAR,
name: $localize`Within 1 year`, name: $localize`Within 1 year`,
dateTilNow: new Date().setFullYear(new Date().getFullYear() - 1), date: new Date().setFullYear(new Date().getFullYear() - 1),
}, },
{ {
id: RelativeDate.THIS_YEAR, id: RelativeDate.THIS_YEAR,
name: $localize`This year`, name: $localize`This year`,
date: new Date('1/1/' + new Date().getFullYear()), date: new Date('1/1/' + new Date().getFullYear()),
dateEnd: new Date('12/31/' + new Date().getFullYear()),
}, },
{ {
id: RelativeDate.THIS_MONTH, id: RelativeDate.THIS_MONTH,
name: $localize`This month`, name: $localize`This month`,
date: new Date().setDate(1), date: new Date().setDate(1),
dateEnd: new Date(new Date().getFullYear(), new Date().getMonth() + 1, 0),
}, },
{ {
id: RelativeDate.TODAY, id: RelativeDate.TODAY,

View File

@@ -146,26 +146,16 @@
<ng-template ngbNavContent> <ng-template ngbNavContent>
<div> <div>
<pngx-input-text #inputTitle i18n-title title="Title" formControlName="title" [horizontal]="true" [suggestion]="suggestions?.title" (keyup)="titleKeyUp($event)" [error]="error?.title"></pngx-input-text> <pngx-input-text #inputTitle i18n-title title="Title" formControlName="title" [horizontal]="true" [suggestion]="suggestions?.title" (keyup)="titleKeyUp($event)" [error]="error?.title"></pngx-input-text>
@if (!isFieldHidden(DocumentDetailFieldID.ArchiveSerialNumber)) { <pngx-input-number i18n-title title="Archive serial number" [error]="error?.archive_serial_number" [horizontal]="true" formControlName='archive_serial_number'></pngx-input-number>
<pngx-input-number i18n-title title="Archive serial number" [error]="error?.archive_serial_number" [horizontal]="true" formControlName='archive_serial_number'></pngx-input-number>
}
<pngx-input-date i18n-title title="Date created" formControlName="created" [suggestions]="suggestions?.dates" [showFilter]="true" [horizontal]="true" (filterDocuments)="filterDocuments($event)" <pngx-input-date i18n-title title="Date created" formControlName="created" [suggestions]="suggestions?.dates" [showFilter]="true" [horizontal]="true" (filterDocuments)="filterDocuments($event)"
[error]="error?.created"></pngx-input-date> [error]="error?.created"></pngx-input-date>
@if (!isFieldHidden(DocumentDetailFieldID.Correspondent)) { <pngx-input-select [items]="correspondents" i18n-title title="Correspondent" formControlName="correspondent" [allowNull]="true" [showFilter]="true" [horizontal]="true" (filterDocuments)="filterDocuments($event, DataType.Correspondent)"
<pngx-input-select [items]="correspondents" i18n-title title="Correspondent" formControlName="correspondent" [allowNull]="true" [showFilter]="true" [horizontal]="true" (filterDocuments)="filterDocuments($event, DataType.Correspondent)" (createNew)="createCorrespondent($event)" [hideAddButton]="createDisabled(DataType.Correspondent)" [suggestions]="suggestions?.correspondents" *pngxIfPermissions="{ action: PermissionAction.View, type: PermissionType.Correspondent }"></pngx-input-select>
(createNew)="createCorrespondent($event)" [hideAddButton]="createDisabled(DataType.Correspondent)" [suggestions]="suggestions?.correspondents" *pngxIfPermissions="{ action: PermissionAction.View, type: PermissionType.Correspondent }"></pngx-input-select> <pngx-input-select [items]="documentTypes" i18n-title title="Document type" formControlName="document_type" [allowNull]="true" [showFilter]="true" [horizontal]="true" (filterDocuments)="filterDocuments($event, DataType.DocumentType)"
} (createNew)="createDocumentType($event)" [hideAddButton]="createDisabled(DataType.DocumentType)" [suggestions]="suggestions?.document_types" *pngxIfPermissions="{ action: PermissionAction.View, type: PermissionType.DocumentType }"></pngx-input-select>
@if (!isFieldHidden(DocumentDetailFieldID.DocumentType)) { <pngx-input-select [items]="storagePaths" i18n-title title="Storage path" formControlName="storage_path" [allowNull]="true" [showFilter]="true" [horizontal]="true" (filterDocuments)="filterDocuments($event, DataType.StoragePath)"
<pngx-input-select [items]="documentTypes" i18n-title title="Document type" formControlName="document_type" [allowNull]="true" [showFilter]="true" [horizontal]="true" (filterDocuments)="filterDocuments($event, DataType.DocumentType)" (createNew)="createStoragePath($event)" [hideAddButton]="createDisabled(DataType.StoragePath)" [suggestions]="suggestions?.storage_paths" i18n-placeholder placeholder="Default" *pngxIfPermissions="{ action: PermissionAction.View, type: PermissionType.StoragePath }"></pngx-input-select>
(createNew)="createDocumentType($event)" [hideAddButton]="createDisabled(DataType.DocumentType)" [suggestions]="suggestions?.document_types" *pngxIfPermissions="{ action: PermissionAction.View, type: PermissionType.DocumentType }"></pngx-input-select> <pngx-input-tags #tagsInput formControlName="tags" [suggestions]="suggestions?.tags" [showFilter]="true" [horizontal]="true" (filterDocuments)="filterDocuments($event, DataType.Tag)" [hideAddButton]="createDisabled(DataType.Tag)" *pngxIfPermissions="{ action: PermissionAction.View, type: PermissionType.Tag }"></pngx-input-tags>
}
@if (!isFieldHidden(DocumentDetailFieldID.StoragePath)) {
<pngx-input-select [items]="storagePaths" i18n-title title="Storage path" formControlName="storage_path" [allowNull]="true" [showFilter]="true" [horizontal]="true" (filterDocuments)="filterDocuments($event, DataType.StoragePath)"
(createNew)="createStoragePath($event)" [hideAddButton]="createDisabled(DataType.StoragePath)" [suggestions]="suggestions?.storage_paths" i18n-placeholder placeholder="Default" *pngxIfPermissions="{ action: PermissionAction.View, type: PermissionType.StoragePath }"></pngx-input-select>
}
@if (!isFieldHidden(DocumentDetailFieldID.Tags)) {
<pngx-input-tags #tagsInput formControlName="tags" [suggestions]="suggestions?.tags" [showFilter]="true" [horizontal]="true" (filterDocuments)="filterDocuments($event, DataType.Tag)" [hideAddButton]="createDisabled(DataType.Tag)" *pngxIfPermissions="{ action: PermissionAction.View, type: PermissionType.Tag }"></pngx-input-tags>
}
@for (fieldInstance of document?.custom_fields; track fieldInstance.field; let i = $index) { @for (fieldInstance of document?.custom_fields; track fieldInstance.field; let i = $index) {
<div [formGroup]="customFieldFormFields.controls[i]"> <div [formGroup]="customFieldFormFields.controls[i]">
@switch (getCustomFieldFromInstance(fieldInstance)?.data_type) { @switch (getCustomFieldFromInstance(fieldInstance)?.data_type) {
@@ -380,37 +370,6 @@
</ng-template> </ng-template>
</li> </li>
} }
@if (document?.duplicate_documents?.length) {
<li [ngbNavItem]="DocumentDetailNavIDs.Duplicates">
<a class="text-nowrap" ngbNavLink i18n>
Duplicates
<span class="badge text-bg-secondary ms-1">{{ document.duplicate_documents.length }}</span>
</a>
<ng-template ngbNavContent>
<div class="d-flex flex-column gap-2">
<div class="fst-italic" i18n>Duplicate documents detected:</div>
<div class="list-group">
@for (duplicate of document.duplicate_documents; track duplicate.id) {
<a
class="list-group-item list-group-item-action d-flex justify-content-between align-items-center"
[routerLink]="['/documents', duplicate.id, 'details']"
[class.disabled]="duplicate.deleted_at"
>
<span class="d-flex align-items-center gap-2">
<span>{{ duplicate.title || ('#' + duplicate.id) }}</span>
@if (duplicate.deleted_at) {
<span class="badge text-bg-secondary" i18n>In trash</span>
}
</span>
<span class="text-secondary">#{{ duplicate.id }}</span>
</a>
}
</div>
</div>
</ng-template>
</li>
}
</ul> </ul>
<div [ngbNavOutlet]="nav" class="mt-3"></div> <div [ngbNavOutlet]="nav" class="mt-3"></div>

View File

@@ -48,7 +48,6 @@ import {
} from 'src/app/data/filter-rule-type' } from 'src/app/data/filter-rule-type'
import { StoragePath } from 'src/app/data/storage-path' import { StoragePath } from 'src/app/data/storage-path'
import { Tag } from 'src/app/data/tag' import { Tag } from 'src/app/data/tag'
import { SETTINGS_KEYS } from 'src/app/data/ui-settings'
import { PermissionsGuard } from 'src/app/guards/permissions.guard' import { PermissionsGuard } from 'src/app/guards/permissions.guard'
import { CustomDatePipe } from 'src/app/pipes/custom-date.pipe' import { CustomDatePipe } from 'src/app/pipes/custom-date.pipe'
import { DocumentTitlePipe } from 'src/app/pipes/document-title.pipe' import { DocumentTitlePipe } from 'src/app/pipes/document-title.pipe'
@@ -302,16 +301,16 @@ describe('DocumentDetailComponent', () => {
.spyOn(openDocumentsService, 'openDocument') .spyOn(openDocumentsService, 'openDocument')
.mockReturnValueOnce(of(true)) .mockReturnValueOnce(of(true))
fixture.detectChanges() fixture.detectChanges()
expect(component.activeNavID).toEqual(component.DocumentDetailNavIDs.Notes) expect(component.activeNavID).toEqual(5) // DocumentDetailNavIDs.Notes
}) })
it('should change url on tab switch', () => { it('should change url on tab switch', () => {
initNormally() initNormally()
const navigateSpy = jest.spyOn(router, 'navigate') const navigateSpy = jest.spyOn(router, 'navigate')
component.nav.select(component.DocumentDetailNavIDs.Notes) component.nav.select(5)
component.nav.navChange.next({ component.nav.navChange.next({
activeId: 1, activeId: 1,
nextId: component.DocumentDetailNavIDs.Notes, nextId: 5,
preventDefault: () => {}, preventDefault: () => {},
}) })
fixture.detectChanges() fixture.detectChanges()
@@ -353,18 +352,6 @@ describe('DocumentDetailComponent', () => {
expect(component.document).toEqual(doc) expect(component.document).toEqual(doc)
}) })
it('should fall back to details tab when duplicates tab is active but no duplicates', () => {
initNormally()
component.activeNavID = component.DocumentDetailNavIDs.Duplicates
const noDupDoc = { ...doc, duplicate_documents: [] }
component.updateComponent(noDupDoc)
expect(component.activeNavID).toEqual(
component.DocumentDetailNavIDs.Details
)
})
it('should load already-opened document via param', () => { it('should load already-opened document via param', () => {
initNormally() initNormally()
jest.spyOn(documentService, 'get').mockReturnValueOnce(of(doc)) jest.spyOn(documentService, 'get').mockReturnValueOnce(of(doc))
@@ -380,38 +367,6 @@ describe('DocumentDetailComponent', () => {
expect(component.document).toEqual(doc) expect(component.document).toEqual(doc)
}) })
it('should update cached open document duplicates when reloading an open doc', () => {
const openDoc = { ...doc, duplicate_documents: [{ id: 1, title: 'Old' }] }
const updatedDuplicates = [
{ id: 2, title: 'Newer duplicate', deleted_at: null },
]
jest
.spyOn(activatedRoute, 'paramMap', 'get')
.mockReturnValue(of(convertToParamMap({ id: 3, section: 'details' })))
jest.spyOn(documentService, 'get').mockReturnValue(
of({
...doc,
modified: new Date('2024-01-02T00:00:00Z'),
duplicate_documents: updatedDuplicates,
})
)
jest.spyOn(openDocumentsService, 'getOpenDocument').mockReturnValue(openDoc)
const saveSpy = jest.spyOn(openDocumentsService, 'save')
jest.spyOn(openDocumentsService, 'openDocument').mockReturnValue(of(true))
jest.spyOn(customFieldsService, 'listAll').mockReturnValue(
of({
count: customFields.length,
all: customFields.map((f) => f.id),
results: customFields,
})
)
fixture.detectChanges()
expect(openDoc.duplicate_documents).toEqual(updatedDuplicates)
expect(saveSpy).toHaveBeenCalled()
})
it('should disable form if user cannot edit', () => { it('should disable form if user cannot edit', () => {
currentUserHasObjectPermissions = false currentUserHasObjectPermissions = false
initNormally() initNormally()
@@ -1016,7 +971,7 @@ describe('DocumentDetailComponent', () => {
it('should display built-in pdf viewer if not disabled', () => { it('should display built-in pdf viewer if not disabled', () => {
initNormally() initNormally()
component.document.archived_file_name = 'file.pdf' component.document.archived_file_name = 'file.pdf'
settingsService.set(SETTINGS_KEYS.USE_NATIVE_PDF_VIEWER, false) jest.spyOn(settingsService, 'get').mockReturnValue(false)
expect(component.useNativePdfViewer).toBeFalsy() expect(component.useNativePdfViewer).toBeFalsy()
fixture.detectChanges() fixture.detectChanges()
expect(fixture.debugElement.query(By.css('pdf-viewer'))).not.toBeNull() expect(fixture.debugElement.query(By.css('pdf-viewer'))).not.toBeNull()
@@ -1025,7 +980,7 @@ describe('DocumentDetailComponent', () => {
it('should display native pdf viewer if enabled', () => { it('should display native pdf viewer if enabled', () => {
initNormally() initNormally()
component.document.archived_file_name = 'file.pdf' component.document.archived_file_name = 'file.pdf'
settingsService.set(SETTINGS_KEYS.USE_NATIVE_PDF_VIEWER, true) jest.spyOn(settingsService, 'get').mockReturnValue(true)
expect(component.useNativePdfViewer).toBeTruthy() expect(component.useNativePdfViewer).toBeTruthy()
fixture.detectChanges() fixture.detectChanges()
expect(fixture.debugElement.query(By.css('object'))).not.toBeNull() expect(fixture.debugElement.query(By.css('object'))).not.toBeNull()

View File

@@ -8,7 +8,7 @@ import {
FormsModule, FormsModule,
ReactiveFormsModule, ReactiveFormsModule,
} from '@angular/forms' } from '@angular/forms'
import { ActivatedRoute, Router, RouterModule } from '@angular/router' import { ActivatedRoute, Router } from '@angular/router'
import { import {
NgbDateStruct, NgbDateStruct,
NgbDropdownModule, NgbDropdownModule,
@@ -84,7 +84,6 @@ import { ToastService } from 'src/app/services/toast.service'
import { getFilenameFromContentDisposition } from 'src/app/utils/http' import { getFilenameFromContentDisposition } from 'src/app/utils/http'
import { ISODateAdapter } from 'src/app/utils/ngb-iso-date-adapter' import { ISODateAdapter } from 'src/app/utils/ngb-iso-date-adapter'
import * as UTIF from 'utif' import * as UTIF from 'utif'
import { DocumentDetailFieldID } from '../admin/settings/settings.component'
import { ConfirmDialogComponent } from '../common/confirm-dialog/confirm-dialog.component' import { ConfirmDialogComponent } from '../common/confirm-dialog/confirm-dialog.component'
import { PasswordRemovalConfirmDialogComponent } from '../common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component' import { PasswordRemovalConfirmDialogComponent } from '../common/confirm-dialog/password-removal-confirm-dialog/password-removal-confirm-dialog.component'
import { CustomFieldsDropdownComponent } from '../common/custom-fields-dropdown/custom-fields-dropdown.component' import { CustomFieldsDropdownComponent } from '../common/custom-fields-dropdown/custom-fields-dropdown.component'
@@ -125,7 +124,6 @@ enum DocumentDetailNavIDs {
Notes = 5, Notes = 5,
Permissions = 6, Permissions = 6,
History = 7, History = 7,
Duplicates = 8,
} }
enum ContentRenderType { enum ContentRenderType {
@@ -183,7 +181,6 @@ export enum ZoomSetting {
NgxBootstrapIconsModule, NgxBootstrapIconsModule,
PdfViewerModule, PdfViewerModule,
TextAreaComponent, TextAreaComponent,
RouterModule,
], ],
}) })
export class DocumentDetailComponent export class DocumentDetailComponent
@@ -282,8 +279,6 @@ export class DocumentDetailComponent
public readonly DataType = DataType public readonly DataType = DataType
public readonly DocumentDetailFieldID = DocumentDetailFieldID
@ViewChild('nav') nav: NgbNav @ViewChild('nav') nav: NgbNav
@ViewChild('pdfPreview') set pdfPreview(element) { @ViewChild('pdfPreview') set pdfPreview(element) {
// this gets called when component added or removed from DOM // this gets called when component added or removed from DOM
@@ -330,12 +325,6 @@ export class DocumentDetailComponent
return this.settings.get(SETTINGS_KEYS.DOCUMENT_EDITING_OVERLAY_THUMBNAIL) return this.settings.get(SETTINGS_KEYS.DOCUMENT_EDITING_OVERLAY_THUMBNAIL)
} }
isFieldHidden(fieldId: DocumentDetailFieldID): boolean {
return this.settings
.get(SETTINGS_KEYS.DOCUMENT_DETAILS_HIDDEN_FIELDS)
.includes(fieldId)
}
private getRenderType(mimeType: string): ContentRenderType { private getRenderType(mimeType: string): ContentRenderType {
if (!mimeType) return ContentRenderType.Unknown if (!mimeType) return ContentRenderType.Unknown
if (mimeType === 'application/pdf') { if (mimeType === 'application/pdf') {
@@ -465,11 +454,6 @@ export class DocumentDetailComponent
const openDocument = this.openDocumentService.getOpenDocument( const openDocument = this.openDocumentService.getOpenDocument(
this.documentId this.documentId
) )
// update duplicate documents if present
if (openDocument && doc?.duplicate_documents) {
openDocument.duplicate_documents = doc.duplicate_documents
this.openDocumentService.save()
}
const useDoc = openDocument || doc const useDoc = openDocument || doc
if (openDocument) { if (openDocument) {
if ( if (
@@ -720,13 +704,6 @@ export class DocumentDetailComponent
} }
this.title = this.documentTitlePipe.transform(doc.title) this.title = this.documentTitlePipe.transform(doc.title)
this.prepareForm(doc) this.prepareForm(doc)
if (
this.activeNavID === DocumentDetailNavIDs.Duplicates &&
!doc?.duplicate_documents?.length
) {
this.activeNavID = DocumentDetailNavIDs.Details
}
} }
get customFieldFormFields(): FormArray { get customFieldFormFields(): FormArray {

View File

@@ -159,8 +159,6 @@ export interface Document extends ObjectWithPermissions {
page_count?: number page_count?: number
duplicate_documents?: Document[]
// Frontend only // Frontend only
__changedFields?: string[] __changedFields?: string[]
} }

View File

@@ -1,4 +1,3 @@
import { Document } from './document'
import { ObjectWithId } from './object-with-id' import { ObjectWithId } from './object-with-id'
export enum PaperlessTaskType { export enum PaperlessTaskType {
@@ -43,7 +42,5 @@ export interface PaperlessTask extends ObjectWithId {
related_document?: number related_document?: number
duplicate_documents?: Document[]
owner?: number owner?: number
} }

View File

@@ -70,8 +70,6 @@ export const SETTINGS_KEYS = {
'general-settings:document-editing:remove-inbox-tags', 'general-settings:document-editing:remove-inbox-tags',
DOCUMENT_EDITING_OVERLAY_THUMBNAIL: DOCUMENT_EDITING_OVERLAY_THUMBNAIL:
'general-settings:document-editing:overlay-thumbnail', 'general-settings:document-editing:overlay-thumbnail',
DOCUMENT_DETAILS_HIDDEN_FIELDS:
'general-settings:document-details:hidden-fields',
SEARCH_DB_ONLY: 'general-settings:search:db-only', SEARCH_DB_ONLY: 'general-settings:search:db-only',
SEARCH_FULL_TYPE: 'general-settings:search:more-link', SEARCH_FULL_TYPE: 'general-settings:search:more-link',
EMPTY_TRASH_DELAY: 'trash_delay', EMPTY_TRASH_DELAY: 'trash_delay',
@@ -257,11 +255,6 @@ export const SETTINGS: UiSetting[] = [
type: 'boolean', type: 'boolean',
default: true, default: true,
}, },
{
key: SETTINGS_KEYS.DOCUMENT_DETAILS_HIDDEN_FIELDS,
type: 'array',
default: [],
},
{ {
key: SETTINGS_KEYS.SEARCH_DB_ONLY, key: SETTINGS_KEYS.SEARCH_DB_ONLY,
type: 'boolean', type: 'boolean',

View File

@@ -779,45 +779,19 @@ class ConsumerPreflightPlugin(
Q(checksum=checksum) | Q(archive_checksum=checksum), Q(checksum=checksum) | Q(archive_checksum=checksum),
) )
if existing_doc.exists(): if existing_doc.exists():
existing_doc = existing_doc.order_by("-created") msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS
duplicates_in_trash = existing_doc.filter(deleted_at__isnull=False) log_msg = f"Not consuming {self.filename}: It is a duplicate of {existing_doc.get().title} (#{existing_doc.get().pk})."
log_msg = (
f"Consuming duplicate {self.filename}: "
f"{existing_doc.count()} existing document(s) share the same content."
)
if duplicates_in_trash.exists(): if existing_doc.first().deleted_at is not None:
log_msg += " Note: at least one existing document is in the trash." msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS_IN_TRASH
log_msg += " Note: existing document is in the trash."
self.log.warning(log_msg)
if settings.CONSUMER_DELETE_DUPLICATES: if settings.CONSUMER_DELETE_DUPLICATES:
duplicate = existing_doc.first()
duplicate_label = (
duplicate.title
or duplicate.original_filename
or (Path(duplicate.filename).name if duplicate.filename else None)
or str(duplicate.pk)
)
Path(self.input_doc.original_file).unlink() Path(self.input_doc.original_file).unlink()
self._fail(
failure_msg = ( msg,
f"Not consuming {self.filename}: " log_msg,
f"It is a duplicate of {duplicate_label} (#{duplicate.pk})" )
)
status_msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS
if duplicates_in_trash.exists():
status_msg = (
ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS_IN_TRASH
)
failure_msg += " Note: existing document is in the trash."
self._fail(
status_msg,
failure_msg,
)
def pre_check_directories(self): def pre_check_directories(self):
""" """

View File

@@ -1,598 +0,0 @@
import math
import uuid
from time import perf_counter
from django.contrib.auth import get_user_model
from django.core.management.base import BaseCommand
from django.core.management.base import CommandError
from django.db import reset_queries
from django.db.models import Count
from django.db.models import Q
from django.db.models import Subquery
from guardian.shortcuts import assign_perm
from documents.models import CustomField
from documents.models import CustomFieldInstance
from documents.models import Document
from documents.models import Tag
from documents.permissions import get_objects_for_user_owner_aware
from documents.permissions import permitted_document_ids
class Command(BaseCommand):
# e.g. manage.py document_perf_benchmark --documents 500000 --chunk-size 5000 --tags 40 --tags-per-doc 3 --custom-fields 6 --custom-fields-per-doc 2
help = (
"Seed a synthetic dataset and benchmark permission-filtered document queries "
"for superusers vs non-superusers."
)
def add_arguments(self, parser):
parser.add_argument(
"--documents",
type=int,
default=10000,
help="Total documents to generate (default: 10,000)",
)
parser.add_argument(
"--owner-ratio",
type=float,
default=0.6,
help="Fraction owned by the benchmarked user (default: 0.6)",
)
parser.add_argument(
"--unowned-ratio",
type=float,
default=0.1,
help="Fraction of unowned documents (default: 0.1)",
)
parser.add_argument(
"--shared-ratio",
type=float,
default=0.25,
help=(
"Fraction of other-user documents that are shared via object perms "
"with the benchmarked user (default: 0.25)"
),
)
parser.add_argument(
"--chunk-size",
type=int,
default=2000,
help="Bulk create size for documents (default: 2000)",
)
parser.add_argument(
"--iterations",
type=int,
default=3,
help="Number of timing runs per query shape (default: 3)",
)
parser.add_argument(
"--prefix",
default="perf-benchmark",
help="Title prefix used to mark generated documents (default: perf-benchmark)",
)
parser.add_argument(
"--username",
default="perf_user",
help="Username of the non-superuser to benchmark (default: perf_user)",
)
parser.add_argument(
"--other-username",
default="perf_owner",
help="Username used for documents not owned by the benchmarked user (default: perf_owner)",
)
parser.add_argument(
"--super-username",
default="perf_admin",
help="Username of the superuser baseline (default: perf_admin)",
)
parser.add_argument(
"--tags",
type=int,
default=0,
help="Number of tags to create and assign (default: 0)",
)
parser.add_argument(
"--tags-per-doc",
type=int,
default=1,
help="How many tags to attach to each document (default: 1)",
)
parser.add_argument(
"--custom-fields",
type=int,
default=0,
help="Number of string custom fields to create (default: 0)",
)
parser.add_argument(
"--custom-fields-per-doc",
type=int,
default=1,
help="How many custom field instances per document (default: 1)",
)
parser.add_argument(
"--skip-tags",
action="store_true",
help="Skip tag document_count benchmarks (useful for large datasets on Postgres)",
)
parser.add_argument(
"--skip-custom-fields",
action="store_true",
help="Skip custom field document_count benchmarks",
)
parser.add_argument(
"--reuse-existing",
action="store_true",
help="Keep previously generated documents with the given prefix instead of recreating",
)
parser.add_argument(
"--cleanup",
action="store_true",
help="Delete previously generated documents with the given prefix and exit",
)
def handle(self, *args, **options):
# keep options for downstream checks
self.options = options
document_total = options["documents"]
owner_ratio = options["owner_ratio"]
unowned_ratio = options["unowned_ratio"]
shared_ratio = options["shared_ratio"]
chunk_size = options["chunk_size"]
iterations = options["iterations"]
prefix = options["prefix"]
tags = options["tags"]
tags_per_doc = options["tags_per_doc"]
custom_fields = options["custom_fields"]
custom_fields_per_doc = options["custom_fields_per_doc"]
self._validate_ratios(owner_ratio, unowned_ratio)
if tags_per_doc < 0 or custom_fields_per_doc < 0:
raise CommandError("Per-document counts must be non-negative")
target_user, other_user, superuser = self._ensure_users(options)
skip_seed = False
if options["cleanup"]:
removed = self._cleanup(prefix)
self.stdout.write(
self.style.SUCCESS(f"Removed {removed} generated documents"),
)
return
if not options["reuse_existing"]:
removed = self._cleanup(prefix)
if removed:
self.stdout.write(f"Removed existing generated documents: {removed}")
else:
existing = Document.objects.filter(title__startswith=prefix).count()
if existing:
skip_seed = True
self.stdout.write(
f"Reusing existing dataset with prefix '{prefix}': {existing} docs",
)
if skip_seed:
dataset_size = Document.objects.filter(title__startswith=prefix).count()
self.stdout.write(
self.style.SUCCESS(
f"Dataset ready (reused): {dataset_size} docs | prefix={prefix}",
),
)
else:
self.stdout.write(
f"Seeding {document_total} documents (owner_ratio={owner_ratio}, "
f"unowned_ratio={unowned_ratio}, shared_ratio={shared_ratio})",
)
created_counts = self._seed_documents(
total=document_total,
owner_ratio=owner_ratio,
unowned_ratio=unowned_ratio,
shared_ratio=shared_ratio,
chunk_size=chunk_size,
prefix=prefix,
target_user=target_user,
other_user=other_user,
)
created_tags = []
if tags:
created_tags = self._seed_tags(prefix=prefix, count=tags)
if tags_per_doc and created_tags:
self._assign_tags_to_documents(
prefix=prefix,
tags=created_tags,
tags_per_doc=tags_per_doc,
chunk_size=chunk_size,
)
created_custom_fields = []
if custom_fields:
created_custom_fields = self._seed_custom_fields(prefix, custom_fields)
if custom_fields_per_doc and created_custom_fields:
self._seed_custom_field_instances(
prefix=prefix,
custom_fields=created_custom_fields,
per_doc=custom_fields_per_doc,
chunk_size=chunk_size,
)
dataset_size = Document.objects.filter(title__startswith=prefix).count()
self.stdout.write(
self.style.SUCCESS(
f"Dataset ready: {dataset_size} docs | owned by target {created_counts['owned']} | "
f"owned by other {created_counts['other_owned']} | unowned {created_counts['unowned']} | "
f"shared-perms {created_counts['shared']} | tags {len(created_tags)} | "
f"custom fields {len(created_custom_fields)}",
),
)
self.stdout.write("\nRunning benchmarks...\n")
self._run_benchmarks(
iterations=iterations,
target_user=target_user,
superuser=superuser,
prefix=prefix,
)
def _validate_ratios(self, owner_ratio: float, unowned_ratio: float):
if owner_ratio < 0 or unowned_ratio < 0:
raise CommandError("Ratios must be non-negative")
if owner_ratio + unowned_ratio > 1:
raise CommandError("owner-ratio + unowned-ratio cannot exceed 1.0")
def _ensure_users(self, options):
User = get_user_model()
target_user, _ = User.objects.get_or_create(
username=options["username"],
defaults={"email": "perf_user@example.com"},
)
other_user, _ = User.objects.get_or_create(
username=options["other_username"],
defaults={"email": "perf_owner@example.com"},
)
superuser, _ = User.objects.get_or_create(
username=options["super_username"],
defaults={
"email": "perf_admin@example.com",
"is_staff": True,
"is_superuser": True,
},
)
return target_user, other_user, superuser
def _cleanup(self, prefix: str) -> int:
docs_qs = Document.global_objects.filter(title__startswith=prefix)
doc_count = docs_qs.count()
if doc_count:
docs_qs.hard_delete()
tag_count = Tag.objects.filter(name__startswith=prefix).count()
if tag_count:
Tag.objects.filter(name__startswith=prefix).delete()
cf_qs = CustomField.objects.filter(name__startswith=prefix)
cf_count = cf_qs.count()
if cf_count:
cf_qs.delete()
cfi_qs = CustomFieldInstance.global_objects.filter(
document__title__startswith=prefix,
)
cfi_count = cfi_qs.count()
if cfi_count:
cfi_qs.hard_delete()
return doc_count + tag_count + cf_count + cfi_count
def _seed_documents(
self,
*,
total: int,
owner_ratio: float,
unowned_ratio: float,
shared_ratio: float,
chunk_size: int,
prefix: str,
target_user,
other_user,
) -> dict[str, int]:
target_count = math.floor(total * owner_ratio)
unowned_count = math.floor(total * unowned_ratio)
other_count = total - target_count - unowned_count
documents: list[Document] = []
other_docs: list[Document] = []
for idx in range(total):
if idx < target_count:
owner = target_user
elif idx < target_count + other_count:
owner = other_user
else:
owner = None
doc = Document(
owner=owner,
title=f"{prefix}-{idx:07d}",
mime_type="application/pdf",
checksum=self._unique_checksum(idx),
page_count=1,
)
if owner is other_user:
other_docs.append(doc)
documents.append(doc)
if len(documents) >= chunk_size:
Document.objects.bulk_create(documents, batch_size=chunk_size)
documents.clear()
if documents:
Document.objects.bulk_create(documents, batch_size=chunk_size)
shared_target = math.floor(len(other_docs) * shared_ratio)
for doc in other_docs[:shared_target]:
assign_perm("documents.view_document", target_user, doc)
return {
"owned": target_count,
"other_owned": other_count,
"unowned": unowned_count,
"shared": shared_target,
}
def _seed_tags(self, *, prefix: str, count: int) -> list[Tag]:
tags = [
Tag(
name=f"{prefix}-tag-{idx:03d}",
)
for idx in range(count)
]
Tag.objects.bulk_create(tags, ignore_conflicts=True)
return list(Tag.objects.filter(name__startswith=prefix))
def _assign_tags_to_documents(
self,
*,
prefix: str,
tags: list[Tag],
tags_per_doc: int,
chunk_size: int,
):
if not tags or tags_per_doc < 1:
return
rels = []
through = Document.tags.through
tag_ids = [t.id for t in tags]
tag_count = len(tag_ids)
iterator = (
Document.objects.filter(title__startswith=prefix)
.values_list(
"id",
flat=True,
)
.iterator()
)
for idx, doc_id in enumerate(iterator):
start = idx % tag_count
chosen = set()
for offset in range(tags_per_doc):
tag_id = tag_ids[(start + offset) % tag_count]
if tag_id in chosen:
continue
chosen.add(tag_id)
rels.append(through(document_id=doc_id, tag_id=tag_id))
if len(rels) >= chunk_size:
through.objects.bulk_create(rels, ignore_conflicts=True)
rels.clear()
if rels:
through.objects.bulk_create(rels, ignore_conflicts=True)
def _seed_custom_fields(self, prefix: str, count: int) -> list[CustomField]:
fields = [
CustomField(
name=f"{prefix}-cf-{idx:03d}",
data_type=CustomField.FieldDataType.STRING,
)
for idx in range(count)
]
CustomField.objects.bulk_create(fields, ignore_conflicts=True)
return list(CustomField.objects.filter(name__startswith=prefix))
def _seed_custom_field_instances(
self,
*,
prefix: str,
custom_fields: list[CustomField],
per_doc: int,
chunk_size: int,
):
if not custom_fields or per_doc < 1:
return
instances = []
cf_ids = [cf.id for cf in custom_fields]
cf_count = len(cf_ids)
iterator = (
Document.objects.filter(title__startswith=prefix)
.values_list(
"id",
flat=True,
)
.iterator()
)
for idx, doc_id in enumerate(iterator):
start = idx % cf_count
for offset in range(per_doc):
cf_id = cf_ids[(start + offset) % cf_count]
instances.append(
CustomFieldInstance(
document_id=doc_id,
field_id=cf_id,
value_text=f"val-{doc_id}-{cf_id}",
),
)
if len(instances) >= chunk_size:
CustomFieldInstance.objects.bulk_create(
instances,
batch_size=chunk_size,
ignore_conflicts=True,
)
instances.clear()
if instances:
CustomFieldInstance.objects.bulk_create(
instances,
batch_size=chunk_size,
ignore_conflicts=True,
)
def _run_benchmarks(self, *, iterations: int, target_user, superuser, prefix: str):
self.stdout.write("-> doc counts")
self._time_query(
label="non-superuser: id__in(values_list flat=True)",
iterations=iterations,
fn=lambda: self._count_with_values_list(target_user),
)
self._time_query(
label="non-superuser: id__in(Subquery(values_list))",
iterations=iterations,
fn=lambda: self._count_with_subquery(target_user),
)
self._time_query(
label="superuser baseline",
iterations=iterations,
fn=lambda: Document.objects.count(),
)
if not self.options.get("skip_tags"):
self.stdout.write("-> tag counts")
self._time_tag_counts(
iterations=iterations,
prefix=prefix,
user=target_user,
)
if not self.options.get("skip_custom_fields"):
self.stdout.write("-> custom field counts")
self._time_custom_field_counts(
iterations=iterations,
prefix=prefix,
user=target_user,
superuser=superuser,
)
def _count_with_values_list(self, user) -> int:
qs = get_objects_for_user_owner_aware(
user,
"documents.view_document",
Document,
)
return Document.objects.filter(id__in=qs.values_list("id", flat=True)).count()
def _count_with_subquery(self, user) -> int:
qs = get_objects_for_user_owner_aware(
user,
"documents.view_document",
Document,
)
subquery = Subquery(qs.values_list("id"))
return Document.objects.filter(id__in=subquery).count()
def _document_filter(self, user, *, use_subquery: bool):
if user is None or getattr(user, "is_superuser", False):
return Q(documents__deleted_at__isnull=True)
qs = get_objects_for_user_owner_aware(
user,
"documents.view_document",
Document,
)
ids = (
Subquery(qs.values_list("id"))
if use_subquery
else qs.values_list("id", flat=True)
)
return Q(documents__deleted_at__isnull=True, documents__id__in=ids)
def _tag_queryset(self, *, prefix: str, filter_q: Q):
return Tag.objects.filter(name__startswith=prefix).annotate(
document_count=Count("documents", filter=filter_q),
)
def _time_tag_counts(self, *, iterations: int, prefix: str, user):
if not Tag.objects.filter(name__startswith=prefix).exists():
return
self._time_query(
label="tag document_count (grouped)",
iterations=iterations,
fn=lambda: list(
Tag.documents.through.objects.filter(
document_id__in=Subquery(permitted_document_ids(user)),
)
.values("tag_id")
.annotate(c=Count("document_id"))
.values_list("tag_id", "c"),
),
)
def _time_custom_field_counts(
self,
*,
iterations: int,
prefix: str,
user,
superuser,
):
if not CustomField.objects.filter(name__startswith=prefix).exists():
return
permitted = Subquery(permitted_document_ids(user))
super_permitted = CustomFieldInstance.objects.filter(
document__deleted_at__isnull=True,
).values_list("document_id")
def _run(ids_subquery):
return list(
CustomFieldInstance.objects.filter(
document_id__in=ids_subquery,
field__name__startswith=prefix,
)
.values("field_id")
.annotate(c=Count("document_id"))
.values_list("field_id", "c"),
)
self._time_query(
label="custom fields document_count (grouped permitted)",
iterations=iterations,
fn=lambda: _run(permitted),
)
self._time_query(
label="custom fields document_count superuser baseline",
iterations=iterations,
fn=lambda: _run(super_permitted),
)
def _time_query(self, *, label: str, iterations: int, fn):
durations = []
for _ in range(iterations):
reset_queries()
start = perf_counter()
fn()
durations.append(perf_counter() - start)
avg = sum(durations) / len(durations)
self.stdout.write(
f"{label}: min={min(durations):.4f}s avg={avg:.4f}s max={max(durations):.4f}s",
)
def _unique_checksum(self, idx: int) -> str:
return f"{uuid.uuid4().hex}{idx:08d}"[:32]

View File

@@ -1,23 +0,0 @@
# Generated by Django 5.2.7 on 2026-01-14 17:45
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0005_workflowtrigger_filter_has_any_correspondents_and_more"),
]
operations = [
migrations.AlterField(
model_name="document",
name="checksum",
field=models.CharField(
editable=False,
max_length=32,
verbose_name="checksum",
help_text="The checksum of the original document.",
),
),
]

View File

@@ -1,25 +0,0 @@
# Generated by Django 5.2.6 on 2026-01-24 07:33
import django.db.models.functions.text
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0006_alter_document_checksum_unique"),
]
operations = [
migrations.AddField(
model_name="document",
name="content_length",
field=models.GeneratedField(
db_persist=True,
expression=django.db.models.functions.text.Length("content"),
null=False,
help_text="Length of the content field in characters. Automatically maintained by the database for faster statistics computation.",
output_field=models.PositiveIntegerField(default=0),
),
),
]

View File

@@ -20,9 +20,7 @@ if settings.AUDIT_LOG_ENABLED:
from auditlog.registry import auditlog from auditlog.registry import auditlog
from django.db.models import Case from django.db.models import Case
from django.db.models import PositiveIntegerField
from django.db.models.functions import Cast from django.db.models.functions import Cast
from django.db.models.functions import Length
from django.db.models.functions import Substr from django.db.models.functions import Substr
from django_softdelete.models import SoftDeleteModel from django_softdelete.models import SoftDeleteModel
@@ -194,15 +192,6 @@ class Document(SoftDeleteModel, ModelWithOwner):
), ),
) )
content_length = models.GeneratedField(
expression=Length("content"),
output_field=PositiveIntegerField(default=0),
db_persist=True,
null=False,
serialize=False,
help_text="Length of the content field in characters. Automatically maintained by the database for faster statistics computation.",
)
mime_type = models.CharField(_("mime type"), max_length=256, editable=False) mime_type = models.CharField(_("mime type"), max_length=256, editable=False)
tags = models.ManyToManyField( tags = models.ManyToManyField(
@@ -216,6 +205,7 @@ class Document(SoftDeleteModel, ModelWithOwner):
_("checksum"), _("checksum"),
max_length=32, max_length=32,
editable=False, editable=False,
unique=True,
help_text=_("The checksum of the original document."), help_text=_("The checksum of the original document."),
) )
@@ -956,7 +946,7 @@ if settings.AUDIT_LOG_ENABLED:
auditlog.register( auditlog.register(
Document, Document,
m2m_fields={"tags"}, m2m_fields={"tags"},
exclude_fields=["content_length", "modified"], exclude_fields=["modified"],
) )
auditlog.register(Correspondent) auditlog.register(Correspondent)
auditlog.register(Tag) auditlog.register(Tag)

View File

@@ -139,48 +139,22 @@ def get_document_count_filter_for_user(user):
if getattr(user, "is_superuser", False): if getattr(user, "is_superuser", False):
return Q(documents__deleted_at__isnull=True) return Q(documents__deleted_at__isnull=True)
return Q( return Q(
documents__id__in=permitted_document_ids(user), documents__deleted_at__isnull=True,
documents__id__in=get_objects_for_user_owner_aware(
user,
"documents.view_document",
Document,
).values_list("id", flat=True),
) )
def permitted_document_ids(user): def get_objects_for_user_owner_aware(user, perms, Model) -> QuerySet:
""" objects_owned = Model.objects.filter(owner=user)
Return a Subquery of permitted, non-deleted document IDs for the user. objects_unowned = Model.objects.filter(owner__isnull=True)
Used to avoid repeated joins to the Document table in count annotations.
"""
if user is None or not getattr(user, "is_authenticated", False):
return Document.objects.none().values_list("id")
qs = get_objects_for_user_owner_aware(
user,
"documents.view_document",
Document,
).filter(deleted_at__isnull=True)
return qs.values_list("id")
def get_objects_for_user_owner_aware(
user,
perms,
Model,
*,
include_deleted=False,
) -> QuerySet:
"""
Returns objects the user owns, are unowned, or has explicit perms.
When include_deleted is True, soft-deleted items are also included.
"""
manager = (
Model.global_objects
if include_deleted and hasattr(Model, "global_objects")
else Model.objects
)
objects_owned = manager.filter(owner=user)
objects_unowned = manager.filter(owner__isnull=True)
objects_with_perms = get_objects_for_user( objects_with_perms = get_objects_for_user(
user=user, user=user,
perms=perms, perms=perms,
klass=manager.all(), klass=Model,
accept_global_perms=False, accept_global_perms=False,
) )
return objects_owned | objects_unowned | objects_with_perms return objects_owned | objects_unowned | objects_with_perms

View File

@@ -23,8 +23,6 @@ from django.core.validators import MinValueValidator
from django.core.validators import RegexValidator from django.core.validators import RegexValidator
from django.core.validators import integer_validator from django.core.validators import integer_validator
from django.db.models import Count from django.db.models import Count
from django.db.models import Q
from django.db.models import Subquery
from django.db.models.functions import Lower from django.db.models.functions import Lower
from django.utils.crypto import get_random_string from django.utils.crypto import get_random_string
from django.utils.dateparse import parse_datetime from django.utils.dateparse import parse_datetime
@@ -72,9 +70,8 @@ from documents.models import WorkflowActionEmail
from documents.models import WorkflowActionWebhook from documents.models import WorkflowActionWebhook
from documents.models import WorkflowTrigger from documents.models import WorkflowTrigger
from documents.parsers import is_mime_type_supported from documents.parsers import is_mime_type_supported
from documents.permissions import get_document_count_filter_for_user
from documents.permissions import get_groups_with_only_permission from documents.permissions import get_groups_with_only_permission
from documents.permissions import get_objects_for_user_owner_aware
from documents.permissions import permitted_document_ids
from documents.permissions import set_permissions_for_object from documents.permissions import set_permissions_for_object
from documents.regex import validate_regex_pattern from documents.regex import validate_regex_pattern
from documents.templating.filepath import validate_filepath_template_and_render from documents.templating.filepath import validate_filepath_template_and_render
@@ -85,9 +82,6 @@ from documents.validators import url_validator
if TYPE_CHECKING: if TYPE_CHECKING:
from collections.abc import Iterable from collections.abc import Iterable
from django.db.models.query import QuerySet
logger = logging.getLogger("paperless.serializers") logger = logging.getLogger("paperless.serializers")
@@ -590,41 +584,18 @@ class TagSerializer(MatchingModelSerializer, OwnedObjectSerializer):
if children_map is not None: if children_map is not None:
children = children_map.get(obj.pk, []) children = children_map.get(obj.pk, [])
else: else:
filter_q = self.context.get("document_count_filter")
request = self.context.get("request") request = self.context.get("request")
user = getattr(request, "user", None) if request else None if filter_q is None:
user = getattr(request, "user", None) if request else None
filter_q = get_document_count_filter_for_user(user)
self.context["document_count_filter"] = filter_q
filter_kind = self.context.get("document_count_filter") children = (
if filter_kind is None: obj.get_children_queryset()
filter_kind = ( .select_related("owner")
"superuser" .annotate(document_count=Count("documents", filter=filter_q))
if user and getattr(user, "is_superuser", False) )
else "restricted"
)
self.context["document_count_filter"] = filter_kind
queryset = obj.get_children_queryset().select_related("owner")
if filter_kind == "superuser":
children = queryset.annotate(
document_count=Count(
"documents",
filter=Q(documents__deleted_at__isnull=True),
distinct=True,
),
)
else:
permitted_ids = Subquery(permitted_document_ids(user))
counts = dict(
Tag.documents.through.objects.filter(
document_id__in=permitted_ids,
)
.values("tag_id")
.annotate(c=Count("document_id"))
.values_list("tag_id", "c"),
)
children = list(queryset)
for child in children:
child.document_count = counts.get(child.id, 0)
view = self.context.get("view") view = self.context.get("view")
ordering = ( ordering = (
@@ -633,11 +604,7 @@ class TagSerializer(MatchingModelSerializer, OwnedObjectSerializer):
else None else None
) )
ordering = ordering or (Lower("name"),) ordering = ordering or (Lower("name"),)
if hasattr(children, "order_by"): children = children.order_by(*ordering)
children = children.order_by(*ordering)
else:
# children is a list (pre-fetched); apply basic ordering on name
children = sorted(children, key=lambda c: (c.name or "").lower())
serializer = TagSerializer( serializer = TagSerializer(
children, children,
@@ -1047,32 +1014,6 @@ class NotesSerializer(serializers.ModelSerializer):
return ret return ret
def _get_viewable_duplicates(
document: Document,
user: User | None,
) -> QuerySet[Document]:
checksums = {document.checksum}
if document.archive_checksum:
checksums.add(document.archive_checksum)
duplicates = Document.global_objects.filter(
Q(checksum__in=checksums) | Q(archive_checksum__in=checksums),
).exclude(pk=document.pk)
duplicates = duplicates.order_by("-created")
allowed = get_objects_for_user_owner_aware(
user,
"documents.view_document",
Document,
include_deleted=True,
)
return duplicates.filter(id__in=allowed)
class DuplicateDocumentSummarySerializer(serializers.Serializer):
id = serializers.IntegerField()
title = serializers.CharField()
deleted_at = serializers.DateTimeField(allow_null=True)
@extend_schema_serializer( @extend_schema_serializer(
deprecate_fields=["created_date"], deprecate_fields=["created_date"],
) )
@@ -1090,7 +1031,6 @@ class DocumentSerializer(
archived_file_name = SerializerMethodField() archived_file_name = SerializerMethodField()
created_date = serializers.DateField(required=False) created_date = serializers.DateField(required=False)
page_count = SerializerMethodField() page_count = SerializerMethodField()
duplicate_documents = SerializerMethodField()
notes = NotesSerializer(many=True, required=False, read_only=True) notes = NotesSerializer(many=True, required=False, read_only=True)
@@ -1116,16 +1056,6 @@ class DocumentSerializer(
def get_page_count(self, obj) -> int | None: def get_page_count(self, obj) -> int | None:
return obj.page_count return obj.page_count
@extend_schema_field(DuplicateDocumentSummarySerializer(many=True))
def get_duplicate_documents(self, obj):
view = self.context.get("view")
if view and getattr(view, "action", None) != "retrieve":
return []
request = self.context.get("request")
user = request.user if request else None
duplicates = _get_viewable_duplicates(obj, user)
return list(duplicates.values("id", "title", "deleted_at"))
def get_original_file_name(self, obj) -> str | None: def get_original_file_name(self, obj) -> str | None:
return obj.original_filename return obj.original_filename
@@ -1303,7 +1233,6 @@ class DocumentSerializer(
"archive_serial_number", "archive_serial_number",
"original_file_name", "original_file_name",
"archived_file_name", "archived_file_name",
"duplicate_documents",
"owner", "owner",
"permissions", "permissions",
"user_can_change", "user_can_change",
@@ -2165,12 +2094,10 @@ class TasksViewSerializer(OwnedObjectSerializer):
"result", "result",
"acknowledged", "acknowledged",
"related_document", "related_document",
"duplicate_documents",
"owner", "owner",
) )
related_document = serializers.SerializerMethodField() related_document = serializers.SerializerMethodField()
duplicate_documents = serializers.SerializerMethodField()
created_doc_re = re.compile(r"New document id (\d+) created") created_doc_re = re.compile(r"New document id (\d+) created")
duplicate_doc_re = re.compile(r"It is a duplicate of .* \(#(\d+)\)") duplicate_doc_re = re.compile(r"It is a duplicate of .* \(#(\d+)\)")
@@ -2195,17 +2122,6 @@ class TasksViewSerializer(OwnedObjectSerializer):
return result return result
@extend_schema_field(DuplicateDocumentSummarySerializer(many=True))
def get_duplicate_documents(self, obj):
related_document = self.get_related_document(obj)
request = self.context.get("request")
user = request.user if request else None
document = Document.global_objects.filter(pk=related_document).first()
if not related_document or not user or not document:
return []
duplicates = _get_viewable_duplicates(document, user)
return list(duplicates.values("id", "title", "deleted_at"))
class RunTaskViewSerializer(serializers.Serializer): class RunTaskViewSerializer(serializers.Serializer):
task_name = serializers.ChoiceField( task_name = serializers.ChoiceField(

View File

@@ -131,10 +131,6 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
self.assertIn("content", results_full[0]) self.assertIn("content", results_full[0])
self.assertIn("id", results_full[0]) self.assertIn("id", results_full[0])
# Content length is used internally for performance reasons.
# No need to expose this field.
self.assertNotIn("content_length", results_full[0])
response = self.client.get("/api/documents/?fields=id", format="json") response = self.client.get("/api/documents/?fields=id", format="json")
self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response.status_code, status.HTTP_200_OK)
results = response.data["results"] results = response.data["results"]

View File

@@ -7,7 +7,6 @@ from django.contrib.auth.models import User
from rest_framework import status from rest_framework import status
from rest_framework.test import APITestCase from rest_framework.test import APITestCase
from documents.models import Document
from documents.models import PaperlessTask from documents.models import PaperlessTask
from documents.tests.utils import DirectoriesMixin from documents.tests.utils import DirectoriesMixin
from documents.views import TasksViewSet from documents.views import TasksViewSet
@@ -259,7 +258,7 @@ class TestTasks(DirectoriesMixin, APITestCase):
task_id=str(uuid.uuid4()), task_id=str(uuid.uuid4()),
task_file_name="task_one.pdf", task_file_name="task_one.pdf",
status=celery.states.FAILURE, status=celery.states.FAILURE,
result="test.pdf: Unexpected error during ingestion.", result="test.pdf: Not consuming test.pdf: It is a duplicate.",
) )
response = self.client.get(self.ENDPOINT) response = self.client.get(self.ENDPOINT)
@@ -271,7 +270,7 @@ class TestTasks(DirectoriesMixin, APITestCase):
self.assertEqual( self.assertEqual(
returned_data["result"], returned_data["result"],
"test.pdf: Unexpected error during ingestion.", "test.pdf: Not consuming test.pdf: It is a duplicate.",
) )
def test_task_name_webui(self): def test_task_name_webui(self):
@@ -326,34 +325,20 @@ class TestTasks(DirectoriesMixin, APITestCase):
self.assertEqual(returned_data["task_file_name"], "anothertest.pdf") self.assertEqual(returned_data["task_file_name"], "anothertest.pdf")
def test_task_result_duplicate_warning_includes_count(self): def test_task_result_failed_duplicate_includes_related_doc(self):
""" """
GIVEN: GIVEN:
- A celery task succeeds, but a duplicate exists - A celery task failed with a duplicate error
WHEN: WHEN:
- API call is made to get tasks - API call is made to get tasks
THEN: THEN:
- The returned data includes duplicate warning metadata - The returned data includes a related document link
""" """
checksum = "duplicate-checksum"
Document.objects.create(
title="Existing",
content="",
mime_type="application/pdf",
checksum=checksum,
)
created_doc = Document.objects.create(
title="Created",
content="",
mime_type="application/pdf",
checksum=checksum,
archive_checksum="another-checksum",
)
PaperlessTask.objects.create( PaperlessTask.objects.create(
task_id=str(uuid.uuid4()), task_id=str(uuid.uuid4()),
task_file_name="task_one.pdf", task_file_name="task_one.pdf",
status=celery.states.SUCCESS, status=celery.states.FAILURE,
result=f"Success. New document id {created_doc.pk} created", result="Not consuming task_one.pdf: It is a duplicate of task_one_existing.pdf (#1234).",
) )
response = self.client.get(self.ENDPOINT) response = self.client.get(self.ENDPOINT)
@@ -363,7 +348,7 @@ class TestTasks(DirectoriesMixin, APITestCase):
returned_data = response.data[0] returned_data = response.data[0]
self.assertEqual(returned_data["related_document"], str(created_doc.pk)) self.assertEqual(returned_data["related_document"], "1234")
def test_run_train_classifier_task(self): def test_run_train_classifier_task(self):
""" """

View File

@@ -485,21 +485,21 @@ class TestConsumer(
with self.get_consumer(self.get_test_file()) as consumer: with self.get_consumer(self.get_test_file()) as consumer:
consumer.run() consumer.run()
with self.get_consumer(self.get_test_file()) as consumer: with self.assertRaisesMessage(ConsumerError, "It is a duplicate"):
consumer.run() with self.get_consumer(self.get_test_file()) as consumer:
consumer.run()
self.assertEqual(Document.objects.count(), 2) self._assert_first_last_send_progress(last_status="FAILED")
self._assert_first_last_send_progress()
def testDuplicates2(self): def testDuplicates2(self):
with self.get_consumer(self.get_test_file()) as consumer: with self.get_consumer(self.get_test_file()) as consumer:
consumer.run() consumer.run()
with self.get_consumer(self.get_test_archive_file()) as consumer: with self.assertRaisesMessage(ConsumerError, "It is a duplicate"):
consumer.run() with self.get_consumer(self.get_test_archive_file()) as consumer:
consumer.run()
self.assertEqual(Document.objects.count(), 2) self._assert_first_last_send_progress(last_status="FAILED")
self._assert_first_last_send_progress()
def testDuplicates3(self): def testDuplicates3(self):
with self.get_consumer(self.get_test_archive_file()) as consumer: with self.get_consumer(self.get_test_archive_file()) as consumer:
@@ -513,10 +513,9 @@ class TestConsumer(
Document.objects.all().delete() Document.objects.all().delete()
with self.get_consumer(self.get_test_file()) as consumer: with self.assertRaisesMessage(ConsumerError, "document is in the trash"):
consumer.run() with self.get_consumer(self.get_test_file()) as consumer:
consumer.run()
self.assertEqual(Document.objects.count(), 1)
def testAsnExists(self): def testAsnExists(self):
with self.get_consumer( with self.get_consumer(
@@ -719,45 +718,12 @@ class TestConsumer(
dst = self.get_test_file() dst = self.get_test_file()
self.assertIsFile(dst) self.assertIsFile(dst)
expected_message = ( with self.assertRaises(ConsumerError):
f"{dst.name}: Not consuming {dst.name}: "
f"It is a duplicate of {document.title} (#{document.pk})"
)
with self.assertRaisesMessage(ConsumerError, expected_message):
with self.get_consumer(dst) as consumer: with self.get_consumer(dst) as consumer:
consumer.run() consumer.run()
self.assertIsNotFile(dst) self.assertIsNotFile(dst)
self.assertEqual(Document.objects.count(), 1) self._assert_first_last_send_progress(last_status="FAILED")
self._assert_first_last_send_progress(last_status=ProgressStatusOptions.FAILED)
@override_settings(CONSUMER_DELETE_DUPLICATES=True)
def test_delete_duplicate_in_trash(self):
dst = self.get_test_file()
with self.get_consumer(dst) as consumer:
consumer.run()
# Move the existing document to trash
document = Document.objects.first()
document.delete()
dst = self.get_test_file()
self.assertIsFile(dst)
expected_message = (
f"{dst.name}: Not consuming {dst.name}: "
f"It is a duplicate of {document.title} (#{document.pk})"
f" Note: existing document is in the trash."
)
with self.assertRaisesMessage(ConsumerError, expected_message):
with self.get_consumer(dst) as consumer:
consumer.run()
self.assertIsNotFile(dst)
self.assertEqual(Document.global_objects.count(), 1)
self.assertEqual(Document.objects.count(), 0)
@override_settings(CONSUMER_DELETE_DUPLICATES=False) @override_settings(CONSUMER_DELETE_DUPLICATES=False)
def test_no_delete_duplicate(self): def test_no_delete_duplicate(self):
@@ -777,12 +743,15 @@ class TestConsumer(
dst = self.get_test_file() dst = self.get_test_file()
self.assertIsFile(dst) self.assertIsFile(dst)
with self.get_consumer(dst) as consumer: with self.assertRaisesRegex(
consumer.run() ConsumerError,
r"sample\.pdf: Not consuming sample\.pdf: It is a duplicate of sample \(#\d+\)",
):
with self.get_consumer(dst) as consumer:
consumer.run()
self.assertIsNotFile(dst) self.assertIsFile(dst)
self.assertEqual(Document.objects.count(), 2) self._assert_first_last_send_progress(last_status="FAILED")
self._assert_first_last_send_progress()
@override_settings(FILENAME_FORMAT="{title}") @override_settings(FILENAME_FORMAT="{title}")
@mock.patch("documents.parsers.document_consumer_declaration.send") @mock.patch("documents.parsers.document_consumer_declaration.send")

View File

@@ -241,10 +241,6 @@ class TestExportImport(
checksum = hashlib.md5(f.read()).hexdigest() checksum = hashlib.md5(f.read()).hexdigest()
self.assertEqual(checksum, element["fields"]["checksum"]) self.assertEqual(checksum, element["fields"]["checksum"])
# Generated field "content_length" should not be exported,
# it is automatically computed during import.
self.assertNotIn("content_length", element["fields"])
if document_exporter.EXPORTER_ARCHIVE_NAME in element: if document_exporter.EXPORTER_ARCHIVE_NAME in element:
fname = ( fname = (
self.target / element[document_exporter.EXPORTER_ARCHIVE_NAME] self.target / element[document_exporter.EXPORTER_ARCHIVE_NAME]

View File

@@ -33,9 +33,9 @@ from django.db.models import IntegerField
from django.db.models import Max from django.db.models import Max
from django.db.models import Model from django.db.models import Model
from django.db.models import Q from django.db.models import Q
from django.db.models import Subquery
from django.db.models import Sum from django.db.models import Sum
from django.db.models import When from django.db.models import When
from django.db.models.functions import Length
from django.db.models.functions import Lower from django.db.models.functions import Lower
from django.db.models.manager import Manager from django.db.models.manager import Manager
from django.http import FileResponse from django.http import FileResponse
@@ -154,7 +154,6 @@ from documents.permissions import ViewDocumentsPermissions
from documents.permissions import get_document_count_filter_for_user from documents.permissions import get_document_count_filter_for_user
from documents.permissions import get_objects_for_user_owner_aware from documents.permissions import get_objects_for_user_owner_aware
from documents.permissions import has_perms_owner_aware from documents.permissions import has_perms_owner_aware
from documents.permissions import permitted_document_ids
from documents.permissions import set_permissions_for_object from documents.permissions import set_permissions_for_object
from documents.schema import generate_object_with_permissions_schema from documents.schema import generate_object_with_permissions_schema
from documents.serialisers import AcknowledgeTasksViewSerializer from documents.serialisers import AcknowledgeTasksViewSerializer
@@ -2327,19 +2326,23 @@ class StatisticsView(GenericAPIView):
user = request.user if request.user is not None else None user = request.user if request.user is not None else None
documents = ( documents = (
Document.objects.all() (
if user is None Document.objects.all()
else get_objects_for_user_owner_aware( if user is None
user, else get_objects_for_user_owner_aware(
"documents.view_document", user,
Document, "documents.view_document",
Document,
)
) )
.only("mime_type", "content")
.prefetch_related("tags")
) )
tags = ( tags = (
Tag.objects.all() Tag.objects.all()
if user is None if user is None
else get_objects_for_user_owner_aware(user, "documents.view_tag", Tag) else get_objects_for_user_owner_aware(user, "documents.view_tag", Tag)
).only("id", "is_inbox_tag") )
correspondent_count = ( correspondent_count = (
Correspondent.objects.count() Correspondent.objects.count()
if user is None if user is None
@@ -2368,33 +2371,31 @@ class StatisticsView(GenericAPIView):
).count() ).count()
) )
inbox_tag_pks = list( documents_total = documents.count()
tags.filter(is_inbox_tag=True).values_list("pk", flat=True),
) inbox_tags = tags.filter(is_inbox_tag=True)
documents_inbox = ( documents_inbox = (
documents.filter(tags__id__in=inbox_tag_pks).values("id").distinct().count() documents.filter(tags__id__in=inbox_tags).distinct().count()
if inbox_tag_pks if inbox_tags.exists()
else None else None
) )
# Single SQL request for document stats and mime type counts document_file_type_counts = (
mime_type_stats = list(
documents.values("mime_type") documents.values("mime_type")
.annotate( .annotate(mime_type_count=Count("mime_type"))
mime_type_count=Count("id"), .order_by("-mime_type_count")
mime_type_chars=Sum("content_length"), if documents_total > 0
) else []
.order_by("-mime_type_count"),
) )
# Calculate totals from grouped results character_count = (
documents_total = sum(row["mime_type_count"] for row in mime_type_stats) documents.annotate(
character_count = sum(row["mime_type_chars"] or 0 for row in mime_type_stats) characters=Length("content"),
document_file_type_counts = [ )
{"mime_type": row["mime_type"], "mime_type_count": row["mime_type_count"]} .aggregate(Sum("characters"))
for row in mime_type_stats .get("characters__sum")
] )
current_asn = Document.objects.aggregate( current_asn = Document.objects.aggregate(
Max("archive_serial_number", default=0), Max("archive_serial_number", default=0),
@@ -2407,9 +2408,11 @@ class StatisticsView(GenericAPIView):
"documents_total": documents_total, "documents_total": documents_total,
"documents_inbox": documents_inbox, "documents_inbox": documents_inbox,
"inbox_tag": ( "inbox_tag": (
inbox_tag_pks[0] if inbox_tag_pks else None inbox_tags.first().pk if inbox_tags.exists() else None
), # backwards compatibility ), # backwards compatibility
"inbox_tags": (inbox_tag_pks if inbox_tag_pks else None), "inbox_tags": (
[tag.pk for tag in inbox_tags] if inbox_tags.exists() else None
),
"document_file_type_counts": document_file_type_counts, "document_file_type_counts": document_file_type_counts,
"character_count": character_count, "character_count": character_count,
"tag_count": len(tags), "tag_count": len(tags),
@@ -3009,32 +3012,27 @@ class CustomFieldViewSet(ModelViewSet):
queryset = CustomField.objects.all().order_by("-created") queryset = CustomField.objects.all().order_by("-created")
def get_queryset(self): def get_queryset(self):
user = self.request.user filter = (
if user is None or user.is_superuser: Q(fields__document__deleted_at__isnull=True)
return ( if self.request.user is None or self.request.user.is_superuser
super() else (
.get_queryset() Q(
.annotate( fields__document__deleted_at__isnull=True,
document_count=Count( fields__document__id__in=get_objects_for_user_owner_aware(
"fields", self.request.user,
filter=Q(fields__document__deleted_at__isnull=True), "documents.view_document",
distinct=True, Document,
), ).values_list("id", flat=True),
) )
) )
)
permitted_ids = Subquery(permitted_document_ids(user))
return ( return (
super() super()
.get_queryset() .get_queryset()
.annotate( .annotate(
document_count=Count( document_count=Count(
"fields", "fields",
filter=Q( filter=filter,
fields__document__deleted_at__isnull=True,
fields__document_id__in=permitted_ids,
),
distinct=True,
), ),
) )
) )

File diff suppressed because it is too large Load Diff