Compare commits

..

13 Commits

Author SHA1 Message Date
shamoon
7795740f2d Fix migration 2026-01-24 20:07:52 -08:00
shamoon
e036ea972a Fix/refactor 2026-01-24 20:07:15 -08:00
shamoon
2523100a52 Backend coverage 2026-01-24 20:07:14 -08:00
shamoon
3ea21f3d20 Refactor serializer 2026-01-24 20:07:14 -08:00
shamoon
e19ef49ed2 Frontend coverage 2026-01-24 20:07:13 -08:00
shamoon
b710fc2907 Some random cleanups 2026-01-24 20:07:13 -08:00
shamoon
21985e5d84 Fix schema 2026-01-24 20:07:12 -08:00
shamoon
107f58c4ae Nice, UX for doc in trash 2026-01-24 20:07:12 -08:00
shamoon
1b8fd1fffa Make these anchors 2026-01-24 20:07:11 -08:00
shamoon
13e45fd45c Treat CONSUMER_DELETE_DUPLICATES as a hard no 2026-01-24 20:07:11 -08:00
shamoon
bbffaf22d2 Ok lets make duplicates a tab, nice 2026-01-24 20:07:10 -08:00
shamoon
fdb45a8134 Drop DuplicateDocument 2026-01-24 20:07:10 -08:00
shamoon
a8e1344339 Core elements, migration, consumer modifications 2026-01-24 20:07:09 -08:00
21 changed files with 312 additions and 447 deletions

View File

@@ -1146,8 +1146,9 @@ via the consumption directory, you can disable the consumer to save resources.
#### [`PAPERLESS_CONSUMER_DELETE_DUPLICATES=<bool>`](#PAPERLESS_CONSUMER_DELETE_DUPLICATES) {#PAPERLESS_CONSUMER_DELETE_DUPLICATES} #### [`PAPERLESS_CONSUMER_DELETE_DUPLICATES=<bool>`](#PAPERLESS_CONSUMER_DELETE_DUPLICATES) {#PAPERLESS_CONSUMER_DELETE_DUPLICATES}
: When the consumer detects a duplicate document, it will not touch : As of version 3.0 Paperless-ngx allows duplicate documents to be consumed by default, _except_ when
the original document. This default behavior can be changed here. this setting is enabled. When enabled, Paperless will check if a document with the same hash already
exists in the system and delete the duplicate file from the consumption directory without consuming it.
Defaults to false. Defaults to false.

View File

@@ -97,6 +97,12 @@
<br/><em>(<ng-container i18n>click for full output</ng-container>)</em> <br/><em>(<ng-container i18n>click for full output</ng-container>)</em>
} }
</ng-template> </ng-template>
@if (task.duplicate_documents?.length > 0) {
<div class="small text-warning-emphasis d-flex align-items-center gap-1">
<i-bs class="lh-1" width="1em" height="1em" name="exclamation-triangle"></i-bs>
<span i18n>Duplicate(s) detected</span>
</div>
}
</td> </td>
} }
<td class="d-lg-none"> <td class="d-lg-none">

View File

@@ -412,9 +412,6 @@ describe('WorkflowEditDialogComponent', () => {
return newFilter return newFilter
} }
const correspondentAny = addFilterOfType(TriggerFilterType.CorrespondentAny)
correspondentAny.get('values').setValue([11])
const correspondentIs = addFilterOfType(TriggerFilterType.CorrespondentIs) const correspondentIs = addFilterOfType(TriggerFilterType.CorrespondentIs)
correspondentIs.get('values').setValue(1) correspondentIs.get('values').setValue(1)
@@ -424,18 +421,12 @@ describe('WorkflowEditDialogComponent', () => {
const documentTypeIs = addFilterOfType(TriggerFilterType.DocumentTypeIs) const documentTypeIs = addFilterOfType(TriggerFilterType.DocumentTypeIs)
documentTypeIs.get('values').setValue(1) documentTypeIs.get('values').setValue(1)
const documentTypeAny = addFilterOfType(TriggerFilterType.DocumentTypeAny)
documentTypeAny.get('values').setValue([12])
const documentTypeNot = addFilterOfType(TriggerFilterType.DocumentTypeNot) const documentTypeNot = addFilterOfType(TriggerFilterType.DocumentTypeNot)
documentTypeNot.get('values').setValue([1]) documentTypeNot.get('values').setValue([1])
const storagePathIs = addFilterOfType(TriggerFilterType.StoragePathIs) const storagePathIs = addFilterOfType(TriggerFilterType.StoragePathIs)
storagePathIs.get('values').setValue(1) storagePathIs.get('values').setValue(1)
const storagePathAny = addFilterOfType(TriggerFilterType.StoragePathAny)
storagePathAny.get('values').setValue([13])
const storagePathNot = addFilterOfType(TriggerFilterType.StoragePathNot) const storagePathNot = addFilterOfType(TriggerFilterType.StoragePathNot)
storagePathNot.get('values').setValue([1]) storagePathNot.get('values').setValue([1])
@@ -450,13 +441,10 @@ describe('WorkflowEditDialogComponent', () => {
expect(formValues.triggers[0].filter_has_tags).toEqual([1]) expect(formValues.triggers[0].filter_has_tags).toEqual([1])
expect(formValues.triggers[0].filter_has_all_tags).toEqual([2, 3]) expect(formValues.triggers[0].filter_has_all_tags).toEqual([2, 3])
expect(formValues.triggers[0].filter_has_not_tags).toEqual([4]) expect(formValues.triggers[0].filter_has_not_tags).toEqual([4])
expect(formValues.triggers[0].filter_has_any_correspondents).toEqual([11])
expect(formValues.triggers[0].filter_has_correspondent).toEqual(1) expect(formValues.triggers[0].filter_has_correspondent).toEqual(1)
expect(formValues.triggers[0].filter_has_not_correspondents).toEqual([1]) expect(formValues.triggers[0].filter_has_not_correspondents).toEqual([1])
expect(formValues.triggers[0].filter_has_any_document_types).toEqual([12])
expect(formValues.triggers[0].filter_has_document_type).toEqual(1) expect(formValues.triggers[0].filter_has_document_type).toEqual(1)
expect(formValues.triggers[0].filter_has_not_document_types).toEqual([1]) expect(formValues.triggers[0].filter_has_not_document_types).toEqual([1])
expect(formValues.triggers[0].filter_has_any_storage_paths).toEqual([13])
expect(formValues.triggers[0].filter_has_storage_path).toEqual(1) expect(formValues.triggers[0].filter_has_storage_path).toEqual(1)
expect(formValues.triggers[0].filter_has_not_storage_paths).toEqual([1]) expect(formValues.triggers[0].filter_has_not_storage_paths).toEqual([1])
expect(formValues.triggers[0].filter_custom_field_query).toEqual( expect(formValues.triggers[0].filter_custom_field_query).toEqual(
@@ -519,22 +507,16 @@ describe('WorkflowEditDialogComponent', () => {
setFilter(TriggerFilterType.TagsAll, 11) setFilter(TriggerFilterType.TagsAll, 11)
setFilter(TriggerFilterType.TagsNone, 12) setFilter(TriggerFilterType.TagsNone, 12)
setFilter(TriggerFilterType.CorrespondentAny, 16)
setFilter(TriggerFilterType.CorrespondentNot, 13) setFilter(TriggerFilterType.CorrespondentNot, 13)
setFilter(TriggerFilterType.DocumentTypeAny, 17)
setFilter(TriggerFilterType.DocumentTypeNot, 14) setFilter(TriggerFilterType.DocumentTypeNot, 14)
setFilter(TriggerFilterType.StoragePathAny, 18)
setFilter(TriggerFilterType.StoragePathNot, 15) setFilter(TriggerFilterType.StoragePathNot, 15)
const formValues = component['getFormValues']() const formValues = component['getFormValues']()
expect(formValues.triggers[0].filter_has_all_tags).toEqual([11]) expect(formValues.triggers[0].filter_has_all_tags).toEqual([11])
expect(formValues.triggers[0].filter_has_not_tags).toEqual([12]) expect(formValues.triggers[0].filter_has_not_tags).toEqual([12])
expect(formValues.triggers[0].filter_has_any_correspondents).toEqual([16])
expect(formValues.triggers[0].filter_has_not_correspondents).toEqual([13]) expect(formValues.triggers[0].filter_has_not_correspondents).toEqual([13])
expect(formValues.triggers[0].filter_has_any_document_types).toEqual([17])
expect(formValues.triggers[0].filter_has_not_document_types).toEqual([14]) expect(formValues.triggers[0].filter_has_not_document_types).toEqual([14])
expect(formValues.triggers[0].filter_has_any_storage_paths).toEqual([18])
expect(formValues.triggers[0].filter_has_not_storage_paths).toEqual([15]) expect(formValues.triggers[0].filter_has_not_storage_paths).toEqual([15])
}) })
@@ -658,11 +640,8 @@ describe('WorkflowEditDialogComponent', () => {
filter_has_tags: [], filter_has_tags: [],
filter_has_all_tags: [], filter_has_all_tags: [],
filter_has_not_tags: [], filter_has_not_tags: [],
filter_has_any_correspondents: [],
filter_has_not_correspondents: [], filter_has_not_correspondents: [],
filter_has_any_document_types: [],
filter_has_not_document_types: [], filter_has_not_document_types: [],
filter_has_any_storage_paths: [],
filter_has_not_storage_paths: [], filter_has_not_storage_paths: [],
filter_has_correspondent: null, filter_has_correspondent: null,
filter_has_document_type: null, filter_has_document_type: null,
@@ -720,14 +699,11 @@ describe('WorkflowEditDialogComponent', () => {
trigger.filter_has_tags = [1] trigger.filter_has_tags = [1]
trigger.filter_has_all_tags = [2, 3] trigger.filter_has_all_tags = [2, 3]
trigger.filter_has_not_tags = [4] trigger.filter_has_not_tags = [4]
trigger.filter_has_any_correspondents = [10] as any
trigger.filter_has_correspondent = 5 as any trigger.filter_has_correspondent = 5 as any
trigger.filter_has_not_correspondents = [6] as any trigger.filter_has_not_correspondents = [6] as any
trigger.filter_has_document_type = 7 as any trigger.filter_has_document_type = 7 as any
trigger.filter_has_any_document_types = [11] as any
trigger.filter_has_not_document_types = [8] as any trigger.filter_has_not_document_types = [8] as any
trigger.filter_has_storage_path = 9 as any trigger.filter_has_storage_path = 9 as any
trigger.filter_has_any_storage_paths = [12] as any
trigger.filter_has_not_storage_paths = [10] as any trigger.filter_has_not_storage_paths = [10] as any
trigger.filter_custom_field_query = JSON.stringify([ trigger.filter_custom_field_query = JSON.stringify([
'AND', 'AND',
@@ -738,8 +714,8 @@ describe('WorkflowEditDialogComponent', () => {
component.ngOnInit() component.ngOnInit()
const triggerGroup = component.triggerFields.at(0) as FormGroup const triggerGroup = component.triggerFields.at(0) as FormGroup
const filters = component.getFiltersFormArray(triggerGroup) const filters = component.getFiltersFormArray(triggerGroup)
expect(filters.length).toBe(13) expect(filters.length).toBe(10)
const customFieldFilter = filters.at(12) as FormGroup const customFieldFilter = filters.at(9) as FormGroup
expect(customFieldFilter.get('type').value).toBe( expect(customFieldFilter.get('type').value).toBe(
TriggerFilterType.CustomFieldQuery TriggerFilterType.CustomFieldQuery
) )
@@ -748,27 +724,12 @@ describe('WorkflowEditDialogComponent', () => {
}) })
it('should expose select metadata helpers', () => { it('should expose select metadata helpers', () => {
expect(component.isSelectMultiple(TriggerFilterType.CorrespondentAny)).toBe(
true
)
expect(component.isSelectMultiple(TriggerFilterType.CorrespondentNot)).toBe( expect(component.isSelectMultiple(TriggerFilterType.CorrespondentNot)).toBe(
true true
) )
expect(component.isSelectMultiple(TriggerFilterType.CorrespondentIs)).toBe( expect(component.isSelectMultiple(TriggerFilterType.CorrespondentIs)).toBe(
false false
) )
expect(component.isSelectMultiple(TriggerFilterType.DocumentTypeAny)).toBe(
true
)
expect(component.isSelectMultiple(TriggerFilterType.DocumentTypeIs)).toBe(
false
)
expect(component.isSelectMultiple(TriggerFilterType.StoragePathAny)).toBe(
true
)
expect(component.isSelectMultiple(TriggerFilterType.StoragePathIs)).toBe(
false
)
component.correspondents = [{ id: 1, name: 'C1' } as any] component.correspondents = [{ id: 1, name: 'C1' } as any]
component.documentTypes = [{ id: 2, name: 'DT' } as any] component.documentTypes = [{ id: 2, name: 'DT' } as any]
@@ -780,15 +741,9 @@ describe('WorkflowEditDialogComponent', () => {
expect( expect(
component.getFilterSelectItems(TriggerFilterType.DocumentTypeIs) component.getFilterSelectItems(TriggerFilterType.DocumentTypeIs)
).toEqual(component.documentTypes) ).toEqual(component.documentTypes)
expect(
component.getFilterSelectItems(TriggerFilterType.DocumentTypeAny)
).toEqual(component.documentTypes)
expect( expect(
component.getFilterSelectItems(TriggerFilterType.StoragePathIs) component.getFilterSelectItems(TriggerFilterType.StoragePathIs)
).toEqual(component.storagePaths) ).toEqual(component.storagePaths)
expect(
component.getFilterSelectItems(TriggerFilterType.StoragePathAny)
).toEqual(component.storagePaths)
expect(component.getFilterSelectItems(TriggerFilterType.TagsAll)).toEqual( expect(component.getFilterSelectItems(TriggerFilterType.TagsAll)).toEqual(
[] []
) )

View File

@@ -145,13 +145,10 @@ export enum TriggerFilterType {
TagsAny = 'tags_any', TagsAny = 'tags_any',
TagsAll = 'tags_all', TagsAll = 'tags_all',
TagsNone = 'tags_none', TagsNone = 'tags_none',
CorrespondentAny = 'correspondent_any',
CorrespondentIs = 'correspondent_is', CorrespondentIs = 'correspondent_is',
CorrespondentNot = 'correspondent_not', CorrespondentNot = 'correspondent_not',
DocumentTypeAny = 'document_type_any',
DocumentTypeIs = 'document_type_is', DocumentTypeIs = 'document_type_is',
DocumentTypeNot = 'document_type_not', DocumentTypeNot = 'document_type_not',
StoragePathAny = 'storage_path_any',
StoragePathIs = 'storage_path_is', StoragePathIs = 'storage_path_is',
StoragePathNot = 'storage_path_not', StoragePathNot = 'storage_path_not',
CustomFieldQuery = 'custom_field_query', CustomFieldQuery = 'custom_field_query',
@@ -175,11 +172,8 @@ type TriggerFilterAggregate = {
filter_has_tags: number[] filter_has_tags: number[]
filter_has_all_tags: number[] filter_has_all_tags: number[]
filter_has_not_tags: number[] filter_has_not_tags: number[]
filter_has_any_correspondents: number[]
filter_has_not_correspondents: number[] filter_has_not_correspondents: number[]
filter_has_any_document_types: number[]
filter_has_not_document_types: number[] filter_has_not_document_types: number[]
filter_has_any_storage_paths: number[]
filter_has_not_storage_paths: number[] filter_has_not_storage_paths: number[]
filter_has_correspondent: number | null filter_has_correspondent: number | null
filter_has_document_type: number | null filter_has_document_type: number | null
@@ -225,14 +219,6 @@ const TRIGGER_FILTER_DEFINITIONS: TriggerFilterDefinition[] = [
allowMultipleEntries: false, allowMultipleEntries: false,
allowMultipleValues: true, allowMultipleValues: true,
}, },
{
id: TriggerFilterType.CorrespondentAny,
name: $localize`Has any of these correspondents`,
inputType: 'select',
allowMultipleEntries: false,
allowMultipleValues: true,
selectItems: 'correspondents',
},
{ {
id: TriggerFilterType.CorrespondentIs, id: TriggerFilterType.CorrespondentIs,
name: $localize`Has correspondent`, name: $localize`Has correspondent`,
@@ -257,14 +243,6 @@ const TRIGGER_FILTER_DEFINITIONS: TriggerFilterDefinition[] = [
allowMultipleValues: false, allowMultipleValues: false,
selectItems: 'documentTypes', selectItems: 'documentTypes',
}, },
{
id: TriggerFilterType.DocumentTypeAny,
name: $localize`Has any of these document types`,
inputType: 'select',
allowMultipleEntries: false,
allowMultipleValues: true,
selectItems: 'documentTypes',
},
{ {
id: TriggerFilterType.DocumentTypeNot, id: TriggerFilterType.DocumentTypeNot,
name: $localize`Does not have document types`, name: $localize`Does not have document types`,
@@ -281,14 +259,6 @@ const TRIGGER_FILTER_DEFINITIONS: TriggerFilterDefinition[] = [
allowMultipleValues: false, allowMultipleValues: false,
selectItems: 'storagePaths', selectItems: 'storagePaths',
}, },
{
id: TriggerFilterType.StoragePathAny,
name: $localize`Has any of these storage paths`,
inputType: 'select',
allowMultipleEntries: false,
allowMultipleValues: true,
selectItems: 'storagePaths',
},
{ {
id: TriggerFilterType.StoragePathNot, id: TriggerFilterType.StoragePathNot,
name: $localize`Does not have storage paths`, name: $localize`Does not have storage paths`,
@@ -336,15 +306,6 @@ const FILTER_HANDLERS: Record<TriggerFilterType, FilterHandler> = {
extract: (trigger) => trigger.filter_has_not_tags, extract: (trigger) => trigger.filter_has_not_tags,
hasValue: (value) => Array.isArray(value) && value.length > 0, hasValue: (value) => Array.isArray(value) && value.length > 0,
}, },
[TriggerFilterType.CorrespondentAny]: {
apply: (aggregate, values) => {
aggregate.filter_has_any_correspondents = Array.isArray(values)
? [...values]
: [values]
},
extract: (trigger) => trigger.filter_has_any_correspondents,
hasValue: (value) => Array.isArray(value) && value.length > 0,
},
[TriggerFilterType.CorrespondentIs]: { [TriggerFilterType.CorrespondentIs]: {
apply: (aggregate, values) => { apply: (aggregate, values) => {
aggregate.filter_has_correspondent = Array.isArray(values) aggregate.filter_has_correspondent = Array.isArray(values)
@@ -372,15 +333,6 @@ const FILTER_HANDLERS: Record<TriggerFilterType, FilterHandler> = {
extract: (trigger) => trigger.filter_has_document_type, extract: (trigger) => trigger.filter_has_document_type,
hasValue: (value) => value !== null && value !== undefined, hasValue: (value) => value !== null && value !== undefined,
}, },
[TriggerFilterType.DocumentTypeAny]: {
apply: (aggregate, values) => {
aggregate.filter_has_any_document_types = Array.isArray(values)
? [...values]
: [values]
},
extract: (trigger) => trigger.filter_has_any_document_types,
hasValue: (value) => Array.isArray(value) && value.length > 0,
},
[TriggerFilterType.DocumentTypeNot]: { [TriggerFilterType.DocumentTypeNot]: {
apply: (aggregate, values) => { apply: (aggregate, values) => {
aggregate.filter_has_not_document_types = Array.isArray(values) aggregate.filter_has_not_document_types = Array.isArray(values)
@@ -399,15 +351,6 @@ const FILTER_HANDLERS: Record<TriggerFilterType, FilterHandler> = {
extract: (trigger) => trigger.filter_has_storage_path, extract: (trigger) => trigger.filter_has_storage_path,
hasValue: (value) => value !== null && value !== undefined, hasValue: (value) => value !== null && value !== undefined,
}, },
[TriggerFilterType.StoragePathAny]: {
apply: (aggregate, values) => {
aggregate.filter_has_any_storage_paths = Array.isArray(values)
? [...values]
: [values]
},
extract: (trigger) => trigger.filter_has_any_storage_paths,
hasValue: (value) => Array.isArray(value) && value.length > 0,
},
[TriggerFilterType.StoragePathNot]: { [TriggerFilterType.StoragePathNot]: {
apply: (aggregate, values) => { apply: (aggregate, values) => {
aggregate.filter_has_not_storage_paths = Array.isArray(values) aggregate.filter_has_not_storage_paths = Array.isArray(values)
@@ -699,11 +642,8 @@ export class WorkflowEditDialogComponent
filter_has_tags: [], filter_has_tags: [],
filter_has_all_tags: [], filter_has_all_tags: [],
filter_has_not_tags: [], filter_has_not_tags: [],
filter_has_any_correspondents: [],
filter_has_not_correspondents: [], filter_has_not_correspondents: [],
filter_has_any_document_types: [],
filter_has_not_document_types: [], filter_has_not_document_types: [],
filter_has_any_storage_paths: [],
filter_has_not_storage_paths: [], filter_has_not_storage_paths: [],
filter_has_correspondent: null, filter_has_correspondent: null,
filter_has_document_type: null, filter_has_document_type: null,
@@ -730,16 +670,10 @@ export class WorkflowEditDialogComponent
trigger.filter_has_tags = aggregate.filter_has_tags trigger.filter_has_tags = aggregate.filter_has_tags
trigger.filter_has_all_tags = aggregate.filter_has_all_tags trigger.filter_has_all_tags = aggregate.filter_has_all_tags
trigger.filter_has_not_tags = aggregate.filter_has_not_tags trigger.filter_has_not_tags = aggregate.filter_has_not_tags
trigger.filter_has_any_correspondents =
aggregate.filter_has_any_correspondents
trigger.filter_has_not_correspondents = trigger.filter_has_not_correspondents =
aggregate.filter_has_not_correspondents aggregate.filter_has_not_correspondents
trigger.filter_has_any_document_types =
aggregate.filter_has_any_document_types
trigger.filter_has_not_document_types = trigger.filter_has_not_document_types =
aggregate.filter_has_not_document_types aggregate.filter_has_not_document_types
trigger.filter_has_any_storage_paths =
aggregate.filter_has_any_storage_paths
trigger.filter_has_not_storage_paths = trigger.filter_has_not_storage_paths =
aggregate.filter_has_not_storage_paths aggregate.filter_has_not_storage_paths
trigger.filter_has_correspondent = trigger.filter_has_correspondent =
@@ -922,11 +856,8 @@ export class WorkflowEditDialogComponent
case TriggerFilterType.TagsAny: case TriggerFilterType.TagsAny:
case TriggerFilterType.TagsAll: case TriggerFilterType.TagsAll:
case TriggerFilterType.TagsNone: case TriggerFilterType.TagsNone:
case TriggerFilterType.CorrespondentAny:
case TriggerFilterType.CorrespondentNot: case TriggerFilterType.CorrespondentNot:
case TriggerFilterType.DocumentTypeAny:
case TriggerFilterType.DocumentTypeNot: case TriggerFilterType.DocumentTypeNot:
case TriggerFilterType.StoragePathAny:
case TriggerFilterType.StoragePathNot: case TriggerFilterType.StoragePathNot:
return true return true
default: default:
@@ -1248,11 +1179,8 @@ export class WorkflowEditDialogComponent
filter_has_tags: [], filter_has_tags: [],
filter_has_all_tags: [], filter_has_all_tags: [],
filter_has_not_tags: [], filter_has_not_tags: [],
filter_has_any_correspondents: [],
filter_has_not_correspondents: [], filter_has_not_correspondents: [],
filter_has_any_document_types: [],
filter_has_not_document_types: [], filter_has_not_document_types: [],
filter_has_any_storage_paths: [],
filter_has_not_storage_paths: [], filter_has_not_storage_paths: [],
filter_custom_field_query: null, filter_custom_field_query: null,
filter_has_correspondent: null, filter_has_correspondent: null,

View File

@@ -370,6 +370,37 @@
</ng-template> </ng-template>
</li> </li>
} }
@if (document?.duplicate_documents?.length) {
<li [ngbNavItem]="DocumentDetailNavIDs.Duplicates">
<a class="text-nowrap" ngbNavLink i18n>
Duplicates
<span class="badge text-bg-secondary ms-1">{{ document.duplicate_documents.length }}</span>
</a>
<ng-template ngbNavContent>
<div class="d-flex flex-column gap-2">
<div class="fst-italic" i18n>Duplicate documents detected:</div>
<div class="list-group">
@for (duplicate of document.duplicate_documents; track duplicate.id) {
<a
class="list-group-item list-group-item-action d-flex justify-content-between align-items-center"
[routerLink]="['/documents', duplicate.id, 'details']"
[class.disabled]="duplicate.deleted_at"
>
<span class="d-flex align-items-center gap-2">
<span>{{ duplicate.title || ('#' + duplicate.id) }}</span>
@if (duplicate.deleted_at) {
<span class="badge text-bg-secondary" i18n>In trash</span>
}
</span>
<span class="text-secondary">#{{ duplicate.id }}</span>
</a>
}
</div>
</div>
</ng-template>
</li>
}
</ul> </ul>
<div [ngbNavOutlet]="nav" class="mt-3"></div> <div [ngbNavOutlet]="nav" class="mt-3"></div>

View File

@@ -301,16 +301,16 @@ describe('DocumentDetailComponent', () => {
.spyOn(openDocumentsService, 'openDocument') .spyOn(openDocumentsService, 'openDocument')
.mockReturnValueOnce(of(true)) .mockReturnValueOnce(of(true))
fixture.detectChanges() fixture.detectChanges()
expect(component.activeNavID).toEqual(5) // DocumentDetailNavIDs.Notes expect(component.activeNavID).toEqual(component.DocumentDetailNavIDs.Notes)
}) })
it('should change url on tab switch', () => { it('should change url on tab switch', () => {
initNormally() initNormally()
const navigateSpy = jest.spyOn(router, 'navigate') const navigateSpy = jest.spyOn(router, 'navigate')
component.nav.select(5) component.nav.select(component.DocumentDetailNavIDs.Notes)
component.nav.navChange.next({ component.nav.navChange.next({
activeId: 1, activeId: 1,
nextId: 5, nextId: component.DocumentDetailNavIDs.Notes,
preventDefault: () => {}, preventDefault: () => {},
}) })
fixture.detectChanges() fixture.detectChanges()
@@ -352,6 +352,18 @@ describe('DocumentDetailComponent', () => {
expect(component.document).toEqual(doc) expect(component.document).toEqual(doc)
}) })
it('should fall back to details tab when duplicates tab is active but no duplicates', () => {
initNormally()
component.activeNavID = component.DocumentDetailNavIDs.Duplicates
const noDupDoc = { ...doc, duplicate_documents: [] }
component.updateComponent(noDupDoc)
expect(component.activeNavID).toEqual(
component.DocumentDetailNavIDs.Details
)
})
it('should load already-opened document via param', () => { it('should load already-opened document via param', () => {
initNormally() initNormally()
jest.spyOn(documentService, 'get').mockReturnValueOnce(of(doc)) jest.spyOn(documentService, 'get').mockReturnValueOnce(of(doc))
@@ -367,6 +379,38 @@ describe('DocumentDetailComponent', () => {
expect(component.document).toEqual(doc) expect(component.document).toEqual(doc)
}) })
it('should update cached open document duplicates when reloading an open doc', () => {
const openDoc = { ...doc, duplicate_documents: [{ id: 1, title: 'Old' }] }
const updatedDuplicates = [
{ id: 2, title: 'Newer duplicate', deleted_at: null },
]
jest
.spyOn(activatedRoute, 'paramMap', 'get')
.mockReturnValue(of(convertToParamMap({ id: 3, section: 'details' })))
jest.spyOn(documentService, 'get').mockReturnValue(
of({
...doc,
modified: new Date('2024-01-02T00:00:00Z'),
duplicate_documents: updatedDuplicates,
})
)
jest.spyOn(openDocumentsService, 'getOpenDocument').mockReturnValue(openDoc)
const saveSpy = jest.spyOn(openDocumentsService, 'save')
jest.spyOn(openDocumentsService, 'openDocument').mockReturnValue(of(true))
jest.spyOn(customFieldsService, 'listAll').mockReturnValue(
of({
count: customFields.length,
all: customFields.map((f) => f.id),
results: customFields,
})
)
fixture.detectChanges()
expect(openDoc.duplicate_documents).toEqual(updatedDuplicates)
expect(saveSpy).toHaveBeenCalled()
})
it('should disable form if user cannot edit', () => { it('should disable form if user cannot edit', () => {
currentUserHasObjectPermissions = false currentUserHasObjectPermissions = false
initNormally() initNormally()

View File

@@ -8,7 +8,7 @@ import {
FormsModule, FormsModule,
ReactiveFormsModule, ReactiveFormsModule,
} from '@angular/forms' } from '@angular/forms'
import { ActivatedRoute, Router } from '@angular/router' import { ActivatedRoute, Router, RouterModule } from '@angular/router'
import { import {
NgbDateStruct, NgbDateStruct,
NgbDropdownModule, NgbDropdownModule,
@@ -124,6 +124,7 @@ enum DocumentDetailNavIDs {
Notes = 5, Notes = 5,
Permissions = 6, Permissions = 6,
History = 7, History = 7,
Duplicates = 8,
} }
enum ContentRenderType { enum ContentRenderType {
@@ -181,6 +182,7 @@ export enum ZoomSetting {
NgxBootstrapIconsModule, NgxBootstrapIconsModule,
PdfViewerModule, PdfViewerModule,
TextAreaComponent, TextAreaComponent,
RouterModule,
], ],
}) })
export class DocumentDetailComponent export class DocumentDetailComponent
@@ -454,6 +456,11 @@ export class DocumentDetailComponent
const openDocument = this.openDocumentService.getOpenDocument( const openDocument = this.openDocumentService.getOpenDocument(
this.documentId this.documentId
) )
// update duplicate documents if present
if (openDocument && doc?.duplicate_documents) {
openDocument.duplicate_documents = doc.duplicate_documents
this.openDocumentService.save()
}
const useDoc = openDocument || doc const useDoc = openDocument || doc
if (openDocument) { if (openDocument) {
if ( if (
@@ -704,6 +711,13 @@ export class DocumentDetailComponent
} }
this.title = this.documentTitlePipe.transform(doc.title) this.title = this.documentTitlePipe.transform(doc.title)
this.prepareForm(doc) this.prepareForm(doc)
if (
this.activeNavID === DocumentDetailNavIDs.Duplicates &&
!doc?.duplicate_documents?.length
) {
this.activeNavID = DocumentDetailNavIDs.Details
}
} }
get customFieldFormFields(): FormArray { get customFieldFormFields(): FormArray {

View File

@@ -159,6 +159,8 @@ export interface Document extends ObjectWithPermissions {
page_count?: number page_count?: number
duplicate_documents?: Document[]
// Frontend only // Frontend only
__changedFields?: string[] __changedFields?: string[]
} }

View File

@@ -1,3 +1,4 @@
import { Document } from './document'
import { ObjectWithId } from './object-with-id' import { ObjectWithId } from './object-with-id'
export enum PaperlessTaskType { export enum PaperlessTaskType {
@@ -42,5 +43,7 @@ export interface PaperlessTask extends ObjectWithId {
related_document?: number related_document?: number
duplicate_documents?: Document[]
owner?: number owner?: number
} }

View File

@@ -44,16 +44,10 @@ export interface WorkflowTrigger extends ObjectWithId {
filter_has_not_tags?: number[] // Tag.id[] filter_has_not_tags?: number[] // Tag.id[]
filter_has_any_correspondents?: number[] // Correspondent.id[]
filter_has_not_correspondents?: number[] // Correspondent.id[] filter_has_not_correspondents?: number[] // Correspondent.id[]
filter_has_any_document_types?: number[] // DocumentType.id[]
filter_has_not_document_types?: number[] // DocumentType.id[] filter_has_not_document_types?: number[] // DocumentType.id[]
filter_has_any_storage_paths?: number[] // StoragePath.id[]
filter_has_not_storage_paths?: number[] // StoragePath.id[] filter_has_not_storage_paths?: number[] // StoragePath.id[]
filter_custom_field_query?: string filter_custom_field_query?: string

View File

@@ -779,19 +779,45 @@ class ConsumerPreflightPlugin(
Q(checksum=checksum) | Q(archive_checksum=checksum), Q(checksum=checksum) | Q(archive_checksum=checksum),
) )
if existing_doc.exists(): if existing_doc.exists():
msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS existing_doc = existing_doc.order_by("-created")
log_msg = f"Not consuming {self.filename}: It is a duplicate of {existing_doc.get().title} (#{existing_doc.get().pk})." duplicates_in_trash = existing_doc.filter(deleted_at__isnull=False)
log_msg = (
f"Consuming duplicate {self.filename}: "
f"{existing_doc.count()} existing document(s) share the same content."
)
if existing_doc.first().deleted_at is not None: if duplicates_in_trash.exists():
msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS_IN_TRASH log_msg += " Note: at least one existing document is in the trash."
log_msg += " Note: existing document is in the trash."
self.log.warning(log_msg)
if settings.CONSUMER_DELETE_DUPLICATES: if settings.CONSUMER_DELETE_DUPLICATES:
duplicate = existing_doc.first()
duplicate_label = (
duplicate.title
or duplicate.original_filename
or (Path(duplicate.filename).name if duplicate.filename else None)
or str(duplicate.pk)
)
Path(self.input_doc.original_file).unlink() Path(self.input_doc.original_file).unlink()
self._fail(
msg, failure_msg = (
log_msg, f"Not consuming {self.filename}: "
) f"It is a duplicate of {duplicate_label} (#{duplicate.pk})"
)
status_msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS
if duplicates_in_trash.exists():
status_msg = (
ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS_IN_TRASH
)
failure_msg += " Note: existing document is in the trash."
self._fail(
status_msg,
failure_msg,
)
def pre_check_directories(self): def pre_check_directories(self):
""" """

View File

@@ -403,18 +403,6 @@ def existing_document_matches_workflow(
f"Document tags {list(document.tags.all())} include excluded tags {list(trigger_has_not_tags_qs)}", f"Document tags {list(document.tags.all())} include excluded tags {list(trigger_has_not_tags_qs)}",
) )
allowed_correspondent_ids = set(
trigger.filter_has_any_correspondents.values_list("id", flat=True),
)
if (
allowed_correspondent_ids
and document.correspondent_id not in allowed_correspondent_ids
):
return (
False,
f"Document correspondent {document.correspondent} is not one of {list(trigger.filter_has_any_correspondents.all())}",
)
# Document correspondent vs trigger has_correspondent # Document correspondent vs trigger has_correspondent
if ( if (
trigger.filter_has_correspondent_id is not None trigger.filter_has_correspondent_id is not None
@@ -436,17 +424,6 @@ def existing_document_matches_workflow(
f"Document correspondent {document.correspondent} is excluded by {list(trigger.filter_has_not_correspondents.all())}", f"Document correspondent {document.correspondent} is excluded by {list(trigger.filter_has_not_correspondents.all())}",
) )
allowed_document_type_ids = set(
trigger.filter_has_any_document_types.values_list("id", flat=True),
)
if allowed_document_type_ids and (
document.document_type_id not in allowed_document_type_ids
):
return (
False,
f"Document doc type {document.document_type} is not one of {list(trigger.filter_has_any_document_types.all())}",
)
# Document document_type vs trigger has_document_type # Document document_type vs trigger has_document_type
if ( if (
trigger.filter_has_document_type_id is not None trigger.filter_has_document_type_id is not None
@@ -468,17 +445,6 @@ def existing_document_matches_workflow(
f"Document doc type {document.document_type} is excluded by {list(trigger.filter_has_not_document_types.all())}", f"Document doc type {document.document_type} is excluded by {list(trigger.filter_has_not_document_types.all())}",
) )
allowed_storage_path_ids = set(
trigger.filter_has_any_storage_paths.values_list("id", flat=True),
)
if allowed_storage_path_ids and (
document.storage_path_id not in allowed_storage_path_ids
):
return (
False,
f"Document storage path {document.storage_path} is not one of {list(trigger.filter_has_any_storage_paths.all())}",
)
# Document storage_path vs trigger has_storage_path # Document storage_path vs trigger has_storage_path
if ( if (
trigger.filter_has_storage_path_id is not None trigger.filter_has_storage_path_id is not None
@@ -566,10 +532,6 @@ def prefilter_documents_by_workflowtrigger(
# Correspondent, DocumentType, etc. filtering # Correspondent, DocumentType, etc. filtering
if trigger.filter_has_any_correspondents.exists():
documents = documents.filter(
correspondent__in=trigger.filter_has_any_correspondents.all(),
)
if trigger.filter_has_correspondent is not None: if trigger.filter_has_correspondent is not None:
documents = documents.filter( documents = documents.filter(
correspondent=trigger.filter_has_correspondent, correspondent=trigger.filter_has_correspondent,
@@ -579,10 +541,6 @@ def prefilter_documents_by_workflowtrigger(
correspondent__in=trigger.filter_has_not_correspondents.all(), correspondent__in=trigger.filter_has_not_correspondents.all(),
) )
if trigger.filter_has_any_document_types.exists():
documents = documents.filter(
document_type__in=trigger.filter_has_any_document_types.all(),
)
if trigger.filter_has_document_type is not None: if trigger.filter_has_document_type is not None:
documents = documents.filter( documents = documents.filter(
document_type=trigger.filter_has_document_type, document_type=trigger.filter_has_document_type,
@@ -592,10 +550,6 @@ def prefilter_documents_by_workflowtrigger(
document_type__in=trigger.filter_has_not_document_types.all(), document_type__in=trigger.filter_has_not_document_types.all(),
) )
if trigger.filter_has_any_storage_paths.exists():
documents = documents.filter(
storage_path__in=trigger.filter_has_any_storage_paths.all(),
)
if trigger.filter_has_storage_path is not None: if trigger.filter_has_storage_path is not None:
documents = documents.filter( documents = documents.filter(
storage_path=trigger.filter_has_storage_path, storage_path=trigger.filter_has_storage_path,
@@ -650,11 +604,8 @@ def document_matches_workflow(
"filter_has_tags", "filter_has_tags",
"filter_has_all_tags", "filter_has_all_tags",
"filter_has_not_tags", "filter_has_not_tags",
"filter_has_any_document_types",
"filter_has_not_document_types", "filter_has_not_document_types",
"filter_has_any_correspondents",
"filter_has_not_correspondents", "filter_has_not_correspondents",
"filter_has_any_storage_paths",
"filter_has_not_storage_paths", "filter_has_not_storage_paths",
) )
) )

View File

@@ -0,0 +1,23 @@
# Generated by Django 5.2.7 on 2026-01-14 17:45
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0004_remove_document_storage_type"),
]
operations = [
migrations.AlterField(
model_name="document",
name="checksum",
field=models.CharField(
editable=False,
max_length=32,
verbose_name="checksum",
help_text="The checksum of the original document.",
),
),
]

View File

@@ -1,43 +0,0 @@
# Generated by Django 5.2.7 on 2025-12-17 22:25
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "0004_remove_document_storage_type"),
]
operations = [
migrations.AddField(
model_name="workflowtrigger",
name="filter_has_any_correspondents",
field=models.ManyToManyField(
blank=True,
related_name="workflowtriggers_has_any_correspondent",
to="documents.correspondent",
verbose_name="has one of these correspondents",
),
),
migrations.AddField(
model_name="workflowtrigger",
name="filter_has_any_document_types",
field=models.ManyToManyField(
blank=True,
related_name="workflowtriggers_has_any_document_type",
to="documents.documenttype",
verbose_name="has one of these document types",
),
),
migrations.AddField(
model_name="workflowtrigger",
name="filter_has_any_storage_paths",
field=models.ManyToManyField(
blank=True,
related_name="workflowtriggers_has_any_storage_path",
to="documents.storagepath",
verbose_name="has one of these storage paths",
),
),
]

View File

@@ -205,7 +205,6 @@ class Document(SoftDeleteModel, ModelWithOwner):
_("checksum"), _("checksum"),
max_length=32, max_length=32,
editable=False, editable=False,
unique=True,
help_text=_("The checksum of the original document."), help_text=_("The checksum of the original document."),
) )
@@ -1066,13 +1065,6 @@ class WorkflowTrigger(models.Model):
verbose_name=_("has this document type"), verbose_name=_("has this document type"),
) )
filter_has_any_document_types = models.ManyToManyField(
DocumentType,
blank=True,
related_name="workflowtriggers_has_any_document_type",
verbose_name=_("has one of these document types"),
)
filter_has_not_document_types = models.ManyToManyField( filter_has_not_document_types = models.ManyToManyField(
DocumentType, DocumentType,
blank=True, blank=True,
@@ -1095,13 +1087,6 @@ class WorkflowTrigger(models.Model):
verbose_name=_("does not have these correspondent(s)"), verbose_name=_("does not have these correspondent(s)"),
) )
filter_has_any_correspondents = models.ManyToManyField(
Correspondent,
blank=True,
related_name="workflowtriggers_has_any_correspondent",
verbose_name=_("has one of these correspondents"),
)
filter_has_storage_path = models.ForeignKey( filter_has_storage_path = models.ForeignKey(
StoragePath, StoragePath,
null=True, null=True,
@@ -1110,13 +1095,6 @@ class WorkflowTrigger(models.Model):
verbose_name=_("has this storage path"), verbose_name=_("has this storage path"),
) )
filter_has_any_storage_paths = models.ManyToManyField(
StoragePath,
blank=True,
related_name="workflowtriggers_has_any_storage_path",
verbose_name=_("has one of these storage paths"),
)
filter_has_not_storage_paths = models.ManyToManyField( filter_has_not_storage_paths = models.ManyToManyField(
StoragePath, StoragePath,
blank=True, blank=True,

View File

@@ -148,13 +148,29 @@ def get_document_count_filter_for_user(user):
) )
def get_objects_for_user_owner_aware(user, perms, Model) -> QuerySet: def get_objects_for_user_owner_aware(
objects_owned = Model.objects.filter(owner=user) user,
objects_unowned = Model.objects.filter(owner__isnull=True) perms,
Model,
*,
include_deleted=False,
) -> QuerySet:
"""
Returns objects the user owns, are unowned, or has explicit perms.
When include_deleted is True, soft-deleted items are also included.
"""
manager = (
Model.global_objects
if include_deleted and hasattr(Model, "global_objects")
else Model.objects
)
objects_owned = manager.filter(owner=user)
objects_unowned = manager.filter(owner__isnull=True)
objects_with_perms = get_objects_for_user( objects_with_perms = get_objects_for_user(
user=user, user=user,
perms=perms, perms=perms,
klass=Model, klass=manager.all(),
accept_global_perms=False, accept_global_perms=False,
) )
return objects_owned | objects_unowned | objects_with_perms return objects_owned | objects_unowned | objects_with_perms

View File

@@ -23,6 +23,7 @@ from django.core.validators import MinValueValidator
from django.core.validators import RegexValidator from django.core.validators import RegexValidator
from django.core.validators import integer_validator from django.core.validators import integer_validator
from django.db.models import Count from django.db.models import Count
from django.db.models import Q
from django.db.models.functions import Lower from django.db.models.functions import Lower
from django.utils.crypto import get_random_string from django.utils.crypto import get_random_string
from django.utils.dateparse import parse_datetime from django.utils.dateparse import parse_datetime
@@ -72,6 +73,7 @@ from documents.models import WorkflowTrigger
from documents.parsers import is_mime_type_supported from documents.parsers import is_mime_type_supported
from documents.permissions import get_document_count_filter_for_user from documents.permissions import get_document_count_filter_for_user
from documents.permissions import get_groups_with_only_permission from documents.permissions import get_groups_with_only_permission
from documents.permissions import get_objects_for_user_owner_aware
from documents.permissions import set_permissions_for_object from documents.permissions import set_permissions_for_object
from documents.regex import validate_regex_pattern from documents.regex import validate_regex_pattern
from documents.templating.filepath import validate_filepath_template_and_render from documents.templating.filepath import validate_filepath_template_and_render
@@ -1014,6 +1016,29 @@ class NotesSerializer(serializers.ModelSerializer):
return ret return ret
def _get_viewable_duplicates(document: Document, user: User | None):
checksums = {document.checksum}
if document.archive_checksum:
checksums.add(document.archive_checksum)
duplicates = Document.global_objects.filter(
Q(checksum__in=checksums) | Q(archive_checksum__in=checksums),
).exclude(pk=document.pk)
duplicates = duplicates.order_by("-created")
allowed = get_objects_for_user_owner_aware(
user,
"documents.view_document",
Document,
include_deleted=True,
)
return duplicates.filter(id__in=allowed.values_list("id", flat=True))
class DuplicateDocumentSummarySerializer(serializers.Serializer):
id = serializers.IntegerField()
title = serializers.CharField()
deleted_at = serializers.DateTimeField(allow_null=True)
@extend_schema_serializer( @extend_schema_serializer(
deprecate_fields=["created_date"], deprecate_fields=["created_date"],
) )
@@ -1031,6 +1056,7 @@ class DocumentSerializer(
archived_file_name = SerializerMethodField() archived_file_name = SerializerMethodField()
created_date = serializers.DateField(required=False) created_date = serializers.DateField(required=False)
page_count = SerializerMethodField() page_count = SerializerMethodField()
duplicate_documents = SerializerMethodField()
notes = NotesSerializer(many=True, required=False, read_only=True) notes = NotesSerializer(many=True, required=False, read_only=True)
@@ -1056,6 +1082,16 @@ class DocumentSerializer(
def get_page_count(self, obj) -> int | None: def get_page_count(self, obj) -> int | None:
return obj.page_count return obj.page_count
@extend_schema_field(DuplicateDocumentSummarySerializer(many=True))
def get_duplicate_documents(self, obj):
view = self.context.get("view")
if view and getattr(view, "action", None) != "retrieve":
return []
request = self.context.get("request")
user = request.user if request else None
duplicates = _get_viewable_duplicates(obj, user)
return list(duplicates.values("id", "title", "deleted_at"))
def get_original_file_name(self, obj) -> str | None: def get_original_file_name(self, obj) -> str | None:
return obj.original_filename return obj.original_filename
@@ -1233,6 +1269,7 @@ class DocumentSerializer(
"archive_serial_number", "archive_serial_number",
"original_file_name", "original_file_name",
"archived_file_name", "archived_file_name",
"duplicate_documents",
"owner", "owner",
"permissions", "permissions",
"user_can_change", "user_can_change",
@@ -2094,10 +2131,12 @@ class TasksViewSerializer(OwnedObjectSerializer):
"result", "result",
"acknowledged", "acknowledged",
"related_document", "related_document",
"duplicate_documents",
"owner", "owner",
) )
related_document = serializers.SerializerMethodField() related_document = serializers.SerializerMethodField()
duplicate_documents = serializers.SerializerMethodField()
created_doc_re = re.compile(r"New document id (\d+) created") created_doc_re = re.compile(r"New document id (\d+) created")
duplicate_doc_re = re.compile(r"It is a duplicate of .* \(#(\d+)\)") duplicate_doc_re = re.compile(r"It is a duplicate of .* \(#(\d+)\)")
@@ -2122,6 +2161,17 @@ class TasksViewSerializer(OwnedObjectSerializer):
return result return result
@extend_schema_field(DuplicateDocumentSummarySerializer(many=True))
def get_duplicate_documents(self, obj):
related_document = self.get_related_document(obj)
request = self.context.get("request")
user = request.user if request else None
document = Document.global_objects.filter(pk=related_document).first()
if not related_document or not user or not document:
return []
duplicates = _get_viewable_duplicates(document, user)
return list(duplicates.values("id", "title", "deleted_at"))
class RunTaskViewSerializer(serializers.Serializer): class RunTaskViewSerializer(serializers.Serializer):
task_name = serializers.ChoiceField( task_name = serializers.ChoiceField(
@@ -2299,11 +2349,8 @@ class WorkflowTriggerSerializer(serializers.ModelSerializer):
"filter_has_all_tags", "filter_has_all_tags",
"filter_has_not_tags", "filter_has_not_tags",
"filter_custom_field_query", "filter_custom_field_query",
"filter_has_any_correspondents",
"filter_has_not_correspondents", "filter_has_not_correspondents",
"filter_has_any_document_types",
"filter_has_not_document_types", "filter_has_not_document_types",
"filter_has_any_storage_paths",
"filter_has_not_storage_paths", "filter_has_not_storage_paths",
"filter_has_correspondent", "filter_has_correspondent",
"filter_has_document_type", "filter_has_document_type",
@@ -2541,26 +2588,14 @@ class WorkflowSerializer(serializers.ModelSerializer):
filter_has_tags = trigger.pop("filter_has_tags", None) filter_has_tags = trigger.pop("filter_has_tags", None)
filter_has_all_tags = trigger.pop("filter_has_all_tags", None) filter_has_all_tags = trigger.pop("filter_has_all_tags", None)
filter_has_not_tags = trigger.pop("filter_has_not_tags", None) filter_has_not_tags = trigger.pop("filter_has_not_tags", None)
filter_has_any_correspondents = trigger.pop(
"filter_has_any_correspondents",
None,
)
filter_has_not_correspondents = trigger.pop( filter_has_not_correspondents = trigger.pop(
"filter_has_not_correspondents", "filter_has_not_correspondents",
None, None,
) )
filter_has_any_document_types = trigger.pop(
"filter_has_any_document_types",
None,
)
filter_has_not_document_types = trigger.pop( filter_has_not_document_types = trigger.pop(
"filter_has_not_document_types", "filter_has_not_document_types",
None, None,
) )
filter_has_any_storage_paths = trigger.pop(
"filter_has_any_storage_paths",
None,
)
filter_has_not_storage_paths = trigger.pop( filter_has_not_storage_paths = trigger.pop(
"filter_has_not_storage_paths", "filter_has_not_storage_paths",
None, None,
@@ -2577,26 +2612,14 @@ class WorkflowSerializer(serializers.ModelSerializer):
trigger_instance.filter_has_all_tags.set(filter_has_all_tags) trigger_instance.filter_has_all_tags.set(filter_has_all_tags)
if filter_has_not_tags is not None: if filter_has_not_tags is not None:
trigger_instance.filter_has_not_tags.set(filter_has_not_tags) trigger_instance.filter_has_not_tags.set(filter_has_not_tags)
if filter_has_any_correspondents is not None:
trigger_instance.filter_has_any_correspondents.set(
filter_has_any_correspondents,
)
if filter_has_not_correspondents is not None: if filter_has_not_correspondents is not None:
trigger_instance.filter_has_not_correspondents.set( trigger_instance.filter_has_not_correspondents.set(
filter_has_not_correspondents, filter_has_not_correspondents,
) )
if filter_has_any_document_types is not None:
trigger_instance.filter_has_any_document_types.set(
filter_has_any_document_types,
)
if filter_has_not_document_types is not None: if filter_has_not_document_types is not None:
trigger_instance.filter_has_not_document_types.set( trigger_instance.filter_has_not_document_types.set(
filter_has_not_document_types, filter_has_not_document_types,
) )
if filter_has_any_storage_paths is not None:
trigger_instance.filter_has_any_storage_paths.set(
filter_has_any_storage_paths,
)
if filter_has_not_storage_paths is not None: if filter_has_not_storage_paths is not None:
trigger_instance.filter_has_not_storage_paths.set( trigger_instance.filter_has_not_storage_paths.set(
filter_has_not_storage_paths, filter_has_not_storage_paths,

View File

@@ -7,6 +7,7 @@ from django.contrib.auth.models import User
from rest_framework import status from rest_framework import status
from rest_framework.test import APITestCase from rest_framework.test import APITestCase
from documents.models import Document
from documents.models import PaperlessTask from documents.models import PaperlessTask
from documents.tests.utils import DirectoriesMixin from documents.tests.utils import DirectoriesMixin
from documents.views import TasksViewSet from documents.views import TasksViewSet
@@ -258,7 +259,7 @@ class TestTasks(DirectoriesMixin, APITestCase):
task_id=str(uuid.uuid4()), task_id=str(uuid.uuid4()),
task_file_name="task_one.pdf", task_file_name="task_one.pdf",
status=celery.states.FAILURE, status=celery.states.FAILURE,
result="test.pdf: Not consuming test.pdf: It is a duplicate.", result="test.pdf: Unexpected error during ingestion.",
) )
response = self.client.get(self.ENDPOINT) response = self.client.get(self.ENDPOINT)
@@ -270,7 +271,7 @@ class TestTasks(DirectoriesMixin, APITestCase):
self.assertEqual( self.assertEqual(
returned_data["result"], returned_data["result"],
"test.pdf: Not consuming test.pdf: It is a duplicate.", "test.pdf: Unexpected error during ingestion.",
) )
def test_task_name_webui(self): def test_task_name_webui(self):
@@ -325,20 +326,34 @@ class TestTasks(DirectoriesMixin, APITestCase):
self.assertEqual(returned_data["task_file_name"], "anothertest.pdf") self.assertEqual(returned_data["task_file_name"], "anothertest.pdf")
def test_task_result_failed_duplicate_includes_related_doc(self): def test_task_result_duplicate_warning_includes_count(self):
""" """
GIVEN: GIVEN:
- A celery task failed with a duplicate error - A celery task succeeds, but a duplicate exists
WHEN: WHEN:
- API call is made to get tasks - API call is made to get tasks
THEN: THEN:
- The returned data includes a related document link - The returned data includes duplicate warning metadata
""" """
checksum = "duplicate-checksum"
Document.objects.create(
title="Existing",
content="",
mime_type="application/pdf",
checksum=checksum,
)
created_doc = Document.objects.create(
title="Created",
content="",
mime_type="application/pdf",
checksum=checksum,
archive_checksum="another-checksum",
)
PaperlessTask.objects.create( PaperlessTask.objects.create(
task_id=str(uuid.uuid4()), task_id=str(uuid.uuid4()),
task_file_name="task_one.pdf", task_file_name="task_one.pdf",
status=celery.states.FAILURE, status=celery.states.SUCCESS,
result="Not consuming task_one.pdf: It is a duplicate of task_one_existing.pdf (#1234).", result=f"Success. New document id {created_doc.pk} created",
) )
response = self.client.get(self.ENDPOINT) response = self.client.get(self.ENDPOINT)
@@ -348,7 +363,7 @@ class TestTasks(DirectoriesMixin, APITestCase):
returned_data = response.data[0] returned_data = response.data[0]
self.assertEqual(returned_data["related_document"], "1234") self.assertEqual(returned_data["related_document"], str(created_doc.pk))
def test_run_train_classifier_task(self): def test_run_train_classifier_task(self):
""" """

View File

@@ -186,11 +186,8 @@ class TestApiWorkflows(DirectoriesMixin, APITestCase):
"filter_has_tags": [self.t1.id], "filter_has_tags": [self.t1.id],
"filter_has_all_tags": [self.t2.id], "filter_has_all_tags": [self.t2.id],
"filter_has_not_tags": [self.t3.id], "filter_has_not_tags": [self.t3.id],
"filter_has_any_correspondents": [self.c.id],
"filter_has_not_correspondents": [self.c2.id], "filter_has_not_correspondents": [self.c2.id],
"filter_has_any_document_types": [self.dt.id],
"filter_has_not_document_types": [self.dt2.id], "filter_has_not_document_types": [self.dt2.id],
"filter_has_any_storage_paths": [self.sp.id],
"filter_has_not_storage_paths": [self.sp2.id], "filter_has_not_storage_paths": [self.sp2.id],
"filter_custom_field_query": json.dumps( "filter_custom_field_query": json.dumps(
[ [
@@ -251,26 +248,14 @@ class TestApiWorkflows(DirectoriesMixin, APITestCase):
set(trigger.filter_has_not_tags.values_list("id", flat=True)), set(trigger.filter_has_not_tags.values_list("id", flat=True)),
{self.t3.id}, {self.t3.id},
) )
self.assertSetEqual(
set(trigger.filter_has_any_correspondents.values_list("id", flat=True)),
{self.c.id},
)
self.assertSetEqual( self.assertSetEqual(
set(trigger.filter_has_not_correspondents.values_list("id", flat=True)), set(trigger.filter_has_not_correspondents.values_list("id", flat=True)),
{self.c2.id}, {self.c2.id},
) )
self.assertSetEqual(
set(trigger.filter_has_any_document_types.values_list("id", flat=True)),
{self.dt.id},
)
self.assertSetEqual( self.assertSetEqual(
set(trigger.filter_has_not_document_types.values_list("id", flat=True)), set(trigger.filter_has_not_document_types.values_list("id", flat=True)),
{self.dt2.id}, {self.dt2.id},
) )
self.assertSetEqual(
set(trigger.filter_has_any_storage_paths.values_list("id", flat=True)),
{self.sp.id},
)
self.assertSetEqual( self.assertSetEqual(
set(trigger.filter_has_not_storage_paths.values_list("id", flat=True)), set(trigger.filter_has_not_storage_paths.values_list("id", flat=True)),
{self.sp2.id}, {self.sp2.id},
@@ -434,11 +419,8 @@ class TestApiWorkflows(DirectoriesMixin, APITestCase):
"filter_has_tags": [self.t1.id], "filter_has_tags": [self.t1.id],
"filter_has_all_tags": [self.t2.id], "filter_has_all_tags": [self.t2.id],
"filter_has_not_tags": [self.t3.id], "filter_has_not_tags": [self.t3.id],
"filter_has_any_correspondents": [self.c.id],
"filter_has_not_correspondents": [self.c2.id], "filter_has_not_correspondents": [self.c2.id],
"filter_has_any_document_types": [self.dt.id],
"filter_has_not_document_types": [self.dt2.id], "filter_has_not_document_types": [self.dt2.id],
"filter_has_any_storage_paths": [self.sp.id],
"filter_has_not_storage_paths": [self.sp2.id], "filter_has_not_storage_paths": [self.sp2.id],
"filter_custom_field_query": json.dumps( "filter_custom_field_query": json.dumps(
["AND", [[self.cf1.id, "exact", "value"]]], ["AND", [[self.cf1.id, "exact", "value"]]],
@@ -468,26 +450,14 @@ class TestApiWorkflows(DirectoriesMixin, APITestCase):
workflow.triggers.first().filter_has_not_tags.first(), workflow.triggers.first().filter_has_not_tags.first(),
self.t3, self.t3,
) )
self.assertEqual(
workflow.triggers.first().filter_has_any_correspondents.first(),
self.c,
)
self.assertEqual( self.assertEqual(
workflow.triggers.first().filter_has_not_correspondents.first(), workflow.triggers.first().filter_has_not_correspondents.first(),
self.c2, self.c2,
) )
self.assertEqual(
workflow.triggers.first().filter_has_any_document_types.first(),
self.dt,
)
self.assertEqual( self.assertEqual(
workflow.triggers.first().filter_has_not_document_types.first(), workflow.triggers.first().filter_has_not_document_types.first(),
self.dt2, self.dt2,
) )
self.assertEqual(
workflow.triggers.first().filter_has_any_storage_paths.first(),
self.sp,
)
self.assertEqual( self.assertEqual(
workflow.triggers.first().filter_has_not_storage_paths.first(), workflow.triggers.first().filter_has_not_storage_paths.first(),
self.sp2, self.sp2,

View File

@@ -485,21 +485,21 @@ class TestConsumer(
with self.get_consumer(self.get_test_file()) as consumer: with self.get_consumer(self.get_test_file()) as consumer:
consumer.run() consumer.run()
with self.assertRaisesMessage(ConsumerError, "It is a duplicate"): with self.get_consumer(self.get_test_file()) as consumer:
with self.get_consumer(self.get_test_file()) as consumer: consumer.run()
consumer.run()
self._assert_first_last_send_progress(last_status="FAILED") self.assertEqual(Document.objects.count(), 2)
self._assert_first_last_send_progress()
def testDuplicates2(self): def testDuplicates2(self):
with self.get_consumer(self.get_test_file()) as consumer: with self.get_consumer(self.get_test_file()) as consumer:
consumer.run() consumer.run()
with self.assertRaisesMessage(ConsumerError, "It is a duplicate"): with self.get_consumer(self.get_test_archive_file()) as consumer:
with self.get_consumer(self.get_test_archive_file()) as consumer: consumer.run()
consumer.run()
self._assert_first_last_send_progress(last_status="FAILED") self.assertEqual(Document.objects.count(), 2)
self._assert_first_last_send_progress()
def testDuplicates3(self): def testDuplicates3(self):
with self.get_consumer(self.get_test_archive_file()) as consumer: with self.get_consumer(self.get_test_archive_file()) as consumer:
@@ -513,9 +513,10 @@ class TestConsumer(
Document.objects.all().delete() Document.objects.all().delete()
with self.assertRaisesMessage(ConsumerError, "document is in the trash"): with self.get_consumer(self.get_test_file()) as consumer:
with self.get_consumer(self.get_test_file()) as consumer: consumer.run()
consumer.run()
self.assertEqual(Document.objects.count(), 1)
def testAsnExists(self): def testAsnExists(self):
with self.get_consumer( with self.get_consumer(
@@ -718,12 +719,45 @@ class TestConsumer(
dst = self.get_test_file() dst = self.get_test_file()
self.assertIsFile(dst) self.assertIsFile(dst)
with self.assertRaises(ConsumerError): expected_message = (
f"{dst.name}: Not consuming {dst.name}: "
f"It is a duplicate of {document.title} (#{document.pk})"
)
with self.assertRaisesMessage(ConsumerError, expected_message):
with self.get_consumer(dst) as consumer: with self.get_consumer(dst) as consumer:
consumer.run() consumer.run()
self.assertIsNotFile(dst) self.assertIsNotFile(dst)
self._assert_first_last_send_progress(last_status="FAILED") self.assertEqual(Document.objects.count(), 1)
self._assert_first_last_send_progress(last_status=ProgressStatusOptions.FAILED)
@override_settings(CONSUMER_DELETE_DUPLICATES=True)
def test_delete_duplicate_in_trash(self):
dst = self.get_test_file()
with self.get_consumer(dst) as consumer:
consumer.run()
# Move the existing document to trash
document = Document.objects.first()
document.delete()
dst = self.get_test_file()
self.assertIsFile(dst)
expected_message = (
f"{dst.name}: Not consuming {dst.name}: "
f"It is a duplicate of {document.title} (#{document.pk})"
f" Note: existing document is in the trash."
)
with self.assertRaisesMessage(ConsumerError, expected_message):
with self.get_consumer(dst) as consumer:
consumer.run()
self.assertIsNotFile(dst)
self.assertEqual(Document.global_objects.count(), 1)
self.assertEqual(Document.objects.count(), 0)
@override_settings(CONSUMER_DELETE_DUPLICATES=False) @override_settings(CONSUMER_DELETE_DUPLICATES=False)
def test_no_delete_duplicate(self): def test_no_delete_duplicate(self):
@@ -743,15 +777,12 @@ class TestConsumer(
dst = self.get_test_file() dst = self.get_test_file()
self.assertIsFile(dst) self.assertIsFile(dst)
with self.assertRaisesRegex( with self.get_consumer(dst) as consumer:
ConsumerError, consumer.run()
r"sample\.pdf: Not consuming sample\.pdf: It is a duplicate of sample \(#\d+\)",
):
with self.get_consumer(dst) as consumer:
consumer.run()
self.assertIsFile(dst) self.assertIsNotFile(dst)
self._assert_first_last_send_progress(last_status="FAILED") self.assertEqual(Document.objects.count(), 2)
self._assert_first_last_send_progress()
@override_settings(FILENAME_FORMAT="{title}") @override_settings(FILENAME_FORMAT="{title}")
@mock.patch("documents.parsers.document_consumer_declaration.send") @mock.patch("documents.parsers.document_consumer_declaration.send")

View File

@@ -1276,76 +1276,6 @@ class TestWorkflows(
) )
self.assertIn(expected_str, cm.output[1]) self.assertIn(expected_str, cm.output[1])
def test_document_added_any_filters(self):
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
)
trigger.filter_has_any_correspondents.set([self.c])
trigger.filter_has_any_document_types.set([self.dt])
trigger.filter_has_any_storage_paths.set([self.sp])
matching_doc = Document.objects.create(
title="sample test",
correspondent=self.c,
document_type=self.dt,
storage_path=self.sp,
original_filename="sample.pdf",
checksum="checksum-any-match",
)
matched, reason = existing_document_matches_workflow(matching_doc, trigger)
self.assertTrue(matched)
self.assertIsNone(reason)
wrong_correspondent = Document.objects.create(
title="wrong correspondent",
correspondent=self.c2,
document_type=self.dt,
storage_path=self.sp,
original_filename="sample2.pdf",
)
matched, reason = existing_document_matches_workflow(
wrong_correspondent,
trigger,
)
self.assertFalse(matched)
self.assertIn("correspondent", reason)
other_document_type = DocumentType.objects.create(name="Other")
wrong_document_type = Document.objects.create(
title="wrong doc type",
correspondent=self.c,
document_type=other_document_type,
storage_path=self.sp,
original_filename="sample3.pdf",
checksum="checksum-wrong-doc-type",
)
matched, reason = existing_document_matches_workflow(
wrong_document_type,
trigger,
)
self.assertFalse(matched)
self.assertIn("doc type", reason)
other_storage_path = StoragePath.objects.create(
name="Other path",
path="/other/",
)
wrong_storage_path = Document.objects.create(
title="wrong storage",
correspondent=self.c,
document_type=self.dt,
storage_path=other_storage_path,
original_filename="sample4.pdf",
checksum="checksum-wrong-storage-path",
)
matched, reason = existing_document_matches_workflow(
wrong_storage_path,
trigger,
)
self.assertFalse(matched)
self.assertIn("storage path", reason)
def test_document_added_custom_field_query_no_match(self): def test_document_added_custom_field_query_no_match(self):
trigger = WorkflowTrigger.objects.create( trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED, type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
@@ -1454,39 +1384,6 @@ class TestWorkflows(
self.assertIn(doc1, filtered) self.assertIn(doc1, filtered)
self.assertNotIn(doc2, filtered) self.assertNotIn(doc2, filtered)
def test_prefilter_documents_any_filters(self):
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
)
trigger.filter_has_any_correspondents.set([self.c])
trigger.filter_has_any_document_types.set([self.dt])
trigger.filter_has_any_storage_paths.set([self.sp])
allowed_document = Document.objects.create(
title="allowed",
correspondent=self.c,
document_type=self.dt,
storage_path=self.sp,
original_filename="doc-allowed.pdf",
checksum="checksum-any-allowed",
)
blocked_document = Document.objects.create(
title="blocked",
correspondent=self.c2,
document_type=self.dt,
storage_path=self.sp,
original_filename="doc-blocked.pdf",
checksum="checksum-any-blocked",
)
filtered = prefilter_documents_by_workflowtrigger(
Document.objects.all(),
trigger,
)
self.assertIn(allowed_document, filtered)
self.assertNotIn(blocked_document, filtered)
def test_consumption_trigger_requires_filter_configuration(self): def test_consumption_trigger_requires_filter_configuration(self):
serializer = WorkflowTriggerSerializer( serializer = WorkflowTriggerSerializer(
data={ data={