mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-01-18 22:14:22 -06:00
Compare commits
31 Commits
feature-95
...
feature-47
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4f941d3190 | ||
|
|
7f658dc93a | ||
|
|
5944c21be5 | ||
|
|
12ac170a67 | ||
|
|
31ba831a9a | ||
|
|
47ddb266dd | ||
|
|
681ae581bd | ||
|
|
aa4b685a07 | ||
|
|
cd1070bd3f | ||
|
|
ef661ae101 | ||
|
|
b5413525c4 | ||
|
|
efbd0c1bfa | ||
|
|
1e595a5aab | ||
|
|
62248f5702 | ||
|
|
fa6a0a81f4 | ||
|
|
b2541f3e8c | ||
|
|
f8ab81cef7 | ||
|
|
e9f7993ba5 | ||
|
|
3ea5e05137 | ||
|
|
56fddf1e58 | ||
|
|
d447a9fb32 | ||
|
|
155d69b211 | ||
|
|
4a7f9fa984 | ||
|
|
c471c201ee | ||
|
|
a9548afb42 | ||
|
|
939b2f7553 | ||
|
|
8b58718fff | ||
|
|
ad78c436c0 | ||
|
|
c6697cd82b | ||
|
|
0689c8ad3a | ||
|
|
825e9ca14c |
36
.github/workflows/ci-docker.yml
vendored
36
.github/workflows/ci-docker.yml
vendored
@@ -35,7 +35,7 @@ jobs:
|
|||||||
contents: read
|
contents: read
|
||||||
packages: write
|
packages: write
|
||||||
outputs:
|
outputs:
|
||||||
can-push: ${{ steps.check-push.outputs.can-push }}
|
should-push: ${{ steps.check-push.outputs.should-push }}
|
||||||
push-external: ${{ steps.check-push.outputs.push-external }}
|
push-external: ${{ steps.check-push.outputs.push-external }}
|
||||||
repository: ${{ steps.repo.outputs.name }}
|
repository: ${{ steps.repo.outputs.name }}
|
||||||
ref-name: ${{ steps.ref.outputs.name }}
|
ref-name: ${{ steps.ref.outputs.name }}
|
||||||
@@ -59,16 +59,28 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
REF_NAME: ${{ steps.ref.outputs.name }}
|
REF_NAME: ${{ steps.ref.outputs.name }}
|
||||||
run: |
|
run: |
|
||||||
# can-push: Can we push to GHCR?
|
# should-push: Should we push to GHCR?
|
||||||
# True for: pushes, or PRs from the same repo (not forks)
|
# True for:
|
||||||
can_push=${{ github.event_name == 'push' || github.event.pull_request.head.repo.full_name == github.repository }}
|
# 1. Pushes (tags/dev/beta) - filtered via the workflow triggers
|
||||||
echo "can-push=${can_push}"
|
# 2. Internal PRs where the branch name starts with 'feature-' - filtered here when a PR is synced
|
||||||
echo "can-push=${can_push}" >> $GITHUB_OUTPUT
|
|
||||||
|
should_push="false"
|
||||||
|
|
||||||
|
if [[ "${{ github.event_name }}" == "push" ]]; then
|
||||||
|
should_push="true"
|
||||||
|
elif [[ "${{ github.event_name }}" == "pull_request" && "${{ github.event.pull_request.head.repo.full_name }}" == "${{ github.repository }}" ]]; then
|
||||||
|
if [[ "${REF_NAME}" == feature-* || "${REF_NAME}" == fix-* ]]; then
|
||||||
|
should_push="true"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "should-push=${should_push}"
|
||||||
|
echo "should-push=${should_push}" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
# push-external: Should we also push to Docker Hub and Quay.io?
|
# push-external: Should we also push to Docker Hub and Quay.io?
|
||||||
# Only for main repo on dev/beta branches or version tags
|
# Only for main repo on dev/beta branches or version tags
|
||||||
push_external="false"
|
push_external="false"
|
||||||
if [[ "${can_push}" == "true" && "${{ github.repository_owner }}" == "paperless-ngx" ]]; then
|
if [[ "${should_push}" == "true" && "${{ github.repository_owner }}" == "paperless-ngx" ]]; then
|
||||||
case "${REF_NAME}" in
|
case "${REF_NAME}" in
|
||||||
dev|beta)
|
dev|beta)
|
||||||
push_external="true"
|
push_external="true"
|
||||||
@@ -125,20 +137,20 @@ jobs:
|
|||||||
labels: ${{ steps.docker-meta.outputs.labels }}
|
labels: ${{ steps.docker-meta.outputs.labels }}
|
||||||
build-args: |
|
build-args: |
|
||||||
PNGX_TAG_VERSION=${{ steps.docker-meta.outputs.version }}
|
PNGX_TAG_VERSION=${{ steps.docker-meta.outputs.version }}
|
||||||
outputs: type=image,name=${{ env.REGISTRY }}/${{ steps.repo.outputs.name }},push-by-digest=true,name-canonical=true,push=${{ steps.check-push.outputs.can-push }}
|
outputs: type=image,name=${{ env.REGISTRY }}/${{ steps.repo.outputs.name }},push-by-digest=true,name-canonical=true,push=${{ steps.check-push.outputs.should-push }}
|
||||||
cache-from: |
|
cache-from: |
|
||||||
type=registry,ref=${{ env.REGISTRY }}/${{ steps.repo.outputs.name }}/cache/app:${{ steps.ref.outputs.cache-ref }}-${{ matrix.arch }}
|
type=registry,ref=${{ env.REGISTRY }}/${{ steps.repo.outputs.name }}/cache/app:${{ steps.ref.outputs.cache-ref }}-${{ matrix.arch }}
|
||||||
type=registry,ref=${{ env.REGISTRY }}/${{ steps.repo.outputs.name }}/cache/app:dev-${{ matrix.arch }}
|
type=registry,ref=${{ env.REGISTRY }}/${{ steps.repo.outputs.name }}/cache/app:dev-${{ matrix.arch }}
|
||||||
cache-to: ${{ steps.check-push.outputs.can-push == 'true' && format('type=registry,mode=max,ref={0}/{1}/cache/app:{2}-{3}', env.REGISTRY, steps.repo.outputs.name, steps.ref.outputs.cache-ref, matrix.arch) || '' }}
|
cache-to: ${{ steps.check-push.outputs.should-push == 'true' && format('type=registry,mode=max,ref={0}/{1}/cache/app:{2}-{3}', env.REGISTRY, steps.repo.outputs.name, steps.ref.outputs.cache-ref, matrix.arch) || '' }}
|
||||||
- name: Export digest
|
- name: Export digest
|
||||||
if: steps.check-push.outputs.can-push == 'true'
|
if: steps.check-push.outputs.should-push == 'true'
|
||||||
run: |
|
run: |
|
||||||
mkdir -p /tmp/digests
|
mkdir -p /tmp/digests
|
||||||
digest="${{ steps.build.outputs.digest }}"
|
digest="${{ steps.build.outputs.digest }}"
|
||||||
echo "digest=${digest}"
|
echo "digest=${digest}"
|
||||||
touch "/tmp/digests/${digest#sha256:}"
|
touch "/tmp/digests/${digest#sha256:}"
|
||||||
- name: Upload digest
|
- name: Upload digest
|
||||||
if: steps.check-push.outputs.can-push == 'true'
|
if: steps.check-push.outputs.should-push == 'true'
|
||||||
uses: actions/upload-artifact@v6.0.0
|
uses: actions/upload-artifact@v6.0.0
|
||||||
with:
|
with:
|
||||||
name: digests-${{ matrix.arch }}
|
name: digests-${{ matrix.arch }}
|
||||||
@@ -149,7 +161,7 @@ jobs:
|
|||||||
name: Merge and Push Manifest
|
name: Merge and Push Manifest
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
needs: build-arch
|
needs: build-arch
|
||||||
if: needs.build-arch.outputs.can-push == 'true'
|
if: needs.build-arch.outputs.should-push == 'true'
|
||||||
permissions:
|
permissions:
|
||||||
contents: read
|
contents: read
|
||||||
packages: write
|
packages: write
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ RUN set -eux \
|
|||||||
# Purpose: Installs s6-overlay and rootfs
|
# Purpose: Installs s6-overlay and rootfs
|
||||||
# Comments:
|
# Comments:
|
||||||
# - Don't leave anything extra in here either
|
# - Don't leave anything extra in here either
|
||||||
FROM ghcr.io/astral-sh/uv:0.9.15-python3.12-trixie-slim AS s6-overlay-base
|
FROM ghcr.io/astral-sh/uv:0.9.26-python3.12-trixie-slim AS s6-overlay-base
|
||||||
|
|
||||||
WORKDIR /usr/src/s6
|
WORKDIR /usr/src/s6
|
||||||
|
|
||||||
|
|||||||
@@ -1146,8 +1146,9 @@ via the consumption directory, you can disable the consumer to save resources.
|
|||||||
|
|
||||||
#### [`PAPERLESS_CONSUMER_DELETE_DUPLICATES=<bool>`](#PAPERLESS_CONSUMER_DELETE_DUPLICATES) {#PAPERLESS_CONSUMER_DELETE_DUPLICATES}
|
#### [`PAPERLESS_CONSUMER_DELETE_DUPLICATES=<bool>`](#PAPERLESS_CONSUMER_DELETE_DUPLICATES) {#PAPERLESS_CONSUMER_DELETE_DUPLICATES}
|
||||||
|
|
||||||
: When the consumer detects a duplicate document, it will not touch
|
: As of version 3.0 Paperless-ngx allows duplicate documents to be consumed by default, _except_ when
|
||||||
the original document. This default behavior can be changed here.
|
this setting is enabled. When enabled, Paperless will check if a document with the same hash already
|
||||||
|
exists in the system and delete the duplicate file from the consumption directory without consuming it.
|
||||||
|
|
||||||
Defaults to false.
|
Defaults to false.
|
||||||
|
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ dependencies = [
|
|||||||
# Only patch versions are guaranteed to not introduce breaking changes.
|
# Only patch versions are guaranteed to not introduce breaking changes.
|
||||||
"django~=5.2.5",
|
"django~=5.2.5",
|
||||||
"django-allauth[mfa,socialaccount]~=65.12.1",
|
"django-allauth[mfa,socialaccount]~=65.12.1",
|
||||||
"django-auditlog~=3.3.0",
|
"django-auditlog~=3.4.1",
|
||||||
"django-cachalot~=2.8.0",
|
"django-cachalot~=2.8.0",
|
||||||
"django-celery-results~=2.6.0",
|
"django-celery-results~=2.6.0",
|
||||||
"django-compression-middleware~=0.5.0",
|
"django-compression-middleware~=0.5.0",
|
||||||
@@ -47,20 +47,20 @@ dependencies = [
|
|||||||
"faiss-cpu>=1.10",
|
"faiss-cpu>=1.10",
|
||||||
"filelock~=3.20.0",
|
"filelock~=3.20.0",
|
||||||
"flower~=2.0.1",
|
"flower~=2.0.1",
|
||||||
"gotenberg-client~=0.12.0",
|
"gotenberg-client~=0.13.1",
|
||||||
"httpx-oauth~=0.16",
|
"httpx-oauth~=0.16",
|
||||||
"imap-tools~=1.11.0",
|
"imap-tools~=1.11.0",
|
||||||
"inotifyrecursive~=0.3",
|
"inotifyrecursive~=0.3",
|
||||||
"jinja2~=3.1.5",
|
"jinja2~=3.1.5",
|
||||||
"langdetect~=1.0.9",
|
"langdetect~=1.0.9",
|
||||||
"llama-index-core>=0.12.33.post1",
|
"llama-index-core>=0.14.12",
|
||||||
"llama-index-embeddings-huggingface>=0.5.3",
|
"llama-index-embeddings-huggingface>=0.6.1",
|
||||||
"llama-index-embeddings-openai>=0.3.1",
|
"llama-index-embeddings-openai>=0.5.1",
|
||||||
"llama-index-llms-ollama>=0.5.4",
|
"llama-index-llms-ollama>=0.9.1",
|
||||||
"llama-index-llms-openai>=0.3.38",
|
"llama-index-llms-openai>=0.6.13",
|
||||||
"llama-index-vector-stores-faiss>=0.3",
|
"llama-index-vector-stores-faiss>=0.5.2",
|
||||||
"nltk~=3.9.1",
|
"nltk~=3.9.1",
|
||||||
"ocrmypdf~=16.12.0",
|
"ocrmypdf~=16.13.0",
|
||||||
"openai>=1.76",
|
"openai>=1.76",
|
||||||
"pathvalidate~=3.3.1",
|
"pathvalidate~=3.3.1",
|
||||||
"pdf2image~=1.17.0",
|
"pdf2image~=1.17.0",
|
||||||
@@ -77,7 +77,7 @@ dependencies = [
|
|||||||
"sentence-transformers>=4.1",
|
"sentence-transformers>=4.1",
|
||||||
"setproctitle~=1.3.4",
|
"setproctitle~=1.3.4",
|
||||||
"tika-client~=0.10.0",
|
"tika-client~=0.10.0",
|
||||||
"torch~=2.7.0",
|
"torch~=2.9.1",
|
||||||
"tqdm~=4.67.1",
|
"tqdm~=4.67.1",
|
||||||
"watchdog~=6.0",
|
"watchdog~=6.0",
|
||||||
"whitenoise~=6.9",
|
"whitenoise~=6.9",
|
||||||
@@ -92,7 +92,7 @@ optional-dependencies.postgres = [
|
|||||||
"psycopg[c,pool]==3.2.12",
|
"psycopg[c,pool]==3.2.12",
|
||||||
# Direct dependency for proper resolution of the pre-built wheels
|
# Direct dependency for proper resolution of the pre-built wheels
|
||||||
"psycopg-c==3.2.12",
|
"psycopg-c==3.2.12",
|
||||||
"psycopg-pool==3.2.7",
|
"psycopg-pool==3.3",
|
||||||
]
|
]
|
||||||
optional-dependencies.webserver = [
|
optional-dependencies.webserver = [
|
||||||
"granian[uvloop]~=2.5.1",
|
"granian[uvloop]~=2.5.1",
|
||||||
@@ -127,7 +127,7 @@ testing = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
lint = [
|
lint = [
|
||||||
"pre-commit~=4.4.0",
|
"pre-commit~=4.5.1",
|
||||||
"pre-commit-uv~=4.2.0",
|
"pre-commit-uv~=4.2.0",
|
||||||
"ruff~=0.14.0",
|
"ruff~=0.14.0",
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -97,6 +97,12 @@
|
|||||||
<br/><em>(<ng-container i18n>click for full output</ng-container>)</em>
|
<br/><em>(<ng-container i18n>click for full output</ng-container>)</em>
|
||||||
}
|
}
|
||||||
</ng-template>
|
</ng-template>
|
||||||
|
@if (task.duplicate_documents?.length > 0) {
|
||||||
|
<div class="small text-warning-emphasis d-flex align-items-center gap-1">
|
||||||
|
<i-bs class="lh-1" width="1em" height="1em" name="exclamation-triangle"></i-bs>
|
||||||
|
<span i18n>Duplicate(s) detected</span>
|
||||||
|
</div>
|
||||||
|
}
|
||||||
</td>
|
</td>
|
||||||
}
|
}
|
||||||
<td class="d-lg-none">
|
<td class="d-lg-none">
|
||||||
|
|||||||
@@ -28,7 +28,7 @@
|
|||||||
</button>
|
</button>
|
||||||
</ng-template>
|
</ng-template>
|
||||||
<ng-template ng-option-tmp let-item="item" let-index="index" let-search="searchTerm">
|
<ng-template ng-option-tmp let-item="item" let-index="index" let-search="searchTerm">
|
||||||
<div class="tag-option-row d-flex align-items-center">
|
<div class="tag-option-row d-flex align-items-center" [class.w-auto]="!getTag(item.id)?.parent">
|
||||||
@if (item.id && tags) {
|
@if (item.id && tags) {
|
||||||
@if (getTag(item.id)?.parent) {
|
@if (getTag(item.id)?.parent) {
|
||||||
<i-bs name="list-nested" class="me-1"></i-bs>
|
<i-bs name="list-nested" class="me-1"></i-bs>
|
||||||
|
|||||||
@@ -23,7 +23,7 @@
|
|||||||
|
|
||||||
// Dropdown hierarchy reveal for ng-select options
|
// Dropdown hierarchy reveal for ng-select options
|
||||||
::ng-deep .ng-dropdown-panel .ng-option {
|
::ng-deep .ng-dropdown-panel .ng-option {
|
||||||
overflow-x: scroll;
|
overflow-x: scroll !important;
|
||||||
|
|
||||||
.tag-option-row {
|
.tag-option-row {
|
||||||
font-size: 1rem;
|
font-size: 1rem;
|
||||||
|
|||||||
@@ -370,6 +370,37 @@
|
|||||||
</ng-template>
|
</ng-template>
|
||||||
</li>
|
</li>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@if (document?.duplicate_documents?.length) {
|
||||||
|
<li [ngbNavItem]="DocumentDetailNavIDs.Duplicates">
|
||||||
|
<a class="text-nowrap" ngbNavLink i18n>
|
||||||
|
Duplicates
|
||||||
|
<span class="badge text-bg-secondary ms-1">{{ document.duplicate_documents.length }}</span>
|
||||||
|
</a>
|
||||||
|
<ng-template ngbNavContent>
|
||||||
|
<div class="d-flex flex-column gap-2">
|
||||||
|
<div class="fst-italic" i18n>Duplicate documents detected:</div>
|
||||||
|
<div class="list-group">
|
||||||
|
@for (duplicate of document.duplicate_documents; track duplicate.id) {
|
||||||
|
<a
|
||||||
|
class="list-group-item list-group-item-action d-flex justify-content-between align-items-center"
|
||||||
|
[routerLink]="['/documents', duplicate.id, 'details']"
|
||||||
|
[class.disabled]="duplicate.deleted_at"
|
||||||
|
>
|
||||||
|
<span class="d-flex align-items-center gap-2">
|
||||||
|
<span>{{ duplicate.title || ('#' + duplicate.id) }}</span>
|
||||||
|
@if (duplicate.deleted_at) {
|
||||||
|
<span class="badge text-bg-secondary" i18n>In trash</span>
|
||||||
|
}
|
||||||
|
</span>
|
||||||
|
<span class="text-secondary">#{{ duplicate.id }}</span>
|
||||||
|
</a>
|
||||||
|
}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</ng-template>
|
||||||
|
</li>
|
||||||
|
}
|
||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
<div [ngbNavOutlet]="nav" class="mt-3"></div>
|
<div [ngbNavOutlet]="nav" class="mt-3"></div>
|
||||||
|
|||||||
@@ -301,16 +301,16 @@ describe('DocumentDetailComponent', () => {
|
|||||||
.spyOn(openDocumentsService, 'openDocument')
|
.spyOn(openDocumentsService, 'openDocument')
|
||||||
.mockReturnValueOnce(of(true))
|
.mockReturnValueOnce(of(true))
|
||||||
fixture.detectChanges()
|
fixture.detectChanges()
|
||||||
expect(component.activeNavID).toEqual(5) // DocumentDetailNavIDs.Notes
|
expect(component.activeNavID).toEqual(component.DocumentDetailNavIDs.Notes)
|
||||||
})
|
})
|
||||||
|
|
||||||
it('should change url on tab switch', () => {
|
it('should change url on tab switch', () => {
|
||||||
initNormally()
|
initNormally()
|
||||||
const navigateSpy = jest.spyOn(router, 'navigate')
|
const navigateSpy = jest.spyOn(router, 'navigate')
|
||||||
component.nav.select(5)
|
component.nav.select(component.DocumentDetailNavIDs.Notes)
|
||||||
component.nav.navChange.next({
|
component.nav.navChange.next({
|
||||||
activeId: 1,
|
activeId: 1,
|
||||||
nextId: 5,
|
nextId: component.DocumentDetailNavIDs.Notes,
|
||||||
preventDefault: () => {},
|
preventDefault: () => {},
|
||||||
})
|
})
|
||||||
fixture.detectChanges()
|
fixture.detectChanges()
|
||||||
@@ -352,6 +352,18 @@ describe('DocumentDetailComponent', () => {
|
|||||||
expect(component.document).toEqual(doc)
|
expect(component.document).toEqual(doc)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
it('should fall back to details tab when duplicates tab is active but no duplicates', () => {
|
||||||
|
initNormally()
|
||||||
|
component.activeNavID = component.DocumentDetailNavIDs.Duplicates
|
||||||
|
const noDupDoc = { ...doc, duplicate_documents: [] }
|
||||||
|
|
||||||
|
component.updateComponent(noDupDoc)
|
||||||
|
|
||||||
|
expect(component.activeNavID).toEqual(
|
||||||
|
component.DocumentDetailNavIDs.Details
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
it('should load already-opened document via param', () => {
|
it('should load already-opened document via param', () => {
|
||||||
initNormally()
|
initNormally()
|
||||||
jest.spyOn(documentService, 'get').mockReturnValueOnce(of(doc))
|
jest.spyOn(documentService, 'get').mockReturnValueOnce(of(doc))
|
||||||
@@ -367,6 +379,38 @@ describe('DocumentDetailComponent', () => {
|
|||||||
expect(component.document).toEqual(doc)
|
expect(component.document).toEqual(doc)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
it('should update cached open document duplicates when reloading an open doc', () => {
|
||||||
|
const openDoc = { ...doc, duplicate_documents: [{ id: 1, title: 'Old' }] }
|
||||||
|
const updatedDuplicates = [
|
||||||
|
{ id: 2, title: 'Newer duplicate', deleted_at: null },
|
||||||
|
]
|
||||||
|
jest
|
||||||
|
.spyOn(activatedRoute, 'paramMap', 'get')
|
||||||
|
.mockReturnValue(of(convertToParamMap({ id: 3, section: 'details' })))
|
||||||
|
jest.spyOn(documentService, 'get').mockReturnValue(
|
||||||
|
of({
|
||||||
|
...doc,
|
||||||
|
modified: new Date('2024-01-02T00:00:00Z'),
|
||||||
|
duplicate_documents: updatedDuplicates,
|
||||||
|
})
|
||||||
|
)
|
||||||
|
jest.spyOn(openDocumentsService, 'getOpenDocument').mockReturnValue(openDoc)
|
||||||
|
const saveSpy = jest.spyOn(openDocumentsService, 'save')
|
||||||
|
jest.spyOn(openDocumentsService, 'openDocument').mockReturnValue(of(true))
|
||||||
|
jest.spyOn(customFieldsService, 'listAll').mockReturnValue(
|
||||||
|
of({
|
||||||
|
count: customFields.length,
|
||||||
|
all: customFields.map((f) => f.id),
|
||||||
|
results: customFields,
|
||||||
|
})
|
||||||
|
)
|
||||||
|
|
||||||
|
fixture.detectChanges()
|
||||||
|
|
||||||
|
expect(openDoc.duplicate_documents).toEqual(updatedDuplicates)
|
||||||
|
expect(saveSpy).toHaveBeenCalled()
|
||||||
|
})
|
||||||
|
|
||||||
it('should disable form if user cannot edit', () => {
|
it('should disable form if user cannot edit', () => {
|
||||||
currentUserHasObjectPermissions = false
|
currentUserHasObjectPermissions = false
|
||||||
initNormally()
|
initNormally()
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import {
|
|||||||
FormsModule,
|
FormsModule,
|
||||||
ReactiveFormsModule,
|
ReactiveFormsModule,
|
||||||
} from '@angular/forms'
|
} from '@angular/forms'
|
||||||
import { ActivatedRoute, Router } from '@angular/router'
|
import { ActivatedRoute, Router, RouterModule } from '@angular/router'
|
||||||
import {
|
import {
|
||||||
NgbDateStruct,
|
NgbDateStruct,
|
||||||
NgbDropdownModule,
|
NgbDropdownModule,
|
||||||
@@ -124,6 +124,7 @@ enum DocumentDetailNavIDs {
|
|||||||
Notes = 5,
|
Notes = 5,
|
||||||
Permissions = 6,
|
Permissions = 6,
|
||||||
History = 7,
|
History = 7,
|
||||||
|
Duplicates = 8,
|
||||||
}
|
}
|
||||||
|
|
||||||
enum ContentRenderType {
|
enum ContentRenderType {
|
||||||
@@ -181,6 +182,7 @@ export enum ZoomSetting {
|
|||||||
NgxBootstrapIconsModule,
|
NgxBootstrapIconsModule,
|
||||||
PdfViewerModule,
|
PdfViewerModule,
|
||||||
TextAreaComponent,
|
TextAreaComponent,
|
||||||
|
RouterModule,
|
||||||
],
|
],
|
||||||
})
|
})
|
||||||
export class DocumentDetailComponent
|
export class DocumentDetailComponent
|
||||||
@@ -285,10 +287,10 @@ export class DocumentDetailComponent
|
|||||||
if (
|
if (
|
||||||
element &&
|
element &&
|
||||||
element.nativeElement.offsetParent !== null &&
|
element.nativeElement.offsetParent !== null &&
|
||||||
this.nav?.activeId == 4
|
this.nav?.activeId == DocumentDetailNavIDs.Preview
|
||||||
) {
|
) {
|
||||||
// its visible
|
// its visible
|
||||||
setTimeout(() => this.nav?.select(1))
|
setTimeout(() => this.nav?.select(DocumentDetailNavIDs.Details))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -454,6 +456,11 @@ export class DocumentDetailComponent
|
|||||||
const openDocument = this.openDocumentService.getOpenDocument(
|
const openDocument = this.openDocumentService.getOpenDocument(
|
||||||
this.documentId
|
this.documentId
|
||||||
)
|
)
|
||||||
|
// update duplicate documents if present
|
||||||
|
if (openDocument && doc?.duplicate_documents) {
|
||||||
|
openDocument.duplicate_documents = doc.duplicate_documents
|
||||||
|
this.openDocumentService.save()
|
||||||
|
}
|
||||||
const useDoc = openDocument || doc
|
const useDoc = openDocument || doc
|
||||||
if (openDocument) {
|
if (openDocument) {
|
||||||
if (
|
if (
|
||||||
@@ -704,6 +711,13 @@ export class DocumentDetailComponent
|
|||||||
}
|
}
|
||||||
this.title = this.documentTitlePipe.transform(doc.title)
|
this.title = this.documentTitlePipe.transform(doc.title)
|
||||||
this.prepareForm(doc)
|
this.prepareForm(doc)
|
||||||
|
|
||||||
|
if (
|
||||||
|
this.activeNavID === DocumentDetailNavIDs.Duplicates &&
|
||||||
|
!doc?.duplicate_documents?.length
|
||||||
|
) {
|
||||||
|
this.activeNavID = DocumentDetailNavIDs.Details
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
get customFieldFormFields(): FormArray {
|
get customFieldFormFields(): FormArray {
|
||||||
|
|||||||
@@ -159,6 +159,8 @@ export interface Document extends ObjectWithPermissions {
|
|||||||
|
|
||||||
page_count?: number
|
page_count?: number
|
||||||
|
|
||||||
|
duplicate_documents?: Document[]
|
||||||
|
|
||||||
// Frontend only
|
// Frontend only
|
||||||
__changedFields?: string[]
|
__changedFields?: string[]
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
import { Document } from './document'
|
||||||
import { ObjectWithId } from './object-with-id'
|
import { ObjectWithId } from './object-with-id'
|
||||||
|
|
||||||
export enum PaperlessTaskType {
|
export enum PaperlessTaskType {
|
||||||
@@ -42,5 +43,7 @@ export interface PaperlessTask extends ObjectWithId {
|
|||||||
|
|
||||||
related_document?: number
|
related_document?: number
|
||||||
|
|
||||||
|
duplicate_documents?: Document[]
|
||||||
|
|
||||||
owner?: number
|
owner?: number
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -785,19 +785,45 @@ class ConsumerPreflightPlugin(
|
|||||||
Q(checksum=checksum) | Q(archive_checksum=checksum),
|
Q(checksum=checksum) | Q(archive_checksum=checksum),
|
||||||
)
|
)
|
||||||
if existing_doc.exists():
|
if existing_doc.exists():
|
||||||
msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS
|
existing_doc = existing_doc.order_by("-created")
|
||||||
log_msg = f"Not consuming {self.filename}: It is a duplicate of {existing_doc.get().title} (#{existing_doc.get().pk})."
|
duplicates_in_trash = existing_doc.filter(deleted_at__isnull=False)
|
||||||
|
log_msg = (
|
||||||
|
f"Consuming duplicate {self.filename}: "
|
||||||
|
f"{existing_doc.count()} existing document(s) share the same content."
|
||||||
|
)
|
||||||
|
|
||||||
if existing_doc.first().deleted_at is not None:
|
if duplicates_in_trash.exists():
|
||||||
msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS_IN_TRASH
|
log_msg += " Note: at least one existing document is in the trash."
|
||||||
log_msg += " Note: existing document is in the trash."
|
|
||||||
|
self.log.warning(log_msg)
|
||||||
|
|
||||||
if settings.CONSUMER_DELETE_DUPLICATES:
|
if settings.CONSUMER_DELETE_DUPLICATES:
|
||||||
|
duplicate = existing_doc.first()
|
||||||
|
duplicate_label = (
|
||||||
|
duplicate.title
|
||||||
|
or duplicate.original_filename
|
||||||
|
or (Path(duplicate.filename).name if duplicate.filename else None)
|
||||||
|
or str(duplicate.pk)
|
||||||
|
)
|
||||||
|
|
||||||
Path(self.input_doc.original_file).unlink()
|
Path(self.input_doc.original_file).unlink()
|
||||||
self._fail(
|
|
||||||
msg,
|
failure_msg = (
|
||||||
log_msg,
|
f"Not consuming {self.filename}: "
|
||||||
)
|
f"It is a duplicate of {duplicate_label} (#{duplicate.pk})"
|
||||||
|
)
|
||||||
|
status_msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS
|
||||||
|
|
||||||
|
if duplicates_in_trash.exists():
|
||||||
|
status_msg = (
|
||||||
|
ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS_IN_TRASH
|
||||||
|
)
|
||||||
|
failure_msg += " Note: existing document is in the trash."
|
||||||
|
|
||||||
|
self._fail(
|
||||||
|
status_msg,
|
||||||
|
failure_msg,
|
||||||
|
)
|
||||||
|
|
||||||
def pre_check_directories(self):
|
def pre_check_directories(self):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ def populate_action_order(apps, schema_editor):
|
|||||||
|
|
||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
dependencies = [
|
dependencies = [
|
||||||
("documents", "1075_alter_paperlesstask_task_name"),
|
("documents", "1074_workflowrun_deleted_at_workflowrun_restored_at_and_more"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
@@ -6,7 +6,7 @@ from django.db import models
|
|||||||
|
|
||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
dependencies = [
|
dependencies = [
|
||||||
("documents", "1074_workflowrun_deleted_at_workflowrun_restored_at_and_more"),
|
("documents", "1075_workflowaction_order"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
# Generated by Django 5.2.7 on 2026-01-14 17:45
|
||||||
|
|
||||||
|
from django.db import migrations
|
||||||
|
from django.db import models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
dependencies = [
|
||||||
|
("documents", "1076_alter_paperlesstask_task_name"),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name="document",
|
||||||
|
name="checksum",
|
||||||
|
field=models.CharField(
|
||||||
|
editable=False,
|
||||||
|
max_length=32,
|
||||||
|
verbose_name="checksum",
|
||||||
|
help_text="The checksum of the original document.",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -212,7 +212,6 @@ class Document(SoftDeleteModel, ModelWithOwner):
|
|||||||
_("checksum"),
|
_("checksum"),
|
||||||
max_length=32,
|
max_length=32,
|
||||||
editable=False,
|
editable=False,
|
||||||
unique=True,
|
|
||||||
help_text=_("The checksum of the original document."),
|
help_text=_("The checksum of the original document."),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -148,13 +148,29 @@ def get_document_count_filter_for_user(user):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_objects_for_user_owner_aware(user, perms, Model) -> QuerySet:
|
def get_objects_for_user_owner_aware(
|
||||||
objects_owned = Model.objects.filter(owner=user)
|
user,
|
||||||
objects_unowned = Model.objects.filter(owner__isnull=True)
|
perms,
|
||||||
|
Model,
|
||||||
|
*,
|
||||||
|
include_deleted=False,
|
||||||
|
) -> QuerySet:
|
||||||
|
"""
|
||||||
|
Returns objects the user owns, are unowned, or has explicit perms.
|
||||||
|
When include_deleted is True, soft-deleted items are also included.
|
||||||
|
"""
|
||||||
|
manager = (
|
||||||
|
Model.global_objects
|
||||||
|
if include_deleted and hasattr(Model, "global_objects")
|
||||||
|
else Model.objects
|
||||||
|
)
|
||||||
|
|
||||||
|
objects_owned = manager.filter(owner=user)
|
||||||
|
objects_unowned = manager.filter(owner__isnull=True)
|
||||||
objects_with_perms = get_objects_for_user(
|
objects_with_perms = get_objects_for_user(
|
||||||
user=user,
|
user=user,
|
||||||
perms=perms,
|
perms=perms,
|
||||||
klass=Model,
|
klass=manager.all(),
|
||||||
accept_global_perms=False,
|
accept_global_perms=False,
|
||||||
)
|
)
|
||||||
return objects_owned | objects_unowned | objects_with_perms
|
return objects_owned | objects_unowned | objects_with_perms
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ from django.core.validators import MinValueValidator
|
|||||||
from django.core.validators import RegexValidator
|
from django.core.validators import RegexValidator
|
||||||
from django.core.validators import integer_validator
|
from django.core.validators import integer_validator
|
||||||
from django.db.models import Count
|
from django.db.models import Count
|
||||||
|
from django.db.models import Q
|
||||||
from django.db.models.functions import Lower
|
from django.db.models.functions import Lower
|
||||||
from django.utils.crypto import get_random_string
|
from django.utils.crypto import get_random_string
|
||||||
from django.utils.dateparse import parse_datetime
|
from django.utils.dateparse import parse_datetime
|
||||||
@@ -72,6 +73,7 @@ from documents.models import WorkflowTrigger
|
|||||||
from documents.parsers import is_mime_type_supported
|
from documents.parsers import is_mime_type_supported
|
||||||
from documents.permissions import get_document_count_filter_for_user
|
from documents.permissions import get_document_count_filter_for_user
|
||||||
from documents.permissions import get_groups_with_only_permission
|
from documents.permissions import get_groups_with_only_permission
|
||||||
|
from documents.permissions import get_objects_for_user_owner_aware
|
||||||
from documents.permissions import set_permissions_for_object
|
from documents.permissions import set_permissions_for_object
|
||||||
from documents.regex import validate_regex_pattern
|
from documents.regex import validate_regex_pattern
|
||||||
from documents.templating.filepath import validate_filepath_template_and_render
|
from documents.templating.filepath import validate_filepath_template_and_render
|
||||||
@@ -1014,6 +1016,29 @@ class NotesSerializer(serializers.ModelSerializer):
|
|||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
def _get_viewable_duplicates(document: Document, user: User | None):
|
||||||
|
checksums = {document.checksum}
|
||||||
|
if document.archive_checksum:
|
||||||
|
checksums.add(document.archive_checksum)
|
||||||
|
duplicates = Document.global_objects.filter(
|
||||||
|
Q(checksum__in=checksums) | Q(archive_checksum__in=checksums),
|
||||||
|
).exclude(pk=document.pk)
|
||||||
|
duplicates = duplicates.order_by("-created")
|
||||||
|
allowed = get_objects_for_user_owner_aware(
|
||||||
|
user,
|
||||||
|
"documents.view_document",
|
||||||
|
Document,
|
||||||
|
include_deleted=True,
|
||||||
|
)
|
||||||
|
return duplicates.filter(id__in=allowed.values_list("id", flat=True))
|
||||||
|
|
||||||
|
|
||||||
|
class DuplicateDocumentSummarySerializer(serializers.Serializer):
|
||||||
|
id = serializers.IntegerField()
|
||||||
|
title = serializers.CharField()
|
||||||
|
deleted_at = serializers.DateTimeField(allow_null=True)
|
||||||
|
|
||||||
|
|
||||||
@extend_schema_serializer(
|
@extend_schema_serializer(
|
||||||
deprecate_fields=["created_date"],
|
deprecate_fields=["created_date"],
|
||||||
)
|
)
|
||||||
@@ -1031,6 +1056,7 @@ class DocumentSerializer(
|
|||||||
archived_file_name = SerializerMethodField()
|
archived_file_name = SerializerMethodField()
|
||||||
created_date = serializers.DateField(required=False)
|
created_date = serializers.DateField(required=False)
|
||||||
page_count = SerializerMethodField()
|
page_count = SerializerMethodField()
|
||||||
|
duplicate_documents = SerializerMethodField()
|
||||||
|
|
||||||
notes = NotesSerializer(many=True, required=False, read_only=True)
|
notes = NotesSerializer(many=True, required=False, read_only=True)
|
||||||
|
|
||||||
@@ -1056,6 +1082,16 @@ class DocumentSerializer(
|
|||||||
def get_page_count(self, obj) -> int | None:
|
def get_page_count(self, obj) -> int | None:
|
||||||
return obj.page_count
|
return obj.page_count
|
||||||
|
|
||||||
|
@extend_schema_field(DuplicateDocumentSummarySerializer(many=True))
|
||||||
|
def get_duplicate_documents(self, obj):
|
||||||
|
view = self.context.get("view")
|
||||||
|
if view and getattr(view, "action", None) != "retrieve":
|
||||||
|
return []
|
||||||
|
request = self.context.get("request")
|
||||||
|
user = request.user if request else None
|
||||||
|
duplicates = _get_viewable_duplicates(obj, user)
|
||||||
|
return list(duplicates.values("id", "title", "deleted_at"))
|
||||||
|
|
||||||
def get_original_file_name(self, obj) -> str | None:
|
def get_original_file_name(self, obj) -> str | None:
|
||||||
return obj.original_filename
|
return obj.original_filename
|
||||||
|
|
||||||
@@ -1233,6 +1269,7 @@ class DocumentSerializer(
|
|||||||
"archive_serial_number",
|
"archive_serial_number",
|
||||||
"original_file_name",
|
"original_file_name",
|
||||||
"archived_file_name",
|
"archived_file_name",
|
||||||
|
"duplicate_documents",
|
||||||
"owner",
|
"owner",
|
||||||
"permissions",
|
"permissions",
|
||||||
"user_can_change",
|
"user_can_change",
|
||||||
@@ -2094,10 +2131,12 @@ class TasksViewSerializer(OwnedObjectSerializer):
|
|||||||
"result",
|
"result",
|
||||||
"acknowledged",
|
"acknowledged",
|
||||||
"related_document",
|
"related_document",
|
||||||
|
"duplicate_documents",
|
||||||
"owner",
|
"owner",
|
||||||
)
|
)
|
||||||
|
|
||||||
related_document = serializers.SerializerMethodField()
|
related_document = serializers.SerializerMethodField()
|
||||||
|
duplicate_documents = serializers.SerializerMethodField()
|
||||||
created_doc_re = re.compile(r"New document id (\d+) created")
|
created_doc_re = re.compile(r"New document id (\d+) created")
|
||||||
duplicate_doc_re = re.compile(r"It is a duplicate of .* \(#(\d+)\)")
|
duplicate_doc_re = re.compile(r"It is a duplicate of .* \(#(\d+)\)")
|
||||||
|
|
||||||
@@ -2122,6 +2161,17 @@ class TasksViewSerializer(OwnedObjectSerializer):
|
|||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
@extend_schema_field(DuplicateDocumentSummarySerializer(many=True))
|
||||||
|
def get_duplicate_documents(self, obj):
|
||||||
|
related_document = self.get_related_document(obj)
|
||||||
|
request = self.context.get("request")
|
||||||
|
user = request.user if request else None
|
||||||
|
document = Document.global_objects.filter(pk=related_document).first()
|
||||||
|
if not related_document or not user or not document:
|
||||||
|
return []
|
||||||
|
duplicates = _get_viewable_duplicates(document, user)
|
||||||
|
return list(duplicates.values("id", "title", "deleted_at"))
|
||||||
|
|
||||||
|
|
||||||
class RunTaskViewSerializer(serializers.Serializer):
|
class RunTaskViewSerializer(serializers.Serializer):
|
||||||
task_name = serializers.ChoiceField(
|
task_name = serializers.ChoiceField(
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ from django.contrib.auth.models import User
|
|||||||
from rest_framework import status
|
from rest_framework import status
|
||||||
from rest_framework.test import APITestCase
|
from rest_framework.test import APITestCase
|
||||||
|
|
||||||
|
from documents.models import Document
|
||||||
from documents.models import PaperlessTask
|
from documents.models import PaperlessTask
|
||||||
from documents.tests.utils import DirectoriesMixin
|
from documents.tests.utils import DirectoriesMixin
|
||||||
from documents.views import TasksViewSet
|
from documents.views import TasksViewSet
|
||||||
@@ -258,7 +259,7 @@ class TestTasks(DirectoriesMixin, APITestCase):
|
|||||||
task_id=str(uuid.uuid4()),
|
task_id=str(uuid.uuid4()),
|
||||||
task_file_name="task_one.pdf",
|
task_file_name="task_one.pdf",
|
||||||
status=celery.states.FAILURE,
|
status=celery.states.FAILURE,
|
||||||
result="test.pdf: Not consuming test.pdf: It is a duplicate.",
|
result="test.pdf: Unexpected error during ingestion.",
|
||||||
)
|
)
|
||||||
|
|
||||||
response = self.client.get(self.ENDPOINT)
|
response = self.client.get(self.ENDPOINT)
|
||||||
@@ -270,7 +271,7 @@ class TestTasks(DirectoriesMixin, APITestCase):
|
|||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
returned_data["result"],
|
returned_data["result"],
|
||||||
"test.pdf: Not consuming test.pdf: It is a duplicate.",
|
"test.pdf: Unexpected error during ingestion.",
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_task_name_webui(self):
|
def test_task_name_webui(self):
|
||||||
@@ -325,20 +326,34 @@ class TestTasks(DirectoriesMixin, APITestCase):
|
|||||||
|
|
||||||
self.assertEqual(returned_data["task_file_name"], "anothertest.pdf")
|
self.assertEqual(returned_data["task_file_name"], "anothertest.pdf")
|
||||||
|
|
||||||
def test_task_result_failed_duplicate_includes_related_doc(self):
|
def test_task_result_duplicate_warning_includes_count(self):
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
- A celery task failed with a duplicate error
|
- A celery task succeeds, but a duplicate exists
|
||||||
WHEN:
|
WHEN:
|
||||||
- API call is made to get tasks
|
- API call is made to get tasks
|
||||||
THEN:
|
THEN:
|
||||||
- The returned data includes a related document link
|
- The returned data includes duplicate warning metadata
|
||||||
"""
|
"""
|
||||||
|
checksum = "duplicate-checksum"
|
||||||
|
Document.objects.create(
|
||||||
|
title="Existing",
|
||||||
|
content="",
|
||||||
|
mime_type="application/pdf",
|
||||||
|
checksum=checksum,
|
||||||
|
archive_checksum="another-checksum",
|
||||||
|
)
|
||||||
|
created_doc = Document.objects.create(
|
||||||
|
title="Created",
|
||||||
|
content="",
|
||||||
|
mime_type="application/pdf",
|
||||||
|
checksum=checksum,
|
||||||
|
)
|
||||||
PaperlessTask.objects.create(
|
PaperlessTask.objects.create(
|
||||||
task_id=str(uuid.uuid4()),
|
task_id=str(uuid.uuid4()),
|
||||||
task_file_name="task_one.pdf",
|
task_file_name="task_one.pdf",
|
||||||
status=celery.states.FAILURE,
|
status=celery.states.SUCCESS,
|
||||||
result="Not consuming task_one.pdf: It is a duplicate of task_one_existing.pdf (#1234).",
|
result=f"Success. New document id {created_doc.pk} created",
|
||||||
)
|
)
|
||||||
|
|
||||||
response = self.client.get(self.ENDPOINT)
|
response = self.client.get(self.ENDPOINT)
|
||||||
@@ -348,7 +363,7 @@ class TestTasks(DirectoriesMixin, APITestCase):
|
|||||||
|
|
||||||
returned_data = response.data[0]
|
returned_data = response.data[0]
|
||||||
|
|
||||||
self.assertEqual(returned_data["related_document"], "1234")
|
self.assertEqual(returned_data["related_document"], str(created_doc.pk))
|
||||||
|
|
||||||
def test_run_train_classifier_task(self):
|
def test_run_train_classifier_task(self):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -485,21 +485,21 @@ class TestConsumer(
|
|||||||
with self.get_consumer(self.get_test_file()) as consumer:
|
with self.get_consumer(self.get_test_file()) as consumer:
|
||||||
consumer.run()
|
consumer.run()
|
||||||
|
|
||||||
with self.assertRaisesMessage(ConsumerError, "It is a duplicate"):
|
with self.get_consumer(self.get_test_file()) as consumer:
|
||||||
with self.get_consumer(self.get_test_file()) as consumer:
|
consumer.run()
|
||||||
consumer.run()
|
|
||||||
|
|
||||||
self._assert_first_last_send_progress(last_status="FAILED")
|
self.assertEqual(Document.objects.count(), 2)
|
||||||
|
self._assert_first_last_send_progress()
|
||||||
|
|
||||||
def testDuplicates2(self):
|
def testDuplicates2(self):
|
||||||
with self.get_consumer(self.get_test_file()) as consumer:
|
with self.get_consumer(self.get_test_file()) as consumer:
|
||||||
consumer.run()
|
consumer.run()
|
||||||
|
|
||||||
with self.assertRaisesMessage(ConsumerError, "It is a duplicate"):
|
with self.get_consumer(self.get_test_archive_file()) as consumer:
|
||||||
with self.get_consumer(self.get_test_archive_file()) as consumer:
|
consumer.run()
|
||||||
consumer.run()
|
|
||||||
|
|
||||||
self._assert_first_last_send_progress(last_status="FAILED")
|
self.assertEqual(Document.objects.count(), 2)
|
||||||
|
self._assert_first_last_send_progress()
|
||||||
|
|
||||||
def testDuplicates3(self):
|
def testDuplicates3(self):
|
||||||
with self.get_consumer(self.get_test_archive_file()) as consumer:
|
with self.get_consumer(self.get_test_archive_file()) as consumer:
|
||||||
@@ -513,9 +513,10 @@ class TestConsumer(
|
|||||||
|
|
||||||
Document.objects.all().delete()
|
Document.objects.all().delete()
|
||||||
|
|
||||||
with self.assertRaisesMessage(ConsumerError, "document is in the trash"):
|
with self.get_consumer(self.get_test_file()) as consumer:
|
||||||
with self.get_consumer(self.get_test_file()) as consumer:
|
consumer.run()
|
||||||
consumer.run()
|
|
||||||
|
self.assertEqual(Document.objects.count(), 1)
|
||||||
|
|
||||||
def testAsnExists(self):
|
def testAsnExists(self):
|
||||||
with self.get_consumer(
|
with self.get_consumer(
|
||||||
@@ -718,12 +719,45 @@ class TestConsumer(
|
|||||||
dst = self.get_test_file()
|
dst = self.get_test_file()
|
||||||
self.assertIsFile(dst)
|
self.assertIsFile(dst)
|
||||||
|
|
||||||
with self.assertRaises(ConsumerError):
|
expected_message = (
|
||||||
|
f"{dst.name}: Not consuming {dst.name}: "
|
||||||
|
f"It is a duplicate of {document.title} (#{document.pk})"
|
||||||
|
)
|
||||||
|
|
||||||
|
with self.assertRaisesMessage(ConsumerError, expected_message):
|
||||||
with self.get_consumer(dst) as consumer:
|
with self.get_consumer(dst) as consumer:
|
||||||
consumer.run()
|
consumer.run()
|
||||||
|
|
||||||
self.assertIsNotFile(dst)
|
self.assertIsNotFile(dst)
|
||||||
self._assert_first_last_send_progress(last_status="FAILED")
|
self.assertEqual(Document.objects.count(), 1)
|
||||||
|
self._assert_first_last_send_progress(last_status=ProgressStatusOptions.FAILED)
|
||||||
|
|
||||||
|
@override_settings(CONSUMER_DELETE_DUPLICATES=True)
|
||||||
|
def test_delete_duplicate_in_trash(self):
|
||||||
|
dst = self.get_test_file()
|
||||||
|
with self.get_consumer(dst) as consumer:
|
||||||
|
consumer.run()
|
||||||
|
|
||||||
|
# Move the existing document to trash
|
||||||
|
document = Document.objects.first()
|
||||||
|
document.delete()
|
||||||
|
|
||||||
|
dst = self.get_test_file()
|
||||||
|
self.assertIsFile(dst)
|
||||||
|
|
||||||
|
expected_message = (
|
||||||
|
f"{dst.name}: Not consuming {dst.name}: "
|
||||||
|
f"It is a duplicate of {document.title} (#{document.pk})"
|
||||||
|
f" Note: existing document is in the trash."
|
||||||
|
)
|
||||||
|
|
||||||
|
with self.assertRaisesMessage(ConsumerError, expected_message):
|
||||||
|
with self.get_consumer(dst) as consumer:
|
||||||
|
consumer.run()
|
||||||
|
|
||||||
|
self.assertIsNotFile(dst)
|
||||||
|
self.assertEqual(Document.global_objects.count(), 1)
|
||||||
|
self.assertEqual(Document.objects.count(), 0)
|
||||||
|
|
||||||
@override_settings(CONSUMER_DELETE_DUPLICATES=False)
|
@override_settings(CONSUMER_DELETE_DUPLICATES=False)
|
||||||
def test_no_delete_duplicate(self):
|
def test_no_delete_duplicate(self):
|
||||||
@@ -743,15 +777,12 @@ class TestConsumer(
|
|||||||
dst = self.get_test_file()
|
dst = self.get_test_file()
|
||||||
self.assertIsFile(dst)
|
self.assertIsFile(dst)
|
||||||
|
|
||||||
with self.assertRaisesRegex(
|
with self.get_consumer(dst) as consumer:
|
||||||
ConsumerError,
|
consumer.run()
|
||||||
r"sample\.pdf: Not consuming sample\.pdf: It is a duplicate of sample \(#\d+\)",
|
|
||||||
):
|
|
||||||
with self.get_consumer(dst) as consumer:
|
|
||||||
consumer.run()
|
|
||||||
|
|
||||||
self.assertIsFile(dst)
|
self.assertIsNotFile(dst)
|
||||||
self._assert_first_last_send_progress(last_status="FAILED")
|
self.assertEqual(Document.objects.count(), 2)
|
||||||
|
self._assert_first_last_send_progress()
|
||||||
|
|
||||||
@override_settings(FILENAME_FORMAT="{title}")
|
@override_settings(FILENAME_FORMAT="{title}")
|
||||||
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
||||||
|
|||||||
@@ -11,14 +11,12 @@ from paperless_ai.chat import stream_chat_with_documents
|
|||||||
@pytest.fixture(autouse=True)
|
@pytest.fixture(autouse=True)
|
||||||
def patch_embed_model():
|
def patch_embed_model():
|
||||||
from llama_index.core import settings as llama_settings
|
from llama_index.core import settings as llama_settings
|
||||||
|
from llama_index.core.embeddings.mock_embed_model import MockEmbedding
|
||||||
|
|
||||||
mock_embed_model = MagicMock()
|
# Use a real BaseEmbedding subclass to satisfy llama-index 0.14 validation
|
||||||
mock_embed_model._get_text_embedding_batch.return_value = [
|
llama_settings.Settings.embed_model = MockEmbedding(embed_dim=1536)
|
||||||
[0.1] * 1536,
|
|
||||||
] # 1 vector per input
|
|
||||||
llama_settings.Settings._embed_model = mock_embed_model
|
|
||||||
yield
|
yield
|
||||||
llama_settings.Settings._embed_model = None
|
llama_settings.Settings.embed_model = None
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
@pytest.fixture(autouse=True)
|
||||||
|
|||||||
Reference in New Issue
Block a user