Compare commits

..

12 Commits

Author SHA1 Message Date
shamoon
b2b9e0c64b Fix/refactor 2026-01-18 21:43:50 -08:00
shamoon
5944c21be5 Backend coverage 2026-01-18 16:27:52 -08:00
shamoon
12ac170a67 Refactor serializer 2026-01-18 16:27:52 -08:00
shamoon
31ba831a9a Frontend coverage 2026-01-18 16:27:52 -08:00
shamoon
47ddb266dd Some random cleanups 2026-01-18 16:27:52 -08:00
shamoon
681ae581bd Fix schema 2026-01-18 16:27:52 -08:00
shamoon
aa4b685a07 Nice, UX for doc in trash 2026-01-18 16:27:52 -08:00
shamoon
cd1070bd3f Make these anchors 2026-01-18 16:27:52 -08:00
shamoon
ef661ae101 Treat CONSUMER_DELETE_DUPLICATES as a hard no 2026-01-18 16:27:52 -08:00
shamoon
b5413525c4 Ok lets make duplicates a tab, nice 2026-01-18 16:27:52 -08:00
shamoon
efbd0c1bfa Drop DuplicateDocument 2026-01-18 16:27:52 -08:00
shamoon
1e595a5aab Core elements, migration, consumer modifications 2026-01-18 16:27:52 -08:00
34 changed files with 324 additions and 441 deletions

View File

@@ -44,7 +44,6 @@ include-labels:
- 'notable'
exclude-labels:
- 'skip-changelog'
filter-by-commitish: true
category-template: '### $TITLE'
change-template: '- $TITLE @$AUTHOR ([#$NUMBER]($URL))'
change-title-escapes: '\<*_&#@'

View File

@@ -8,11 +8,6 @@ echo "${log_prefix} Apply database migrations..."
cd "${PAPERLESS_SRC_DIR}"
if [[ "${PAPERLESS_MIGRATION_MODE:-0}" == "1" ]]; then
echo "${log_prefix} Migration mode enabled, skipping migrations."
exit 0
fi
# The whole migrate, with flock, needs to run as the right user
if [[ -n "${USER_IS_NON_ROOT}" ]]; then
exec s6-setlock -n "${data_dir}/migration_lock" python3 manage.py migrate --skip-checks --no-input

View File

@@ -9,15 +9,7 @@ echo "${log_prefix} Running Django checks"
cd "${PAPERLESS_SRC_DIR}"
if [[ -n "${USER_IS_NON_ROOT}" ]]; then
if [[ "${PAPERLESS_MIGRATION_MODE:-0}" == "1" ]]; then
python3 manage_migration.py check
else
python3 manage.py check
fi
python3 manage.py check
else
if [[ "${PAPERLESS_MIGRATION_MODE:-0}" == "1" ]]; then
s6-setuidgid paperless python3 manage_migration.py check
else
s6-setuidgid paperless python3 manage.py check
fi
s6-setuidgid paperless python3 manage.py check
fi

View File

@@ -13,14 +13,8 @@ if [[ -n "${PAPERLESS_FORCE_SCRIPT_NAME}" ]]; then
export GRANIAN_URL_PATH_PREFIX=${PAPERLESS_FORCE_SCRIPT_NAME}
fi
if [[ "${PAPERLESS_MIGRATION_MODE:-0}" == "1" ]]; then
app_module="paperless.migration_asgi:application"
else
app_module="paperless.asgi:application"
fi
if [[ -n "${USER_IS_NON_ROOT}" ]]; then
exec granian --interface asginl --ws --loop uvloop "${app_module}"
exec granian --interface asginl --ws --loop uvloop "paperless.asgi:application"
else
exec s6-setuidgid paperless granian --interface asginl --ws --loop uvloop "${app_module}"
exec s6-setuidgid paperless granian --interface asginl --ws --loop uvloop "paperless.asgi:application"
fi

View File

@@ -1,21 +1,5 @@
# Changelog
## paperless-ngx 2.20.5
### Bug Fixes
- Fix: ensure horizontal scroll for long tag names in list, wrap tags without parent [@shamoon](https://github.com/shamoon) ([#11811](https://github.com/paperless-ngx/paperless-ngx/pull/11811))
- Fix: use explicit order field for workflow actions [@shamoon](https://github.com/shamoon) [@stumpylog](https://github.com/stumpylog) ([#11781](https://github.com/paperless-ngx/paperless-ngx/pull/11781))
### All App Changes
<details>
<summary>2 changes</summary>
- Fix: ensure horizontal scroll for long tag names in list, wrap tags without parent [@shamoon](https://github.com/shamoon) ([#11811](https://github.com/paperless-ngx/paperless-ngx/pull/11811))
- Fix: use explicit order field for workflow actions [@shamoon](https://github.com/shamoon) [@stumpylog](https://github.com/stumpylog) ([#11781](https://github.com/paperless-ngx/paperless-ngx/pull/11781))
</details>
## paperless-ngx 2.20.4
### Security

View File

@@ -1146,8 +1146,9 @@ via the consumption directory, you can disable the consumer to save resources.
#### [`PAPERLESS_CONSUMER_DELETE_DUPLICATES=<bool>`](#PAPERLESS_CONSUMER_DELETE_DUPLICATES) {#PAPERLESS_CONSUMER_DELETE_DUPLICATES}
: When the consumer detects a duplicate document, it will not touch
the original document. This default behavior can be changed here.
: As of version 3.0 Paperless-ngx allows duplicate documents to be consumed by default, _except_ when
this setting is enabled. When enabled, Paperless will check if a document with the same hash already
exists in the system and delete the duplicate file from the consumption directory without consuming it.
Defaults to false.

View File

@@ -1,6 +1,6 @@
[project]
name = "paperless-ngx"
version = "2.20.5"
version = "2.20.4"
description = "A community-supported supercharged document management system: scan, index and archive all your physical documents"
readme = "README.md"
requires-python = ">=3.10"

View File

@@ -1,6 +1,6 @@
{
"name": "paperless-ngx-ui",
"version": "2.20.5",
"version": "2.20.4",
"scripts": {
"preinstall": "npx only-allow pnpm",
"ng": "ng",

View File

@@ -97,6 +97,12 @@
<br/><em>(<ng-container i18n>click for full output</ng-container>)</em>
}
</ng-template>
@if (task.duplicate_documents?.length > 0) {
<div class="small text-warning-emphasis d-flex align-items-center gap-1">
<i-bs class="lh-1" width="1em" height="1em" name="exclamation-triangle"></i-bs>
<span i18n>Duplicate(s) detected</span>
</div>
}
</td>
}
<td class="d-lg-none">

View File

@@ -22,8 +22,8 @@
}
// Dropdown hierarchy reveal for ng-select options
:host ::ng-deep .ng-dropdown-panel .ng-option {
overflow-x: auto !important;
::ng-deep .ng-dropdown-panel .ng-option {
overflow-x: scroll !important;
.tag-option-row {
font-size: 1rem;
@@ -41,12 +41,12 @@
}
}
:host ::ng-deep .ng-dropdown-panel .ng-option:hover .hierarchy-reveal,
:host ::ng-deep .ng-dropdown-panel .ng-option.ng-option-marked .hierarchy-reveal {
::ng-deep .ng-dropdown-panel .ng-option:hover .hierarchy-reveal,
::ng-deep .ng-dropdown-panel .ng-option.ng-option-marked .hierarchy-reveal {
max-width: 1000px;
}
::ng-deep .ng-dropdown-panel .ng-option:hover .hierarchy-indicator,
:host ::ng-deep .ng-dropdown-panel .ng-option.ng-option-marked .hierarchy-indicator {
::ng-deep .ng-dropdown-panel .ng-option.ng-option-marked .hierarchy-indicator {
background: transparent;
}

View File

@@ -370,6 +370,37 @@
</ng-template>
</li>
}
@if (document?.duplicate_documents?.length) {
<li [ngbNavItem]="DocumentDetailNavIDs.Duplicates">
<a class="text-nowrap" ngbNavLink i18n>
Duplicates
<span class="badge text-bg-secondary ms-1">{{ document.duplicate_documents.length }}</span>
</a>
<ng-template ngbNavContent>
<div class="d-flex flex-column gap-2">
<div class="fst-italic" i18n>Duplicate documents detected:</div>
<div class="list-group">
@for (duplicate of document.duplicate_documents; track duplicate.id) {
<a
class="list-group-item list-group-item-action d-flex justify-content-between align-items-center"
[routerLink]="['/documents', duplicate.id, 'details']"
[class.disabled]="duplicate.deleted_at"
>
<span class="d-flex align-items-center gap-2">
<span>{{ duplicate.title || ('#' + duplicate.id) }}</span>
@if (duplicate.deleted_at) {
<span class="badge text-bg-secondary" i18n>In trash</span>
}
</span>
<span class="text-secondary">#{{ duplicate.id }}</span>
</a>
}
</div>
</div>
</ng-template>
</li>
}
</ul>
<div [ngbNavOutlet]="nav" class="mt-3"></div>

View File

@@ -301,16 +301,16 @@ describe('DocumentDetailComponent', () => {
.spyOn(openDocumentsService, 'openDocument')
.mockReturnValueOnce(of(true))
fixture.detectChanges()
expect(component.activeNavID).toEqual(5) // DocumentDetailNavIDs.Notes
expect(component.activeNavID).toEqual(component.DocumentDetailNavIDs.Notes)
})
it('should change url on tab switch', () => {
initNormally()
const navigateSpy = jest.spyOn(router, 'navigate')
component.nav.select(5)
component.nav.select(component.DocumentDetailNavIDs.Notes)
component.nav.navChange.next({
activeId: 1,
nextId: 5,
nextId: component.DocumentDetailNavIDs.Notes,
preventDefault: () => {},
})
fixture.detectChanges()
@@ -352,6 +352,18 @@ describe('DocumentDetailComponent', () => {
expect(component.document).toEqual(doc)
})
it('should fall back to details tab when duplicates tab is active but no duplicates', () => {
initNormally()
component.activeNavID = component.DocumentDetailNavIDs.Duplicates
const noDupDoc = { ...doc, duplicate_documents: [] }
component.updateComponent(noDupDoc)
expect(component.activeNavID).toEqual(
component.DocumentDetailNavIDs.Details
)
})
it('should load already-opened document via param', () => {
initNormally()
jest.spyOn(documentService, 'get').mockReturnValueOnce(of(doc))
@@ -367,6 +379,38 @@ describe('DocumentDetailComponent', () => {
expect(component.document).toEqual(doc)
})
it('should update cached open document duplicates when reloading an open doc', () => {
const openDoc = { ...doc, duplicate_documents: [{ id: 1, title: 'Old' }] }
const updatedDuplicates = [
{ id: 2, title: 'Newer duplicate', deleted_at: null },
]
jest
.spyOn(activatedRoute, 'paramMap', 'get')
.mockReturnValue(of(convertToParamMap({ id: 3, section: 'details' })))
jest.spyOn(documentService, 'get').mockReturnValue(
of({
...doc,
modified: new Date('2024-01-02T00:00:00Z'),
duplicate_documents: updatedDuplicates,
})
)
jest.spyOn(openDocumentsService, 'getOpenDocument').mockReturnValue(openDoc)
const saveSpy = jest.spyOn(openDocumentsService, 'save')
jest.spyOn(openDocumentsService, 'openDocument').mockReturnValue(of(true))
jest.spyOn(customFieldsService, 'listAll').mockReturnValue(
of({
count: customFields.length,
all: customFields.map((f) => f.id),
results: customFields,
})
)
fixture.detectChanges()
expect(openDoc.duplicate_documents).toEqual(updatedDuplicates)
expect(saveSpy).toHaveBeenCalled()
})
it('should disable form if user cannot edit', () => {
currentUserHasObjectPermissions = false
initNormally()

View File

@@ -8,7 +8,7 @@ import {
FormsModule,
ReactiveFormsModule,
} from '@angular/forms'
import { ActivatedRoute, Router } from '@angular/router'
import { ActivatedRoute, Router, RouterModule } from '@angular/router'
import {
NgbDateStruct,
NgbDropdownModule,
@@ -124,6 +124,7 @@ enum DocumentDetailNavIDs {
Notes = 5,
Permissions = 6,
History = 7,
Duplicates = 8,
}
enum ContentRenderType {
@@ -181,6 +182,7 @@ export enum ZoomSetting {
NgxBootstrapIconsModule,
PdfViewerModule,
TextAreaComponent,
RouterModule,
],
})
export class DocumentDetailComponent
@@ -454,6 +456,11 @@ export class DocumentDetailComponent
const openDocument = this.openDocumentService.getOpenDocument(
this.documentId
)
// update duplicate documents if present
if (openDocument && doc?.duplicate_documents) {
openDocument.duplicate_documents = doc.duplicate_documents
this.openDocumentService.save()
}
const useDoc = openDocument || doc
if (openDocument) {
if (
@@ -704,6 +711,13 @@ export class DocumentDetailComponent
}
this.title = this.documentTitlePipe.transform(doc.title)
this.prepareForm(doc)
if (
this.activeNavID === DocumentDetailNavIDs.Duplicates &&
!doc?.duplicate_documents?.length
) {
this.activeNavID = DocumentDetailNavIDs.Details
}
}
get customFieldFormFields(): FormArray {

View File

@@ -159,6 +159,8 @@ export interface Document extends ObjectWithPermissions {
page_count?: number
duplicate_documents?: Document[]
// Frontend only
__changedFields?: string[]
}

View File

@@ -1,3 +1,4 @@
import { Document } from './document'
import { ObjectWithId } from './object-with-id'
export enum PaperlessTaskType {
@@ -42,5 +43,7 @@ export interface PaperlessTask extends ObjectWithId {
related_document?: number
duplicate_documents?: Document[]
owner?: number
}

View File

@@ -6,7 +6,7 @@ export const environment = {
apiVersion: '9', // match src/paperless/settings.py
appTitle: 'Paperless-ngx',
tag: 'prod',
version: '2.20.5',
version: '2.20.4',
webSocketHost: window.location.host,
webSocketProtocol: window.location.protocol == 'https:' ? 'wss:' : 'ws:',
webSocketBaseUrl: base_url.pathname + 'ws/',

View File

@@ -785,19 +785,45 @@ class ConsumerPreflightPlugin(
Q(checksum=checksum) | Q(archive_checksum=checksum),
)
if existing_doc.exists():
msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS
log_msg = f"Not consuming {self.filename}: It is a duplicate of {existing_doc.get().title} (#{existing_doc.get().pk})."
existing_doc = existing_doc.order_by("-created")
duplicates_in_trash = existing_doc.filter(deleted_at__isnull=False)
log_msg = (
f"Consuming duplicate {self.filename}: "
f"{existing_doc.count()} existing document(s) share the same content."
)
if existing_doc.first().deleted_at is not None:
msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS_IN_TRASH
log_msg += " Note: existing document is in the trash."
if duplicates_in_trash.exists():
log_msg += " Note: at least one existing document is in the trash."
self.log.warning(log_msg)
if settings.CONSUMER_DELETE_DUPLICATES:
duplicate = existing_doc.first()
duplicate_label = (
duplicate.title
or duplicate.original_filename
or (Path(duplicate.filename).name if duplicate.filename else None)
or str(duplicate.pk)
)
Path(self.input_doc.original_file).unlink()
self._fail(
msg,
log_msg,
)
failure_msg = (
f"Not consuming {self.filename}: "
f"It is a duplicate of {duplicate_label} (#{duplicate.pk})"
)
status_msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS
if duplicates_in_trash.exists():
status_msg = (
ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS_IN_TRASH
)
failure_msg += " Note: existing document is in the trash."
self._fail(
status_msg,
failure_msg,
)
def pre_check_directories(self):
"""

View File

@@ -0,0 +1,23 @@
# Generated by Django 5.2.7 on 2026-01-14 17:45
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
dependencies = [
("documents", "1076_alter_paperlesstask_task_name"),
]
operations = [
migrations.AlterField(
model_name="document",
name="checksum",
field=models.CharField(
editable=False,
max_length=32,
verbose_name="checksum",
help_text="The checksum of the original document.",
),
),
]

View File

@@ -212,7 +212,6 @@ class Document(SoftDeleteModel, ModelWithOwner):
_("checksum"),
max_length=32,
editable=False,
unique=True,
help_text=_("The checksum of the original document."),
)

View File

@@ -148,13 +148,29 @@ def get_document_count_filter_for_user(user):
)
def get_objects_for_user_owner_aware(user, perms, Model) -> QuerySet:
objects_owned = Model.objects.filter(owner=user)
objects_unowned = Model.objects.filter(owner__isnull=True)
def get_objects_for_user_owner_aware(
user,
perms,
Model,
*,
include_deleted=False,
) -> QuerySet:
"""
Returns objects the user owns, are unowned, or has explicit perms.
When include_deleted is True, soft-deleted items are also included.
"""
manager = (
Model.global_objects
if include_deleted and hasattr(Model, "global_objects")
else Model.objects
)
objects_owned = manager.filter(owner=user)
objects_unowned = manager.filter(owner__isnull=True)
objects_with_perms = get_objects_for_user(
user=user,
perms=perms,
klass=Model,
klass=manager.all(),
accept_global_perms=False,
)
return objects_owned | objects_unowned | objects_with_perms

View File

@@ -23,6 +23,7 @@ from django.core.validators import MinValueValidator
from django.core.validators import RegexValidator
from django.core.validators import integer_validator
from django.db.models import Count
from django.db.models import Q
from django.db.models.functions import Lower
from django.utils.crypto import get_random_string
from django.utils.dateparse import parse_datetime
@@ -72,6 +73,7 @@ from documents.models import WorkflowTrigger
from documents.parsers import is_mime_type_supported
from documents.permissions import get_document_count_filter_for_user
from documents.permissions import get_groups_with_only_permission
from documents.permissions import get_objects_for_user_owner_aware
from documents.permissions import set_permissions_for_object
from documents.regex import validate_regex_pattern
from documents.templating.filepath import validate_filepath_template_and_render
@@ -1014,6 +1016,29 @@ class NotesSerializer(serializers.ModelSerializer):
return ret
def _get_viewable_duplicates(document: Document, user: User | None):
checksums = {document.checksum}
if document.archive_checksum:
checksums.add(document.archive_checksum)
duplicates = Document.global_objects.filter(
Q(checksum__in=checksums) | Q(archive_checksum__in=checksums),
).exclude(pk=document.pk)
duplicates = duplicates.order_by("-created")
allowed = get_objects_for_user_owner_aware(
user,
"documents.view_document",
Document,
include_deleted=True,
)
return duplicates.filter(id__in=allowed.values_list("id", flat=True))
class DuplicateDocumentSummarySerializer(serializers.Serializer):
id = serializers.IntegerField()
title = serializers.CharField()
deleted_at = serializers.DateTimeField(allow_null=True)
@extend_schema_serializer(
deprecate_fields=["created_date"],
)
@@ -1031,6 +1056,7 @@ class DocumentSerializer(
archived_file_name = SerializerMethodField()
created_date = serializers.DateField(required=False)
page_count = SerializerMethodField()
duplicate_documents = SerializerMethodField()
notes = NotesSerializer(many=True, required=False, read_only=True)
@@ -1056,6 +1082,16 @@ class DocumentSerializer(
def get_page_count(self, obj) -> int | None:
return obj.page_count
@extend_schema_field(DuplicateDocumentSummarySerializer(many=True))
def get_duplicate_documents(self, obj):
view = self.context.get("view")
if view and getattr(view, "action", None) != "retrieve":
return []
request = self.context.get("request")
user = request.user if request else None
duplicates = _get_viewable_duplicates(obj, user)
return list(duplicates.values("id", "title", "deleted_at"))
def get_original_file_name(self, obj) -> str | None:
return obj.original_filename
@@ -1233,6 +1269,7 @@ class DocumentSerializer(
"archive_serial_number",
"original_file_name",
"archived_file_name",
"duplicate_documents",
"owner",
"permissions",
"user_can_change",
@@ -2094,10 +2131,12 @@ class TasksViewSerializer(OwnedObjectSerializer):
"result",
"acknowledged",
"related_document",
"duplicate_documents",
"owner",
)
related_document = serializers.SerializerMethodField()
duplicate_documents = serializers.SerializerMethodField()
created_doc_re = re.compile(r"New document id (\d+) created")
duplicate_doc_re = re.compile(r"It is a duplicate of .* \(#(\d+)\)")
@@ -2122,6 +2161,17 @@ class TasksViewSerializer(OwnedObjectSerializer):
return result
@extend_schema_field(DuplicateDocumentSummarySerializer(many=True))
def get_duplicate_documents(self, obj):
related_document = self.get_related_document(obj)
request = self.context.get("request")
user = request.user if request else None
document = Document.global_objects.filter(pk=related_document).first()
if not related_document or not user or not document:
return []
duplicates = _get_viewable_duplicates(document, user)
return list(duplicates.values("id", "title", "deleted_at"))
class RunTaskViewSerializer(serializers.Serializer):
task_name = serializers.ChoiceField(

View File

@@ -7,6 +7,7 @@ from django.contrib.auth.models import User
from rest_framework import status
from rest_framework.test import APITestCase
from documents.models import Document
from documents.models import PaperlessTask
from documents.tests.utils import DirectoriesMixin
from documents.views import TasksViewSet
@@ -258,7 +259,7 @@ class TestTasks(DirectoriesMixin, APITestCase):
task_id=str(uuid.uuid4()),
task_file_name="task_one.pdf",
status=celery.states.FAILURE,
result="test.pdf: Not consuming test.pdf: It is a duplicate.",
result="test.pdf: Unexpected error during ingestion.",
)
response = self.client.get(self.ENDPOINT)
@@ -270,7 +271,7 @@ class TestTasks(DirectoriesMixin, APITestCase):
self.assertEqual(
returned_data["result"],
"test.pdf: Not consuming test.pdf: It is a duplicate.",
"test.pdf: Unexpected error during ingestion.",
)
def test_task_name_webui(self):
@@ -325,20 +326,34 @@ class TestTasks(DirectoriesMixin, APITestCase):
self.assertEqual(returned_data["task_file_name"], "anothertest.pdf")
def test_task_result_failed_duplicate_includes_related_doc(self):
def test_task_result_duplicate_warning_includes_count(self):
"""
GIVEN:
- A celery task failed with a duplicate error
- A celery task succeeds, but a duplicate exists
WHEN:
- API call is made to get tasks
THEN:
- The returned data includes a related document link
- The returned data includes duplicate warning metadata
"""
checksum = "duplicate-checksum"
Document.objects.create(
title="Existing",
content="",
mime_type="application/pdf",
checksum=checksum,
)
created_doc = Document.objects.create(
title="Created",
content="",
mime_type="application/pdf",
checksum=checksum,
archive_checksum="another-checksum",
)
PaperlessTask.objects.create(
task_id=str(uuid.uuid4()),
task_file_name="task_one.pdf",
status=celery.states.FAILURE,
result="Not consuming task_one.pdf: It is a duplicate of task_one_existing.pdf (#1234).",
status=celery.states.SUCCESS,
result=f"Success. New document id {created_doc.pk} created",
)
response = self.client.get(self.ENDPOINT)
@@ -348,7 +363,7 @@ class TestTasks(DirectoriesMixin, APITestCase):
returned_data = response.data[0]
self.assertEqual(returned_data["related_document"], "1234")
self.assertEqual(returned_data["related_document"], str(created_doc.pk))
def test_run_train_classifier_task(self):
"""

View File

@@ -485,21 +485,21 @@ class TestConsumer(
with self.get_consumer(self.get_test_file()) as consumer:
consumer.run()
with self.assertRaisesMessage(ConsumerError, "It is a duplicate"):
with self.get_consumer(self.get_test_file()) as consumer:
consumer.run()
with self.get_consumer(self.get_test_file()) as consumer:
consumer.run()
self._assert_first_last_send_progress(last_status="FAILED")
self.assertEqual(Document.objects.count(), 2)
self._assert_first_last_send_progress()
def testDuplicates2(self):
with self.get_consumer(self.get_test_file()) as consumer:
consumer.run()
with self.assertRaisesMessage(ConsumerError, "It is a duplicate"):
with self.get_consumer(self.get_test_archive_file()) as consumer:
consumer.run()
with self.get_consumer(self.get_test_archive_file()) as consumer:
consumer.run()
self._assert_first_last_send_progress(last_status="FAILED")
self.assertEqual(Document.objects.count(), 2)
self._assert_first_last_send_progress()
def testDuplicates3(self):
with self.get_consumer(self.get_test_archive_file()) as consumer:
@@ -513,9 +513,10 @@ class TestConsumer(
Document.objects.all().delete()
with self.assertRaisesMessage(ConsumerError, "document is in the trash"):
with self.get_consumer(self.get_test_file()) as consumer:
consumer.run()
with self.get_consumer(self.get_test_file()) as consumer:
consumer.run()
self.assertEqual(Document.objects.count(), 1)
def testAsnExists(self):
with self.get_consumer(
@@ -718,12 +719,45 @@ class TestConsumer(
dst = self.get_test_file()
self.assertIsFile(dst)
with self.assertRaises(ConsumerError):
expected_message = (
f"{dst.name}: Not consuming {dst.name}: "
f"It is a duplicate of {document.title} (#{document.pk})"
)
with self.assertRaisesMessage(ConsumerError, expected_message):
with self.get_consumer(dst) as consumer:
consumer.run()
self.assertIsNotFile(dst)
self._assert_first_last_send_progress(last_status="FAILED")
self.assertEqual(Document.objects.count(), 1)
self._assert_first_last_send_progress(last_status=ProgressStatusOptions.FAILED)
@override_settings(CONSUMER_DELETE_DUPLICATES=True)
def test_delete_duplicate_in_trash(self):
dst = self.get_test_file()
with self.get_consumer(dst) as consumer:
consumer.run()
# Move the existing document to trash
document = Document.objects.first()
document.delete()
dst = self.get_test_file()
self.assertIsFile(dst)
expected_message = (
f"{dst.name}: Not consuming {dst.name}: "
f"It is a duplicate of {document.title} (#{document.pk})"
f" Note: existing document is in the trash."
)
with self.assertRaisesMessage(ConsumerError, expected_message):
with self.get_consumer(dst) as consumer:
consumer.run()
self.assertIsNotFile(dst)
self.assertEqual(Document.global_objects.count(), 1)
self.assertEqual(Document.objects.count(), 0)
@override_settings(CONSUMER_DELETE_DUPLICATES=False)
def test_no_delete_duplicate(self):
@@ -743,15 +777,12 @@ class TestConsumer(
dst = self.get_test_file()
self.assertIsFile(dst)
with self.assertRaisesRegex(
ConsumerError,
r"sample\.pdf: Not consuming sample\.pdf: It is a duplicate of sample \(#\d+\)",
):
with self.get_consumer(dst) as consumer:
consumer.run()
with self.get_consumer(dst) as consumer:
consumer.run()
self.assertIsFile(dst)
self._assert_first_last_send_progress(last_status="FAILED")
self.assertIsNotFile(dst)
self.assertEqual(Document.objects.count(), 2)
self._assert_first_last_send_progress()
@override_settings(FILENAME_FORMAT="{title}")
@mock.patch("documents.parsers.document_consumer_declaration.send")

View File

@@ -1,13 +0,0 @@
#!/usr/bin/env python3
import os
import sys
if __name__ == "__main__":
os.environ.setdefault(
"DJANGO_SETTINGS_MODULE",
"paperless_migration.settings",
)
from django.core.management import execute_from_command_line
execute_from_command_line(sys.argv)

View File

@@ -1,7 +0,0 @@
import os
from django.core.asgi import get_asgi_application
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless_migration.settings")
application = get_asgi_application()

View File

@@ -1,6 +1,6 @@
from typing import Final
__version__: Final[tuple[int, int, int]] = (2, 20, 5)
__version__: Final[tuple[int, int, int]] = (2, 20, 4)
# Version string like X.Y.Z
__full_version_str__: Final[str] = ".".join(map(str, __version__))
# Version string like X.Y

View File

@@ -1,6 +0,0 @@
from django.apps import AppConfig
class PaperlessMigrationConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
name = "paperless_migration"

View File

@@ -1,193 +0,0 @@
"""Settings for migration-mode Django instance."""
from __future__ import annotations
import os
from pathlib import Path
from typing import Any
from dotenv import load_dotenv
BASE_DIR = Path(__file__).resolve().parent.parent
DEBUG = False
ALLOWED_HOSTS = ["*"]
# Tap paperless.conf if it's available
for path in [
os.getenv("PAPERLESS_CONFIGURATION_PATH"),
"../paperless.conf",
"/etc/paperless.conf",
"/usr/local/etc/paperless.conf",
]:
if path and Path(path).exists():
load_dotenv(path)
break
def __get_path(
key: str,
default: str | Path,
) -> Path:
if key in os.environ:
return Path(os.environ[key]).resolve()
return Path(default).resolve()
DATA_DIR = __get_path("PAPERLESS_DATA_DIR", BASE_DIR.parent / "data")
def _parse_db_settings() -> dict[str, dict[str, Any]]:
databases: dict[str, dict[str, Any]] = {
"default": {
"ENGINE": "django.db.backends.sqlite3",
"NAME": DATA_DIR / "db.sqlite3",
"OPTIONS": {},
},
}
if os.getenv("PAPERLESS_DBHOST"):
databases["sqlite"] = databases["default"].copy()
databases["default"] = {
"HOST": os.getenv("PAPERLESS_DBHOST"),
"NAME": os.getenv("PAPERLESS_DBNAME", "paperless"),
"USER": os.getenv("PAPERLESS_DBUSER", "paperless"),
"PASSWORD": os.getenv("PAPERLESS_DBPASS", "paperless"),
"OPTIONS": {},
}
if os.getenv("PAPERLESS_DBPORT"):
databases["default"]["PORT"] = os.getenv("PAPERLESS_DBPORT")
if os.getenv("PAPERLESS_DBENGINE") == "mariadb":
engine = "django.db.backends.mysql"
options = {
"read_default_file": "/etc/mysql/my.cnf",
"charset": "utf8mb4",
"ssl_mode": os.getenv("PAPERLESS_DBSSLMODE", "PREFERRED"),
"ssl": {
"ca": os.getenv("PAPERLESS_DBSSLROOTCERT"),
"cert": os.getenv("PAPERLESS_DBSSLCERT"),
"key": os.getenv("PAPERLESS_DBSSLKEY"),
},
}
else:
engine = "django.db.backends.postgresql"
options = {
"sslmode": os.getenv("PAPERLESS_DBSSLMODE", "prefer"),
"sslrootcert": os.getenv("PAPERLESS_DBSSLROOTCERT"),
"sslcert": os.getenv("PAPERLESS_DBSSLCERT"),
"sslkey": os.getenv("PAPERLESS_DBSSLKEY"),
}
databases["default"]["ENGINE"] = engine
databases["default"]["OPTIONS"].update(options)
if os.getenv("PAPERLESS_DB_TIMEOUT") is not None:
timeout = int(os.getenv("PAPERLESS_DB_TIMEOUT"))
if databases["default"]["ENGINE"] == "django.db.backends.sqlite3":
databases["default"]["OPTIONS"].update({"timeout": timeout})
else:
databases["default"]["OPTIONS"].update({"connect_timeout": timeout})
databases["sqlite"]["OPTIONS"].update({"timeout": timeout})
return databases
DATABASES = _parse_db_settings()
SECRET_KEY = os.getenv(
"PAPERLESS_SECRET_KEY",
"e11fl1oa-*ytql8p)(06fbj4ukrlo+n7k&q5+$1md7i+mge=ee",
)
AUTH_PASSWORD_VALIDATORS = [
{
"NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator",
},
{
"NAME": "django.contrib.auth.password_validation.MinimumLengthValidator",
},
{
"NAME": "django.contrib.auth.password_validation.CommonPasswordValidator",
},
{
"NAME": "django.contrib.auth.password_validation.NumericPasswordValidator",
},
]
LANGUAGE_CODE = "en-us"
TIME_ZONE = "UTC"
USE_I18N = True
USE_TZ = True
CSRF_TRUSTED_ORIGINS: list[str] = []
INSTALLED_APPS = [
"django.contrib.auth",
"django.contrib.contenttypes",
"django.contrib.sessions",
"django.contrib.messages",
"django.contrib.staticfiles",
"allauth",
"allauth.account",
"allauth.socialaccount",
"allauth.mfa",
"paperless_migration",
]
MIDDLEWARE = [
"django.middleware.security.SecurityMiddleware",
"django.contrib.sessions.middleware.SessionMiddleware",
"django.middleware.common.CommonMiddleware",
"django.middleware.csrf.CsrfViewMiddleware",
"django.contrib.auth.middleware.AuthenticationMiddleware",
"django.contrib.messages.middleware.MessageMiddleware",
"django.middleware.clickjacking.XFrameOptionsMiddleware",
"allauth.account.middleware.AccountMiddleware",
]
ROOT_URLCONF = "paperless_migration.urls"
TEMPLATES = [
{
"BACKEND": "django.template.backends.django.DjangoTemplates",
"DIRS": [],
"APP_DIRS": True,
"OPTIONS": {
"context_processors": [
"django.template.context_processors.request",
"django.contrib.auth.context_processors.auth",
"django.contrib.messages.context_processors.messages",
],
},
},
]
WSGI_APPLICATION = "paperless_migration.wsgi.application"
AUTHENTICATION_BACKENDS = [
"django.contrib.auth.backends.ModelBackend",
"allauth.account.auth_backends.AuthenticationBackend",
]
STATIC_URL = "/static/"
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
LOGIN_URL = "/accounts/login/"
LOGIN_REDIRECT_URL = "/migration/"
LOGOUT_REDIRECT_URL = "/accounts/login/?loggedout=1"
ACCOUNT_ADAPTER = "allauth.account.adapter.DefaultAccountAdapter"
ACCOUNT_AUTHENTICATED_LOGIN_REDIRECTS = False
SOCIALACCOUNT_ADAPTER = "allauth.socialaccount.adapter.DefaultSocialAccountAdapter"
SOCIALACCOUNT_ENABLED = False
SESSION_ENGINE = "django.contrib.sessions.backends.db"
MIGRATION_EXPORT_PATH = os.getenv(
"PAPERLESS_MIGRATION_EXPORT_PATH",
"/data/export.json",
)
MIGRATION_TRANSFORMED_PATH = os.getenv(
"PAPERLESS_MIGRATION_TRANSFORMED_PATH",
"/data/export.v3.json",
)

View File

@@ -1,61 +0,0 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Paperless-ngx Migration Mode</title>
</head>
<body>
<main>
<h1>Migration Mode</h1>
<p>
This instance is running in migration mode. Use this interface to run
the v2 → v3 migration.
</p>
{% if messages %}
<ul>
{% for message in messages %}
<li>{{ message }}</li>
{% endfor %}
</ul>
{% endif %}
<section>
<h2>Step 1 — Export (v2)</h2>
<p>Expected export file:</p>
<ul>
<li><strong>Path:</strong> {{ export_path }}</li>
<li><strong>Status:</strong> {{ export_exists|yesno:"Found,Missing" }}</li>
</ul>
<form method="post">
{% csrf_token %}
<button type="submit" name="action" value="check">
Re-check export
</button>
</form>
</section>
<section>
<h2>Step 2 — Transform</h2>
<p>Expected transformed file:</p>
<ul>
<li><strong>Path:</strong> {{ transformed_path }}</li>
<li><strong>Status:</strong> {{ transformed_exists|yesno:"Found,Missing" }}</li>
</ul>
<form method="post">
{% csrf_token %}
<button type="submit" name="action" value="transform">
Transform export
</button>
</form>
</section>
<section>
<h2>Step 3 — Import (v3)</h2>
<form method="post">
{% csrf_token %}
<button type="submit" name="action" value="import">
Import transformed data
</button>
</form>
</section>
</main>
</body>
</html>

View File

@@ -1,9 +0,0 @@
from django.urls import include
from django.urls import path
from paperless_migration import views
urlpatterns = [
path("accounts/", include("allauth.urls")),
path("migration/", views.migration_home, name="migration_home"),
]

View File

@@ -1,46 +0,0 @@
from pathlib import Path
from django.contrib import messages
from django.contrib.auth.decorators import login_required
from django.http import HttpResponseForbidden
from django.shortcuts import redirect
from django.shortcuts import render
from django.views.decorators.http import require_http_methods
from paperless_migration import settings
@login_required
@require_http_methods(["GET", "POST"])
def migration_home(request):
if not request.user.is_superuser:
return HttpResponseForbidden("Superuser access required")
export_path = Path(settings.MIGRATION_EXPORT_PATH)
transformed_path = Path(settings.MIGRATION_TRANSFORMED_PATH)
if request.method == "POST":
action = request.POST.get("action")
if action == "check":
messages.success(request, "Checked export paths.")
elif action == "transform":
messages.info(
request,
"Transform step is not implemented yet.",
)
elif action == "import":
messages.info(
request,
"Import step is not implemented yet.",
)
else:
messages.error(request, "Unknown action.")
return redirect("migration_home")
context = {
"export_path": export_path,
"export_exists": export_path.exists(),
"transformed_path": transformed_path,
"transformed_exists": transformed_path.exists(),
}
return render(request, "paperless_migration/migration_home.html", context)

View File

@@ -1,7 +0,0 @@
import os
from django.core.wsgi import get_wsgi_application
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless_migration.settings")
application = get_wsgi_application()

2
uv.lock generated
View File

@@ -2934,7 +2934,7 @@ wheels = [
[[package]]
name = "paperless-ngx"
version = "2.20.5"
version = "2.20.4"
source = { virtual = "." }
dependencies = [
{ name = "azure-ai-documentintelligence", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },