Compare commits

..

1 Commits

Author SHA1 Message Date
Antoine Mérino
65aed2405c Documentation: update notes for DB pool size (#11600) 2025-12-30 13:06:21 -08:00
31 changed files with 122 additions and 410 deletions

View File

@@ -115,7 +115,7 @@ jobs:
--frozen \
mkdocs gh-deploy --force --no-history
- name: Upload artifact
uses: actions/upload-artifact@v6
uses: actions/upload-artifact@v5
with:
name: documentation
path: site/
@@ -215,7 +215,7 @@ jobs:
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
- name: Cache frontend dependencies
id: cache-frontend-deps
uses: actions/cache@v5
uses: actions/cache@v4
with:
path: |
~/.pnpm-store
@@ -248,7 +248,7 @@ jobs:
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
- name: Cache frontend dependencies
id: cache-frontend-deps
uses: actions/cache@v5
uses: actions/cache@v4
with:
path: |
~/.pnpm-store
@@ -301,7 +301,7 @@ jobs:
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
- name: Cache frontend dependencies
id: cache-frontend-deps
uses: actions/cache@v5
uses: actions/cache@v4
with:
path: |
~/.pnpm-store
@@ -333,7 +333,7 @@ jobs:
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
- name: Cache frontend dependencies
id: cache-frontend-deps
uses: actions/cache@v5
uses: actions/cache@v4
with:
path: |
~/.pnpm-store
@@ -476,7 +476,7 @@ jobs:
docker cp frontend-extract:/usr/src/paperless/src/documents/static/frontend src/documents/static/frontend/
- name: Upload frontend artifact
if: steps.build-vars.outputs.can-push == 'true'
uses: actions/upload-artifact@v6
uses: actions/upload-artifact@v5
with:
name: frontend-compiled
path: src/documents/static/frontend/
@@ -510,12 +510,12 @@ jobs:
sudo apt-get update -qq
sudo apt-get install -qq --no-install-recommends gettext liblept5
- name: Download frontend artifact
uses: actions/download-artifact@v7
uses: actions/download-artifact@v6
with:
name: frontend-compiled
path: src/documents/static/frontend/
- name: Download documentation artifact
uses: actions/download-artifact@v7
uses: actions/download-artifact@v6
with:
name: documentation
path: docs/_build/html/
@@ -578,7 +578,7 @@ jobs:
sudo chown -R 1000:1000 paperless-ngx/
tar -cJf paperless-ngx.tar.xz paperless-ngx/
- name: Upload release artifact
uses: actions/upload-artifact@v6
uses: actions/upload-artifact@v5
with:
name: release
path: dist/paperless-ngx.tar.xz
@@ -595,7 +595,7 @@ jobs:
if: github.ref_type == 'tag' && (startsWith(github.ref_name, 'v') || contains(github.ref_name, '-beta.rc'))
steps:
- name: Download release artifact
uses: actions/download-artifact@v7
uses: actions/download-artifact@v6
with:
name: release
path: ./

View File

@@ -37,7 +37,7 @@ jobs:
if: github.repository_owner == 'paperless-ngx'
runs-on: ubuntu-24.04
steps:
- uses: dessant/lock-threads@v6
- uses: dessant/lock-threads@v5
with:
issue-inactive-days: '30'
pr-inactive-days: '30'

View File

@@ -12,11 +12,9 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@v6
env:
GH_REF: ${{ github.ref }} # sonar rule:githubactions:S7630 - avoid injection
with:
token: ${{ secrets.PNGX_BOT_PAT }}
ref: ${{ env.GH_REF }}
ref: ${{ github.head_ref }}
- name: Set up Python
id: setup-python
uses: actions/setup-python@v6
@@ -47,7 +45,7 @@ jobs:
cache-dependency-path: 'src-ui/pnpm-lock.yaml'
- name: Cache frontend dependencies
id: cache-frontend-deps
uses: actions/cache@v5
uses: actions/cache@v4
with:
path: |
~/.pnpm-store

View File

@@ -170,11 +170,18 @@ Available options are `postgresql` and `mariadb`.
!!! note
A small pool is typically sufficient — for example, a size of 4.
Make sure your PostgreSQL server's max_connections setting is large enough to handle:
```(Paperless workers + Celery workers) × pool size + safety margin```
For example, with 4 Paperless workers and 2 Celery workers, and a pool size of 4:
(4 + 2) × 4 + 10 = 34 connections required.
A pool of 8-10 connections per worker is typically sufficient.
If you encounter error messages such as `couldn't get a connection`
or database connection timeouts, you probably need to increase the pool size.
!!! warning
Make sure your PostgreSQL `max_connections` setting is large enough to handle the connection pools:
`(NB_PAPERLESS_WORKERS + NB_CELERY_WORKERS) × POOL_SIZE + SAFETY_MARGIN`. For example, with
4 Paperless workers and 2 Celery workers, and a pool size of 8:``(4 + 2) × 8 + 10 = 58`,
so `max_connections = 60` (or even more) is appropriate.
This assumes only Paperless-ngx connects to your PostgreSQL instance. If you have other applications,
you should increase `max_connections` accordingly.
#### [`PAPERLESS_DB_READ_CACHE_ENABLED=<bool>`](#PAPERLESS_DB_READ_CACHE_ENABLED) {#PAPERLESS_DB_READ_CACHE_ENABLED}

View File

@@ -31,7 +31,6 @@
"fi-FI": "src/locale/messages.fi_FI.xlf",
"fr-FR": "src/locale/messages.fr_FR.xlf",
"hu-HU": "src/locale/messages.hu_HU.xlf",
"id-ID": "src/locale/messages.id_ID.xlf",
"it-IT": "src/locale/messages.it_IT.xlf",
"ja-JP": "src/locale/messages.ja_JP.xlf",
"lb-LU": "src/locale/messages.lb_LU.xlf",

View File

@@ -10028,186 +10028,179 @@
<context context-type="linenumber">135</context>
</context-group>
</trans-unit>
<trans-unit id="8312065814232621608" datatype="html">
<source>Indonesian</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/services/settings.service.ts</context>
<context context-type="linenumber">141</context>
</context-group>
</trans-unit>
<trans-unit id="2935232983274991580" datatype="html">
<source>Italian</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/services/settings.service.ts</context>
<context context-type="linenumber">147</context>
<context context-type="linenumber">141</context>
</context-group>
</trans-unit>
<trans-unit id="6924606686202701860" datatype="html">
<source>Japanese</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/services/settings.service.ts</context>
<context context-type="linenumber">153</context>
<context context-type="linenumber">147</context>
</context-group>
</trans-unit>
<trans-unit id="6145439649200570157" datatype="html">
<source>Korean</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/services/settings.service.ts</context>
<context context-type="linenumber">159</context>
<context context-type="linenumber">153</context>
</context-group>
</trans-unit>
<trans-unit id="1334425850005897370" datatype="html">
<source>Luxembourgish</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/services/settings.service.ts</context>
<context context-type="linenumber">165</context>
<context context-type="linenumber">159</context>
</context-group>
</trans-unit>
<trans-unit id="3071065188816255493" datatype="html">
<source>Dutch</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/services/settings.service.ts</context>
<context context-type="linenumber">171</context>
<context context-type="linenumber">165</context>
</context-group>
</trans-unit>
<trans-unit id="8069284467804715623" datatype="html">
<source>Norwegian</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/services/settings.service.ts</context>
<context context-type="linenumber">177</context>
<context context-type="linenumber">171</context>
</context-group>
</trans-unit>
<trans-unit id="4977087909184008115" datatype="html">
<source>Persian</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/services/settings.service.ts</context>
<context context-type="linenumber">183</context>
<context context-type="linenumber">177</context>
</context-group>
</trans-unit>
<trans-unit id="792060551707690640" datatype="html">
<source>Polish</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/services/settings.service.ts</context>
<context context-type="linenumber">189</context>
<context context-type="linenumber">183</context>
</context-group>
</trans-unit>
<trans-unit id="9184513005098760425" datatype="html">
<source>Portuguese (Brazil)</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/services/settings.service.ts</context>
<context context-type="linenumber">195</context>
<context context-type="linenumber">189</context>
</context-group>
</trans-unit>
<trans-unit id="153799456510623899" datatype="html">
<source>Portuguese</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/services/settings.service.ts</context>
<context context-type="linenumber">201</context>
<context context-type="linenumber">195</context>
</context-group>
</trans-unit>
<trans-unit id="8118856427047826368" datatype="html">
<source>Romanian</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/services/settings.service.ts</context>
<context context-type="linenumber">207</context>
<context context-type="linenumber">201</context>
</context-group>
</trans-unit>
<trans-unit id="7137419789978325708" datatype="html">
<source>Russian</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/services/settings.service.ts</context>
<context context-type="linenumber">213</context>
<context context-type="linenumber">207</context>
</context-group>
</trans-unit>
<trans-unit id="9102963095355753902" datatype="html">
<source>Slovak</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/services/settings.service.ts</context>
<context context-type="linenumber">219</context>
<context context-type="linenumber">213</context>
</context-group>
</trans-unit>
<trans-unit id="4287008301409320881" datatype="html">
<source>Slovenian</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/services/settings.service.ts</context>
<context context-type="linenumber">225</context>
<context context-type="linenumber">219</context>
</context-group>
</trans-unit>
<trans-unit id="8608389829607915090" datatype="html">
<source>Serbian</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/services/settings.service.ts</context>
<context context-type="linenumber">231</context>
<context context-type="linenumber">225</context>
</context-group>
</trans-unit>
<trans-unit id="499386805970351976" datatype="html">
<source>Swedish</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/services/settings.service.ts</context>
<context context-type="linenumber">237</context>
<context context-type="linenumber">231</context>
</context-group>
</trans-unit>
<trans-unit id="5682359291233237791" datatype="html">
<source>Turkish</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/services/settings.service.ts</context>
<context context-type="linenumber">243</context>
<context context-type="linenumber">237</context>
</context-group>
</trans-unit>
<trans-unit id="3578644052206125685" datatype="html">
<source>Ukrainian</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/services/settings.service.ts</context>
<context context-type="linenumber">249</context>
<context context-type="linenumber">243</context>
</context-group>
</trans-unit>
<trans-unit id="3611216939636790848" datatype="html">
<source>Vietnamese</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/services/settings.service.ts</context>
<context context-type="linenumber">255</context>
<context context-type="linenumber">249</context>
</context-group>
</trans-unit>
<trans-unit id="4689443708886954687" datatype="html">
<source>Chinese Simplified</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/services/settings.service.ts</context>
<context context-type="linenumber">261</context>
<context context-type="linenumber">255</context>
</context-group>
</trans-unit>
<trans-unit id="8082606363137705994" datatype="html">
<source>Chinese Traditional</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/services/settings.service.ts</context>
<context context-type="linenumber">267</context>
<context context-type="linenumber">261</context>
</context-group>
</trans-unit>
<trans-unit id="4912706592792948707" datatype="html">
<source>ISO 8601</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/services/settings.service.ts</context>
<context context-type="linenumber">275</context>
<context context-type="linenumber">269</context>
</context-group>
</trans-unit>
<trans-unit id="313643372755303297" datatype="html">
<source>Successfully completed one-time migratration of settings to the database!</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/services/settings.service.ts</context>
<context context-type="linenumber">609</context>
<context context-type="linenumber">603</context>
</context-group>
</trans-unit>
<trans-unit id="5558341108007064934" datatype="html">
<source>Unable to migrate settings to the database, please try saving manually.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/services/settings.service.ts</context>
<context context-type="linenumber">610</context>
<context context-type="linenumber">604</context>
</context-group>
</trans-unit>
<trans-unit id="1168781785897678748" datatype="html">
<source>You can restart the tour from the settings page.</source>
<context-group purpose="location">
<context context-type="sourcefile">src/app/services/settings.service.ts</context>
<context context-type="linenumber">683</context>
<context context-type="linenumber">677</context>
</context-group>
</trans-unit>
<trans-unit id="3852289441366561594" datatype="html">

View File

@@ -28,7 +28,6 @@ import localeFa from '@angular/common/locales/fa'
import localeFi from '@angular/common/locales/fi'
import localeFr from '@angular/common/locales/fr'
import localeHu from '@angular/common/locales/hu'
import localeId from '@angular/common/locales/id'
import localeIt from '@angular/common/locales/it'
import localeJa from '@angular/common/locales/ja'
import localeKo from '@angular/common/locales/ko'
@@ -64,7 +63,6 @@ registerLocaleData(localeFa)
registerLocaleData(localeFi)
registerLocaleData(localeFr)
registerLocaleData(localeHu)
registerLocaleData(localeId)
registerLocaleData(localeIt)
registerLocaleData(localeJa)
registerLocaleData(localeKo)

View File

@@ -14,7 +14,7 @@
@if (previewText) {
<div class="bg-light p-3 overflow-auto whitespace-preserve" width="100%">{{previewText}}</div>
} @else {
<object [data]="previewUrl | safeUrl" width="100%" class="bg-light" [class.p-2]="!isPdf"></object>
<object [data]="previewURL | safeUrl" width="100%" class="bg-light" [class.p-2]="!isPdf"></object>
}
} @else {
@if (requiresPassword) {
@@ -24,7 +24,7 @@
}
@if (!requiresPassword) {
<pdf-viewer
[src]="previewUrl"
[src]="previewURL"
[original-size]="false"
[show-borders]="false"
[show-all]="true"

View File

@@ -71,7 +71,7 @@ export class PreviewPopupComponent implements OnDestroy {
return (this.isPdf && this.useNativePdfViewer) || !this.isPdf
}
get previewUrl() {
get previewURL() {
return this.documentService.getPreviewUrl(this.document.id)
}
@@ -93,7 +93,7 @@ export class PreviewPopupComponent implements OnDestroy {
init() {
if (this.document.mime_type?.includes('text')) {
this.http
.get(this.previewUrl, { responseType: 'text' })
.get(this.previewURL, { responseType: 'text' })
.pipe(first(), takeUntil(this.unsubscribeNotifier))
.subscribe({
next: (res) => {
@@ -126,6 +126,10 @@ export class PreviewPopupComponent implements OnDestroy {
}
}
get previewUrl() {
return this.documentService.getPreviewUrl(this.document.id)
}
mouseEnterPreview() {
this.mouseOnPreview = true
if (!this.popover.isOpen()) {

View File

@@ -379,7 +379,7 @@
<ng-template #previewContent>
<div class="thumb-preview position-absolute pe-none text-center" [class.fade]="previewLoaded">
@if (showThumbnailOverlay) {
<img [src]="thumbUrl" class="mx-auto" [attr.width]="previewZoomScale === 'page-fit' ? 'auto' : '100%'" [attr.height]="previewZoomScale === 'page-fit' ? '100%' : 'auto'" alt="Document loading..." i18n-alt />
<img [src]="thumbUrl | safeUrl" class="mx-auto" [attr.width]="previewZoomScale === 'page-fit' ? 'auto' : '100%'" [attr.height]="previewZoomScale === 'page-fit' ? '100%' : 'auto'" alt="Document loading..." i18n-alt />
}
<div class="position-absolute top-0 start-0 m-2 p-2 d-flex align-items-center justify-content-center">
<div>
@@ -414,7 +414,7 @@
}
@case (ContentRenderType.Image) {
<div class="preview-sticky">
<img [src]="previewUrl" width="100%" height="100%" alt="{{title}}" />
<img [src]="previewUrl | safeUrl" width="100%" height="100%" alt="{{title}}" />
</div>
}
@case (ContentRenderType.TIFF) {

View File

@@ -136,12 +136,6 @@ const LANGUAGE_OPTIONS = [
englishName: 'Hungarian',
dateInputFormat: 'yyyy.mm.dd',
},
{
code: 'id-id',
name: $localize`Indonesian`,
englishName: 'Indonesian',
dateInputFormat: 'dd-mm-yyyy',
},
{
code: 'it-it',
name: $localize`Italian`,

View File

@@ -171,7 +171,6 @@ import localeFa from '@angular/common/locales/fa'
import localeFi from '@angular/common/locales/fi'
import localeFr from '@angular/common/locales/fr'
import localeHu from '@angular/common/locales/hu'
import localeId from '@angular/common/locales/id'
import localeIt from '@angular/common/locales/it'
import localeJa from '@angular/common/locales/ja'
import localeKo from '@angular/common/locales/ko'
@@ -210,7 +209,6 @@ registerLocaleData(localeFa)
registerLocaleData(localeFi)
registerLocaleData(localeFr)
registerLocaleData(localeHu)
registerLocaleData(localeId)
registerLocaleData(localeIt)
registerLocaleData(localeJa)
registerLocaleData(localeKo)

View File

@@ -186,11 +186,7 @@ class BarcodePlugin(ConsumeTaskPlugin):
# Update/overwrite an ASN if possible
# After splitting, as otherwise each split document gets the same ASN
if (
self.settings.barcode_enable_asn
and not self.metadata.skip_asn
and (located_asn := self.asn) is not None
):
if self.settings.barcode_enable_asn and (located_asn := self.asn) is not None:
logger.info(f"Found ASN in barcode: {located_asn}")
self.metadata.asn = located_asn

View File

@@ -433,8 +433,6 @@ def merge(
if user is not None:
overrides.owner_id = user.id
# Avoid copying or detecting ASN from merged PDFs to prevent collision
overrides.skip_asn = True
logger.info("Adding merged document to the task queue.")

View File

@@ -46,7 +46,6 @@ from documents.signals.handlers import run_workflows
from documents.templating.workflows import parse_w_workflow_placeholders
from documents.utils import copy_basic_file_stats
from documents.utils import copy_file_with_basic_stats
from documents.utils import normalize_nfc
from documents.utils import run_subprocess
from paperless_mail.parsers import MailDocumentParser
@@ -112,12 +111,7 @@ class ConsumerPluginMixin:
self.renew_logging_group()
self.metadata.filename = normalize_nfc(self.metadata.filename)
self.metadata.title = normalize_nfc(self.metadata.title)
self.filename = normalize_nfc(
self.metadata.filename or self.input_doc.original_file.name,
)
self.filename = self.metadata.filename or self.input_doc.original_file.name
def _send_progress(
self,
@@ -658,8 +652,6 @@ class ConsumerPlugin(
f"Error occurred parsing title override '{self.metadata.title}', falling back to original. Exception: {e}",
)
title = normalize_nfc(title)
file_for_checksum = (
self.unmodified_original
if self.unmodified_original is not None
@@ -704,7 +696,7 @@ class ConsumerPlugin(
pk=self.metadata.storage_path_id,
)
if self.metadata.asn is not None and not self.metadata.skip_asn:
if self.metadata.asn is not None:
document.archive_serial_number = self.metadata.asn
if self.metadata.owner_id:
@@ -820,8 +812,8 @@ class ConsumerPreflightPlugin(
"""
Check that if override_asn is given, it is unique and within a valid range
"""
if self.metadata.skip_asn or self.metadata.asn is None:
# if skip is set or ASN is None
if self.metadata.asn is None:
# check not necessary in case no ASN gets set
return
# Validate the range is above zero and less than uint32_t max
# otherwise, Whoosh can't handle it in the index

View File

@@ -22,7 +22,7 @@ class DocumentMetadataOverrides:
document_type_id: int | None = None
tag_ids: list[int] | None = None
storage_path_id: int | None = None
created: datetime.date | None = None
created: datetime.datetime | None = None
asn: int | None = None
owner_id: int | None = None
view_users: list[int] | None = None
@@ -30,7 +30,6 @@ class DocumentMetadataOverrides:
change_users: list[int] | None = None
change_groups: list[int] | None = None
custom_fields: dict | None = None
skip_asn: bool = False
def update(self, other: "DocumentMetadataOverrides") -> "DocumentMetadataOverrides":
"""
@@ -50,8 +49,6 @@ class DocumentMetadataOverrides:
self.storage_path_id = other.storage_path_id
if other.owner_id is not None:
self.owner_id = other.owner_id
if other.skip_asn:
self.skip_asn = True
# merge
if self.tag_ids is None:
@@ -103,7 +100,6 @@ class DocumentMetadataOverrides:
overrides.storage_path_id = doc.storage_path.id if doc.storage_path else None
overrides.owner_id = doc.owner.id if doc.owner else None
overrides.tag_ids = list(doc.tags.values_list("id", flat=True))
overrides.created = doc.created
overrides.view_users = list(
get_users_with_perms(

View File

@@ -6,7 +6,6 @@ from django.conf import settings
from documents.models import Document
from documents.templating.filepath import validate_filepath_template_and_render
from documents.templating.utils import convert_format_str_to_template_format
from documents.utils import normalize_nfc
def create_source_path_directory(source_path: Path) -> None:
@@ -56,11 +55,11 @@ def generate_unique_filename(doc, *, archive_filename=False) -> Path:
"""
if archive_filename:
old_filename: Path | None = (
Path(normalize_nfc(doc.archive_filename)) if doc.archive_filename else None
Path(doc.archive_filename) if doc.archive_filename else None
)
root = settings.ARCHIVE_DIR
else:
old_filename = Path(normalize_nfc(doc.filename)) if doc.filename else None
old_filename = Path(doc.filename) if doc.filename else None
root = settings.ORIGINALS_DIR
# If generating archive filenames, try to make a name that is similar to
@@ -92,7 +91,7 @@ def generate_unique_filename(doc, *, archive_filename=False) -> Path:
)
if new_filename == old_filename:
# still the same as before.
return Path(normalize_nfc(str(new_filename)))
return new_filename
if (root / new_filename).exists():
counter += 1
@@ -120,7 +119,7 @@ def format_filename(document: Document, template_str: str) -> str | None:
"none",
) # backward compatibility
return normalize_nfc(rendered_filename)
return rendered_filename
def generate_filename(
@@ -175,4 +174,4 @@ def generate_filename(
if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG:
full_path = full_path.with_suffix(full_path.suffix + ".gpg")
return Path(normalize_nfc(str(full_path)))
return full_path

View File

@@ -41,7 +41,6 @@ from documents.models import PaperlessTask
from documents.models import ShareLink
from documents.models import StoragePath
from documents.models import Tag
from documents.utils import normalize_nfc
if TYPE_CHECKING:
from collections.abc import Callable
@@ -163,11 +162,7 @@ class TitleContentFilter(Filter):
def filter(self, qs, value):
value = value.strip() if isinstance(value, str) else value
if value:
normalized = normalize_nfc(value) or ""
folded = normalized.casefold()
return qs.filter(
Q(title__icontains=folded) | Q(content__icontains=folded),
)
return qs.filter(Q(title__icontains=value) | Q(content__icontains=value))
else:
return qs

View File

@@ -3,7 +3,6 @@ from __future__ import annotations
import logging
import math
import re
import unicodedata
from collections import Counter
from contextlib import contextmanager
from datetime import datetime
@@ -59,14 +58,6 @@ if TYPE_CHECKING:
logger = logging.getLogger("paperless.index")
def _normalize_for_index(value: str | None) -> str | None:
"""Normalize text to NFC for consistent search/index matching."""
if value is None:
return None
return unicodedata.normalize("NFC", value)
def get_schema() -> Schema:
return Schema(
id=NUMERIC(stored=True, unique=True),
@@ -172,41 +163,37 @@ def update_document(writer: AsyncWriter, doc: Document) -> None:
viewer_ids: str = ",".join([str(u.id) for u in users_with_perms])
writer.update_document(
id=doc.pk,
title=_normalize_for_index(doc.title),
content=_normalize_for_index(doc.content),
correspondent=_normalize_for_index(
doc.correspondent.name if doc.correspondent else None,
),
title=doc.title,
content=doc.content,
correspondent=doc.correspondent.name if doc.correspondent else None,
correspondent_id=doc.correspondent.id if doc.correspondent else None,
has_correspondent=doc.correspondent is not None,
tag=_normalize_for_index(tags) if tags else None,
tag=tags if tags else None,
tag_id=tags_ids if tags_ids else None,
has_tag=len(tags) > 0,
type=_normalize_for_index(
doc.document_type.name if doc.document_type else None,
),
type=doc.document_type.name if doc.document_type else None,
type_id=doc.document_type.id if doc.document_type else None,
has_type=doc.document_type is not None,
created=datetime.combine(doc.created, time.min),
added=doc.added,
asn=asn,
modified=doc.modified,
path=_normalize_for_index(doc.storage_path.name if doc.storage_path else None),
path=doc.storage_path.name if doc.storage_path else None,
path_id=doc.storage_path.id if doc.storage_path else None,
has_path=doc.storage_path is not None,
notes=_normalize_for_index(notes),
notes=notes,
num_notes=len(notes),
custom_fields=_normalize_for_index(custom_fields),
custom_fields=custom_fields,
custom_field_count=len(doc.custom_fields.all()),
has_custom_fields=len(custom_fields) > 0,
custom_fields_id=custom_fields_ids if custom_fields_ids else None,
owner=_normalize_for_index(doc.owner.username if doc.owner else None),
owner=doc.owner.username if doc.owner else None,
owner_id=doc.owner.id if doc.owner else None,
has_owner=doc.owner is not None,
viewer_id=viewer_ids if viewer_ids else None,
checksum=doc.checksum,
page_count=doc.page_count,
original_filename=_normalize_for_index(doc.original_filename),
original_filename=doc.original_filename,
is_shared=len(viewer_ids) > 0,
)
logger.debug(f"Index updated for document {doc.pk}.")
@@ -434,7 +421,7 @@ class LocalDateParser(English):
class DelayedFullTextQuery(DelayedQuery):
def _get_query(self) -> tuple:
q_str = _normalize_for_index(self.query_params["query"]) or ""
q_str = self.query_params["query"]
q_str = rewrite_natural_date_keywords(q_str)
qp = MultifieldParser(
[
@@ -473,12 +460,7 @@ class DelayedFullTextQuery(DelayedQuery):
class DelayedMoreLikeThisQuery(DelayedQuery):
def _get_query(self) -> tuple:
more_like_doc_id = int(self.query_params["more_like_id"])
content = (
_normalize_for_index(
Document.objects.get(id=more_like_doc_id).content,
)
or ""
)
content = Document.objects.get(id=more_like_doc_id).content
docnum = self.searcher.document_number(id=more_like_doc_id)
kts = self.searcher.key_terms_from_text(
@@ -506,7 +488,6 @@ def autocomplete(
Mimics whoosh.reading.IndexReader.most_distinctive_terms with permissions
and without scoring
"""
term = _normalize_for_index(term) or ""
terms = []
with ix.searcher(weighting=TF_IDF()) as s:

View File

@@ -2,12 +2,10 @@ from __future__ import annotations
import logging
import re
import unicodedata
from fnmatch import fnmatch
from fnmatch import translate as fnmatch_translate
from typing import TYPE_CHECKING
from django.db.models import Q
from rest_framework import serializers
from documents.data_models import ConsumableDocument
@@ -23,7 +21,6 @@ from documents.models import Workflow
from documents.models import WorkflowTrigger
from documents.permissions import get_objects_for_user_owner_aware
from documents.regex import safe_regex_search
from documents.utils import normalize_nfc
if TYPE_CHECKING:
from django.db.models import QuerySet
@@ -33,34 +30,6 @@ if TYPE_CHECKING:
logger = logging.getLogger("paperless.matching")
def _normalize_glob_value(value: str) -> str:
"""Normalize strings for glob-style matching (case-insensitive)."""
return (normalize_nfc(value) or "").casefold()
def _normalized_fnmatch(name: str, pattern: str) -> bool:
"""Canonicalize Unicode and compare using fnmatch semantics."""
return fnmatch(_normalize_glob_value(name), _normalize_glob_value(pattern))
def _glob_regex_variants(pattern: str) -> list[str]:
"""
Build regex patterns that match both NFC and NFD forms of a glob pattern.
Using both forms lets DB prefilters remain Unicode-normalization agnostic.
"""
regexes = set()
for normalized in {
normalize_nfc(pattern) or "",
unicodedata.normalize("NFD", pattern),
}:
regex = fnmatch_translate(normalized).lstrip("^").rstrip("$")
regexes.add(regex)
return list(regexes)
def log_reason(
matching_model: MatchingModel | WorkflowTrigger,
document: Document,
@@ -336,9 +305,9 @@ def consumable_document_matches_workflow(
if (
trigger.filter_filename is not None
and len(trigger.filter_filename) > 0
and not _normalized_fnmatch(
document.original_file.name,
trigger.filter_filename,
and not fnmatch(
document.original_file.name.lower(),
trigger.filter_filename.lower(),
)
):
reason = (
@@ -359,7 +328,7 @@ def consumable_document_matches_workflow(
if (
trigger.filter_path is not None
and len(trigger.filter_path) > 0
and not _normalized_fnmatch(
and not fnmatch(
match_against,
trigger.filter_path,
)
@@ -523,9 +492,9 @@ def existing_document_matches_workflow(
trigger.filter_filename is not None
and len(trigger.filter_filename) > 0
and document.original_filename is not None
and not _normalized_fnmatch(
document.original_filename,
trigger.filter_filename,
and not fnmatch(
document.original_filename.lower(),
trigger.filter_filename.lower(),
)
):
return (
@@ -604,11 +573,8 @@ def prefilter_documents_by_workflowtrigger(
documents = documents.annotate(**annotations).filter(custom_field_q)
if trigger.filter_filename:
regexes = _glob_regex_variants(trigger.filter_filename)
filename_q = Q()
for regex in regexes:
filename_q |= Q(original_filename__iregex=regex)
documents = documents.filter(filename_q)
regex = fnmatch_translate(trigger.filter_filename).lstrip("^").rstrip("$")
documents = documents.filter(original_filename__iregex=regex)
return documents

View File

@@ -18,8 +18,6 @@ from django.core.exceptions import ValidationError
from django.core.validators import DecimalValidator
from django.core.validators import EmailValidator
from django.core.validators import MaxLengthValidator
from django.core.validators import MaxValueValidator
from django.core.validators import MinValueValidator
from django.core.validators import RegexValidator
from django.core.validators import integer_validator
from django.db.models import Count
@@ -877,13 +875,6 @@ class CustomFieldInstanceSerializer(serializers.ModelSerializer):
uri_validator(data["value"])
elif field.data_type == CustomField.FieldDataType.INT:
integer_validator(data["value"])
try:
value_int = int(data["value"])
except (TypeError, ValueError):
raise serializers.ValidationError("Enter a valid integer.")
# Keep values within the PostgreSQL integer range
MinValueValidator(-2147483648)(value_int)
MaxValueValidator(2147483647)(value_int)
elif (
field.data_type == CustomField.FieldDataType.MONETARY
and data["value"] != ""

View File

@@ -1664,44 +1664,6 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
self.consume_file_mock.assert_not_called()
def test_patch_document_integer_custom_field_out_of_range(self):
"""
GIVEN:
- An integer custom field
- A document
WHEN:
- Patching the document with an integer value exceeding PostgreSQL's range
THEN:
- HTTP 400 is returned (validation catches the overflow)
- No custom field instance is created
"""
cf_int = CustomField.objects.create(
name="intfield",
data_type=CustomField.FieldDataType.INT,
)
doc = Document.objects.create(
title="Doc",
checksum="123",
mime_type="application/pdf",
)
response = self.client.patch(
f"/api/documents/{doc.pk}/",
{
"custom_fields": [
{
"field": cf_int.pk,
"value": 2**31, # overflow for PostgreSQL integer fields
},
],
},
format="json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn("custom_fields", response.data)
self.assertEqual(CustomFieldInstance.objects.count(), 0)
def test_upload_with_webui_source(self):
"""
GIVEN: A document with a source file

View File

@@ -89,23 +89,6 @@ class TestDocumentSearchApi(DirectoriesMixin, APITestCase):
self.assertEqual(len(results), 0)
self.assertCountEqual(response.data["all"], [])
def test_search_handles_diacritics_normalization(self):
doc = Document.objects.create(
title="certida\u0303o de nascimento",
content="birth record without keyword",
checksum="D",
pk=10,
)
with AsyncWriter(index.open_index()) as writer:
index.update_document(writer, doc)
response = self.client.get("/api/documents/?query=certidão")
self.assertEqual(response.status_code, status.HTTP_200_OK)
results = response.data["results"]
self.assertEqual(response.data["count"], 1)
self.assertEqual(len(results), 1)
self.assertEqual(results[0]["id"], doc.id)
def test_search_custom_field_ordering(self):
custom_field = CustomField.objects.create(
name="Sortable field",

View File

@@ -581,7 +581,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
- Consume file should be called
"""
doc_ids = [self.doc1.id, self.doc2.id, self.doc3.id]
metadata_document_id = self.doc2.id
metadata_document_id = self.doc1.id
user = User.objects.create(username="test_user")
result = bulk_edit.merge(
@@ -602,14 +602,11 @@ class TestPDFActions(DirectoriesMixin, TestCase):
expected_filename,
)
self.assertEqual(consume_file_args[1].title, None)
self.assertTrue(consume_file_args[1].skip_asn)
# With metadata_document_id overrides
result = bulk_edit.merge(doc_ids, metadata_document_id=metadata_document_id)
consume_file_args, _ = mock_consume_file.call_args
self.assertEqual(consume_file_args[1].title, "B (merged)")
self.assertEqual(consume_file_args[1].created, self.doc2.created)
self.assertTrue(consume_file_args[1].skip_asn)
self.assertEqual(consume_file_args[1].title, "A (merged)")
self.assertEqual(result, "OK")
@@ -650,7 +647,6 @@ class TestPDFActions(DirectoriesMixin, TestCase):
expected_filename,
)
self.assertEqual(consume_file_args[1].title, None)
self.assertTrue(consume_file_args[1].skip_asn)
delete_documents_args, _ = mock_delete_documents.call_args
self.assertEqual(

View File

@@ -290,23 +290,6 @@ class TestConsumer(
self._assert_first_last_send_progress()
def test_override_filename_normalized(self):
filename = self.get_test_file()
override_filename = "Inhaltsu\u0308bersicht.pdf"
with self.get_consumer(
filename,
DocumentMetadataOverrides(filename=override_filename),
) as consumer:
consumer.run()
document = Document.objects.first()
self.assertIsNotNone(document)
self.assertEqual(document.original_filename, "Inhaltsübersicht.pdf")
self.assertEqual(document.title, "Inhaltsübersicht")
self._assert_first_last_send_progress()
def testOverrideTitle(self):
with self.get_consumer(
self.get_test_file(),
@@ -321,25 +304,6 @@ class TestConsumer(
self.assertEqual(document.title, "Override Title")
self._assert_first_last_send_progress()
@override_settings(FILENAME_FORMAT="{{ title }}")
def test_filename_format_normalized(self):
filename = self.get_test_file()
title = "Inhaltsu\u0308bersicht Faszination"
with self.get_consumer(
filename,
DocumentMetadataOverrides(title=title),
) as consumer:
consumer.run()
document = Document.objects.first()
self.assertIsNotNone(document)
self.assertEqual(document.title, "Inhaltsübersicht Faszination")
self.assertEqual(document.filename, "Inhaltsübersicht Faszination.pdf")
self.assertIsFile(document.source_path)
self._assert_first_last_send_progress()
def testOverrideCorrespondent(self):
c = Correspondent.objects.create(name="test")
@@ -448,14 +412,6 @@ class TestConsumer(
self.assertEqual(document.archive_serial_number, 123)
self._assert_first_last_send_progress()
def testMetadataOverridesSkipAsnPropagation(self):
overrides = DocumentMetadataOverrides()
incoming = DocumentMetadataOverrides(skip_asn=True)
overrides.update(incoming)
self.assertTrue(overrides.skip_asn)
def testOverrideTitlePlaceholders(self):
c = Correspondent.objects.create(name="Correspondent Name")
dt = DocumentType.objects.create(name="DocType Name")

View File

@@ -557,50 +557,6 @@ class TestWorkflows(
expected_str = f"Document filename {test_file.name} does not match"
self.assertIn(expected_str, cm.output[1])
def test_workflow_match_filename_diacritics_normalized(self):
"""
GIVEN:
- Consumption workflow filtering on filename with diacritics
WHEN:
- File with decomposed Unicode filename is consumed
THEN:
- Workflow still matches and applies overrides
"""
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.CONSUMPTION,
sources=f"{DocumentSource.ApiUpload},{DocumentSource.ConsumeFolder},{DocumentSource.MailFetch}",
filter_filename="*račun*",
)
action = WorkflowAction.objects.create(
assign_title="Diacritics matched",
)
action.save()
w = Workflow.objects.create(
name="Workflow 1",
order=0,
)
w.triggers.add(trigger)
w.actions.add(action)
w.save()
decomposed_name = "rac\u030cun.pdf"
test_file = shutil.copy(
self.SAMPLE_DIR / "simple.pdf",
self.dirs.scratch_dir / decomposed_name,
)
with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
tasks.consume_file(
ConsumableDocument(
source=DocumentSource.ConsumeFolder,
original_file=test_file,
),
None,
)
document = Document.objects.first()
self.assertEqual(document.title, "Diacritics matched")
def test_workflow_no_match_path(self):
"""
GIVEN:
@@ -990,35 +946,6 @@ class TestWorkflows(
self.assertEqual(doc.correspondent, self.c2)
self.assertEqual(doc.title, f"Doc created in {created.year}")
def test_document_added_filename_diacritics_normalized(self):
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,
filter_filename="*račun*",
)
action = WorkflowAction.objects.create(
assign_title="Matched diacritics",
)
w = Workflow.objects.create(
name="Workflow 1",
order=0,
)
w.triggers.add(trigger)
w.actions.add(action)
w.save()
doc = Document.objects.create(
title="sample test",
correspondent=self.c,
original_filename="rac\u030cun.pdf",
)
document_consumption_finished.send(
sender=self.__class__,
document=doc,
)
self.assertEqual(doc.title, "Matched diacritics")
def test_document_added_no_match_filename(self):
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_ADDED,

View File

@@ -1,7 +1,5 @@
import logging
import shutil
import unicodedata
from os import PathLike
from os import utime
from pathlib import Path
from subprocess import CompletedProcess
@@ -18,14 +16,6 @@ def _coerce_to_path(
return Path(source).resolve(), Path(dest).resolve()
def normalize_nfc(value: str | PathLike[str] | None) -> str | None:
"""Return NFC-normalized string for filesystem-safe comparisons."""
if value is None:
return None
return unicodedata.normalize("NFC", str(value))
def copy_basic_file_stats(source: Path | str, dest: Path | str) -> None:
"""
Copies only the m_time and a_time attributes from source to destination.

View File

@@ -708,7 +708,6 @@ class DocumentViewSet(
"title",
"correspondent__name",
"document_type__name",
"storage_path__name",
"created",
"modified",
"added",

View File

@@ -2,7 +2,7 @@ msgid ""
msgstr ""
"Project-Id-Version: paperless-ngx\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-12-29 14:49+0000\n"
"POT-Creation-Date: 2025-12-12 17:41+0000\n"
"PO-Revision-Date: 2022-02-17 04:17\n"
"Last-Translator: \n"
"Language-Team: English\n"
@@ -1219,35 +1219,35 @@ msgstr ""
msgid "workflow runs"
msgstr ""
#: documents/serialisers.py:642
#: documents/serialisers.py:640
msgid "Invalid color."
msgstr ""
#: documents/serialisers.py:1835
#: documents/serialisers.py:1826
#, python-format
msgid "File type %(type)s not supported"
msgstr ""
#: documents/serialisers.py:1879
#: documents/serialisers.py:1870
#, python-format
msgid "Custom field id must be an integer: %(id)s"
msgstr ""
#: documents/serialisers.py:1886
#: documents/serialisers.py:1877
#, python-format
msgid "Custom field with id %(id)s does not exist"
msgstr ""
#: documents/serialisers.py:1903 documents/serialisers.py:1913
#: documents/serialisers.py:1894 documents/serialisers.py:1904
msgid ""
"Custom fields must be a list of integers or an object mapping ids to values."
msgstr ""
#: documents/serialisers.py:1908
#: documents/serialisers.py:1899
msgid "Some custom fields don't exist or were specified twice."
msgstr ""
#: documents/serialisers.py:2023
#: documents/serialisers.py:2014
msgid "Invalid variable detected."
msgstr ""
@@ -1767,86 +1767,82 @@ msgid "Hungarian"
msgstr ""
#: paperless/settings.py:789
msgid "Indonesian"
msgstr ""
#: paperless/settings.py:790
msgid "Italian"
msgstr ""
#: paperless/settings.py:791
#: paperless/settings.py:790
msgid "Japanese"
msgstr ""
#: paperless/settings.py:792
#: paperless/settings.py:791
msgid "Korean"
msgstr ""
#: paperless/settings.py:793
#: paperless/settings.py:792
msgid "Luxembourgish"
msgstr ""
#: paperless/settings.py:794
#: paperless/settings.py:793
msgid "Norwegian"
msgstr ""
#: paperless/settings.py:795
#: paperless/settings.py:794
msgid "Dutch"
msgstr ""
#: paperless/settings.py:796
#: paperless/settings.py:795
msgid "Polish"
msgstr ""
#: paperless/settings.py:797
#: paperless/settings.py:796
msgid "Portuguese (Brazil)"
msgstr ""
#: paperless/settings.py:798
#: paperless/settings.py:797
msgid "Portuguese"
msgstr ""
#: paperless/settings.py:799
#: paperless/settings.py:798
msgid "Romanian"
msgstr ""
#: paperless/settings.py:800
#: paperless/settings.py:799
msgid "Russian"
msgstr ""
#: paperless/settings.py:801
#: paperless/settings.py:800
msgid "Slovak"
msgstr ""
#: paperless/settings.py:802
#: paperless/settings.py:801
msgid "Slovenian"
msgstr ""
#: paperless/settings.py:803
#: paperless/settings.py:802
msgid "Serbian"
msgstr ""
#: paperless/settings.py:804
#: paperless/settings.py:803
msgid "Swedish"
msgstr ""
#: paperless/settings.py:805
#: paperless/settings.py:804
msgid "Turkish"
msgstr ""
#: paperless/settings.py:806
#: paperless/settings.py:805
msgid "Ukrainian"
msgstr ""
#: paperless/settings.py:807
#: paperless/settings.py:806
msgid "Vietnamese"
msgstr ""
#: paperless/settings.py:808
#: paperless/settings.py:807
msgid "Chinese Simplified"
msgstr ""
#: paperless/settings.py:809
#: paperless/settings.py:808
msgid "Chinese Traditional"
msgstr ""

View File

@@ -786,7 +786,6 @@ LANGUAGES = [
("fi-fi", _("Finnish")),
("fr-fr", _("French")),
("hu-hu", _("Hungarian")),
("id-id", _("Indonesian")),
("it-it", _("Italian")),
("ja-jp", _("Japanese")),
("ko-kr", _("Korean")),

View File

@@ -1108,7 +1108,6 @@ class TestMail(
self.assertEqual(len(self.mailMocker.bogus_mailbox.messages), 2)
self.assertEqual(len(self.mailMocker.bogus_mailbox.messages_spam), 1)
@pytest.mark.flaky(reruns=4)
def test_error_skip_rule(self):
account = MailAccount.objects.create(
name="test2",