mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-08-07 19:08:32 -05:00
Compare commits
1 Commits
dependabot
...
chore/manu
Author | SHA1 | Date | |
---|---|---|---|
![]() |
36ead3d08e |
4
.github/workflows/ci.yml
vendored
4
.github/workflows/ci.yml
vendored
@@ -15,7 +15,6 @@ env:
|
||||
DEFAULT_UV_VERSION: "0.8.x"
|
||||
# This is the default version of Python to use in most steps which aren't specific
|
||||
DEFAULT_PYTHON_VERSION: "3.11"
|
||||
NLTK_DATA: "/usr/share/nltk_data"
|
||||
jobs:
|
||||
pre-commit:
|
||||
# We want to run on external PRs, but not on our own internal PRs as they'll be run
|
||||
@@ -122,11 +121,8 @@ jobs:
|
||||
- name: List installed Python dependencies
|
||||
run: |
|
||||
uv pip list
|
||||
- name: Install or update NLTK dependencies
|
||||
run: uv run python -m nltk.downloader punkt punkt_tab snowball_data stopwords -d ${{ env.NLTK_DATA }}
|
||||
- name: Tests
|
||||
env:
|
||||
NLTK_DATA: ${{ env.NLTK_DATA }}
|
||||
PAPERLESS_CI_TEST: 1
|
||||
# Enable paperless_mail testing against real server
|
||||
PAPERLESS_MAIL_TEST_HOST: ${{ secrets.TEST_MAIL_HOST }}
|
||||
|
@@ -31,7 +31,7 @@ repos:
|
||||
rev: v2.4.1
|
||||
hooks:
|
||||
- id: codespell
|
||||
exclude: "(^src-ui/src/locale/)|(^src-ui/pnpm-lock.yaml)|(^src-ui/e2e/)|(^src/paperless_mail/tests/samples/)|(^src/documents/tests/samples/)"
|
||||
exclude: "(^src-ui/src/locale/)|(^src-ui/pnpm-lock.yaml)|(^src-ui/e2e/)|(^src/paperless_mail/tests/samples/)"
|
||||
exclude_types:
|
||||
- pofile
|
||||
- json
|
||||
|
@@ -179,14 +179,10 @@ following:
|
||||
|
||||
### Database Upgrades
|
||||
|
||||
In general, Paperless-ngx supports current version of PostgreSQL and MariaDB and it is generally
|
||||
In general, paperless does not require a specific version of PostgreSQL or MariaDB and it is
|
||||
safe to update them to newer versions. However, you should always take a backup and follow
|
||||
the instructions from your database's documentation for how to upgrade between major versions.
|
||||
|
||||
!!! note
|
||||
|
||||
As of Paperless-ngx v2.18, the minimum supported version of PostgreSQL is 13.
|
||||
|
||||
For PostgreSQL, refer to [Upgrading a PostgreSQL Cluster](https://www.postgresql.org/docs/current/upgrading.html).
|
||||
|
||||
For MariaDB, refer to [Upgrading MariaDB](https://mariadb.com/kb/en/upgrading/)
|
||||
|
@@ -159,23 +159,6 @@ Available options are `postgresql` and `mariadb`.
|
||||
|
||||
Defaults to unset, which uses Django’s built-in defaults.
|
||||
|
||||
#### [`PAPERLESS_DB_POOLSIZE=<int>`](#PAPERLESS_DB_POOLSIZE) {#PAPERLESS_DB_POOLSIZE}
|
||||
|
||||
: Defines the maximum number of database connections to keep in the pool.
|
||||
|
||||
Only applies to PostgreSQL. This setting is ignored for other database engines.
|
||||
|
||||
The value must be greater than or equal to 1 to be used.
|
||||
Defaults to unset, which disables connection pooling.
|
||||
|
||||
!!! note
|
||||
|
||||
A small pool is typically sufficient — for example, a size of 4.
|
||||
Make sure your PostgreSQL server's max_connections setting is large enough to handle:
|
||||
```(Paperless workers + Celery workers) × pool size + safety margin```
|
||||
For example, with 4 Paperless workers and 2 Celery workers, and a pool size of 4:
|
||||
(4 + 2) × 4 + 10 = 34 connections required.
|
||||
|
||||
#### [`PAPERLESS_DB_READ_CACHE_ENABLED=<bool>`](#PAPERLESS_DB_READ_CACHE_ENABLED) {#PAPERLESS_DB_READ_CACHE_ENABLED}
|
||||
|
||||
: Caches the database read query results into Redis. This can significantly improve application response times by caching database queries, at the cost of slightly increased memory usage.
|
||||
|
@@ -30,9 +30,6 @@ Each document has data fields that you can assign to them:
|
||||
- A _document type_ is used to demarcate the type of a document such
|
||||
as letter, bank statement, invoice, contract, etc. It is used to
|
||||
identify what a document is about.
|
||||
- The document _storage path_ is the location where the document files
|
||||
are stored. See [Storage Paths](advanced_usage.md#storage-paths) for
|
||||
more information.
|
||||
- The _date added_ of a document is the date the document was scanned
|
||||
into paperless. You cannot and should not change this date.
|
||||
- The _date created_ of a document is the date the document was
|
||||
|
@@ -23,22 +23,22 @@ dependencies = [
|
||||
"dateparser~=1.2",
|
||||
# WARNING: django does not use semver.
|
||||
# Only patch versions are guaranteed to not introduce breaking changes.
|
||||
"django~=5.2.5",
|
||||
"django~=5.1.7",
|
||||
"django-allauth[socialaccount,mfa]~=65.4.0",
|
||||
"django-auditlog~=3.2.1",
|
||||
"django-auditlog~=3.1.2",
|
||||
"django-cachalot~=2.8.0",
|
||||
"django-celery-results~=2.6.0",
|
||||
"django-compression-middleware~=0.5.0",
|
||||
"django-cors-headers~=4.7.0",
|
||||
"django-extensions~=4.1",
|
||||
"django-filter~=25.1",
|
||||
"django-guardian~=3.0.3",
|
||||
"django-multiselectfield~=1.0.1",
|
||||
"django-guardian~=2.4.0",
|
||||
"django-multiselectfield~=0.1.13",
|
||||
"django-soft-delete~=1.0.18",
|
||||
"djangorestframework~=3.15",
|
||||
"djangorestframework-guardian~=0.4.0",
|
||||
"djangorestframework-guardian~=0.3.0",
|
||||
"drf-spectacular~=0.28",
|
||||
"drf-spectacular-sidecar~=2025.8.1",
|
||||
"drf-spectacular-sidecar~=2025.4.1",
|
||||
"drf-writable-nested~=0.7.1",
|
||||
"filelock~=3.18.0",
|
||||
"flower~=2.0.1",
|
||||
@@ -52,7 +52,6 @@ dependencies = [
|
||||
"ocrmypdf~=16.10.0",
|
||||
"pathvalidate~=3.3.1",
|
||||
"pdf2image~=1.17.0",
|
||||
"psycopg-pool",
|
||||
"python-dateutil~=2.9.0",
|
||||
"python-dotenv~=1.1.0",
|
||||
"python-gnupg~=0.5.4",
|
||||
@@ -63,7 +62,7 @@ dependencies = [
|
||||
"redis[hiredis]~=5.2.1",
|
||||
"scikit-learn~=1.7.0",
|
||||
"setproctitle~=1.3.4",
|
||||
"tika-client~=0.10.0",
|
||||
"tika-client~=0.9.0",
|
||||
"tqdm~=4.67.1",
|
||||
"watchdog~=6.0",
|
||||
"whitenoise~=6.9",
|
||||
@@ -75,10 +74,9 @@ optional-dependencies.mariadb = [
|
||||
"mysqlclient~=2.2.7",
|
||||
]
|
||||
optional-dependencies.postgres = [
|
||||
"psycopg[c,pool]==3.2.9",
|
||||
"psycopg[c]==3.2.9",
|
||||
# Direct dependency for proper resolution of the pre-built wheels
|
||||
"psycopg-c==3.2.9",
|
||||
"psycopg-pool==3.2.6",
|
||||
]
|
||||
optional-dependencies.webserver = [
|
||||
"granian[uvloop]~=2.4.1",
|
||||
@@ -103,7 +101,7 @@ testing = [
|
||||
"imagehash",
|
||||
"pytest~=8.4.1",
|
||||
"pytest-cov~=6.2.1",
|
||||
"pytest-django~=4.11.1",
|
||||
"pytest-django~=4.10.0",
|
||||
"pytest-env",
|
||||
"pytest-httpx",
|
||||
"pytest-mock",
|
||||
@@ -204,9 +202,15 @@ lint.per-file-ignores."docker/wait-for-redis.py" = [
|
||||
"INP001",
|
||||
"T201",
|
||||
]
|
||||
lint.per-file-ignores."src/documents/file_handling.py" = [
|
||||
"PTH",
|
||||
] # TODO Enable & remove
|
||||
lint.per-file-ignores."src/documents/management/commands/document_consumer.py" = [
|
||||
"PTH",
|
||||
] # TODO Enable & remove
|
||||
lint.per-file-ignores."src/documents/management/commands/document_exporter.py" = [
|
||||
"PTH",
|
||||
] # TODO Enable & remove
|
||||
lint.per-file-ignores."src/documents/migrations/1012_fix_archive_files.py" = [
|
||||
"PTH",
|
||||
] # TODO Enable & remove
|
||||
@@ -216,6 +220,9 @@ lint.per-file-ignores."src/documents/models.py" = [
|
||||
lint.per-file-ignores."src/documents/parsers.py" = [
|
||||
"PTH",
|
||||
] # TODO Enable & remove
|
||||
lint.per-file-ignores."src/documents/signals/handlers.py" = [
|
||||
"PTH",
|
||||
] # TODO Enable & remove
|
||||
lint.per-file-ignores."src/paperless_tesseract/tests/test_parser.py" = [
|
||||
"RUF001",
|
||||
]
|
||||
@@ -232,7 +239,6 @@ testpaths = [
|
||||
"src/paperless_mail/tests/",
|
||||
"src/paperless_tesseract/tests/",
|
||||
"src/paperless_tika/tests",
|
||||
"src/paperless_text/tests/",
|
||||
]
|
||||
addopts = [
|
||||
"--pythonwarnings=all",
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -11,17 +11,17 @@
|
||||
},
|
||||
"private": true,
|
||||
"dependencies": {
|
||||
"@angular/cdk": "^20.1.4",
|
||||
"@angular/common": "~20.1.4",
|
||||
"@angular/compiler": "~20.1.4",
|
||||
"@angular/core": "~20.1.4",
|
||||
"@angular/forms": "~20.1.4",
|
||||
"@angular/localize": "~20.1.4",
|
||||
"@angular/platform-browser": "~20.1.4",
|
||||
"@angular/platform-browser-dynamic": "~20.1.4",
|
||||
"@angular/router": "~20.1.4",
|
||||
"@angular/cdk": "^20.0.4",
|
||||
"@angular/common": "~20.0.6",
|
||||
"@angular/compiler": "~20.0.6",
|
||||
"@angular/core": "~20.0.6",
|
||||
"@angular/forms": "~20.0.6",
|
||||
"@angular/localize": "~20.0.6",
|
||||
"@angular/platform-browser": "~20.0.6",
|
||||
"@angular/platform-browser-dynamic": "~20.0.6",
|
||||
"@angular/router": "~20.0.6",
|
||||
"@ng-bootstrap/ng-bootstrap": "^19.0.1",
|
||||
"@ng-select/ng-select": "^20.0.1",
|
||||
"@ng-select/ng-select": "^15.1.3",
|
||||
"@ngneat/dirty-check-forms": "^3.0.3",
|
||||
"@popperjs/core": "^2.11.8",
|
||||
"bootstrap": "^5.3.7",
|
||||
@@ -32,7 +32,7 @@
|
||||
"ngx-color": "^10.0.0",
|
||||
"ngx-cookie-service": "^20.0.1",
|
||||
"ngx-device-detector": "^10.0.2",
|
||||
"ngx-ui-tour-ng-bootstrap": "^17.0.1",
|
||||
"ngx-ui-tour-ng-bootstrap": "^17.0.0",
|
||||
"rxjs": "^7.8.2",
|
||||
"tslib": "^2.8.1",
|
||||
"utif": "^3.1.0",
|
||||
@@ -42,33 +42,33 @@
|
||||
"devDependencies": {
|
||||
"@angular-builders/custom-webpack": "^20.0.0",
|
||||
"@angular-builders/jest": "^20.0.0",
|
||||
"@angular-devkit/core": "^20.1.4",
|
||||
"@angular-devkit/schematics": "^20.1.4",
|
||||
"@angular-devkit/core": "^20.0.4",
|
||||
"@angular-devkit/schematics": "^20.0.4",
|
||||
"@angular-eslint/builder": "20.1.1",
|
||||
"@angular-eslint/eslint-plugin": "20.1.1",
|
||||
"@angular-eslint/eslint-plugin-template": "20.1.1",
|
||||
"@angular-eslint/schematics": "20.1.1",
|
||||
"@angular-eslint/template-parser": "20.1.1",
|
||||
"@angular/build": "^20.1.4",
|
||||
"@angular/cli": "~20.1.4",
|
||||
"@angular/compiler-cli": "~20.1.4",
|
||||
"@angular/build": "^20.0.4",
|
||||
"@angular/cli": "~20.0.4",
|
||||
"@angular/compiler-cli": "~20.0.6",
|
||||
"@codecov/webpack-plugin": "^1.9.1",
|
||||
"@playwright/test": "^1.54.2",
|
||||
"@types/jest": "^30.0.0",
|
||||
"@types/node": "^24.1.0",
|
||||
"@typescript-eslint/eslint-plugin": "^8.38.0",
|
||||
"@typescript-eslint/parser": "^8.38.0",
|
||||
"@typescript-eslint/utils": "^8.38.0",
|
||||
"eslint": "^9.32.0",
|
||||
"jest": "30.0.5",
|
||||
"jest-environment-jsdom": "^30.0.5",
|
||||
"@playwright/test": "^1.53.2",
|
||||
"@types/jest": "^29.5.14",
|
||||
"@types/node": "^24.0.10",
|
||||
"@typescript-eslint/eslint-plugin": "^8.35.1",
|
||||
"@typescript-eslint/parser": "^8.35.1",
|
||||
"@typescript-eslint/utils": "^8.35.1",
|
||||
"eslint": "^9.30.1",
|
||||
"jest": "29.7.0",
|
||||
"jest-environment-jsdom": "^29.7.0",
|
||||
"jest-junit": "^16.0.0",
|
||||
"jest-preset-angular": "^15.0.0",
|
||||
"jest-preset-angular": "^14.5.5",
|
||||
"jest-websocket-mock": "^2.5.0",
|
||||
"prettier-plugin-organize-imports": "^4.2.0",
|
||||
"prettier-plugin-organize-imports": "^4.1.0",
|
||||
"ts-node": "~10.9.1",
|
||||
"typescript": "^5.8.3",
|
||||
"webpack": "^5.101.0"
|
||||
"webpack": "^5.99.9"
|
||||
},
|
||||
"pnpm": {
|
||||
"onlyBuiltDependencies": [
|
||||
|
5220
src-ui/pnpm-lock.yaml
generated
5220
src-ui/pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
@@ -1,16 +1,12 @@
|
||||
import '@angular/localize/init'
|
||||
import { jest } from '@jest/globals'
|
||||
import { setupZoneTestEnv } from 'jest-preset-angular/setup-env/zone'
|
||||
import { TextDecoder, TextEncoder } from 'node:util'
|
||||
import { TextDecoder, TextEncoder } from 'util'
|
||||
if (process.env.NODE_ENV === 'test') {
|
||||
setupZoneTestEnv()
|
||||
}
|
||||
;(globalThis as any).TextEncoder = TextEncoder as unknown as {
|
||||
new (): TextEncoder
|
||||
}
|
||||
;(globalThis as any).TextDecoder = TextDecoder as unknown as {
|
||||
new (): TextDecoder
|
||||
}
|
||||
global.TextEncoder = TextEncoder
|
||||
global.TextDecoder = TextDecoder
|
||||
|
||||
import { registerLocaleData } from '@angular/common'
|
||||
import localeAf from '@angular/common/locales/af'
|
||||
@@ -120,6 +116,10 @@ if (!URL.revokeObjectURL) {
|
||||
Object.defineProperty(window.URL, 'revokeObjectURL', { value: jest.fn() })
|
||||
}
|
||||
Object.defineProperty(window, 'ResizeObserver', { value: mock() })
|
||||
Object.defineProperty(window, 'location', {
|
||||
configurable: true,
|
||||
value: { reload: jest.fn() },
|
||||
})
|
||||
|
||||
HTMLCanvasElement.prototype.getContext = <
|
||||
typeof HTMLCanvasElement.prototype.getContext
|
||||
|
@@ -50,7 +50,7 @@
|
||||
<div [ngbNavOutlet]="nav" class="border-start border-end border-bottom p-3 mb-3 shadow-sm"></div>
|
||||
<div class="btn-toolbar" role="toolbar">
|
||||
<div class="btn-group me-2">
|
||||
<button type="button" (click)="discardChanges()" class="btn btn-outline-secondary" [disabled]="loading || (isDirty$ | async) === false" i18n>Discard</button>
|
||||
<button type="button" (click)="discardChanges()" class="btn btn-secondary" [disabled]="loading || (isDirty$ | async) === false" i18n>Discard</button>
|
||||
</div>
|
||||
<div class="btn-group">
|
||||
<button type="submit" class="btn btn-primary" [disabled]="loading || !configForm.valid || (isDirty$ | async) === false" i18n>Save</button>
|
||||
|
@@ -358,6 +358,6 @@
|
||||
|
||||
<div [ngbNavOutlet]="nav" class="border-start border-end border-bottom p-3 mb-3 shadow-sm"></div>
|
||||
|
||||
<button type="button" (click)="reset()" class="btn btn-outline-secondary mb-2" [disabled]="(isDirty$ | async) === false" i18n>Cancel</button>
|
||||
<button type="submit" class="btn btn-primary ms-2 mb-2" [disabled]="(isDirty$ | async) === false" i18n>Save</button>
|
||||
<button type="submit" class="btn btn-primary mb-2" [disabled]="(isDirty$ | async) === false" i18n>Save</button>
|
||||
<button type="button" (click)="reset()" class="btn btn-secondary ms-2 mb-2" [disabled]="(isDirty$ | async) === false" i18n>Cancel</button>
|
||||
</form>
|
||||
|
@@ -36,7 +36,6 @@ import { UserService } from 'src/app/services/rest/user.service'
|
||||
import { SettingsService } from 'src/app/services/settings.service'
|
||||
import { SystemStatusService } from 'src/app/services/system-status.service'
|
||||
import { Toast, ToastService } from 'src/app/services/toast.service'
|
||||
import * as navUtils from 'src/app/utils/navigation'
|
||||
import { ConfirmButtonComponent } from '../../common/confirm-button/confirm-button.component'
|
||||
import { ConfirmDialogComponent } from '../../common/confirm-dialog/confirm-dialog.component'
|
||||
import { CheckComponent } from '../../common/input/check/check.component'
|
||||
@@ -226,9 +225,6 @@ describe('SettingsComponent', () => {
|
||||
})
|
||||
|
||||
it('should offer reload if settings changes require', () => {
|
||||
const reloadSpy = jest
|
||||
.spyOn(navUtils, 'locationReload')
|
||||
.mockImplementation(() => {})
|
||||
completeSetup()
|
||||
let toast: Toast
|
||||
toastService.getToasts().subscribe((t) => (toast = t[0]))
|
||||
@@ -245,7 +241,6 @@ describe('SettingsComponent', () => {
|
||||
|
||||
expect(toast.actionName).toEqual('Reload now')
|
||||
toast.action()
|
||||
expect(reloadSpy).toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('should allow setting theme color, visually apply change immediately but not save', () => {
|
||||
@@ -274,7 +269,7 @@ describe('SettingsComponent', () => {
|
||||
)
|
||||
completeSetup(userService)
|
||||
fixture.detectChanges()
|
||||
expect(toastErrorSpy).toHaveBeenCalled()
|
||||
expect(toastErrorSpy).toBeCalled()
|
||||
})
|
||||
|
||||
it('should show errors on load if load groups failure', () => {
|
||||
@@ -286,7 +281,7 @@ describe('SettingsComponent', () => {
|
||||
)
|
||||
completeSetup(groupService)
|
||||
fixture.detectChanges()
|
||||
expect(toastErrorSpy).toHaveBeenCalled()
|
||||
expect(toastErrorSpy).toBeCalled()
|
||||
})
|
||||
|
||||
it('should load system status on initialize, show errors if needed', () => {
|
||||
|
@@ -57,7 +57,6 @@ import {
|
||||
} from 'src/app/services/settings.service'
|
||||
import { SystemStatusService } from 'src/app/services/system-status.service'
|
||||
import { Toast, ToastService } from 'src/app/services/toast.service'
|
||||
import { locationReload } from 'src/app/utils/navigation'
|
||||
import { CheckComponent } from '../../common/input/check/check.component'
|
||||
import { ColorComponent } from '../../common/input/color/color.component'
|
||||
import { PermissionsGroupComponent } from '../../common/input/permissions/permissions-group/permissions-group.component'
|
||||
@@ -551,7 +550,7 @@ export class SettingsComponent
|
||||
savedToast.content = $localize`Settings were saved successfully. Reload is required to apply some changes.`
|
||||
savedToast.actionName = $localize`Reload now`
|
||||
savedToast.action = () => {
|
||||
locationReload()
|
||||
location.reload()
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -19,7 +19,6 @@ import { GroupService } from 'src/app/services/rest/group.service'
|
||||
import { UserService } from 'src/app/services/rest/user.service'
|
||||
import { SettingsService } from 'src/app/services/settings.service'
|
||||
import { ToastService } from 'src/app/services/toast.service'
|
||||
import * as navUtils from 'src/app/utils/navigation'
|
||||
import { ConfirmDialogComponent } from '../../common/confirm-dialog/confirm-dialog.component'
|
||||
import { GroupEditDialogComponent } from '../../common/edit-dialog/group-edit-dialog/group-edit-dialog.component'
|
||||
import { UserEditDialogComponent } from '../../common/edit-dialog/user-edit-dialog/user-edit-dialog.component'
|
||||
@@ -108,7 +107,7 @@ describe('UsersAndGroupsComponent', () => {
|
||||
const toastErrorSpy = jest.spyOn(toastService, 'showError')
|
||||
const toastInfoSpy = jest.spyOn(toastService, 'showInfo')
|
||||
editDialog.failed.emit()
|
||||
expect(toastErrorSpy).toHaveBeenCalled()
|
||||
expect(toastErrorSpy).toBeCalled()
|
||||
settingsService.currentUser = users[1] // simulate logged in as different user
|
||||
editDialog.succeeded.emit(users[0])
|
||||
expect(toastInfoSpy).toHaveBeenCalledWith(
|
||||
@@ -131,7 +130,7 @@ describe('UsersAndGroupsComponent', () => {
|
||||
throwError(() => new Error('error deleting user'))
|
||||
)
|
||||
deleteDialog.confirm()
|
||||
expect(toastErrorSpy).toHaveBeenCalled()
|
||||
expect(toastErrorSpy).toBeCalled()
|
||||
deleteSpy.mockReturnValueOnce(of(true))
|
||||
deleteDialog.confirm()
|
||||
expect(listAllSpy).toHaveBeenCalled()
|
||||
@@ -143,18 +142,19 @@ describe('UsersAndGroupsComponent', () => {
|
||||
let modal: NgbModalRef
|
||||
modalService.activeInstances.subscribe((refs) => (modal = refs[0]))
|
||||
component.editUser(users[0])
|
||||
const navSpy = jest
|
||||
.spyOn(navUtils, 'setLocationHref')
|
||||
.mockImplementation(() => {})
|
||||
const editDialog = modal.componentInstance as UserEditDialogComponent
|
||||
editDialog.passwordIsSet = true
|
||||
settingsService.currentUser = users[0] // simulate logged in as same user
|
||||
editDialog.succeeded.emit(users[0])
|
||||
fixture.detectChanges()
|
||||
Object.defineProperty(window, 'location', {
|
||||
value: {
|
||||
href: 'http://localhost/',
|
||||
},
|
||||
writable: true, // possibility to override
|
||||
})
|
||||
tick(2600)
|
||||
expect(navSpy).toHaveBeenCalledWith(
|
||||
`${window.location.origin}/accounts/logout/?next=/accounts/login/?next=/`
|
||||
)
|
||||
expect(window.location.href).toContain('logout')
|
||||
}))
|
||||
|
||||
it('should support edit / create group, show error if needed', () => {
|
||||
@@ -166,7 +166,7 @@ describe('UsersAndGroupsComponent', () => {
|
||||
const toastErrorSpy = jest.spyOn(toastService, 'showError')
|
||||
const toastInfoSpy = jest.spyOn(toastService, 'showInfo')
|
||||
editDialog.failed.emit()
|
||||
expect(toastErrorSpy).toHaveBeenCalled()
|
||||
expect(toastErrorSpy).toBeCalled()
|
||||
editDialog.succeeded.emit(groups[0])
|
||||
expect(toastInfoSpy).toHaveBeenCalledWith(
|
||||
`Saved group "${groups[0].name}".`
|
||||
@@ -188,7 +188,7 @@ describe('UsersAndGroupsComponent', () => {
|
||||
throwError(() => new Error('error deleting group'))
|
||||
)
|
||||
deleteDialog.confirm()
|
||||
expect(toastErrorSpy).toHaveBeenCalled()
|
||||
expect(toastErrorSpy).toBeCalled()
|
||||
deleteSpy.mockReturnValueOnce(of(true))
|
||||
deleteDialog.confirm()
|
||||
expect(listAllSpy).toHaveBeenCalled()
|
||||
@@ -210,7 +210,7 @@ describe('UsersAndGroupsComponent', () => {
|
||||
)
|
||||
completeSetup(userService)
|
||||
fixture.detectChanges()
|
||||
expect(toastErrorSpy).toHaveBeenCalled()
|
||||
expect(toastErrorSpy).toBeCalled()
|
||||
})
|
||||
|
||||
it('should show errors on load if load groups failure', () => {
|
||||
@@ -222,6 +222,6 @@ describe('UsersAndGroupsComponent', () => {
|
||||
)
|
||||
completeSetup(groupService)
|
||||
fixture.detectChanges()
|
||||
expect(toastErrorSpy).toHaveBeenCalled()
|
||||
expect(toastErrorSpy).toBeCalled()
|
||||
})
|
||||
})
|
||||
|
@@ -10,7 +10,6 @@ import { GroupService } from 'src/app/services/rest/group.service'
|
||||
import { UserService } from 'src/app/services/rest/user.service'
|
||||
import { SettingsService } from 'src/app/services/settings.service'
|
||||
import { ToastService } from 'src/app/services/toast.service'
|
||||
import { setLocationHref } from 'src/app/utils/navigation'
|
||||
import { ConfirmDialogComponent } from '../../common/confirm-dialog/confirm-dialog.component'
|
||||
import { EditDialogMode } from '../../common/edit-dialog/edit-dialog.component'
|
||||
import { GroupEditDialogComponent } from '../../common/edit-dialog/group-edit-dialog/group-edit-dialog.component'
|
||||
@@ -94,9 +93,7 @@ export class UsersAndGroupsComponent
|
||||
$localize`Password has been changed, you will be logged out momentarily.`
|
||||
)
|
||||
setTimeout(() => {
|
||||
setLocationHref(
|
||||
`${window.location.origin}/accounts/logout/?next=/accounts/login/?next=/`
|
||||
)
|
||||
window.location.href = `${window.location.origin}/accounts/logout/?next=/accounts/login/?next=/`
|
||||
}, 2500)
|
||||
} else {
|
||||
this.toastService.showInfo(
|
||||
|
@@ -30,7 +30,7 @@
|
||||
}
|
||||
<div class="list-group-item">
|
||||
<div class="input-group input-group-sm">
|
||||
<input class="form-control" type="text" spellcheck="false" [(ngModel)]="filterText" [placeholder]="filterPlaceholder" (keyup.enter)="listFilterEnter()" #listFilterTextInput>
|
||||
<input class="form-control" type="text" [(ngModel)]="filterText" [placeholder]="filterPlaceholder" (keyup.enter)="listFilterEnter()" #listFilterTextInput>
|
||||
</div>
|
||||
</div>
|
||||
@if (selectionModel.items) {
|
||||
|
@@ -18,7 +18,6 @@ import { NgxBootstrapIconsModule, allIcons } from 'ngx-bootstrap-icons'
|
||||
import { of, throwError } from 'rxjs'
|
||||
import { ProfileService } from 'src/app/services/profile.service'
|
||||
import { ToastService } from 'src/app/services/toast.service'
|
||||
import * as navUtils from 'src/app/utils/navigation'
|
||||
import { ConfirmButtonComponent } from '../confirm-button/confirm-button.component'
|
||||
import { PasswordComponent } from '../input/password/password.component'
|
||||
import { TextComponent } from '../input/text/text.component'
|
||||
@@ -206,15 +205,16 @@ describe('ProfileEditDialogComponent', () => {
|
||||
|
||||
const updateSpy = jest.spyOn(profileService, 'update')
|
||||
updateSpy.mockReturnValue(of(null))
|
||||
const navSpy = jest
|
||||
.spyOn(navUtils, 'setLocationHref')
|
||||
.mockImplementation(() => {})
|
||||
Object.defineProperty(window, 'location', {
|
||||
value: {
|
||||
href: 'http://localhost/',
|
||||
},
|
||||
writable: true, // possibility to override
|
||||
})
|
||||
component.save()
|
||||
expect(updateSpy).toHaveBeenCalled()
|
||||
tick(2600)
|
||||
expect(navSpy).toHaveBeenCalledWith(
|
||||
`${window.location.origin}/accounts/logout/?next=/accounts/login/?next=/`
|
||||
)
|
||||
expect(window.location.href).toContain('logout')
|
||||
}))
|
||||
|
||||
it('should support auth token copy', fakeAsync(() => {
|
||||
|
@@ -21,7 +21,6 @@ import {
|
||||
import { SafeHtmlPipe } from 'src/app/pipes/safehtml.pipe'
|
||||
import { ProfileService } from 'src/app/services/profile.service'
|
||||
import { ToastService } from 'src/app/services/toast.service'
|
||||
import { setLocationHref } from 'src/app/utils/navigation'
|
||||
import { LoadingComponentWithPermissions } from '../../loading-component/loading.component'
|
||||
import { ConfirmButtonComponent } from '../confirm-button/confirm-button.component'
|
||||
import { PasswordComponent } from '../input/password/password.component'
|
||||
@@ -195,9 +194,7 @@ export class ProfileEditDialogComponent
|
||||
$localize`Password has been changed, you will be logged out momentarily.`
|
||||
)
|
||||
setTimeout(() => {
|
||||
setLocationHref(
|
||||
`${window.location.origin}/accounts/logout/?next=/accounts/login/?next=/`
|
||||
)
|
||||
window.location.href = `${window.location.origin}/accounts/logout/?next=/accounts/login/?next=/`
|
||||
}, 2500)
|
||||
}
|
||||
this.activeModal.close()
|
||||
|
@@ -188,7 +188,7 @@ describe('MailComponent', () => {
|
||||
const toastErrorSpy = jest.spyOn(toastService, 'showError')
|
||||
const toastInfoSpy = jest.spyOn(toastService, 'showInfo')
|
||||
editDialog.failed.emit()
|
||||
expect(toastErrorSpy).toHaveBeenCalled()
|
||||
expect(toastErrorSpy).toBeCalled()
|
||||
editDialog.succeeded.emit(mailAccounts[0] as any)
|
||||
expect(toastInfoSpy).toHaveBeenCalledWith(
|
||||
`Saved account "${mailAccounts[0].name}".`
|
||||
@@ -211,7 +211,7 @@ describe('MailComponent', () => {
|
||||
throwError(() => new Error('error deleting mail account'))
|
||||
)
|
||||
deleteDialog.confirm()
|
||||
expect(toastErrorSpy).toHaveBeenCalled()
|
||||
expect(toastErrorSpy).toBeCalled()
|
||||
deleteSpy.mockReturnValueOnce(of(true))
|
||||
deleteDialog.confirm()
|
||||
expect(listAllSpy).toHaveBeenCalled()
|
||||
@@ -246,7 +246,7 @@ describe('MailComponent', () => {
|
||||
const toastErrorSpy = jest.spyOn(toastService, 'showError')
|
||||
const toastInfoSpy = jest.spyOn(toastService, 'showInfo')
|
||||
editDialog.failed.emit()
|
||||
expect(toastErrorSpy).toHaveBeenCalled()
|
||||
expect(toastErrorSpy).toBeCalled()
|
||||
editDialog.succeeded.emit(mailRules[0] as any)
|
||||
expect(toastInfoSpy).toHaveBeenCalledWith(
|
||||
`Saved rule "${mailRules[0].name}".`
|
||||
@@ -280,7 +280,7 @@ describe('MailComponent', () => {
|
||||
throwError(() => new Error('error deleting mail rule "rule1"'))
|
||||
)
|
||||
deleteDialog.confirm()
|
||||
expect(toastErrorSpy).toHaveBeenCalled()
|
||||
expect(toastErrorSpy).toBeCalled()
|
||||
deleteSpy.mockReturnValueOnce(of(true))
|
||||
deleteDialog.confirm()
|
||||
expect(listAllSpy).toHaveBeenCalled()
|
||||
|
@@ -1,5 +1,4 @@
|
||||
<pngx-page-header title="{{ typeNamePlural | titlecase }}" info="View, add, edit and delete {{ typeNamePlural }}." infoLink="usage/#terms-and-definitions">
|
||||
|
||||
<pngx-page-header title="{{ typeNamePlural | titlecase }}">
|
||||
<button class="btn btn-sm btn-outline-secondary" (click)="clearSelection()" [hidden]="selectedObjects.size === 0">
|
||||
<i-bs name="x"></i-bs> <ng-container i18n>Clear selection</ng-container>
|
||||
</button>
|
||||
|
@@ -164,7 +164,7 @@ describe('ManagementListComponent', () => {
|
||||
const toastInfoSpy = jest.spyOn(toastService, 'showInfo')
|
||||
const reloadSpy = jest.spyOn(component, 'reloadData')
|
||||
|
||||
const createButton = fixture.debugElement.queryAll(By.css('button'))[4]
|
||||
const createButton = fixture.debugElement.queryAll(By.css('button'))[3]
|
||||
createButton.triggerEventHandler('click')
|
||||
|
||||
expect(modal).not.toBeUndefined()
|
||||
@@ -188,7 +188,7 @@ describe('ManagementListComponent', () => {
|
||||
const toastInfoSpy = jest.spyOn(toastService, 'showInfo')
|
||||
const reloadSpy = jest.spyOn(component, 'reloadData')
|
||||
|
||||
const editButton = fixture.debugElement.queryAll(By.css('button'))[7]
|
||||
const editButton = fixture.debugElement.queryAll(By.css('button'))[6]
|
||||
editButton.triggerEventHandler('click')
|
||||
|
||||
expect(modal).not.toBeUndefined()
|
||||
@@ -213,7 +213,7 @@ describe('ManagementListComponent', () => {
|
||||
const deleteSpy = jest.spyOn(tagService, 'delete')
|
||||
const reloadSpy = jest.spyOn(component, 'reloadData')
|
||||
|
||||
const deleteButton = fixture.debugElement.queryAll(By.css('button'))[8]
|
||||
const deleteButton = fixture.debugElement.queryAll(By.css('button'))[7]
|
||||
deleteButton.triggerEventHandler('click')
|
||||
|
||||
expect(modal).not.toBeUndefined()
|
||||
@@ -233,7 +233,7 @@ describe('ManagementListComponent', () => {
|
||||
|
||||
it('should support quick filter for objects', () => {
|
||||
const qfSpy = jest.spyOn(documentListViewService, 'quickFilter')
|
||||
const filterButton = fixture.debugElement.queryAll(By.css('button'))[9]
|
||||
const filterButton = fixture.debugElement.queryAll(By.css('button'))[8]
|
||||
filterButton.triggerEventHandler('click')
|
||||
expect(qfSpy).toHaveBeenCalledWith([
|
||||
{ rule_type: FILTER_HAS_TAGS_ALL, value: tags[0].id.toString() },
|
||||
|
@@ -70,6 +70,6 @@
|
||||
}
|
||||
</ul>
|
||||
|
||||
<button type="button" (click)="reset()" class="btn btn-outline-secondary mb-2" [disabled]="(isDirty$ | async) === false" i18n>Cancel</button>
|
||||
<button type="submit" class="btn btn-primary ms-2 mb-2" [disabled]="(isDirty$ | async) === false" i18n>Save</button>
|
||||
<button type="submit" class="btn btn-primary mb-2" [disabled]="(isDirty$ | async) === false" i18n>Save</button>
|
||||
<button type="button" (click)="reset()" class="btn btn-secondary ms-2 mb-2" [disabled]="(isDirty$ | async) === false" i18n>Cancel</button>
|
||||
</form>
|
||||
|
@@ -1,8 +0,0 @@
|
||||
/* istanbul ignore file */
|
||||
export function setLocationHref(url: string) {
|
||||
window.location.href = url
|
||||
}
|
||||
|
||||
export function locationReload() {
|
||||
window.location.reload()
|
||||
}
|
@@ -3,8 +3,7 @@
|
||||
"compilerOptions": {
|
||||
"outDir": "./out-tsc/spec",
|
||||
"types": [
|
||||
"jest",
|
||||
"node",
|
||||
"jest"
|
||||
],
|
||||
"module": "commonjs",
|
||||
"emitDecoratorMetadata": true,
|
||||
|
@@ -1,23 +1,16 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import pickle
|
||||
from binascii import hexlify
|
||||
from collections import OrderedDict
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Any
|
||||
from typing import Final
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.cache import cache
|
||||
from django.core.cache import caches
|
||||
|
||||
from documents.models import Document
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from django.core.cache.backends.base import BaseCache
|
||||
|
||||
from documents.classifier import DocumentClassifier
|
||||
|
||||
logger = logging.getLogger("paperless.caching")
|
||||
@@ -46,80 +39,6 @@ CACHE_1_MINUTE: Final[int] = 60
|
||||
CACHE_5_MINUTES: Final[int] = 5 * CACHE_1_MINUTE
|
||||
CACHE_50_MINUTES: Final[int] = 50 * CACHE_1_MINUTE
|
||||
|
||||
read_cache = caches["read-cache"]
|
||||
|
||||
|
||||
class LRUCache:
|
||||
def __init__(self, capacity: int = 128):
|
||||
self._data = OrderedDict()
|
||||
self.capacity = capacity
|
||||
|
||||
def get(self, key, default=None) -> Any | None:
|
||||
if key in self._data:
|
||||
self._data.move_to_end(key)
|
||||
return self._data[key]
|
||||
return default
|
||||
|
||||
def set(self, key, value) -> None:
|
||||
self._data[key] = value
|
||||
self._data.move_to_end(key)
|
||||
while len(self._data) > self.capacity:
|
||||
self._data.popitem(last=False)
|
||||
|
||||
|
||||
class StoredLRUCache(LRUCache):
|
||||
"""
|
||||
LRU cache that can persist its entire contents as a single entry in a backend cache.
|
||||
|
||||
Useful for sharing a cache across multiple workers or processes.
|
||||
|
||||
Workflow:
|
||||
1. Load the cache state from the backend using `load()`.
|
||||
2. Use `get()` and `set()` locally as usual.
|
||||
3. Persist changes back to the backend using `save()`.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
backend_key: str,
|
||||
capacity: int = 128,
|
||||
backend: BaseCache = read_cache,
|
||||
backend_ttl=settings.CACHALOT_TIMEOUT,
|
||||
):
|
||||
if backend_key is None:
|
||||
raise ValueError("backend_key is mandatory")
|
||||
super().__init__(capacity)
|
||||
self._backend_key = backend_key
|
||||
self._backend = backend
|
||||
self.backend_ttl = backend_ttl
|
||||
|
||||
def load(self) -> None:
|
||||
"""
|
||||
Load the whole cache content from backend storage.
|
||||
|
||||
If no valid cached data exists in the backend, the local cache is cleared.
|
||||
"""
|
||||
serialized_data = self._backend.get(self._backend_key)
|
||||
try:
|
||||
self._data = (
|
||||
pickle.loads(serialized_data) if serialized_data else OrderedDict()
|
||||
)
|
||||
except pickle.PickleError:
|
||||
logger.warning(
|
||||
"Cache exists in backend but could not be read (possibly invalid format)",
|
||||
)
|
||||
|
||||
def save(self) -> None:
|
||||
"""Save the entire local cache to the backend as a serialized object.
|
||||
|
||||
The backend entry will expire after the configured TTL.
|
||||
"""
|
||||
self._backend.set(
|
||||
self._backend_key,
|
||||
pickle.dumps(self._data),
|
||||
self.backend_ttl,
|
||||
)
|
||||
|
||||
|
||||
def get_suggestion_cache_key(document_id: int) -> str:
|
||||
"""
|
||||
|
@@ -16,29 +16,16 @@ if TYPE_CHECKING:
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.cache import cache
|
||||
from django.core.cache import caches
|
||||
|
||||
from documents.caching import CACHE_5_MINUTES
|
||||
from documents.caching import CACHE_50_MINUTES
|
||||
from documents.caching import CLASSIFIER_HASH_KEY
|
||||
from documents.caching import CLASSIFIER_MODIFIED_KEY
|
||||
from documents.caching import CLASSIFIER_VERSION_KEY
|
||||
from documents.caching import StoredLRUCache
|
||||
from documents.models import Document
|
||||
from documents.models import MatchingModel
|
||||
|
||||
logger = logging.getLogger("paperless.classifier")
|
||||
|
||||
ADVANCED_TEXT_PROCESSING_ENABLED = (
|
||||
settings.NLTK_LANGUAGE is not None and settings.NLTK_ENABLED
|
||||
)
|
||||
|
||||
read_cache = caches["read-cache"]
|
||||
|
||||
|
||||
RE_DIGIT = re.compile(r"\d")
|
||||
RE_WORD = re.compile(r"\b[\w]+\b") # words that may contain digits
|
||||
|
||||
|
||||
class IncompatibleClassifierVersionError(Exception):
|
||||
def __init__(self, message: str, *args: object) -> None:
|
||||
@@ -105,27 +92,14 @@ class DocumentClassifier:
|
||||
self.last_auto_type_hash: bytes | None = None
|
||||
|
||||
self.data_vectorizer = None
|
||||
self.data_vectorizer_hash = None
|
||||
self.tags_binarizer = None
|
||||
self.tags_classifier = None
|
||||
self.correspondent_classifier = None
|
||||
self.document_type_classifier = None
|
||||
self.storage_path_classifier = None
|
||||
self._stemmer = None
|
||||
# 10,000 elements roughly use 200 to 500 KB per worker,
|
||||
# and also in the shared Redis cache,
|
||||
# Keep this cache small to minimize lookup and I/O latency.
|
||||
if ADVANCED_TEXT_PROCESSING_ENABLED:
|
||||
self._stem_cache = StoredLRUCache(
|
||||
f"stem_cache_v{self.FORMAT_VERSION}",
|
||||
capacity=10000,
|
||||
)
|
||||
self._stop_words = None
|
||||
|
||||
def _update_data_vectorizer_hash(self):
|
||||
self.data_vectorizer_hash = sha256(
|
||||
pickle.dumps(self.data_vectorizer),
|
||||
).hexdigest()
|
||||
self._stemmer = None
|
||||
self._stop_words = None
|
||||
|
||||
def load(self) -> None:
|
||||
from sklearn.exceptions import InconsistentVersionWarning
|
||||
@@ -145,7 +119,6 @@ class DocumentClassifier:
|
||||
self.last_auto_type_hash = pickle.load(f)
|
||||
|
||||
self.data_vectorizer = pickle.load(f)
|
||||
self._update_data_vectorizer_hash()
|
||||
self.tags_binarizer = pickle.load(f)
|
||||
|
||||
self.tags_classifier = pickle.load(f)
|
||||
@@ -296,7 +269,7 @@ class DocumentClassifier:
|
||||
Generates the content for documents, but once at a time
|
||||
"""
|
||||
for doc in docs_queryset:
|
||||
yield self.preprocess_content(doc.content, shared_cache=False)
|
||||
yield self.preprocess_content(doc.content)
|
||||
|
||||
self.data_vectorizer = CountVectorizer(
|
||||
analyzer="word",
|
||||
@@ -374,7 +347,6 @@ class DocumentClassifier:
|
||||
|
||||
self.last_doc_change_time = latest_doc_change
|
||||
self.last_auto_type_hash = hasher.digest()
|
||||
self._update_data_vectorizer_hash()
|
||||
|
||||
# Set the classifier information into the cache
|
||||
# Caching for 50 minutes, so slightly less than the normal retrain time
|
||||
@@ -384,15 +356,30 @@ class DocumentClassifier:
|
||||
|
||||
return True
|
||||
|
||||
def _init_advanced_text_processing(self):
|
||||
if self._stop_words is None or self._stemmer is None:
|
||||
def preprocess_content(self, content: str) -> str: # pragma: no cover
|
||||
"""
|
||||
Process to contents of a document, distilling it down into
|
||||
words which are meaningful to the content
|
||||
"""
|
||||
|
||||
# Lower case the document
|
||||
content = content.lower().strip()
|
||||
# Reduce spaces
|
||||
content = re.sub(r"\s+", " ", content)
|
||||
# Get only the letters
|
||||
content = re.sub(r"[^\w\s]", " ", content)
|
||||
|
||||
# If the NLTK language is supported, do further processing
|
||||
if settings.NLTK_LANGUAGE is not None and settings.NLTK_ENABLED:
|
||||
import nltk
|
||||
from nltk.corpus import stopwords
|
||||
from nltk.stem import SnowballStemmer
|
||||
from nltk.tokenize import word_tokenize
|
||||
|
||||
# Not really hacky, since it isn't private and is documented, but
|
||||
# set the search path for NLTK data to the single location it should be in
|
||||
nltk.data.path = [settings.NLTK_DIR]
|
||||
|
||||
try:
|
||||
# Preload the corpus early, to force the lazy loader to transform
|
||||
stopwords.ensure_loaded()
|
||||
@@ -400,100 +387,41 @@ class DocumentClassifier:
|
||||
# Do some one time setup
|
||||
# Sometimes, somehow, there's multiple threads loading the corpus
|
||||
# and it's not thread safe, raising an AttributeError
|
||||
self._stemmer = SnowballStemmer(settings.NLTK_LANGUAGE)
|
||||
self._stop_words = frozenset(stopwords.words(settings.NLTK_LANGUAGE))
|
||||
if self._stemmer is None:
|
||||
self._stemmer = SnowballStemmer(settings.NLTK_LANGUAGE)
|
||||
if self._stop_words is None:
|
||||
self._stop_words = set(stopwords.words(settings.NLTK_LANGUAGE))
|
||||
|
||||
# Tokenize
|
||||
# This splits the content into tokens, roughly words
|
||||
words: list[str] = word_tokenize(
|
||||
content,
|
||||
language=settings.NLTK_LANGUAGE,
|
||||
)
|
||||
|
||||
meaningful_words = []
|
||||
for word in words:
|
||||
# Skip stop words
|
||||
# These are words like "a", "and", "the" which add little meaning
|
||||
if word in self._stop_words:
|
||||
continue
|
||||
# Stem the words
|
||||
# This reduces the words to their stems.
|
||||
# "amazement" returns "amaz"
|
||||
# "amaze" returns "amaz
|
||||
# "amazed" returns "amaz"
|
||||
meaningful_words.append(self._stemmer.stem(word))
|
||||
|
||||
return " ".join(meaningful_words)
|
||||
|
||||
except AttributeError:
|
||||
logger.debug("Could not initialize NLTK for advanced text processing.")
|
||||
return False
|
||||
return True
|
||||
|
||||
def stem_and_skip_stop_words(self, words: list[str], *, shared_cache=True):
|
||||
"""
|
||||
Reduce a list of words to their stem. Stop words are converted to empty strings.
|
||||
:param words: the list of words to stem
|
||||
"""
|
||||
|
||||
def _stem_and_skip_stop_word(word: str):
|
||||
"""
|
||||
Reduce a given word to its stem. If it's a stop word, return an empty string.
|
||||
E.g. "amazement", "amaze" and "amazed" all return "amaz".
|
||||
"""
|
||||
cached = self._stem_cache.get(word)
|
||||
if cached is not None:
|
||||
return cached
|
||||
elif word in self._stop_words:
|
||||
return ""
|
||||
# Assumption: words that contain numbers are never stemmed
|
||||
elif RE_DIGIT.search(word):
|
||||
return word
|
||||
else:
|
||||
result = self._stemmer.stem(word)
|
||||
self._stem_cache.set(word, result)
|
||||
return result
|
||||
|
||||
if shared_cache:
|
||||
self._stem_cache.load()
|
||||
|
||||
# Stem the words and skip stop words
|
||||
result = " ".join(
|
||||
filter(None, (_stem_and_skip_stop_word(w) for w in words)),
|
||||
)
|
||||
if shared_cache:
|
||||
self._stem_cache.save()
|
||||
return result
|
||||
|
||||
def preprocess_content(
|
||||
self,
|
||||
content: str,
|
||||
*,
|
||||
shared_cache=True,
|
||||
) -> str:
|
||||
"""
|
||||
Process the contents of a document, distilling it down into
|
||||
words which are meaningful to the content.
|
||||
|
||||
A stemmer cache is shared across workers with the parameter "shared_cache".
|
||||
This is unnecessary when training the classifier.
|
||||
"""
|
||||
|
||||
# Lower case the document, reduce space,
|
||||
# and keep only letters and digits.
|
||||
content = " ".join(match.group().lower() for match in RE_WORD.finditer(content))
|
||||
|
||||
if ADVANCED_TEXT_PROCESSING_ENABLED:
|
||||
from nltk.tokenize import word_tokenize
|
||||
|
||||
if not self._init_advanced_text_processing():
|
||||
return content
|
||||
# Tokenize
|
||||
# This splits the content into tokens, roughly words
|
||||
words = word_tokenize(content, language=settings.NLTK_LANGUAGE)
|
||||
# Stem the words and skip stop words
|
||||
content = self.stem_and_skip_stop_words(words, shared_cache=shared_cache)
|
||||
|
||||
return content
|
||||
|
||||
def _get_vectorizer_cache_key(self, content: str):
|
||||
hash = sha256(content.encode())
|
||||
hash.update(
|
||||
f"|{self.FORMAT_VERSION}|{settings.NLTK_LANGUAGE}|{settings.NLTK_ENABLED}|{self.data_vectorizer_hash}".encode(),
|
||||
)
|
||||
return f"vectorized_content_{hash.hexdigest()}"
|
||||
|
||||
def _vectorize(self, content: str):
|
||||
key = self._get_vectorizer_cache_key(content)
|
||||
serialized_result = read_cache.get(key)
|
||||
if serialized_result is None:
|
||||
result = self.data_vectorizer.transform([self.preprocess_content(content)])
|
||||
read_cache.set(key, pickle.dumps(result), CACHE_5_MINUTES)
|
||||
else:
|
||||
read_cache.touch(key, CACHE_5_MINUTES)
|
||||
result = pickle.loads(serialized_result)
|
||||
return result
|
||||
|
||||
def predict_correspondent(self, content: str) -> int | None:
|
||||
if self.correspondent_classifier:
|
||||
X = self._vectorize(content)
|
||||
X = self.data_vectorizer.transform([self.preprocess_content(content)])
|
||||
correspondent_id = self.correspondent_classifier.predict(X)
|
||||
if correspondent_id != -1:
|
||||
return correspondent_id
|
||||
@@ -504,7 +432,7 @@ class DocumentClassifier:
|
||||
|
||||
def predict_document_type(self, content: str) -> int | None:
|
||||
if self.document_type_classifier:
|
||||
X = self._vectorize(content)
|
||||
X = self.data_vectorizer.transform([self.preprocess_content(content)])
|
||||
document_type_id = self.document_type_classifier.predict(X)
|
||||
if document_type_id != -1:
|
||||
return document_type_id
|
||||
@@ -517,7 +445,7 @@ class DocumentClassifier:
|
||||
from sklearn.utils.multiclass import type_of_target
|
||||
|
||||
if self.tags_classifier:
|
||||
X = self._vectorize(content)
|
||||
X = self.data_vectorizer.transform([self.preprocess_content(content)])
|
||||
y = self.tags_classifier.predict(X)
|
||||
tags_ids = self.tags_binarizer.inverse_transform(y)[0]
|
||||
if type_of_target(y).startswith("multilabel"):
|
||||
@@ -536,7 +464,7 @@ class DocumentClassifier:
|
||||
|
||||
def predict_storage_path(self, content: str) -> int | None:
|
||||
if self.storage_path_classifier:
|
||||
X = self._vectorize(content)
|
||||
X = self.data_vectorizer.transform([self.preprocess_content(content)])
|
||||
storage_path_id = self.storage_path_classifier.predict(X)
|
||||
if storage_path_id != -1:
|
||||
return storage_path_id
|
||||
|
@@ -1,5 +1,4 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
@@ -8,15 +7,19 @@ from documents.templating.filepath import validate_filepath_template_and_render
|
||||
from documents.templating.utils import convert_format_str_to_template_format
|
||||
|
||||
|
||||
def create_source_path_directory(source_path: Path) -> None:
|
||||
source_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
def create_source_path_directory(source_path):
|
||||
os.makedirs(os.path.dirname(source_path), exist_ok=True)
|
||||
|
||||
|
||||
def delete_empty_directories(directory: Path, root: Path) -> None:
|
||||
if not directory.is_dir():
|
||||
def delete_empty_directories(directory, root):
|
||||
if not os.path.isdir(directory):
|
||||
return
|
||||
|
||||
if not directory.is_relative_to(root):
|
||||
# Go up in the directory hierarchy and try to delete all directories
|
||||
directory = os.path.normpath(directory)
|
||||
root = os.path.normpath(root)
|
||||
|
||||
if not directory.startswith(root + os.path.sep):
|
||||
# don't do anything outside our originals folder.
|
||||
|
||||
# append os.path.set so that we avoid these cases:
|
||||
@@ -24,12 +27,11 @@ def delete_empty_directories(directory: Path, root: Path) -> None:
|
||||
# root = /home/originals ("/" gets appended and startswith fails)
|
||||
return
|
||||
|
||||
# Go up in the directory hierarchy and try to delete all directories
|
||||
while directory != root:
|
||||
if not list(directory.iterdir()):
|
||||
if not os.listdir(directory):
|
||||
# it's empty
|
||||
try:
|
||||
directory.rmdir()
|
||||
os.rmdir(directory)
|
||||
except OSError:
|
||||
# whatever. empty directories aren't that bad anyway.
|
||||
return
|
||||
@@ -38,10 +40,10 @@ def delete_empty_directories(directory: Path, root: Path) -> None:
|
||||
return
|
||||
|
||||
# go one level up
|
||||
directory = directory.parent
|
||||
directory = os.path.normpath(os.path.dirname(directory))
|
||||
|
||||
|
||||
def generate_unique_filename(doc, *, archive_filename=False) -> Path:
|
||||
def generate_unique_filename(doc, *, archive_filename=False):
|
||||
"""
|
||||
Generates a unique filename for doc in settings.ORIGINALS_DIR.
|
||||
|
||||
@@ -54,32 +56,21 @@ def generate_unique_filename(doc, *, archive_filename=False) -> Path:
|
||||
|
||||
"""
|
||||
if archive_filename:
|
||||
old_filename: Path | None = (
|
||||
Path(doc.archive_filename) if doc.archive_filename else None
|
||||
)
|
||||
old_filename = doc.archive_filename
|
||||
root = settings.ARCHIVE_DIR
|
||||
else:
|
||||
old_filename = Path(doc.filename) if doc.filename else None
|
||||
old_filename = doc.filename
|
||||
root = settings.ORIGINALS_DIR
|
||||
|
||||
# If generating archive filenames, try to make a name that is similar to
|
||||
# the original filename first.
|
||||
|
||||
if archive_filename and doc.filename:
|
||||
# Generate the full path using the same logic as generate_filename
|
||||
base_generated = generate_filename(doc, archive_filename=archive_filename)
|
||||
|
||||
# Try to create a simple PDF version based on the original filename
|
||||
# but preserve any directory structure from the template
|
||||
if str(base_generated.parent) != ".":
|
||||
# Has directory structure, preserve it
|
||||
simple_pdf_name = base_generated.parent / (Path(doc.filename).stem + ".pdf")
|
||||
else:
|
||||
# No directory structure
|
||||
simple_pdf_name = Path(Path(doc.filename).stem + ".pdf")
|
||||
|
||||
if simple_pdf_name == old_filename or not (root / simple_pdf_name).exists():
|
||||
return simple_pdf_name
|
||||
new_filename = os.path.splitext(doc.filename)[0] + ".pdf"
|
||||
if new_filename == old_filename or not os.path.exists(
|
||||
os.path.join(root, new_filename),
|
||||
):
|
||||
return new_filename
|
||||
|
||||
counter = 0
|
||||
|
||||
@@ -93,7 +84,7 @@ def generate_unique_filename(doc, *, archive_filename=False) -> Path:
|
||||
# still the same as before.
|
||||
return new_filename
|
||||
|
||||
if (root / new_filename).exists():
|
||||
if os.path.exists(os.path.join(root, new_filename)):
|
||||
counter += 1
|
||||
else:
|
||||
return new_filename
|
||||
@@ -105,8 +96,8 @@ def generate_filename(
|
||||
counter=0,
|
||||
append_gpg=True,
|
||||
archive_filename=False,
|
||||
) -> Path:
|
||||
base_path: Path | None = None
|
||||
):
|
||||
path = ""
|
||||
|
||||
def format_filename(document: Document, template_str: str) -> str | None:
|
||||
rendered_filename = validate_filepath_template_and_render(
|
||||
@@ -143,34 +134,17 @@ def generate_filename(
|
||||
|
||||
# If we have one, render it
|
||||
if filename_format is not None:
|
||||
rendered_path: str | None = format_filename(doc, filename_format)
|
||||
if rendered_path:
|
||||
base_path = Path(rendered_path)
|
||||
path = format_filename(doc, filename_format)
|
||||
|
||||
counter_str = f"_{counter:02}" if counter else ""
|
||||
filetype_str = ".pdf" if archive_filename else doc.file_type
|
||||
|
||||
if base_path:
|
||||
# Split the path into directory and filename parts
|
||||
directory = base_path.parent
|
||||
# Use the full name (not just stem) as the base filename
|
||||
base_filename = base_path.name
|
||||
|
||||
# Build the final filename with counter and filetype
|
||||
final_filename = f"{base_filename}{counter_str}{filetype_str}"
|
||||
|
||||
# If we have a directory component, include it
|
||||
if str(directory) != ".":
|
||||
full_path = directory / final_filename
|
||||
else:
|
||||
full_path = Path(final_filename)
|
||||
if path:
|
||||
filename = f"{path}{counter_str}{filetype_str}"
|
||||
else:
|
||||
# No template, use document ID
|
||||
final_filename = f"{doc.pk:07}{counter_str}{filetype_str}"
|
||||
full_path = Path(final_filename)
|
||||
filename = f"{doc.pk:07}{counter_str}{filetype_str}"
|
||||
|
||||
# Add GPG extension if needed
|
||||
if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG:
|
||||
full_path = full_path.with_suffix(full_path.suffix + ".gpg")
|
||||
filename += ".gpg"
|
||||
|
||||
return full_path
|
||||
return filename
|
||||
|
@@ -236,7 +236,10 @@ class Command(CryptMixin, BaseCommand):
|
||||
# now make an archive in the original target, with all files stored
|
||||
if self.zip_export and temp_dir is not None:
|
||||
shutil.make_archive(
|
||||
self.original_target / options["zip_name"],
|
||||
os.path.join(
|
||||
self.original_target,
|
||||
options["zip_name"],
|
||||
),
|
||||
format="zip",
|
||||
root_dir=temp_dir.name,
|
||||
)
|
||||
@@ -339,7 +342,7 @@ class Command(CryptMixin, BaseCommand):
|
||||
)
|
||||
|
||||
if self.split_manifest:
|
||||
manifest_name = base_name.with_name(f"{base_name.stem}-manifest.json")
|
||||
manifest_name = Path(base_name + "-manifest.json")
|
||||
if self.use_folder_prefix:
|
||||
manifest_name = Path("json") / manifest_name
|
||||
manifest_name = (self.target / manifest_name).resolve()
|
||||
@@ -413,7 +416,7 @@ class Command(CryptMixin, BaseCommand):
|
||||
else:
|
||||
item.unlink()
|
||||
|
||||
def generate_base_name(self, document: Document) -> Path:
|
||||
def generate_base_name(self, document: Document) -> str:
|
||||
"""
|
||||
Generates a unique name for the document, one which hasn't already been exported (or will be)
|
||||
"""
|
||||
@@ -433,12 +436,12 @@ class Command(CryptMixin, BaseCommand):
|
||||
break
|
||||
else:
|
||||
filename_counter += 1
|
||||
return Path(base_name)
|
||||
return base_name
|
||||
|
||||
def generate_document_targets(
|
||||
self,
|
||||
document: Document,
|
||||
base_name: Path,
|
||||
base_name: str,
|
||||
document_dict: dict,
|
||||
) -> tuple[Path, Path | None, Path | None]:
|
||||
"""
|
||||
@@ -446,25 +449,25 @@ class Command(CryptMixin, BaseCommand):
|
||||
"""
|
||||
original_name = base_name
|
||||
if self.use_folder_prefix:
|
||||
original_name = Path("originals") / original_name
|
||||
original_target = (self.target / original_name).resolve()
|
||||
document_dict[EXPORTER_FILE_NAME] = str(original_name)
|
||||
original_name = os.path.join("originals", original_name)
|
||||
original_target = (self.target / Path(original_name)).resolve()
|
||||
document_dict[EXPORTER_FILE_NAME] = original_name
|
||||
|
||||
if not self.no_thumbnail:
|
||||
thumbnail_name = base_name.parent / (base_name.stem + "-thumbnail.webp")
|
||||
thumbnail_name = base_name + "-thumbnail.webp"
|
||||
if self.use_folder_prefix:
|
||||
thumbnail_name = Path("thumbnails") / thumbnail_name
|
||||
thumbnail_target = (self.target / thumbnail_name).resolve()
|
||||
document_dict[EXPORTER_THUMBNAIL_NAME] = str(thumbnail_name)
|
||||
thumbnail_name = os.path.join("thumbnails", thumbnail_name)
|
||||
thumbnail_target = (self.target / Path(thumbnail_name)).resolve()
|
||||
document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name
|
||||
else:
|
||||
thumbnail_target = None
|
||||
|
||||
if not self.no_archive and document.has_archive_version:
|
||||
archive_name = base_name.parent / (base_name.stem + "-archive.pdf")
|
||||
archive_name = base_name + "-archive.pdf"
|
||||
if self.use_folder_prefix:
|
||||
archive_name = Path("archive") / archive_name
|
||||
archive_target = (self.target / archive_name).resolve()
|
||||
document_dict[EXPORTER_ARCHIVE_NAME] = str(archive_name)
|
||||
archive_name = os.path.join("archive", archive_name)
|
||||
archive_target = (self.target / Path(archive_name)).resolve()
|
||||
document_dict[EXPORTER_ARCHIVE_NAME] = archive_name
|
||||
else:
|
||||
archive_target = None
|
||||
|
||||
@@ -569,7 +572,7 @@ class Command(CryptMixin, BaseCommand):
|
||||
perform_copy = False
|
||||
|
||||
if target.exists():
|
||||
source_stat = source.stat()
|
||||
source_stat = os.stat(source)
|
||||
target_stat = target.stat()
|
||||
if self.compare_checksums and source_checksum:
|
||||
target_checksum = hashlib.md5(target.read_bytes()).hexdigest()
|
||||
|
@@ -125,14 +125,14 @@ class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
|
||||
messages.append(
|
||||
self.style.NOTICE(
|
||||
f"Document {result.doc_one_pk} fuzzy match"
|
||||
f" to {result.doc_two_pk} (confidence {result.ratio:.3f})\n",
|
||||
f" to {result.doc_two_pk} (confidence {result.ratio:.3f})",
|
||||
),
|
||||
)
|
||||
maybe_delete_ids.append(result.doc_two_pk)
|
||||
|
||||
if len(messages) == 0:
|
||||
messages.append(
|
||||
self.style.SUCCESS("No matches found\n"),
|
||||
self.style.SUCCESS("No matches found"),
|
||||
)
|
||||
self.stdout.writelines(
|
||||
messages,
|
||||
|
@@ -63,11 +63,11 @@ class Document:
|
||||
/ "documents"
|
||||
/ "originals"
|
||||
/ f"{self.pk:07}.{self.file_type}.gpg"
|
||||
)
|
||||
).as_posix()
|
||||
|
||||
@property
|
||||
def source_file(self):
|
||||
return self.source_path.open("rb")
|
||||
return Path(self.source_path).open("rb")
|
||||
|
||||
@property
|
||||
def file_name(self):
|
||||
|
@@ -2038,24 +2038,6 @@ class WorkflowTriggerSerializer(serializers.ModelSerializer):
|
||||
|
||||
return attrs
|
||||
|
||||
@staticmethod
|
||||
def normalize_workflow_trigger_sources(trigger):
|
||||
"""
|
||||
Convert sources to strings to handle django-multiselectfield v1.0 changes
|
||||
"""
|
||||
if trigger and "sources" in trigger:
|
||||
trigger["sources"] = [
|
||||
str(s.value if hasattr(s, "value") else s) for s in trigger["sources"]
|
||||
]
|
||||
|
||||
def create(self, validated_data):
|
||||
WorkflowTriggerSerializer.normalize_workflow_trigger_sources(validated_data)
|
||||
return super().create(validated_data)
|
||||
|
||||
def update(self, instance, validated_data):
|
||||
WorkflowTriggerSerializer.normalize_workflow_trigger_sources(validated_data)
|
||||
return super().update(instance, validated_data)
|
||||
|
||||
|
||||
class WorkflowActionEmailSerializer(serializers.ModelSerializer):
|
||||
id = serializers.IntegerField(allow_null=True, required=False)
|
||||
@@ -2220,8 +2202,6 @@ class WorkflowSerializer(serializers.ModelSerializer):
|
||||
if triggers is not None and triggers is not serializers.empty:
|
||||
for trigger in triggers:
|
||||
filter_has_tags = trigger.pop("filter_has_tags", None)
|
||||
# Convert sources to strings to handle django-multiselectfield v1.0 changes
|
||||
WorkflowTriggerSerializer.normalize_workflow_trigger_sources(trigger)
|
||||
trigger_instance, _ = WorkflowTrigger.objects.update_or_create(
|
||||
id=trigger.get("id"),
|
||||
defaults=trigger,
|
||||
|
@@ -1,8 +1,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import httpx
|
||||
@@ -12,13 +12,11 @@ from celery.signals import before_task_publish
|
||||
from celery.signals import task_failure
|
||||
from celery.signals import task_postrun
|
||||
from celery.signals import task_prerun
|
||||
from celery.signals import worker_process_init
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import Group
|
||||
from django.contrib.auth.models import User
|
||||
from django.db import DatabaseError
|
||||
from django.db import close_old_connections
|
||||
from django.db import connections
|
||||
from django.db import models
|
||||
from django.db.models import Q
|
||||
from django.dispatch import receiver
|
||||
@@ -51,6 +49,8 @@ from documents.permissions import set_permissions_for_object
|
||||
from documents.templating.workflows import parse_w_workflow_placeholders
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pathlib import Path
|
||||
|
||||
from documents.classifier import DocumentClassifier
|
||||
from documents.data_models import ConsumableDocument
|
||||
from documents.data_models import DocumentMetadataOverrides
|
||||
@@ -327,16 +327,15 @@ def cleanup_document_deletion(sender, instance, **kwargs):
|
||||
# Find a non-conflicting filename in case a document with the same
|
||||
# name was moved to trash earlier
|
||||
counter = 0
|
||||
old_filename = Path(instance.source_path).name
|
||||
old_filebase = Path(old_filename).stem
|
||||
old_fileext = Path(old_filename).suffix
|
||||
old_filename = os.path.split(instance.source_path)[1]
|
||||
(old_filebase, old_fileext) = os.path.splitext(old_filename)
|
||||
|
||||
while True:
|
||||
new_file_path = settings.EMPTY_TRASH_DIR / (
|
||||
old_filebase + (f"_{counter:02}" if counter else "") + old_fileext
|
||||
)
|
||||
|
||||
if new_file_path.exists():
|
||||
if os.path.exists(new_file_path):
|
||||
counter += 1
|
||||
else:
|
||||
break
|
||||
@@ -360,26 +359,26 @@ def cleanup_document_deletion(sender, instance, **kwargs):
|
||||
files += (instance.source_path,)
|
||||
|
||||
for filename in files:
|
||||
if filename and filename.is_file():
|
||||
if filename and os.path.isfile(filename):
|
||||
try:
|
||||
filename.unlink()
|
||||
os.unlink(filename)
|
||||
logger.debug(f"Deleted file {filename}.")
|
||||
except OSError as e:
|
||||
logger.warning(
|
||||
f"While deleting document {instance!s}, the file "
|
||||
f"{filename} could not be deleted: {e}",
|
||||
)
|
||||
elif filename and not filename.is_file():
|
||||
elif filename and not os.path.isfile(filename):
|
||||
logger.warning(f"Expected {filename} to exist, but it did not")
|
||||
|
||||
delete_empty_directories(
|
||||
Path(instance.source_path).parent,
|
||||
os.path.dirname(instance.source_path),
|
||||
root=settings.ORIGINALS_DIR,
|
||||
)
|
||||
|
||||
if instance.has_archive_version:
|
||||
delete_empty_directories(
|
||||
Path(instance.archive_path).parent,
|
||||
os.path.dirname(instance.archive_path),
|
||||
root=settings.ARCHIVE_DIR,
|
||||
)
|
||||
|
||||
@@ -400,14 +399,14 @@ def update_filename_and_move_files(
|
||||
if isinstance(instance, CustomFieldInstance):
|
||||
instance = instance.document
|
||||
|
||||
def validate_move(instance, old_path: Path, new_path: Path):
|
||||
if not old_path.is_file():
|
||||
def validate_move(instance, old_path, new_path):
|
||||
if not os.path.isfile(old_path):
|
||||
# Can't do anything if the old file does not exist anymore.
|
||||
msg = f"Document {instance!s}: File {old_path} doesn't exist."
|
||||
logger.fatal(msg)
|
||||
raise CannotMoveFilesException(msg)
|
||||
|
||||
if new_path.is_file():
|
||||
if os.path.isfile(new_path):
|
||||
# Can't do anything if the new file already exists. Skip updating file.
|
||||
msg = f"Document {instance!s}: Cannot rename file since target path {new_path} already exists."
|
||||
logger.warning(msg)
|
||||
@@ -435,20 +434,16 @@ def update_filename_and_move_files(
|
||||
old_filename = instance.filename
|
||||
old_source_path = instance.source_path
|
||||
|
||||
# Need to convert to string to be able to save it to the db
|
||||
instance.filename = str(generate_unique_filename(instance))
|
||||
instance.filename = generate_unique_filename(instance)
|
||||
move_original = old_filename != instance.filename
|
||||
|
||||
old_archive_filename = instance.archive_filename
|
||||
old_archive_path = instance.archive_path
|
||||
|
||||
if instance.has_archive_version:
|
||||
# Need to convert to string to be able to save it to the db
|
||||
instance.archive_filename = str(
|
||||
generate_unique_filename(
|
||||
instance,
|
||||
archive_filename=True,
|
||||
),
|
||||
instance.archive_filename = generate_unique_filename(
|
||||
instance,
|
||||
archive_filename=True,
|
||||
)
|
||||
|
||||
move_archive = old_archive_filename != instance.archive_filename
|
||||
@@ -490,11 +485,11 @@ def update_filename_and_move_files(
|
||||
|
||||
# Try to move files to their original location.
|
||||
try:
|
||||
if move_original and instance.source_path.is_file():
|
||||
if move_original and os.path.isfile(instance.source_path):
|
||||
logger.info("Restoring previous original path")
|
||||
shutil.move(instance.source_path, old_source_path)
|
||||
|
||||
if move_archive and instance.archive_path.is_file():
|
||||
if move_archive and os.path.isfile(instance.archive_path):
|
||||
logger.info("Restoring previous archive path")
|
||||
shutil.move(instance.archive_path, old_archive_path)
|
||||
|
||||
@@ -515,15 +510,17 @@ def update_filename_and_move_files(
|
||||
|
||||
# finally, remove any empty sub folders. This will do nothing if
|
||||
# something has failed above.
|
||||
if not old_source_path.is_file():
|
||||
if not os.path.isfile(old_source_path):
|
||||
delete_empty_directories(
|
||||
Path(old_source_path).parent,
|
||||
os.path.dirname(old_source_path),
|
||||
root=settings.ORIGINALS_DIR,
|
||||
)
|
||||
|
||||
if instance.has_archive_version and not old_archive_path.is_file():
|
||||
if instance.has_archive_version and not os.path.isfile(
|
||||
old_archive_path,
|
||||
):
|
||||
delete_empty_directories(
|
||||
Path(old_archive_path).parent,
|
||||
os.path.dirname(old_archive_path),
|
||||
root=settings.ARCHIVE_DIR,
|
||||
)
|
||||
|
||||
@@ -1220,7 +1217,10 @@ def run_workflows(
|
||||
)
|
||||
files = None
|
||||
if action.webhook.include_document:
|
||||
with original_file.open("rb") as f:
|
||||
with open(
|
||||
original_file,
|
||||
"rb",
|
||||
) as f:
|
||||
files = {
|
||||
"file": (
|
||||
filename,
|
||||
@@ -1439,18 +1439,3 @@ def task_failure_handler(
|
||||
task_instance.save()
|
||||
except Exception: # pragma: no cover
|
||||
logger.exception("Updating PaperlessTask failed")
|
||||
|
||||
|
||||
@worker_process_init.connect
|
||||
def close_connection_pool_on_worker_init(**kwargs):
|
||||
"""
|
||||
Close the DB connection pool for each Celery child process after it starts.
|
||||
|
||||
This is necessary because the parent process parse the Django configuration,
|
||||
initializes connection pools then forks.
|
||||
|
||||
Closing these pools after forking ensures child processes have a valid connection.
|
||||
"""
|
||||
for conn in connections.all(initialized_only=True):
|
||||
if conn.alias == "default" and hasattr(conn, "pool") and conn.pool:
|
||||
conn.close_pool()
|
||||
|
@@ -1,34 +0,0 @@
|
||||
Sample textual document content.
|
||||
Include as many characters as possible, to check the classifier's vectorization.
|
||||
|
||||
Hey 00, this is "a" test0707 content.
|
||||
This is an example document — created on 2025-06-25.
|
||||
|
||||
Digits: 0123456789
|
||||
Punctuation: . , ; : ! ? ' " ( ) [ ] { } — – …
|
||||
English text: The quick brown fox jumps over the lazy dog.
|
||||
English stop words: We’ve been doing it before.
|
||||
Accented Latin (diacritics): àâäæçéèêëîïôœùûüÿñ
|
||||
Arabic: لقد قام المترجم بعمل جيد
|
||||
Greek: Αλφα, Βήτα, Γάμμα, Δέλτα, Ωμέγα
|
||||
Cyrillic: Привет, как дела? Добро пожаловать!
|
||||
Chinese (Simplified): 你好,世界!今天的天气很好。
|
||||
Chinese (Traditional): 歡迎來到世界,今天天氣很好。
|
||||
Japanese (Kanji, Hiragana, Katakana): 東京へ行きます。カタカナ、ひらがな、漢字。
|
||||
Korean (Hangul): 안녕하세요. 오늘 날씨 어때요?
|
||||
Arabic: مرحبًا، كيف حالك؟
|
||||
Hebrew: שלום, מה שלומך?
|
||||
Emoji: 😀 🐍 📘 ✅ ©️ 🇺🇳
|
||||
Symbols: © ® ™ § ¶ † ‡ ∞ µ ∑ ∆ √
|
||||
Math: ∫₀^∞ x² dx = ∞, π ≈ 3.14159, ∇·E = ρ/ε₀
|
||||
Currency: 1$ € ¥ £ ₹
|
||||
Date formats: 25/06/2025, June 25, 2025, 2025年6月25日
|
||||
Quote in French: « Bonjour, ça va ? »
|
||||
Quote in German: „Guten Tag! Wie geht's?“
|
||||
Newline test:
|
||||
\r\n
|
||||
\r
|
||||
|
||||
Tab\ttest\tspacing
|
||||
/ = +) ( []) ~ * #192 +33601010101 § ¤
|
||||
End of document.
|
@@ -1 +0,0 @@
|
||||
sample textual document content include as many characters as possible to check the classifier s vectorization hey 00 this is a test0707 content this is an example document created on 2025 06 25 digits 0123456789 punctuation english text the quick brown fox jumps over the lazy dog english stop words we ve been doing it before accented latin diacritics àâäæçéèêëîïôœùûüÿñ arabic لقد قام المترجم بعمل جيد greek αλφα βήτα γάμμα δέλτα ωμέγα cyrillic привет как дела добро пожаловать chinese simplified 你好 世界 今天的天气很好 chinese traditional 歡迎來到世界 今天天氣很好 japanese kanji hiragana katakana 東京へ行きます カタカナ ひらがな 漢字 korean hangul 안녕하세요 오늘 날씨 어때요 arabic مرحب ا كيف حالك hebrew שלום מה שלומך emoji symbols µ math ₀ x² dx π 3 14159 e ρ ε₀ currency 1 date formats 25 06 2025 june 25 2025 2025年6月25日 quote in french bonjour ça va quote in german guten tag wie geht s newline test r n r tab ttest tspacing 192 33601010101 end of document
|
@@ -1 +0,0 @@
|
||||
sampl textual document content includ mani charact possibl check classifi vector hey 00 test0707 content exampl document creat 2025 06 25 digit 0123456789 punctuat english text quick brown fox jump lazi dog english stop word accent latin diacrit àâäæçéèêëîïôœùûüÿñ arab لقد قام المترجم بعمل جيد greek αλφα βήτα γάμμα δέλτα ωμέγα cyril привет как дела добро пожаловать chines simplifi 你好 世界 今天的天气很好 chines tradit 歡迎來到世界 今天天氣很好 japanes kanji hiragana katakana 東京へ行きます カタカナ ひらがな 漢字 korean hangul 안녕하세요 오늘 날씨 어때요 arab مرحب ا كيف حالك hebrew שלום מה שלומך emoji symbol µ math ₀ x² dx π 3 14159 e ρ ε₀ currenc 1 date format 25 06 2025 june 25 2025 2025年6月25日 quot french bonjour ça va quot german guten tag wie geht newlin test r n r tab ttest tspace 192 33601010101 end document
|
@@ -1,45 +0,0 @@
|
||||
import pickle
|
||||
|
||||
from documents.caching import StoredLRUCache
|
||||
|
||||
|
||||
def test_lru_cache_entries():
|
||||
CACHE_TTL = 1
|
||||
# LRU cache with a capacity of 2 elements
|
||||
cache = StoredLRUCache("test_lru_cache_key", 2, backend_ttl=CACHE_TTL)
|
||||
cache.set(1, 1)
|
||||
cache.set(2, 2)
|
||||
assert cache.get(2) == 2
|
||||
assert cache.get(1) == 1
|
||||
|
||||
# The oldest entry (2) should be removed
|
||||
cache.set(3, 3)
|
||||
assert cache.get(3) == 3
|
||||
assert not cache.get(2)
|
||||
assert cache.get(1) == 1
|
||||
|
||||
# Save the cache, restore it and check it overwrites the current cache in memory
|
||||
cache.save()
|
||||
cache.set(4, 4)
|
||||
assert not cache.get(3)
|
||||
cache.load()
|
||||
assert not cache.get(4)
|
||||
assert cache.get(3) == 3
|
||||
assert cache.get(1) == 1
|
||||
|
||||
|
||||
def test_stored_lru_cache_key_ttl(mocker):
|
||||
mock_backend = mocker.Mock()
|
||||
cache = StoredLRUCache("test_key", backend=mock_backend, backend_ttl=321)
|
||||
|
||||
# Simulate storing values
|
||||
cache.set("x", "X")
|
||||
cache.set("y", "Y")
|
||||
cache.save()
|
||||
|
||||
# Assert backend.set was called with pickled data, key and TTL
|
||||
mock_backend.set.assert_called_once()
|
||||
key, data, timeout = mock_backend.set.call_args[0]
|
||||
assert key == "test_key"
|
||||
assert timeout == 321
|
||||
assert pickle.loads(data) == {"x": "X", "y": "Y"}
|
@@ -21,7 +21,7 @@ from documents.models import Tag
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
def dummy_preprocess(content: str, **kwargs):
|
||||
def dummy_preprocess(content: str):
|
||||
"""
|
||||
Simpler, faster pre-processing for testing purposes
|
||||
"""
|
||||
@@ -223,47 +223,24 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
self.generate_test_data()
|
||||
self.classifier.train()
|
||||
|
||||
with (
|
||||
mock.patch.object(
|
||||
self.classifier.data_vectorizer,
|
||||
"transform",
|
||||
wraps=self.classifier.data_vectorizer.transform,
|
||||
) as mock_transform,
|
||||
mock.patch.object(
|
||||
self.classifier,
|
||||
"preprocess_content",
|
||||
wraps=self.classifier.preprocess_content,
|
||||
) as mock_preprocess_content,
|
||||
):
|
||||
self.assertEqual(
|
||||
self.classifier.predict_correspondent(self.doc1.content),
|
||||
self.c1.pk,
|
||||
)
|
||||
self.assertEqual(
|
||||
self.classifier.predict_correspondent(self.doc2.content),
|
||||
None,
|
||||
)
|
||||
self.assertListEqual(
|
||||
self.classifier.predict_tags(self.doc1.content),
|
||||
[self.t1.pk],
|
||||
)
|
||||
self.assertListEqual(
|
||||
self.classifier.predict_tags(self.doc2.content),
|
||||
[self.t1.pk, self.t3.pk],
|
||||
)
|
||||
self.assertEqual(
|
||||
self.classifier.predict_document_type(self.doc1.content),
|
||||
self.dt.pk,
|
||||
)
|
||||
self.assertEqual(
|
||||
self.classifier.predict_document_type(self.doc2.content),
|
||||
None,
|
||||
)
|
||||
|
||||
# Check that the classifier vectorized content and text preprocessing has been cached
|
||||
# It should be called once per document (doc1 and doc2)
|
||||
self.assertEqual(mock_preprocess_content.call_count, 2)
|
||||
self.assertEqual(mock_transform.call_count, 2)
|
||||
self.assertEqual(
|
||||
self.classifier.predict_correspondent(self.doc1.content),
|
||||
self.c1.pk,
|
||||
)
|
||||
self.assertEqual(self.classifier.predict_correspondent(self.doc2.content), None)
|
||||
self.assertListEqual(
|
||||
self.classifier.predict_tags(self.doc1.content),
|
||||
[self.t1.pk],
|
||||
)
|
||||
self.assertListEqual(
|
||||
self.classifier.predict_tags(self.doc2.content),
|
||||
[self.t1.pk, self.t3.pk],
|
||||
)
|
||||
self.assertEqual(
|
||||
self.classifier.predict_document_type(self.doc1.content),
|
||||
self.dt.pk,
|
||||
)
|
||||
self.assertEqual(self.classifier.predict_document_type(self.doc2.content), None)
|
||||
|
||||
def test_no_retrain_if_no_change(self):
|
||||
"""
|
||||
@@ -717,67 +694,3 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
||||
mock_load.side_effect = Exception()
|
||||
with self.assertRaises(Exception):
|
||||
load_classifier(raise_exception=True)
|
||||
|
||||
|
||||
def test_preprocess_content():
|
||||
"""
|
||||
GIVEN:
|
||||
- Advanced text processing is enabled (default)
|
||||
WHEN:
|
||||
- Classifier preprocesses a document's content
|
||||
THEN:
|
||||
- Processed content matches the expected output (stemmed words)
|
||||
"""
|
||||
with (Path(__file__).parent / "samples" / "content.txt").open("r") as f:
|
||||
content = f.read()
|
||||
with (Path(__file__).parent / "samples" / "preprocessed_content_advanced.txt").open(
|
||||
"r",
|
||||
) as f:
|
||||
expected_preprocess_content = f.read().rstrip()
|
||||
classifier = DocumentClassifier()
|
||||
result = classifier.preprocess_content(content)
|
||||
assert result == expected_preprocess_content
|
||||
|
||||
|
||||
def test_preprocess_content_nltk_disabled():
|
||||
"""
|
||||
GIVEN:
|
||||
- Advanced text processing is disabled
|
||||
WHEN:
|
||||
- Classifier preprocesses a document's content
|
||||
THEN:
|
||||
- Processed content matches the expected output (unstemmed words)
|
||||
"""
|
||||
with (Path(__file__).parent / "samples" / "content.txt").open("r") as f:
|
||||
content = f.read()
|
||||
with (Path(__file__).parent / "samples" / "preprocessed_content.txt").open(
|
||||
"r",
|
||||
) as f:
|
||||
expected_preprocess_content = f.read().rstrip()
|
||||
classifier = DocumentClassifier()
|
||||
with mock.patch("documents.classifier.ADVANCED_TEXT_PROCESSING_ENABLED", new=False):
|
||||
result = classifier.preprocess_content(content)
|
||||
assert result == expected_preprocess_content
|
||||
|
||||
|
||||
def test_preprocess_content_nltk_load_fail(mocker):
|
||||
"""
|
||||
GIVEN:
|
||||
- NLTK stop words fail to load
|
||||
WHEN:
|
||||
- Classifier preprocesses a document's content
|
||||
THEN:
|
||||
- Processed content matches the expected output (unstemmed words)
|
||||
"""
|
||||
_module = mocker.MagicMock(name="nltk_corpus_mock")
|
||||
_module.stopwords.words.side_effect = AttributeError()
|
||||
mocker.patch.dict("sys.modules", {"nltk.corpus": _module})
|
||||
classifier = DocumentClassifier()
|
||||
with (Path(__file__).parent / "samples" / "content.txt").open("r") as f:
|
||||
content = f.read()
|
||||
with (Path(__file__).parent / "samples" / "preprocessed_content.txt").open(
|
||||
"r",
|
||||
) as f:
|
||||
expected_preprocess_content = f.read().rstrip()
|
||||
result = classifier.preprocess_content(content)
|
||||
assert result == expected_preprocess_content
|
||||
|
@@ -41,9 +41,11 @@ class TestDocument(TestCase):
|
||||
Path(file_path).touch()
|
||||
Path(thumb_path).touch()
|
||||
|
||||
with mock.patch("documents.signals.handlers.Path.unlink") as mock_unlink:
|
||||
with mock.patch("documents.signals.handlers.os.unlink") as mock_unlink:
|
||||
document.delete()
|
||||
empty_trash([document.pk])
|
||||
mock_unlink.assert_any_call(file_path)
|
||||
mock_unlink.assert_any_call(thumb_path)
|
||||
self.assertEqual(mock_unlink.call_count, 2)
|
||||
|
||||
def test_document_soft_delete(self):
|
||||
@@ -61,7 +63,7 @@ class TestDocument(TestCase):
|
||||
Path(file_path).touch()
|
||||
Path(thumb_path).touch()
|
||||
|
||||
with mock.patch("documents.signals.handlers.Path.unlink") as mock_unlink:
|
||||
with mock.patch("documents.signals.handlers.os.unlink") as mock_unlink:
|
||||
document.delete()
|
||||
self.assertEqual(mock_unlink.call_count, 0)
|
||||
|
||||
|
@@ -34,12 +34,12 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
document.save()
|
||||
|
||||
self.assertEqual(generate_filename(document), Path(f"{document.pk:07d}.pdf"))
|
||||
self.assertEqual(generate_filename(document), f"{document.pk:07d}.pdf")
|
||||
|
||||
document.storage_type = Document.STORAGE_TYPE_GPG
|
||||
self.assertEqual(
|
||||
generate_filename(document),
|
||||
Path(f"{document.pk:07d}.pdf.gpg"),
|
||||
f"{document.pk:07d}.pdf.gpg",
|
||||
)
|
||||
|
||||
@override_settings(FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||
@@ -58,12 +58,12 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
document.filename = generate_filename(document)
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
self.assertEqual(document.filename, Path("none/none.pdf"))
|
||||
self.assertEqual(document.filename, "none/none.pdf")
|
||||
|
||||
# Enable encryption and check again
|
||||
document.storage_type = Document.STORAGE_TYPE_GPG
|
||||
document.filename = generate_filename(document)
|
||||
self.assertEqual(document.filename, Path("none/none.pdf.gpg"))
|
||||
self.assertEqual(document.filename, "none/none.pdf.gpg")
|
||||
|
||||
document.save()
|
||||
|
||||
@@ -96,7 +96,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
document.filename = generate_filename(document)
|
||||
self.assertEqual(document.filename, Path("none/none.pdf"))
|
||||
self.assertEqual(document.filename, "none/none.pdf")
|
||||
create_source_path_directory(document.source_path)
|
||||
document.source_path.touch()
|
||||
|
||||
@@ -137,7 +137,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
document.filename = generate_filename(document)
|
||||
self.assertEqual(document.filename, Path("none/none.pdf"))
|
||||
self.assertEqual(document.filename, "none/none.pdf")
|
||||
create_source_path_directory(document.source_path)
|
||||
Path(document.source_path).touch()
|
||||
|
||||
@@ -247,7 +247,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
document.filename = generate_filename(document)
|
||||
self.assertEqual(document.filename, Path("none/none.pdf"))
|
||||
self.assertEqual(document.filename, "none/none.pdf")
|
||||
|
||||
create_source_path_directory(document.source_path)
|
||||
|
||||
@@ -269,11 +269,11 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
dt = DocumentType.objects.create(name="my_doc_type")
|
||||
d = Document.objects.create(title="the_doc", mime_type="application/pdf")
|
||||
|
||||
self.assertEqual(generate_filename(d), Path("none - the_doc.pdf"))
|
||||
self.assertEqual(generate_filename(d), "none - the_doc.pdf")
|
||||
|
||||
d.document_type = dt
|
||||
|
||||
self.assertEqual(generate_filename(d), Path("my_doc_type - the_doc.pdf"))
|
||||
self.assertEqual(generate_filename(d), "my_doc_type - the_doc.pdf")
|
||||
|
||||
@override_settings(FILENAME_FORMAT="{asn} - {title}")
|
||||
def test_asn(self):
|
||||
@@ -289,8 +289,8 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
archive_serial_number=None,
|
||||
checksum="B",
|
||||
)
|
||||
self.assertEqual(generate_filename(d1), Path("652 - the_doc.pdf"))
|
||||
self.assertEqual(generate_filename(d2), Path("none - the_doc.pdf"))
|
||||
self.assertEqual(generate_filename(d1), "652 - the_doc.pdf")
|
||||
self.assertEqual(generate_filename(d2), "none - the_doc.pdf")
|
||||
|
||||
@override_settings(FILENAME_FORMAT="{title} {tag_list}")
|
||||
def test_tag_list(self):
|
||||
@@ -298,7 +298,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
doc.tags.create(name="tag2")
|
||||
doc.tags.create(name="tag1")
|
||||
|
||||
self.assertEqual(generate_filename(doc), Path("doc1 tag1,tag2.pdf"))
|
||||
self.assertEqual(generate_filename(doc), "doc1 tag1,tag2.pdf")
|
||||
|
||||
doc = Document.objects.create(
|
||||
title="doc2",
|
||||
@@ -306,7 +306,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
|
||||
self.assertEqual(generate_filename(doc), Path("doc2.pdf"))
|
||||
self.assertEqual(generate_filename(doc), "doc2.pdf")
|
||||
|
||||
@override_settings(FILENAME_FORMAT="//etc/something/{title}")
|
||||
def test_filename_relative(self):
|
||||
@@ -330,11 +330,11 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
created=d1,
|
||||
)
|
||||
|
||||
self.assertEqual(generate_filename(doc1), Path("2020-03-06.pdf"))
|
||||
self.assertEqual(generate_filename(doc1), "2020-03-06.pdf")
|
||||
|
||||
doc1.created = datetime.date(2020, 11, 16)
|
||||
|
||||
self.assertEqual(generate_filename(doc1), Path("2020-11-16.pdf"))
|
||||
self.assertEqual(generate_filename(doc1), "2020-11-16.pdf")
|
||||
|
||||
@override_settings(
|
||||
FILENAME_FORMAT="{added_year}-{added_month}-{added_day}",
|
||||
@@ -347,11 +347,11 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
added=d1,
|
||||
)
|
||||
|
||||
self.assertEqual(generate_filename(doc1), Path("232-01-09.pdf"))
|
||||
self.assertEqual(generate_filename(doc1), "232-01-09.pdf")
|
||||
|
||||
doc1.added = timezone.make_aware(datetime.datetime(2020, 11, 16, 1, 1, 1))
|
||||
|
||||
self.assertEqual(generate_filename(doc1), Path("2020-11-16.pdf"))
|
||||
self.assertEqual(generate_filename(doc1), "2020-11-16.pdf")
|
||||
|
||||
@override_settings(
|
||||
FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}",
|
||||
@@ -389,11 +389,11 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
document.mime_type = "application/pdf"
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
self.assertEqual(generate_filename(document), Path("0000001.pdf"))
|
||||
self.assertEqual(generate_filename(document), "0000001.pdf")
|
||||
|
||||
document.pk = 13579
|
||||
|
||||
self.assertEqual(generate_filename(document), Path("0013579.pdf"))
|
||||
self.assertEqual(generate_filename(document), "0013579.pdf")
|
||||
|
||||
@override_settings(FILENAME_FORMAT=None)
|
||||
def test_format_none(self):
|
||||
@@ -402,7 +402,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
document.mime_type = "application/pdf"
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
self.assertEqual(generate_filename(document), Path("0000001.pdf"))
|
||||
self.assertEqual(generate_filename(document), "0000001.pdf")
|
||||
|
||||
def test_try_delete_empty_directories(self):
|
||||
# Create our working directory
|
||||
@@ -428,7 +428,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
document.mime_type = "application/pdf"
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
self.assertEqual(generate_filename(document), Path("0000001.pdf"))
|
||||
self.assertEqual(generate_filename(document), "0000001.pdf")
|
||||
|
||||
@override_settings(FILENAME_FORMAT="{created__year}")
|
||||
def test_invalid_format_key(self):
|
||||
@@ -437,7 +437,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
document.mime_type = "application/pdf"
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
self.assertEqual(generate_filename(document), Path("0000001.pdf"))
|
||||
self.assertEqual(generate_filename(document), "0000001.pdf")
|
||||
|
||||
@override_settings(FILENAME_FORMAT="{title}")
|
||||
def test_duplicates(self):
|
||||
@@ -564,7 +564,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
value_select="abc123",
|
||||
)
|
||||
|
||||
self.assertEqual(generate_filename(doc), Path("document_apple.pdf"))
|
||||
self.assertEqual(generate_filename(doc), "document_apple.pdf")
|
||||
|
||||
# handler should not have been called
|
||||
self.assertEqual(m.call_count, 0)
|
||||
@@ -576,7 +576,7 @@ class TestFileHandling(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
],
|
||||
}
|
||||
cf.save()
|
||||
self.assertEqual(generate_filename(doc), Path("document_aubergine.pdf"))
|
||||
self.assertEqual(generate_filename(doc), "document_aubergine.pdf")
|
||||
# handler should have been called
|
||||
self.assertEqual(m.call_count, 1)
|
||||
|
||||
@@ -897,7 +897,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
pk=1,
|
||||
checksum="1",
|
||||
)
|
||||
self.assertEqual(generate_filename(doc), Path("This. is the title.pdf"))
|
||||
self.assertEqual(generate_filename(doc), "This. is the title.pdf")
|
||||
|
||||
doc = Document.objects.create(
|
||||
title="my\\invalid/../title:yay",
|
||||
@@ -905,7 +905,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
pk=2,
|
||||
checksum="2",
|
||||
)
|
||||
self.assertEqual(generate_filename(doc), Path("my-invalid-..-title-yay.pdf"))
|
||||
self.assertEqual(generate_filename(doc), "my-invalid-..-title-yay.pdf")
|
||||
|
||||
@override_settings(FILENAME_FORMAT="{created}")
|
||||
def test_date(self):
|
||||
@@ -916,7 +916,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
pk=2,
|
||||
checksum="2",
|
||||
)
|
||||
self.assertEqual(generate_filename(doc), Path("2020-05-21.pdf"))
|
||||
self.assertEqual(generate_filename(doc), "2020-05-21.pdf")
|
||||
|
||||
def test_dynamic_path(self):
|
||||
"""
|
||||
@@ -935,7 +935,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
checksum="2",
|
||||
storage_path=StoragePath.objects.create(path="TestFolder/{{created}}"),
|
||||
)
|
||||
self.assertEqual(generate_filename(doc), Path("TestFolder/2020-06-25.pdf"))
|
||||
self.assertEqual(generate_filename(doc), "TestFolder/2020-06-25.pdf")
|
||||
|
||||
def test_dynamic_path_with_none(self):
|
||||
"""
|
||||
@@ -956,7 +956,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
checksum="2",
|
||||
storage_path=StoragePath.objects.create(path="{{asn}} - {{created}}"),
|
||||
)
|
||||
self.assertEqual(generate_filename(doc), Path("none - 2020-06-25.pdf"))
|
||||
self.assertEqual(generate_filename(doc), "none - 2020-06-25.pdf")
|
||||
|
||||
@override_settings(
|
||||
FILENAME_FORMAT_REMOVE_NONE=True,
|
||||
@@ -984,7 +984,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
checksum="2",
|
||||
storage_path=sp,
|
||||
)
|
||||
self.assertEqual(generate_filename(doc), Path("TestFolder/2020-06-25.pdf"))
|
||||
self.assertEqual(generate_filename(doc), "TestFolder/2020-06-25.pdf")
|
||||
|
||||
# Special case, undefined variable, then defined at the start of the template
|
||||
# This could lead to an absolute path after we remove the leading -none-, but leave the leading /
|
||||
@@ -993,7 +993,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
"{{ owner_username }}/{{ created_year }}/{{ correspondent }}/{{ title }}"
|
||||
)
|
||||
sp.save()
|
||||
self.assertEqual(generate_filename(doc), Path("2020/does not matter.pdf"))
|
||||
self.assertEqual(generate_filename(doc), "2020/does not matter.pdf")
|
||||
|
||||
def test_multiple_doc_paths(self):
|
||||
"""
|
||||
@@ -1028,14 +1028,8 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
),
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
generate_filename(doc_a),
|
||||
Path("ThisIsAFolder/4/2020-06-25.pdf"),
|
||||
)
|
||||
self.assertEqual(
|
||||
generate_filename(doc_b),
|
||||
Path("SomeImportantNone/2020-07-25.pdf"),
|
||||
)
|
||||
self.assertEqual(generate_filename(doc_a), "ThisIsAFolder/4/2020-06-25.pdf")
|
||||
self.assertEqual(generate_filename(doc_b), "SomeImportantNone/2020-07-25.pdf")
|
||||
|
||||
@override_settings(
|
||||
FILENAME_FORMAT=None,
|
||||
@@ -1070,11 +1064,8 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
),
|
||||
)
|
||||
|
||||
self.assertEqual(generate_filename(doc_a), Path("0000002.pdf"))
|
||||
self.assertEqual(
|
||||
generate_filename(doc_b),
|
||||
Path("SomeImportantNone/2020-07-25.pdf"),
|
||||
)
|
||||
self.assertEqual(generate_filename(doc_a), "0000002.pdf")
|
||||
self.assertEqual(generate_filename(doc_b), "SomeImportantNone/2020-07-25.pdf")
|
||||
|
||||
@override_settings(
|
||||
FILENAME_FORMAT="{created_year_short}/{created_month_name_short}/{created_month_name}/{title}",
|
||||
@@ -1087,7 +1078,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
pk=2,
|
||||
checksum="2",
|
||||
)
|
||||
self.assertEqual(generate_filename(doc), Path("89/Dec/December/The Title.pdf"))
|
||||
self.assertEqual(generate_filename(doc), "89/Dec/December/The Title.pdf")
|
||||
|
||||
@override_settings(
|
||||
FILENAME_FORMAT="{added_year_short}/{added_month_name}/{added_month_name_short}/{title}",
|
||||
@@ -1100,7 +1091,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
pk=2,
|
||||
checksum="2",
|
||||
)
|
||||
self.assertEqual(generate_filename(doc), Path("84/August/Aug/The Title.pdf"))
|
||||
self.assertEqual(generate_filename(doc), "84/August/Aug/The Title.pdf")
|
||||
|
||||
@override_settings(
|
||||
FILENAME_FORMAT="{owner_username}/{title}",
|
||||
@@ -1133,8 +1124,8 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
checksum="3",
|
||||
)
|
||||
|
||||
self.assertEqual(generate_filename(owned_doc), Path("user1/The Title.pdf"))
|
||||
self.assertEqual(generate_filename(no_owner_doc), Path("none/does matter.pdf"))
|
||||
self.assertEqual(generate_filename(owned_doc), "user1/The Title.pdf")
|
||||
self.assertEqual(generate_filename(no_owner_doc), "none/does matter.pdf")
|
||||
|
||||
@override_settings(
|
||||
FILENAME_FORMAT="{original_name}",
|
||||
@@ -1180,20 +1171,17 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
original_filename="logs.txt",
|
||||
)
|
||||
|
||||
self.assertEqual(generate_filename(doc_with_original), Path("someepdf.pdf"))
|
||||
self.assertEqual(generate_filename(doc_with_original), "someepdf.pdf")
|
||||
|
||||
self.assertEqual(
|
||||
generate_filename(tricky_with_original),
|
||||
Path("some pdf with spaces and stuff.pdf"),
|
||||
"some pdf with spaces and stuff.pdf",
|
||||
)
|
||||
|
||||
self.assertEqual(generate_filename(no_original), Path("none.pdf"))
|
||||
self.assertEqual(generate_filename(no_original), "none.pdf")
|
||||
|
||||
self.assertEqual(generate_filename(text_doc), Path("logs.txt"))
|
||||
self.assertEqual(
|
||||
generate_filename(text_doc, archive_filename=True),
|
||||
Path("logs.pdf"),
|
||||
)
|
||||
self.assertEqual(generate_filename(text_doc), "logs.txt")
|
||||
self.assertEqual(generate_filename(text_doc, archive_filename=True), "logs.pdf")
|
||||
|
||||
@override_settings(
|
||||
FILENAME_FORMAT="XX{correspondent}/{title}",
|
||||
@@ -1218,7 +1206,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
|
||||
# Ensure that filename is properly generated
|
||||
document.filename = generate_filename(document)
|
||||
self.assertEqual(document.filename, Path("XX/doc1.pdf"))
|
||||
self.assertEqual(document.filename, "XX/doc1.pdf")
|
||||
|
||||
def test_complex_template_strings(self):
|
||||
"""
|
||||
@@ -1256,19 +1244,19 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
|
||||
self.assertEqual(
|
||||
generate_filename(doc_a),
|
||||
Path("somepath/some where/2020-06-25/Does Matter.pdf"),
|
||||
"somepath/some where/2020-06-25/Does Matter.pdf",
|
||||
)
|
||||
doc_a.checksum = "5"
|
||||
|
||||
self.assertEqual(
|
||||
generate_filename(doc_a),
|
||||
Path("somepath/2024-10-01/Does Matter.pdf"),
|
||||
"somepath/2024-10-01/Does Matter.pdf",
|
||||
)
|
||||
|
||||
sp.path = "{{ document.title|lower }}{{ document.archive_serial_number - 2 }}"
|
||||
sp.save()
|
||||
|
||||
self.assertEqual(generate_filename(doc_a), Path("does matter23.pdf"))
|
||||
self.assertEqual(generate_filename(doc_a), "does matter23.pdf")
|
||||
|
||||
sp.path = """
|
||||
somepath/
|
||||
@@ -1287,13 +1275,13 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
sp.save()
|
||||
self.assertEqual(
|
||||
generate_filename(doc_a),
|
||||
Path("somepath/asn-000-200/Does Matter/Does Matter.pdf"),
|
||||
"somepath/asn-000-200/Does Matter/Does Matter.pdf",
|
||||
)
|
||||
doc_a.archive_serial_number = 301
|
||||
doc_a.save()
|
||||
self.assertEqual(
|
||||
generate_filename(doc_a),
|
||||
Path("somepath/asn-201-400/asn-3xx/Does Matter.pdf"),
|
||||
"somepath/asn-201-400/asn-3xx/Does Matter.pdf",
|
||||
)
|
||||
|
||||
@override_settings(
|
||||
@@ -1322,7 +1310,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
with self.assertLogs(level=logging.WARNING) as capture:
|
||||
self.assertEqual(
|
||||
generate_filename(doc_a),
|
||||
Path("0000002.pdf"),
|
||||
"0000002.pdf",
|
||||
)
|
||||
|
||||
self.assertEqual(len(capture.output), 1)
|
||||
@@ -1357,7 +1345,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
with self.assertLogs(level=logging.WARNING) as capture:
|
||||
self.assertEqual(
|
||||
generate_filename(doc_a),
|
||||
Path("0000002.pdf"),
|
||||
"0000002.pdf",
|
||||
)
|
||||
|
||||
self.assertEqual(len(capture.output), 1)
|
||||
@@ -1425,7 +1413,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
):
|
||||
self.assertEqual(
|
||||
generate_filename(doc_a),
|
||||
Path("invoices/1234.pdf"),
|
||||
"invoices/1234.pdf",
|
||||
)
|
||||
|
||||
with override_settings(
|
||||
@@ -1439,7 +1427,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
):
|
||||
self.assertEqual(
|
||||
generate_filename(doc_a),
|
||||
Path("Some Title_ChoiceOne.pdf"),
|
||||
"Some Title_ChoiceOne.pdf",
|
||||
)
|
||||
|
||||
# Check for handling Nones well
|
||||
@@ -1448,7 +1436,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
|
||||
self.assertEqual(
|
||||
generate_filename(doc_a),
|
||||
Path("Some Title_Default Value.pdf"),
|
||||
"Some Title_Default Value.pdf",
|
||||
)
|
||||
|
||||
cf.name = "Invoice Number"
|
||||
@@ -1461,7 +1449,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
):
|
||||
self.assertEqual(
|
||||
generate_filename(doc_a),
|
||||
Path("invoices/4567.pdf"),
|
||||
"invoices/4567.pdf",
|
||||
)
|
||||
|
||||
with override_settings(
|
||||
@@ -1469,7 +1457,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
):
|
||||
self.assertEqual(
|
||||
generate_filename(doc_a),
|
||||
Path("invoices/0.pdf"),
|
||||
"invoices/0.pdf",
|
||||
)
|
||||
|
||||
def test_datetime_filter(self):
|
||||
@@ -1508,7 +1496,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
):
|
||||
self.assertEqual(
|
||||
generate_filename(doc_a),
|
||||
Path("2020/Some Title.pdf"),
|
||||
"2020/Some Title.pdf",
|
||||
)
|
||||
|
||||
with override_settings(
|
||||
@@ -1516,7 +1504,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
):
|
||||
self.assertEqual(
|
||||
generate_filename(doc_a),
|
||||
Path("2020-06-25/Some Title.pdf"),
|
||||
"2020-06-25/Some Title.pdf",
|
||||
)
|
||||
|
||||
with override_settings(
|
||||
@@ -1524,7 +1512,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
):
|
||||
self.assertEqual(
|
||||
generate_filename(doc_a),
|
||||
Path("2024-10-01/Some Title.pdf"),
|
||||
"2024-10-01/Some Title.pdf",
|
||||
)
|
||||
|
||||
def test_slugify_filter(self):
|
||||
@@ -1551,7 +1539,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
):
|
||||
self.assertEqual(
|
||||
generate_filename(doc),
|
||||
Path("some-title-with-special-characters.pdf"),
|
||||
"some-title-with-special-characters.pdf",
|
||||
)
|
||||
|
||||
# Test with correspondent name containing spaces and special chars
|
||||
@@ -1565,7 +1553,7 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
):
|
||||
self.assertEqual(
|
||||
generate_filename(doc),
|
||||
Path("johns-office-workplace/some-title-with-special-characters.pdf"),
|
||||
"johns-office-workplace/some-title-with-special-characters.pdf",
|
||||
)
|
||||
|
||||
# Test with custom fields
|
||||
@@ -1584,5 +1572,5 @@ class TestFilenameGeneration(DirectoriesMixin, TestCase):
|
||||
):
|
||||
self.assertEqual(
|
||||
generate_filename(doc),
|
||||
Path("brussels-belgium/some-title-with-special-characters.pdf"),
|
||||
"brussels-belgium/some-title-with-special-characters.pdf",
|
||||
)
|
||||
|
@@ -123,7 +123,7 @@ class TestExportImport(
|
||||
|
||||
self.trigger = WorkflowTrigger.objects.create(
|
||||
type=WorkflowTrigger.WorkflowTriggerType.CONSUMPTION,
|
||||
sources=[str(WorkflowTrigger.DocumentSourceChoices.CONSUME_FOLDER.value)],
|
||||
sources=[1],
|
||||
filter_filename="*",
|
||||
)
|
||||
self.action = WorkflowAction.objects.create(assign_title="new title")
|
||||
@@ -209,7 +209,7 @@ class TestExportImport(
|
||||
4,
|
||||
)
|
||||
|
||||
self.assertIsFile(self.target / "manifest.json")
|
||||
self.assertIsFile((self.target / "manifest.json").as_posix())
|
||||
|
||||
self.assertEqual(
|
||||
self._get_document_from_manifest(manifest, self.d1.id)["fields"]["title"],
|
||||
@@ -235,7 +235,9 @@ class TestExportImport(
|
||||
).as_posix()
|
||||
self.assertIsFile(fname)
|
||||
self.assertIsFile(
|
||||
self.target / element[document_exporter.EXPORTER_THUMBNAIL_NAME],
|
||||
(
|
||||
self.target / element[document_exporter.EXPORTER_THUMBNAIL_NAME]
|
||||
).as_posix(),
|
||||
)
|
||||
|
||||
with Path(fname).open("rb") as f:
|
||||
@@ -250,7 +252,7 @@ class TestExportImport(
|
||||
if document_exporter.EXPORTER_ARCHIVE_NAME in element:
|
||||
fname = (
|
||||
self.target / element[document_exporter.EXPORTER_ARCHIVE_NAME]
|
||||
)
|
||||
).as_posix()
|
||||
self.assertIsFile(fname)
|
||||
|
||||
with Path(fname).open("rb") as f:
|
||||
@@ -310,7 +312,7 @@ class TestExportImport(
|
||||
)
|
||||
|
||||
self._do_export()
|
||||
self.assertIsFile(self.target / "manifest.json")
|
||||
self.assertIsFile((self.target / "manifest.json").as_posix())
|
||||
|
||||
st_mtime_1 = (self.target / "manifest.json").stat().st_mtime
|
||||
|
||||
@@ -320,7 +322,7 @@ class TestExportImport(
|
||||
self._do_export()
|
||||
m.assert_not_called()
|
||||
|
||||
self.assertIsFile(self.target / "manifest.json")
|
||||
self.assertIsFile((self.target / "manifest.json").as_posix())
|
||||
st_mtime_2 = (self.target / "manifest.json").stat().st_mtime
|
||||
|
||||
Path(self.d1.source_path).touch()
|
||||
@@ -332,7 +334,7 @@ class TestExportImport(
|
||||
self.assertEqual(m.call_count, 1)
|
||||
|
||||
st_mtime_3 = (self.target / "manifest.json").stat().st_mtime
|
||||
self.assertIsFile(self.target / "manifest.json")
|
||||
self.assertIsFile((self.target / "manifest.json").as_posix())
|
||||
|
||||
self.assertNotEqual(st_mtime_1, st_mtime_2)
|
||||
self.assertNotEqual(st_mtime_2, st_mtime_3)
|
||||
@@ -350,7 +352,7 @@ class TestExportImport(
|
||||
|
||||
self._do_export()
|
||||
|
||||
self.assertIsFile(self.target / "manifest.json")
|
||||
self.assertIsFile((self.target / "manifest.json").as_posix())
|
||||
|
||||
with mock.patch(
|
||||
"documents.management.commands.document_exporter.copy_file_with_basic_stats",
|
||||
@@ -358,7 +360,7 @@ class TestExportImport(
|
||||
self._do_export()
|
||||
m.assert_not_called()
|
||||
|
||||
self.assertIsFile(self.target / "manifest.json")
|
||||
self.assertIsFile((self.target / "manifest.json").as_posix())
|
||||
|
||||
self.d2.checksum = "asdfasdgf3"
|
||||
self.d2.save()
|
||||
@@ -369,7 +371,7 @@ class TestExportImport(
|
||||
self._do_export(compare_checksums=True)
|
||||
self.assertEqual(m.call_count, 1)
|
||||
|
||||
self.assertIsFile(self.target / "manifest.json")
|
||||
self.assertIsFile((self.target / "manifest.json").as_posix())
|
||||
|
||||
def test_update_export_deleted_document(self):
|
||||
shutil.rmtree(Path(self.dirs.media_dir) / "documents")
|
||||
@@ -383,7 +385,7 @@ class TestExportImport(
|
||||
self.assertTrue(len(manifest), 7)
|
||||
doc_from_manifest = self._get_document_from_manifest(manifest, self.d3.id)
|
||||
self.assertIsFile(
|
||||
str(self.target / doc_from_manifest[EXPORTER_FILE_NAME]),
|
||||
(self.target / doc_from_manifest[EXPORTER_FILE_NAME]).as_posix(),
|
||||
)
|
||||
self.d3.delete()
|
||||
|
||||
@@ -395,12 +397,12 @@ class TestExportImport(
|
||||
self.d3.id,
|
||||
)
|
||||
self.assertIsFile(
|
||||
self.target / doc_from_manifest[EXPORTER_FILE_NAME],
|
||||
(self.target / doc_from_manifest[EXPORTER_FILE_NAME]).as_posix(),
|
||||
)
|
||||
|
||||
manifest = self._do_export(delete=True)
|
||||
self.assertIsNotFile(
|
||||
self.target / doc_from_manifest[EXPORTER_FILE_NAME],
|
||||
(self.target / doc_from_manifest[EXPORTER_FILE_NAME]).as_posix(),
|
||||
)
|
||||
|
||||
self.assertTrue(len(manifest), 6)
|
||||
@@ -414,20 +416,20 @@ class TestExportImport(
|
||||
)
|
||||
|
||||
self._do_export(use_filename_format=True)
|
||||
self.assertIsFile(self.target / "wow1" / "c.pdf")
|
||||
self.assertIsFile((self.target / "wow1" / "c.pdf").as_posix())
|
||||
|
||||
self.assertIsFile(self.target / "manifest.json")
|
||||
self.assertIsFile((self.target / "manifest.json").as_posix())
|
||||
|
||||
self.d1.title = "new_title"
|
||||
self.d1.save()
|
||||
self._do_export(use_filename_format=True, delete=True)
|
||||
self.assertIsNotFile(self.target / "wow1" / "c.pdf")
|
||||
self.assertIsNotDir(self.target / "wow1")
|
||||
self.assertIsFile(self.target / "new_title" / "c.pdf")
|
||||
self.assertIsFile(self.target / "manifest.json")
|
||||
self.assertIsFile(self.target / "wow2" / "none.pdf")
|
||||
self.assertIsNotFile((self.target / "wow1" / "c.pdf").as_posix())
|
||||
self.assertIsNotDir((self.target / "wow1").as_posix())
|
||||
self.assertIsFile((self.target / "new_title" / "c.pdf").as_posix())
|
||||
self.assertIsFile((self.target / "manifest.json").as_posix())
|
||||
self.assertIsFile((self.target / "wow2" / "none.pdf").as_posix())
|
||||
self.assertIsFile(
|
||||
self.target / "wow2" / "none_01.pdf",
|
||||
(self.target / "wow2" / "none_01.pdf").as_posix(),
|
||||
)
|
||||
|
||||
def test_export_missing_files(self):
|
||||
|
@@ -87,7 +87,7 @@ class TestFuzzyMatchCommand(TestCase):
|
||||
filename="other_test.pdf",
|
||||
)
|
||||
stdout, _ = self.call_command()
|
||||
self.assertIn("No matches found", stdout)
|
||||
self.assertEqual(stdout, "No matches found\n")
|
||||
|
||||
def test_with_matches(self):
|
||||
"""
|
||||
@@ -116,7 +116,7 @@ class TestFuzzyMatchCommand(TestCase):
|
||||
filename="other_test.pdf",
|
||||
)
|
||||
stdout, _ = self.call_command("--processes", "1")
|
||||
self.assertRegex(stdout, self.MSG_REGEX)
|
||||
self.assertRegex(stdout, self.MSG_REGEX + "\n")
|
||||
|
||||
def test_with_3_matches(self):
|
||||
"""
|
||||
@@ -152,10 +152,11 @@ class TestFuzzyMatchCommand(TestCase):
|
||||
filename="final_test.pdf",
|
||||
)
|
||||
stdout, _ = self.call_command()
|
||||
lines = [x.strip() for x in stdout.splitlines() if x.strip()]
|
||||
lines = [x.strip() for x in stdout.split("\n") if len(x.strip())]
|
||||
self.assertEqual(len(lines), 3)
|
||||
for line in lines:
|
||||
self.assertRegex(line, self.MSG_REGEX)
|
||||
self.assertRegex(lines[0], self.MSG_REGEX)
|
||||
self.assertRegex(lines[1], self.MSG_REGEX)
|
||||
self.assertRegex(lines[2], self.MSG_REGEX)
|
||||
|
||||
def test_document_deletion(self):
|
||||
"""
|
||||
@@ -196,12 +197,14 @@ class TestFuzzyMatchCommand(TestCase):
|
||||
|
||||
stdout, _ = self.call_command("--delete")
|
||||
|
||||
self.assertIn(
|
||||
lines = [x.strip() for x in stdout.split("\n") if len(x.strip())]
|
||||
self.assertEqual(len(lines), 3)
|
||||
self.assertEqual(
|
||||
lines[0],
|
||||
"The command is configured to delete documents. Use with caution",
|
||||
stdout,
|
||||
)
|
||||
self.assertRegex(stdout, self.MSG_REGEX)
|
||||
self.assertIn("Deleting 1 documents based on ratio matches", stdout)
|
||||
self.assertRegex(lines[1], self.MSG_REGEX)
|
||||
self.assertEqual(lines[2], "Deleting 1 documents based on ratio matches")
|
||||
|
||||
self.assertEqual(Document.objects.count(), 2)
|
||||
self.assertIsNotNone(Document.objects.get(pk=1))
|
||||
|
@@ -20,7 +20,7 @@ def source_path_before(self):
|
||||
if self.storage_type == STORAGE_TYPE_GPG:
|
||||
fname += ".gpg"
|
||||
|
||||
return Path(settings.ORIGINALS_DIR) / fname
|
||||
return (Path(settings.ORIGINALS_DIR) / fname).as_posix()
|
||||
|
||||
|
||||
def file_type_after(self):
|
||||
@@ -35,7 +35,7 @@ def source_path_after(doc):
|
||||
if doc.storage_type == STORAGE_TYPE_GPG:
|
||||
fname += ".gpg" # pragma: no cover
|
||||
|
||||
return Path(settings.ORIGINALS_DIR) / fname
|
||||
return (Path(settings.ORIGINALS_DIR) / fname).as_posix()
|
||||
|
||||
|
||||
@override_settings(PASSPHRASE="test")
|
||||
|
@@ -104,7 +104,7 @@ class TestReverseMigrateWorkflow(TestMigrations):
|
||||
|
||||
trigger = WorkflowTrigger.objects.create(
|
||||
type=0,
|
||||
sources=[str(DocumentSource.ConsumeFolder)],
|
||||
sources=[DocumentSource.ConsumeFolder],
|
||||
filter_path="*/path/*",
|
||||
filter_filename="*file*",
|
||||
)
|
||||
|
@@ -2,7 +2,7 @@ msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: paperless-ngx\n"
|
||||
"Report-Msgid-Bugs-To: \n"
|
||||
"POT-Creation-Date: 2025-08-02 12:55+0000\n"
|
||||
"POT-Creation-Date: 2025-07-08 21:14+0000\n"
|
||||
"PO-Revision-Date: 2022-02-17 04:17\n"
|
||||
"Last-Translator: \n"
|
||||
"Language-Team: English\n"
|
||||
@@ -1645,147 +1645,147 @@ msgstr ""
|
||||
msgid "paperless application settings"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:774
|
||||
#: paperless/settings.py:762
|
||||
msgid "English (US)"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:775
|
||||
#: paperless/settings.py:763
|
||||
msgid "Arabic"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:776
|
||||
#: paperless/settings.py:764
|
||||
msgid "Afrikaans"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:777
|
||||
#: paperless/settings.py:765
|
||||
msgid "Belarusian"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:778
|
||||
#: paperless/settings.py:766
|
||||
msgid "Bulgarian"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:779
|
||||
#: paperless/settings.py:767
|
||||
msgid "Catalan"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:780
|
||||
#: paperless/settings.py:768
|
||||
msgid "Czech"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:781
|
||||
#: paperless/settings.py:769
|
||||
msgid "Danish"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:782
|
||||
#: paperless/settings.py:770
|
||||
msgid "German"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:783
|
||||
#: paperless/settings.py:771
|
||||
msgid "Greek"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:784
|
||||
#: paperless/settings.py:772
|
||||
msgid "English (GB)"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:785
|
||||
#: paperless/settings.py:773
|
||||
msgid "Spanish"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:786
|
||||
#: paperless/settings.py:774
|
||||
msgid "Persian"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:787
|
||||
#: paperless/settings.py:775
|
||||
msgid "Finnish"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:788
|
||||
#: paperless/settings.py:776
|
||||
msgid "French"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:789
|
||||
#: paperless/settings.py:777
|
||||
msgid "Hungarian"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:790
|
||||
#: paperless/settings.py:778
|
||||
msgid "Italian"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:791
|
||||
#: paperless/settings.py:779
|
||||
msgid "Japanese"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:792
|
||||
#: paperless/settings.py:780
|
||||
msgid "Korean"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:793
|
||||
#: paperless/settings.py:781
|
||||
msgid "Luxembourgish"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:794
|
||||
#: paperless/settings.py:782
|
||||
msgid "Norwegian"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:795
|
||||
#: paperless/settings.py:783
|
||||
msgid "Dutch"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:796
|
||||
#: paperless/settings.py:784
|
||||
msgid "Polish"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:797
|
||||
#: paperless/settings.py:785
|
||||
msgid "Portuguese (Brazil)"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:798
|
||||
#: paperless/settings.py:786
|
||||
msgid "Portuguese"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:799
|
||||
#: paperless/settings.py:787
|
||||
msgid "Romanian"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:800
|
||||
#: paperless/settings.py:788
|
||||
msgid "Russian"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:801
|
||||
#: paperless/settings.py:789
|
||||
msgid "Slovak"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:802
|
||||
#: paperless/settings.py:790
|
||||
msgid "Slovenian"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:803
|
||||
#: paperless/settings.py:791
|
||||
msgid "Serbian"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:804
|
||||
#: paperless/settings.py:792
|
||||
msgid "Swedish"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:805
|
||||
#: paperless/settings.py:793
|
||||
msgid "Turkish"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:806
|
||||
#: paperless/settings.py:794
|
||||
msgid "Ukrainian"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:807
|
||||
#: paperless/settings.py:795
|
||||
msgid "Vietnamese"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:808
|
||||
#: paperless/settings.py:796
|
||||
msgid "Chinese Simplified"
|
||||
msgstr ""
|
||||
|
||||
#: paperless/settings.py:809
|
||||
#: paperless/settings.py:797
|
||||
msgid "Chinese Traditional"
|
||||
msgstr ""
|
||||
|
||||
|
@@ -54,7 +54,7 @@ class HttpRemoteUserMiddleware(PersistentRemoteUserMiddleware):
|
||||
|
||||
header = settings.HTTP_REMOTE_USER_HEADER_NAME
|
||||
|
||||
def __call__(self, request: HttpRequest) -> None:
|
||||
def process_request(self, request: HttpRequest) -> None:
|
||||
# If remote user auth is enabled only for the frontend, not the API,
|
||||
# then we need dont want to authenticate the user for API requests.
|
||||
if (
|
||||
@@ -62,8 +62,8 @@ class HttpRemoteUserMiddleware(PersistentRemoteUserMiddleware):
|
||||
and "paperless.auth.PaperlessRemoteUserAuthentication"
|
||||
not in settings.REST_FRAMEWORK["DEFAULT_AUTHENTICATION_CLASSES"]
|
||||
):
|
||||
return self.get_response(request)
|
||||
return super().__call__(request)
|
||||
return
|
||||
return super().process_request(request)
|
||||
|
||||
|
||||
class PaperlessRemoteUserAuthentication(authentication.RemoteUserAuthentication):
|
||||
|
@@ -214,3 +214,31 @@ def audit_log_check(app_configs, **kwargs):
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@register()
|
||||
def check_postgres_version(app_configs, **kwargs):
|
||||
"""
|
||||
Django 5.2 removed PostgreSQL 13 support and thus it will be removed in
|
||||
a future Paperless-ngx version. This check can be removed eventually.
|
||||
See https://docs.djangoproject.com/en/5.2/releases/5.2/#dropped-support-for-postgresql-13
|
||||
"""
|
||||
db_conn = connections["default"]
|
||||
result = []
|
||||
if db_conn.vendor == "postgresql":
|
||||
try:
|
||||
with db_conn.cursor() as cursor:
|
||||
cursor.execute("SHOW server_version;")
|
||||
version = cursor.fetchone()[0]
|
||||
if version.startswith("13"):
|
||||
return [
|
||||
Warning(
|
||||
"PostgreSQL 13 is deprecated and will not be supported in a future Paperless-ngx release.",
|
||||
hint="Upgrade to PostgreSQL 14 or newer.",
|
||||
),
|
||||
]
|
||||
except Exception: # pragma: no cover
|
||||
# Don't block checks on version query failure
|
||||
pass
|
||||
|
||||
return result
|
||||
|
@@ -703,9 +703,6 @@ def _parse_db_settings() -> dict:
|
||||
# Leave room for future extensibility
|
||||
if os.getenv("PAPERLESS_DBENGINE") == "mariadb":
|
||||
engine = "django.db.backends.mysql"
|
||||
# Contrary to Postgres, Django does not natively support connection pooling for MariaDB.
|
||||
# However, since MariaDB uses threads instead of forks, establishing connections is significantly faster
|
||||
# compared to PostgreSQL, so the lack of pooling is not an issue
|
||||
options = {
|
||||
"read_default_file": "/etc/mysql/my.cnf",
|
||||
"charset": "utf8mb4",
|
||||
@@ -725,15 +722,6 @@ def _parse_db_settings() -> dict:
|
||||
"sslcert": os.getenv("PAPERLESS_DBSSLCERT", None),
|
||||
"sslkey": os.getenv("PAPERLESS_DBSSLKEY", None),
|
||||
}
|
||||
if int(os.getenv("PAPERLESS_DB_POOLSIZE", 0)) > 0:
|
||||
options.update(
|
||||
{
|
||||
"pool": {
|
||||
"min_size": 1,
|
||||
"max_size": int(os.getenv("PAPERLESS_DB_POOLSIZE")),
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
databases["default"]["ENGINE"] = engine
|
||||
databases["default"]["OPTIONS"].update(options)
|
||||
|
@@ -9,6 +9,7 @@ from documents.tests.utils import DirectoriesMixin
|
||||
from documents.tests.utils import FileSystemAssertsMixin
|
||||
from paperless.checks import audit_log_check
|
||||
from paperless.checks import binaries_check
|
||||
from paperless.checks import check_postgres_version
|
||||
from paperless.checks import debug_mode_check
|
||||
from paperless.checks import paths_check
|
||||
from paperless.checks import settings_values_check
|
||||
@@ -262,3 +263,39 @@ class TestAuditLogChecks(TestCase):
|
||||
("auditlog table was found but audit log is disabled."),
|
||||
msg.msg,
|
||||
)
|
||||
|
||||
|
||||
class TestPostgresVersionCheck(TestCase):
|
||||
@mock.patch("paperless.checks.connections")
|
||||
def test_postgres_13_warns(self, mock_connections):
|
||||
mock_connection = mock.MagicMock()
|
||||
mock_connection.vendor = "postgresql"
|
||||
mock_cursor = mock.MagicMock()
|
||||
mock_cursor.__enter__.return_value.fetchone.return_value = ["13.11"]
|
||||
mock_connection.cursor.return_value = mock_cursor
|
||||
mock_connections.__getitem__.return_value = mock_connection
|
||||
|
||||
warnings = check_postgres_version(None)
|
||||
self.assertEqual(len(warnings), 1)
|
||||
self.assertIn("PostgreSQL 13 is deprecated", warnings[0].msg)
|
||||
|
||||
@mock.patch("paperless.checks.connections")
|
||||
def test_postgres_14_passes(self, mock_connections):
|
||||
mock_connection = mock.MagicMock()
|
||||
mock_connection.vendor = "postgresql"
|
||||
mock_cursor = mock.MagicMock()
|
||||
mock_cursor.__enter__.return_value.fetchone.return_value = ["14.10"]
|
||||
mock_connection.cursor.return_value = mock_cursor
|
||||
mock_connections.__getitem__.return_value = mock_connection
|
||||
|
||||
warnings = check_postgres_version(None)
|
||||
self.assertEqual(warnings, [])
|
||||
|
||||
@mock.patch("paperless.checks.connections")
|
||||
def test_non_postgres_skipped(self, mock_connections):
|
||||
mock_connection = mock.MagicMock()
|
||||
mock_connection.vendor = "sqlite"
|
||||
mock_connections.__getitem__.return_value = mock_connection
|
||||
|
||||
warnings = check_postgres_version(None)
|
||||
self.assertEqual(warnings, [])
|
||||
|
@@ -1,7 +1,6 @@
|
||||
import os
|
||||
from unittest import mock
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import User
|
||||
from django.test import override_settings
|
||||
from rest_framework import status
|
||||
@@ -92,7 +91,6 @@ class TestRemoteUser(DirectoriesMixin, APITestCase):
|
||||
|
||||
@override_settings(
|
||||
REST_FRAMEWORK={
|
||||
**settings.REST_FRAMEWORK,
|
||||
"DEFAULT_AUTHENTICATION_CLASSES": [
|
||||
"rest_framework.authentication.BasicAuthentication",
|
||||
"rest_framework.authentication.TokenAuthentication",
|
||||
|
@@ -16,15 +16,7 @@ class TextDocumentParser(DocumentParser):
|
||||
logging_name = "paperless.parsing.text"
|
||||
|
||||
def get_thumbnail(self, document_path: Path, mime_type, file_name=None) -> Path:
|
||||
# Avoid reading entire file into memory
|
||||
max_chars = 100_000
|
||||
file_size_limit = 50 * 1024 * 1024
|
||||
|
||||
if document_path.stat().st_size > file_size_limit:
|
||||
text = "[File too large to preview]"
|
||||
else:
|
||||
with Path(document_path).open("r", encoding="utf-8", errors="replace") as f:
|
||||
text = f.read(max_chars)
|
||||
text = self.read_file_handle_unicode_errors(document_path)
|
||||
|
||||
img = Image.new("RGB", (500, 700), color="white")
|
||||
draw = ImageDraw.Draw(img)
|
||||
@@ -33,7 +25,7 @@ class TextDocumentParser(DocumentParser):
|
||||
size=20,
|
||||
layout_engine=ImageFont.Layout.BASIC,
|
||||
)
|
||||
draw.multiline_text((5, 5), text, font=font, fill="black", spacing=4)
|
||||
draw.text((5, 5), text, font=font, fill="black")
|
||||
|
||||
out_path = self.tempdir / "thumb.webp"
|
||||
img.save(out_path, format="WEBP")
|
||||
|
@@ -1,4 +1,3 @@
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from paperless_text.parsers import TextDocumentParser
|
||||
@@ -36,26 +35,3 @@ class TestTextParser:
|
||||
|
||||
assert text_parser.get_text() == "Pantothens<EFBFBD>ure\n"
|
||||
assert text_parser.get_archive_path() is None
|
||||
|
||||
def test_thumbnail_large_file(self, text_parser: TextDocumentParser):
|
||||
"""
|
||||
GIVEN:
|
||||
- A very large text file (>50MB)
|
||||
WHEN:
|
||||
- A thumbnail is requested
|
||||
THEN:
|
||||
- A thumbnail is created without reading the entire file into memory
|
||||
"""
|
||||
with tempfile.NamedTemporaryFile(
|
||||
delete=False,
|
||||
mode="w",
|
||||
encoding="utf-8",
|
||||
suffix=".txt",
|
||||
) as tmp:
|
||||
tmp.write("A" * (51 * 1024 * 1024)) # 51 MB of 'A'
|
||||
large_file = Path(tmp.name)
|
||||
|
||||
thumb = text_parser.get_thumbnail(large_file, "text/plain")
|
||||
assert thumb.exists()
|
||||
assert thumb.is_file()
|
||||
large_file.unlink()
|
||||
|
Reference in New Issue
Block a user