Compare commits

..

4 Commits

Author SHA1 Message Date
shamoon
9a29195792 Mas testing 2025-12-31 17:06:24 -08:00
shamoon
bdd00498a1 skip_asn_if_exists 2025-12-31 12:09:56 -08:00
shamoon
92deebddd4 "Handoff" ASN when merging or editing PDFs 2025-12-31 11:50:27 -08:00
shamoon
c7efcee3d6 First, release ASNs before document replacement (and restore if needed) 2025-12-31 10:42:07 -08:00
8 changed files with 380 additions and 101 deletions

View File

@@ -68,7 +68,7 @@
"prettier-plugin-organize-imports": "^4.3.0",
"ts-node": "~10.9.1",
"typescript": "^5.8.3",
"webpack": "^5.104.1"
"webpack": "^5.103.0"
},
"packageManager": "pnpm@10.17.1",
"pnpm": {

112
src-ui/pnpm-lock.yaml generated
View File

@@ -128,7 +128,7 @@ importers:
version: 20.3.15(@angular/compiler@20.3.15)(typescript@5.8.3)
'@codecov/webpack-plugin':
specifier: ^1.9.1
version: 1.9.1(webpack@5.104.1)
version: 1.9.1(webpack@5.103.0)
'@playwright/test':
specifier: ^1.57.0
version: 1.57.0
@@ -175,8 +175,8 @@ importers:
specifier: ^5.8.3
version: 5.8.3
webpack:
specifier: ^5.104.1
version: 5.104.1
specifier: ^5.103.0
version: 5.103.0
packages:
@@ -3423,8 +3423,8 @@ packages:
base64-js@1.5.1:
resolution: {integrity: sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==}
baseline-browser-mapping@2.9.11:
resolution: {integrity: sha512-Sg0xJUNDU1sJNGdfGWhVHX0kkZ+HWcvmVymJbj6NSgZZmW/8S9Y2HQ5euytnIgakgxN6papOAWiwDo1ctFDcoQ==}
baseline-browser-mapping@2.9.4:
resolution: {integrity: sha512-ZCQ9GEWl73BVm8bu5Fts8nt7MHdbt5vY9bP6WGnUh+r3l8M7CgfyTlwsgCbMC66BNxPr6Xoce3j66Ms5YUQTNA==}
hasBin: true
batch@0.6.1:
@@ -3530,8 +3530,8 @@ packages:
resolution: {integrity: sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==}
engines: {node: '>=10'}
caniuse-lite@1.0.30001762:
resolution: {integrity: sha512-PxZwGNvH7Ak8WX5iXzoK1KPZttBXNPuaOvI2ZYU7NrlM+d9Ov+TUvlLOBNGzVXAntMSMMlJPd+jY6ovrVjSmUw==}
caniuse-lite@1.0.30001759:
resolution: {integrity: sha512-Pzfx9fOKoKvevQf8oCXoyNRQ5QyxJj+3O0Rqx2V5oxT61KGx8+n6hV/IUyJeifUci2clnmmKVpvtiqRzgiWjSw==}
canvas@3.0.0:
resolution: {integrity: sha512-NtcIBY88FjymQy+g2g5qnuP5IslrbWCQ3A6rSr1PeuYxVRapRZ3BZCrDyAakvI6CuDYidgZaf55ygulFVwROdg==}
@@ -3912,8 +3912,8 @@ packages:
ee-first@1.1.1:
resolution: {integrity: sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==}
electron-to-chromium@1.5.267:
resolution: {integrity: sha512-0Drusm6MVRXSOJpGbaSVgcQsuB4hEkMpHXaVstcPmhu5LIedxs1xNK/nIxmQIU/RPC0+1/o0AVZfBTkTNJOdUw==}
electron-to-chromium@1.5.266:
resolution: {integrity: sha512-kgWEglXvkEfMH7rxP5OSZZwnaDWT7J9EoZCujhnpLbfi0bbNtRkgdX2E3gt0Uer11c61qCYktB3hwkAS325sJg==}
emittery@0.13.1:
resolution: {integrity: sha512-DeWwawk6r5yR9jFgnDKYt4sLS0LmHJJi3ZOnb5/JdbYwj3nW+FxQnHIjhBKz8YLC7oRNPVM9NQ47I3CVx34eqQ==}
@@ -3946,8 +3946,8 @@ packages:
end-of-stream@1.4.4:
resolution: {integrity: sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==}
enhanced-resolve@5.18.4:
resolution: {integrity: sha512-LgQMM4WXU3QI+SYgEc2liRgznaD5ojbmY3sb8LxyguVkIg5FxdpTkvk72te2R38/TGKxH634oLxXRGY6d7AP+Q==}
enhanced-resolve@5.18.3:
resolution: {integrity: sha512-d4lC8xfavMeBjzGr2vECC3fsGXziXZQyJxD868h2M/mBI3PwAuODxAkLkq5HYuvrPYcUtiLzsTo8U3PgX3Ocww==}
engines: {node: '>=10.13.0'}
entities@4.5.0:
@@ -3987,9 +3987,6 @@ packages:
es-module-lexer@1.7.0:
resolution: {integrity: sha512-jEQoCwk8hyb2AZziIOLhDqpm5+2ww5uIE6lkO/6jcOCusfk6LhMHpXXfBLXTZ7Ydyt0j4VoUQv6uGNYbdW+kBA==}
es-module-lexer@2.0.0:
resolution: {integrity: sha512-5POEcUuZybH7IdmGsD8wlf0AI55wMecM9rVBTI/qEAy2c1kTOm3DjFYjrBdI2K3BaJjJYfYFeRtM0t9ssnRuxw==}
es-object-atoms@1.1.1:
resolution: {integrity: sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==}
engines: {node: '>= 0.4'}
@@ -6293,8 +6290,8 @@ packages:
resolution: {integrity: sha512-7NyxrTE4Anh8km8iEy7o0QYPs+0JKBTj5ZaqHg6B39erLg0qYXN3BijtShwbsNSvQ+LN75+KV+C4QR/f6Gwnpg==}
engines: {node: '>=18'}
terser-webpack-plugin@5.3.16:
resolution: {integrity: sha512-h9oBFCWrq78NyWWVcSwZarJkZ01c2AyGrzs1crmHZO3QUg9D61Wu4NPjBy69n7JqylFF5y+CsUZYmYEIZ3mR+Q==}
terser-webpack-plugin@5.3.15:
resolution: {integrity: sha512-PGkOdpRFK+rb1TzVz+msVhw4YMRT9txLF4kRqvJhGhCM324xuR3REBSHALN+l+sAhKUmz0aotnjp5D+P83mLhQ==}
engines: {node: '>= 10.13.0'}
peerDependencies:
'@swc/core': '*'
@@ -6566,8 +6563,8 @@ packages:
unrs-resolver@1.11.1:
resolution: {integrity: sha512-bSjt9pjaEBnNiGgc9rUiHGKv5l4/TGzDmYw3RhnkJGtLhbnnA/5qJj7x3dNDCRx/PJxu774LlH8lCOlB4hEfKg==}
update-browserslist-db@1.2.3:
resolution: {integrity: sha512-Js0m9cx+qOgDxo0eMiFGEueWztz+d4+M3rGlmKPT+T4IS/jP4ylw3Nwpu6cpTTP8R1MAC1kF4VbdLt3ARf209w==}
update-browserslist-db@1.2.2:
resolution: {integrity: sha512-E85pfNzMQ9jpKkA7+TJAi4TJN+tBCuWh5rUcS/sv6cFi+1q9LYDwDI5dpUL0u/73EElyQ8d3TEaeW4sPedBqYA==}
hasBin: true
peerDependencies:
browserslist: '>= 4.21.0'
@@ -6713,10 +6710,6 @@ packages:
resolution: {integrity: sha512-c5EGNOiyxxV5qmTtAB7rbiXxi1ooX1pQKMLX/MIabJjRA0SJBQOjKF+KSVfHkr9U1cADPon0mRiVe/riyaiDUA==}
engines: {node: '>=10.13.0'}
watchpack@2.5.0:
resolution: {integrity: sha512-e6vZvY6xboSwLz2GD36c16+O/2Z6fKvIf4pOXptw2rY9MVwE/TXc6RGqxD3I3x0a28lwBY7DE+76uTPSsBrrCA==}
engines: {node: '>=10.13.0'}
wbuf@1.7.3:
resolution: {integrity: sha512-O84QOnr0icsbFGLS0O3bI5FswxzRr8/gHwWkDlQFskhSPryQXvrTMxjxGP4+iWYoauLoBvfDpkrOauZ+0iZpDA==}
@@ -6770,8 +6763,8 @@ packages:
webpack-virtual-modules@0.6.2:
resolution: {integrity: sha512-66/V2i5hQanC51vBQKPH4aI8NMAcBW59FVBs+rC7eGHupMyfn34q7rZIE+ETlJ+XTevqfUhVVBgSUNSW2flEUQ==}
webpack@5.104.1:
resolution: {integrity: sha512-Qphch25abbMNtekmEGJmeRUhLDbe+QfiWTiqpKYkpCOWY64v9eyl+KRRLmqOFA2AvKPpc9DC6+u2n76tQLBoaA==}
webpack@5.103.0:
resolution: {integrity: sha512-HU1JOuV1OavsZ+mfigY0j8d1TgQgbZ6M+J75zDkpEAwYeXjWSqrGJtgnPblJjd/mAyTNQ7ygw0MiKOn6etz8yw==}
engines: {node: '>=10.13.0'}
hasBin: true
peerDependencies:
@@ -6801,12 +6794,10 @@ packages:
whatwg-encoding@2.0.0:
resolution: {integrity: sha512-p41ogyeMUrw3jWclHWTQg1k05DSVXPLcVxRTYsXUk+ZooOCZLcoYgPZ/HL/D/N+uQPOtcp1me1WhBEaX02mhWg==}
engines: {node: '>=12'}
deprecated: Use @exodus/bytes instead for a more spec-conformant and faster implementation
whatwg-encoding@3.1.1:
resolution: {integrity: sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==}
engines: {node: '>=18'}
deprecated: Use @exodus/bytes instead for a more spec-conformant and faster implementation
whatwg-mimetype@3.0.0:
resolution: {integrity: sha512-nt+N2dzIutVRxARx1nghPKGv1xHikU7HKdfafKkLNLindmPU/ch3U31NOCGGA/dmPcmb1VlofO0vnKAcsm0o/Q==}
@@ -7190,7 +7181,7 @@ snapshots:
dependencies:
'@ampproject/remapping': 2.3.0
'@angular-devkit/architect': 0.2000.4(chokidar@4.0.3)
'@angular-devkit/build-webpack': 0.2000.4(chokidar@4.0.3)(webpack-dev-server@5.2.1(webpack@5.104.1))(webpack@5.99.8(esbuild@0.25.5))
'@angular-devkit/build-webpack': 0.2000.4(chokidar@4.0.3)(webpack-dev-server@5.2.1(webpack@5.103.0))(webpack@5.99.8(esbuild@0.25.5))
'@angular-devkit/core': 20.0.4(chokidar@4.0.3)
'@angular/build': 20.0.4(@angular/compiler-cli@20.3.15(@angular/compiler@20.3.15)(typescript@5.8.3))(@angular/compiler@20.3.15)(@angular/core@20.3.15(@angular/compiler@20.3.15)(rxjs@7.8.2)(zone.js@0.15.1))(@angular/localize@20.3.15(@angular/compiler-cli@20.3.15(@angular/compiler@20.3.15)(typescript@5.8.3))(@angular/compiler@20.3.15))(@angular/platform-browser@20.3.15(@angular/common@20.3.15(@angular/core@20.3.15(@angular/compiler@20.3.15)(rxjs@7.8.2)(zone.js@0.15.1))(rxjs@7.8.2))(@angular/core@20.3.15(@angular/compiler@20.3.15)(rxjs@7.8.2)(zone.js@0.15.1)))(@types/node@24.10.1)(chokidar@4.0.3)(jiti@1.21.7)(less@4.3.0)(postcss@8.5.3)(terser@5.39.1)(tslib@2.8.1)(typescript@5.8.3)(yaml@2.7.0)
'@angular/compiler-cli': 20.3.15(@angular/compiler@20.3.15)(typescript@5.8.3)
@@ -7276,12 +7267,12 @@ snapshots:
- webpack-cli
- yaml
'@angular-devkit/build-webpack@0.2000.4(chokidar@4.0.3)(webpack-dev-server@5.2.1(webpack@5.104.1))(webpack@5.99.8(esbuild@0.25.5))':
'@angular-devkit/build-webpack@0.2000.4(chokidar@4.0.3)(webpack-dev-server@5.2.1(webpack@5.103.0))(webpack@5.99.8(esbuild@0.25.5))':
dependencies:
'@angular-devkit/architect': 0.2000.4(chokidar@4.0.3)
rxjs: 7.8.2
webpack: 5.99.8(esbuild@0.25.5)
webpack-dev-server: 5.2.1(webpack@5.104.1)
webpack-dev-server: 5.2.1(webpack@5.103.0)
transitivePeerDependencies:
- chokidar
@@ -8441,11 +8432,11 @@ snapshots:
unplugin: 1.16.1
zod: 3.25.76
'@codecov/webpack-plugin@1.9.1(webpack@5.104.1)':
'@codecov/webpack-plugin@1.9.1(webpack@5.103.0)':
dependencies:
'@codecov/bundler-plugin-core': 1.9.1
unplugin: 1.16.1
webpack: 5.104.1
webpack: 5.103.0
'@cspotcode/source-map-support@0.8.1':
dependencies:
@@ -10384,7 +10375,7 @@ snapshots:
autoprefixer@10.4.21(postcss@8.5.3):
dependencies:
browserslist: 4.28.1
caniuse-lite: 1.0.30001762
caniuse-lite: 1.0.30001759
fraction.js: 4.3.7
normalize-range: 0.1.2
picocolors: 1.1.1
@@ -10480,7 +10471,7 @@ snapshots:
base64-js@1.5.1:
optional: true
baseline-browser-mapping@2.9.11: {}
baseline-browser-mapping@2.9.4: {}
batch@0.6.1: {}
@@ -10576,11 +10567,11 @@ snapshots:
browserslist@4.28.1:
dependencies:
baseline-browser-mapping: 2.9.11
caniuse-lite: 1.0.30001762
electron-to-chromium: 1.5.267
baseline-browser-mapping: 2.9.4
caniuse-lite: 1.0.30001759
electron-to-chromium: 1.5.266
node-releases: 2.0.27
update-browserslist-db: 1.2.3(browserslist@4.28.1)
update-browserslist-db: 1.2.2(browserslist@4.28.1)
bs-logger@0.2.6:
dependencies:
@@ -10635,7 +10626,7 @@ snapshots:
camelcase@6.3.0: {}
caniuse-lite@1.0.30001762: {}
caniuse-lite@1.0.30001759: {}
canvas@3.0.0:
dependencies:
@@ -10977,7 +10968,7 @@ snapshots:
ee-first@1.1.1: {}
electron-to-chromium@1.5.267: {}
electron-to-chromium@1.5.266: {}
emittery@0.13.1: {}
@@ -11003,7 +10994,7 @@ snapshots:
once: 1.4.0
optional: true
enhanced-resolve@5.18.4:
enhanced-resolve@5.18.3:
dependencies:
graceful-fs: 4.2.11
tapable: 2.3.0
@@ -11033,8 +11024,6 @@ snapshots:
es-module-lexer@1.7.0: {}
es-module-lexer@2.0.0: {}
es-object-atoms@1.1.1:
dependencies:
es-errors: 1.3.0
@@ -13934,7 +13923,7 @@ snapshots:
minizlib: 3.1.0
yallist: 5.0.0
terser-webpack-plugin@5.3.16(esbuild@0.25.5)(webpack@5.99.8(esbuild@0.25.5)):
terser-webpack-plugin@5.3.15(esbuild@0.25.5)(webpack@5.99.8(esbuild@0.25.5)):
dependencies:
'@jridgewell/trace-mapping': 0.3.31
jest-worker: 27.5.1
@@ -13945,14 +13934,14 @@ snapshots:
optionalDependencies:
esbuild: 0.25.5
terser-webpack-plugin@5.3.16(webpack@5.104.1):
terser-webpack-plugin@5.3.15(webpack@5.103.0):
dependencies:
'@jridgewell/trace-mapping': 0.3.31
jest-worker: 27.5.1
schema-utils: 4.3.3
serialize-javascript: 6.0.2
terser: 5.44.1
webpack: 5.104.1
webpack: 5.103.0
terser@5.39.1:
dependencies:
@@ -14209,7 +14198,7 @@ snapshots:
'@unrs/resolver-binding-win32-ia32-msvc': 1.11.1
'@unrs/resolver-binding-win32-x64-msvc': 1.11.1
update-browserslist-db@1.2.3(browserslist@4.28.1):
update-browserslist-db@1.2.2(browserslist@4.28.1):
dependencies:
browserslist: 4.28.1
escalade: 3.2.0
@@ -14309,11 +14298,6 @@ snapshots:
glob-to-regexp: 0.4.1
graceful-fs: 4.2.11
watchpack@2.5.0:
dependencies:
glob-to-regexp: 0.4.1
graceful-fs: 4.2.11
wbuf@1.7.3:
dependencies:
minimalistic-assert: 1.0.1
@@ -14323,7 +14307,7 @@ snapshots:
webidl-conversions@7.0.0: {}
webpack-dev-middleware@7.4.2(webpack@5.104.1):
webpack-dev-middleware@7.4.2(webpack@5.103.0):
dependencies:
colorette: 2.0.20
memfs: 4.17.2
@@ -14332,7 +14316,7 @@ snapshots:
range-parser: 1.2.1
schema-utils: 4.3.3
optionalDependencies:
webpack: 5.104.1
webpack: 5.103.0
webpack-dev-middleware@7.4.2(webpack@5.99.8(esbuild@0.25.5)):
dependencies:
@@ -14345,7 +14329,7 @@ snapshots:
optionalDependencies:
webpack: 5.99.8(esbuild@0.25.5)
webpack-dev-server@5.2.1(webpack@5.104.1):
webpack-dev-server@5.2.1(webpack@5.103.0):
dependencies:
'@types/bonjour': 3.5.13
'@types/connect-history-api-fallback': 1.5.4
@@ -14373,10 +14357,10 @@ snapshots:
serve-index: 1.9.1
sockjs: 0.3.24
spdy: 4.0.2
webpack-dev-middleware: 7.4.2(webpack@5.104.1)
webpack-dev-middleware: 7.4.2(webpack@5.103.0)
ws: 8.18.3
optionalDependencies:
webpack: 5.104.1
webpack: 5.103.0
transitivePeerDependencies:
- bufferutil
- debug
@@ -14436,7 +14420,7 @@ snapshots:
webpack-virtual-modules@0.6.2: {}
webpack@5.104.1:
webpack@5.103.0:
dependencies:
'@types/eslint-scope': 3.7.7
'@types/estree': 1.0.8
@@ -14448,8 +14432,8 @@ snapshots:
acorn-import-phases: 1.0.4(acorn@8.15.0)
browserslist: 4.28.1
chrome-trace-event: 1.0.4
enhanced-resolve: 5.18.4
es-module-lexer: 2.0.0
enhanced-resolve: 5.18.3
es-module-lexer: 1.7.0
eslint-scope: 5.1.1
events: 3.3.0
glob-to-regexp: 0.4.1
@@ -14460,8 +14444,8 @@ snapshots:
neo-async: 2.6.2
schema-utils: 4.3.3
tapable: 2.3.0
terser-webpack-plugin: 5.3.16(webpack@5.104.1)
watchpack: 2.5.0
terser-webpack-plugin: 5.3.15(webpack@5.103.0)
watchpack: 2.4.4
webpack-sources: 3.3.3
transitivePeerDependencies:
- '@swc/core'
@@ -14479,7 +14463,7 @@ snapshots:
acorn: 8.15.0
browserslist: 4.28.1
chrome-trace-event: 1.0.4
enhanced-resolve: 5.18.4
enhanced-resolve: 5.18.3
es-module-lexer: 1.7.0
eslint-scope: 5.1.1
events: 3.3.0
@@ -14491,8 +14475,8 @@ snapshots:
neo-async: 2.6.2
schema-utils: 4.3.3
tapable: 2.3.0
terser-webpack-plugin: 5.3.16(esbuild@0.25.5)(webpack@5.99.8(esbuild@0.25.5))
watchpack: 2.5.0
terser-webpack-plugin: 5.3.15(esbuild@0.25.5)(webpack@5.99.8(esbuild@0.25.5))
watchpack: 2.4.4
webpack-sources: 3.3.3
transitivePeerDependencies:
- '@swc/core'

View File

@@ -16,6 +16,7 @@ from pikepdf import Pdf
from documents.converters import convert_from_tiff_to_pdf
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
from documents.models import Document
from documents.models import Tag
from documents.plugins.base import ConsumeTaskPlugin
from documents.plugins.base import StopConsumeTaskError
@@ -115,6 +116,24 @@ class BarcodePlugin(ConsumeTaskPlugin):
self._tiff_conversion_done = False
self.barcodes: list[Barcode] = []
def _apply_detected_asn(self, detected_asn: int) -> None:
"""
Apply a detected ASN to metadata if allowed.
"""
if (
self.metadata.skip_asn_if_exists
and Document.global_objects.filter(
archive_serial_number=detected_asn,
).exists()
):
logger.info(
f"Found ASN in barcode {detected_asn} but skipping because it already exists.",
)
return
logger.info(f"Found ASN in barcode: {detected_asn}")
self.metadata.asn = detected_asn
def run(self) -> None:
# Some operations may use PIL, override pixel setting if needed
maybe_override_pixel_limit()
@@ -186,13 +205,8 @@ class BarcodePlugin(ConsumeTaskPlugin):
# Update/overwrite an ASN if possible
# After splitting, as otherwise each split document gets the same ASN
if (
self.settings.barcode_enable_asn
and not self.metadata.skip_asn
and (located_asn := self.asn) is not None
):
logger.info(f"Found ASN in barcode: {located_asn}")
self.metadata.asn = located_asn
if self.settings.barcode_enable_asn and (located_asn := self.asn) is not None:
self._apply_detected_asn(located_asn)
def cleanup(self) -> None:
self.temp_dir.cleanup()

View File

@@ -7,7 +7,6 @@ from pathlib import Path
from typing import TYPE_CHECKING
from typing import Literal
from celery import chain
from celery import chord
from celery import group
from celery import shared_task
@@ -38,6 +37,42 @@ if TYPE_CHECKING:
logger: logging.Logger = logging.getLogger("paperless.bulk_edit")
@shared_task(bind=True)
def restore_archive_serial_numbers_task(
self,
backup: dict[int, int],
*args,
**kwargs,
) -> None:
restore_archive_serial_numbers(backup)
def release_archive_serial_numbers(doc_ids: list[int]) -> dict[int, int]:
"""
Clears ASNs on documents that are about to be replaced so new documents
can be assigned ASNs without uniqueness collisions. Returns a backup map
of doc_id -> previous ASN for potential restoration.
"""
qs = Document.objects.filter(
id__in=doc_ids,
archive_serial_number__isnull=False,
).only("pk", "archive_serial_number")
backup = dict(qs.values_list("pk", "archive_serial_number"))
qs.update(archive_serial_number=None)
logger.info(f"Released archive serial numbers for documents {list(backup.keys())}")
return backup
def restore_archive_serial_numbers(backup: dict[int, int]) -> None:
"""
Restores ASNs using the provided backup map, intended for
rollback when replacement consumption fails.
"""
for doc_id, asn in backup.items():
Document.objects.filter(pk=doc_id).update(archive_serial_number=asn)
logger.info(f"Restored archive serial numbers for documents {list(backup.keys())}")
def set_correspondent(
doc_ids: list[int],
correspondent: Correspondent,
@@ -386,6 +421,7 @@ def merge(
merged_pdf = pikepdf.new()
version: str = merged_pdf.pdf_version
handoff_asn: int | None = None
# use doc_ids to preserve order
for doc_id in doc_ids:
doc = qs.get(id=doc_id)
@@ -401,6 +437,8 @@ def merge(
version = max(version, pdf.pdf_version)
merged_pdf.pages.extend(pdf.pages)
affected_docs.append(doc.id)
if handoff_asn is None and doc.archive_serial_number is not None:
handoff_asn = doc.archive_serial_number
except Exception as e:
logger.exception(
f"Error merging document {doc.id}, it will not be included in the merge: {e}",
@@ -426,6 +464,8 @@ def merge(
DocumentMetadataOverrides.from_document(metadata_document)
)
overrides.title = metadata_document.title + " (merged)"
if metadata_document.archive_serial_number is not None:
handoff_asn = metadata_document.archive_serial_number
else:
overrides = DocumentMetadataOverrides()
else:
@@ -433,8 +473,11 @@ def merge(
if user is not None:
overrides.owner_id = user.id
# Avoid copying or detecting ASN from merged PDFs to prevent collision
overrides.skip_asn = True
if not delete_originals:
overrides.skip_asn_if_exists = True
if delete_originals and handoff_asn is not None:
overrides.asn = handoff_asn
logger.info("Adding merged document to the task queue.")
@@ -447,12 +490,20 @@ def merge(
)
if delete_originals:
backup = release_archive_serial_numbers(affected_docs)
logger.info(
"Queueing removal of original documents after consumption of merged document",
)
chain(consume_task, delete.si(affected_docs)).delay()
else:
consume_task.delay()
try:
consume_task.apply_async(
link=[delete.si(affected_docs)],
link_error=[restore_archive_serial_numbers_task.s(backup)],
)
except Exception:
restore_archive_serial_numbers(backup)
raise
else:
consume_task.delay()
return "OK"
@@ -494,6 +545,8 @@ def split(
overrides.title = f"{doc.title} (split {idx + 1})"
if user is not None:
overrides.owner_id = user.id
if not delete_originals:
overrides.skip_asn_if_exists = True
logger.info(
f"Adding split document with pages {split_doc} to the task queue.",
)
@@ -508,10 +561,20 @@ def split(
)
if delete_originals:
backup = release_archive_serial_numbers([doc.id])
logger.info(
"Queueing removal of original document after consumption of the split documents",
)
chord(header=consume_tasks, body=delete.si([doc.id])).delay()
try:
chord(
header=consume_tasks,
body=delete.si([doc.id]),
).apply_async(
link_error=[restore_archive_serial_numbers_task.s(backup)],
)
except Exception:
restore_archive_serial_numbers(backup)
raise
else:
group(consume_tasks).delay()
@@ -614,7 +677,10 @@ def edit_pdf(
)
if user is not None:
overrides.owner_id = user.id
if not delete_original:
overrides.skip_asn_if_exists = True
if delete_original and len(pdf_docs) == 1:
overrides.asn = doc.archive_serial_number
for idx, pdf in enumerate(pdf_docs, start=1):
filepath: Path = (
Path(tempfile.mkdtemp(dir=settings.SCRATCH_DIR))
@@ -633,7 +699,17 @@ def edit_pdf(
)
if delete_original:
chord(header=consume_tasks, body=delete.si([doc.id])).delay()
backup = release_archive_serial_numbers([doc.id])
try:
chord(
header=consume_tasks,
body=delete.si([doc.id]),
).apply_async(
link_error=[restore_archive_serial_numbers_task.s(backup)],
)
except Exception:
restore_archive_serial_numbers(backup)
raise
else:
group(consume_tasks).delay()

View File

@@ -696,7 +696,7 @@ class ConsumerPlugin(
pk=self.metadata.storage_path_id,
)
if self.metadata.asn is not None and not self.metadata.skip_asn:
if self.metadata.asn is not None:
document.archive_serial_number = self.metadata.asn
if self.metadata.owner_id:
@@ -812,8 +812,8 @@ class ConsumerPreflightPlugin(
"""
Check that if override_asn is given, it is unique and within a valid range
"""
if self.metadata.skip_asn or self.metadata.asn is None:
# if skip is set or ASN is None
if self.metadata.asn is None:
# if ASN is None
return
# Validate the range is above zero and less than uint32_t max
# otherwise, Whoosh can't handle it in the index

View File

@@ -30,7 +30,7 @@ class DocumentMetadataOverrides:
change_users: list[int] | None = None
change_groups: list[int] | None = None
custom_fields: dict | None = None
skip_asn: bool = False
skip_asn_if_exists: bool = False
def update(self, other: "DocumentMetadataOverrides") -> "DocumentMetadataOverrides":
"""
@@ -50,8 +50,8 @@ class DocumentMetadataOverrides:
self.storage_path_id = other.storage_path_id
if other.owner_id is not None:
self.owner_id = other.owner_id
if other.skip_asn:
self.skip_asn = True
if other.skip_asn_if_exists:
self.skip_asn_if_exists = True
# merge
if self.tag_ids is None:

View File

@@ -602,23 +602,21 @@ class TestPDFActions(DirectoriesMixin, TestCase):
expected_filename,
)
self.assertEqual(consume_file_args[1].title, None)
self.assertTrue(consume_file_args[1].skip_asn)
# No metadata_document_id, delete_originals False, so ASN should be None
self.assertIsNone(consume_file_args[1].asn)
# With metadata_document_id overrides
result = bulk_edit.merge(doc_ids, metadata_document_id=metadata_document_id)
consume_file_args, _ = mock_consume_file.call_args
self.assertEqual(consume_file_args[1].title, "B (merged)")
self.assertEqual(consume_file_args[1].created, self.doc2.created)
self.assertTrue(consume_file_args[1].skip_asn)
self.assertEqual(result, "OK")
@mock.patch("documents.bulk_edit.delete.si")
@mock.patch("documents.tasks.consume_file.s")
@mock.patch("documents.bulk_edit.chain")
def test_merge_and_delete_originals(
self,
mock_chain,
mock_consume_file,
mock_delete_documents,
):
@@ -632,6 +630,12 @@ class TestPDFActions(DirectoriesMixin, TestCase):
- Document deletion task should be called
"""
doc_ids = [self.doc1.id, self.doc2.id, self.doc3.id]
self.doc1.archive_serial_number = 101
self.doc2.archive_serial_number = 102
self.doc3.archive_serial_number = 103
self.doc1.save()
self.doc2.save()
self.doc3.save()
result = bulk_edit.merge(doc_ids, delete_originals=True)
self.assertEqual(result, "OK")
@@ -642,7 +646,8 @@ class TestPDFActions(DirectoriesMixin, TestCase):
mock_consume_file.assert_called()
mock_delete_documents.assert_called()
mock_chain.assert_called_once()
consume_sig = mock_consume_file.return_value
consume_sig.apply_async.assert_called_once()
consume_file_args, _ = mock_consume_file.call_args
self.assertEqual(
@@ -650,7 +655,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
expected_filename,
)
self.assertEqual(consume_file_args[1].title, None)
self.assertTrue(consume_file_args[1].skip_asn)
self.assertEqual(consume_file_args[1].asn, 101)
delete_documents_args, _ = mock_delete_documents.call_args
self.assertEqual(
@@ -658,6 +663,92 @@ class TestPDFActions(DirectoriesMixin, TestCase):
doc_ids,
)
self.doc1.refresh_from_db()
self.doc2.refresh_from_db()
self.doc3.refresh_from_db()
self.assertIsNone(self.doc1.archive_serial_number)
self.assertIsNone(self.doc2.archive_serial_number)
self.assertIsNone(self.doc3.archive_serial_number)
@mock.patch("documents.bulk_edit.delete.si")
@mock.patch("documents.tasks.consume_file.s")
def test_merge_and_delete_originals_restore_on_failure(
self,
mock_consume_file,
mock_delete_documents,
):
"""
GIVEN:
- Existing documents
WHEN:
- Merge action with deleting documents is called with 1 document
- Error occurs when queuing consume file task
THEN:
- Archive serial numbers are restored
"""
doc_ids = [self.doc1.id]
self.doc1.archive_serial_number = 111
self.doc1.save()
sig = mock.Mock()
sig.apply_async.side_effect = Exception("boom")
mock_consume_file.return_value = sig
with self.assertRaises(Exception):
bulk_edit.merge(doc_ids, delete_originals=True)
self.doc1.refresh_from_db()
self.assertEqual(self.doc1.archive_serial_number, 111)
@mock.patch("documents.bulk_edit.delete.si")
@mock.patch("documents.tasks.consume_file.s")
def test_merge_and_delete_originals_metadata_handoff(
self,
mock_consume_file,
mock_delete_documents,
):
"""
GIVEN:
- Existing documents with ASNs
WHEN:
- Merge with delete_originals=True and metadata_document_id set
THEN:
- Handoff ASN uses metadata document ASN
"""
doc_ids = [self.doc1.id, self.doc2.id]
self.doc1.archive_serial_number = 101
self.doc2.archive_serial_number = 202
self.doc1.save()
self.doc2.save()
result = bulk_edit.merge(
doc_ids,
metadata_document_id=self.doc2.id,
delete_originals=True,
)
self.assertEqual(result, "OK")
consume_file_args, _ = mock_consume_file.call_args
self.assertEqual(consume_file_args[1].asn, 202)
def test_restore_archive_serial_numbers_task(self):
"""
GIVEN:
- Existing document with no archive serial number
WHEN:
- Restore archive serial number task is called with backup data
THEN:
- Document archive serial number is restored
"""
self.doc1.archive_serial_number = 444
self.doc1.save()
Document.objects.filter(pk=self.doc1.id).update(archive_serial_number=None)
backup = {self.doc1.id: 444}
bulk_edit.restore_archive_serial_numbers_task(backup)
self.doc1.refresh_from_db()
self.assertEqual(self.doc1.archive_serial_number, 444)
@mock.patch("documents.tasks.consume_file.s")
def test_merge_with_archive_fallback(self, mock_consume_file):
"""
@@ -726,6 +817,7 @@ class TestPDFActions(DirectoriesMixin, TestCase):
self.assertEqual(mock_consume_file.call_count, 2)
consume_file_args, _ = mock_consume_file.call_args
self.assertEqual(consume_file_args[1].title, "B (split 2)")
self.assertIsNone(consume_file_args[1].asn)
self.assertEqual(result, "OK")
@@ -750,6 +842,8 @@ class TestPDFActions(DirectoriesMixin, TestCase):
"""
doc_ids = [self.doc2.id]
pages = [[1, 2], [3]]
self.doc2.archive_serial_number = 200
self.doc2.save()
result = bulk_edit.split(doc_ids, pages, delete_originals=True)
self.assertEqual(result, "OK")
@@ -767,6 +861,42 @@ class TestPDFActions(DirectoriesMixin, TestCase):
doc_ids,
)
self.doc2.refresh_from_db()
self.assertIsNone(self.doc2.archive_serial_number)
@mock.patch("documents.bulk_edit.delete.si")
@mock.patch("documents.tasks.consume_file.s")
@mock.patch("documents.bulk_edit.chord")
def test_split_restore_on_failure(
self,
mock_chord,
mock_consume_file,
mock_delete_documents,
):
"""
GIVEN:
- Existing documents
WHEN:
- Split action with deleting documents is called with 1 document and 2 page groups
- Error occurs when queuing chord task
THEN:
- Archive serial numbers are restored
"""
doc_ids = [self.doc2.id]
pages = [[1, 2]]
self.doc2.archive_serial_number = 222
self.doc2.save()
sig = mock.Mock()
sig.apply_async.side_effect = Exception("boom")
mock_chord.return_value = sig
result = bulk_edit.split(doc_ids, pages, delete_originals=True)
self.assertEqual(result, "OK")
self.doc2.refresh_from_db()
self.assertEqual(self.doc2.archive_serial_number, 222)
@mock.patch("documents.tasks.consume_file.delay")
@mock.patch("pikepdf.Pdf.save")
def test_split_with_errors(self, mock_save_pdf, mock_consume_file):
@@ -967,10 +1097,49 @@ class TestPDFActions(DirectoriesMixin, TestCase):
mock_chord.return_value.delay.return_value = None
doc_ids = [self.doc2.id]
operations = [{"page": 1}, {"page": 2}]
self.doc2.archive_serial_number = 250
self.doc2.save()
result = bulk_edit.edit_pdf(doc_ids, operations, delete_original=True)
self.assertEqual(result, "OK")
mock_chord.assert_called_once()
consume_file_args, _ = mock_consume_file.call_args
self.assertEqual(consume_file_args[1].asn, 250)
self.doc2.refresh_from_db()
self.assertIsNone(self.doc2.archive_serial_number)
@mock.patch("documents.bulk_edit.delete.si")
@mock.patch("documents.tasks.consume_file.s")
@mock.patch("documents.bulk_edit.chord")
def test_edit_pdf_restore_on_failure(
self,
mock_chord,
mock_consume_file,
mock_delete_documents,
):
"""
GIVEN:
- Existing document
WHEN:
- edit_pdf is called with delete_original=True
- Error occurs when queuing chord task
THEN:
- Archive serial numbers are restored
"""
doc_ids = [self.doc2.id]
operations = [{"page": 1}]
self.doc2.archive_serial_number = 333
self.doc2.save()
sig = mock.Mock()
sig.apply_async.side_effect = Exception("boom")
mock_chord.return_value = sig
with self.assertRaises(Exception):
bulk_edit.edit_pdf(doc_ids, operations, delete_original=True)
self.doc2.refresh_from_db()
self.assertEqual(self.doc2.archive_serial_number, 333)
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.delay")
def test_edit_pdf_with_update_document(self, mock_update_document):

View File

@@ -14,6 +14,7 @@ from django.test import override_settings
from django.utils import timezone
from guardian.core import ObjectPermissionChecker
from documents.barcodes import BarcodePlugin
from documents.consumer import ConsumerError
from documents.data_models import DocumentMetadataOverrides
from documents.data_models import DocumentSource
@@ -412,14 +413,6 @@ class TestConsumer(
self.assertEqual(document.archive_serial_number, 123)
self._assert_first_last_send_progress()
def testMetadataOverridesSkipAsnPropagation(self):
overrides = DocumentMetadataOverrides()
incoming = DocumentMetadataOverrides(skip_asn=True)
overrides.update(incoming)
self.assertTrue(overrides.skip_asn)
def testOverrideTitlePlaceholders(self):
c = Correspondent.objects.create(name="Correspondent Name")
dt = DocumentType.objects.create(name="DocType Name")
@@ -1240,3 +1233,46 @@ class PostConsumeTestCase(DirectoriesMixin, GetConsumerMixin, TestCase):
r"sample\.pdf: Error while executing post-consume script: Command '\[.*\]' returned non-zero exit status \d+\.",
):
consumer.run_post_consume_script(doc)
class TestMetadataOverrides(TestCase):
def test_update_skip_asn_if_exists(self):
base = DocumentMetadataOverrides()
incoming = DocumentMetadataOverrides(skip_asn_if_exists=True)
base.update(incoming)
self.assertTrue(base.skip_asn_if_exists)
class TestBarcodeApplyDetectedASN(TestCase):
"""
GIVEN:
- Existing Documents with ASN 123
WHEN:
- A BarcodePlugin which detected an ASN
THEN:
- If skip_asn_if_exists is set, and ASN exists, do not set ASN
- If skip_asn_if_exists is set, and ASN does not exist, set ASN
"""
def test_apply_detected_asn_skips_existing_when_flag_set(self):
doc = Document.objects.create(
checksum="X1",
title="D1",
archive_serial_number=123,
)
metadata = DocumentMetadataOverrides(skip_asn_if_exists=True)
plugin = BarcodePlugin(
input_doc=mock.Mock(),
metadata=metadata,
status_mgr=mock.Mock(),
base_tmp_dir=tempfile.gettempdir(),
task_id="test-task",
)
plugin._apply_detected_asn(123)
self.assertIsNone(plugin.metadata.asn)
doc.hard_delete()
plugin._apply_detected_asn(123)
self.assertEqual(plugin.metadata.asn, 123)