Compare commits

...

9 Commits

Author SHA1 Message Date
shamoon
7ea4893e42 Try joblib 2025-08-31 14:18:31 -07:00
shamoon
78255d0a99 Add some memory logging to suggestions (revert) 2025-08-31 14:07:48 -07:00
shamoon
fc4cb08bda Cache classifier loading with lru_cache 2025-08-30 20:06:18 -07:00
shamoon
875dc6602b Dont fetch doc content on suggestions 2025-08-30 20:06:18 -07:00
shamoon
8084ece274 Just build docker image in ci (revert me too) 2025-08-30 20:06:17 -07:00
shamoon
70b24c056b Add process to middleware (revert me too) 2025-08-30 20:06:17 -07:00
shamoon
26d2d63c26 MemLogMiddleware for testing (revert me) 2025-08-30 19:03:27 -07:00
GitHub Actions
107374af71 Auto translate strings 2025-08-30 16:11:17 +00:00
shamoon
a77141e133 Fix: ensure title gets marked as dirty 2025-08-30 09:09:43 -07:00
9 changed files with 719 additions and 567 deletions

1008
.github/workflows/ci.yml vendored

File diff suppressed because it is too large Load Diff

View File

@@ -53,6 +53,7 @@ dependencies = [
"ocrmypdf~=16.10.0", "ocrmypdf~=16.10.0",
"pathvalidate~=3.3.1", "pathvalidate~=3.3.1",
"pdf2image~=1.17.0", "pdf2image~=1.17.0",
"psutil>=7",
"psycopg-pool", "psycopg-pool",
"python-dateutil~=2.9.0", "python-dateutil~=2.9.0",
"python-dotenv~=1.1.0", "python-dotenv~=1.1.0",

View File

@@ -2544,11 +2544,11 @@
</context-group> </context-group>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1017</context> <context context-type="linenumber">1018</context>
</context-group> </context-group>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1382</context> <context context-type="linenumber">1383</context>
</context-group> </context-group>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context> <context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
@@ -3156,7 +3156,7 @@
</context-group> </context-group>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">970</context> <context context-type="linenumber">971</context>
</context-group> </context-group>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context> <context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
@@ -6579,7 +6579,7 @@
</context-group> </context-group>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1381</context> <context context-type="linenumber">1382</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="6490688569532630280" datatype="html"> <trans-unit id="6490688569532630280" datatype="html">
@@ -6904,21 +6904,21 @@
<source>Next document</source> <source>Next document</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">573</context> <context context-type="linenumber">574</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="651985345816518480" datatype="html"> <trans-unit id="651985345816518480" datatype="html">
<source>Previous document</source> <source>Previous document</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">583</context> <context context-type="linenumber">584</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="2885986061416655600" datatype="html"> <trans-unit id="2885986061416655600" datatype="html">
<source>Close document</source> <source>Close document</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">591</context> <context context-type="linenumber">592</context>
</context-group> </context-group>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/services/open-documents.service.ts</context> <context context-type="sourcefile">src/app/services/open-documents.service.ts</context>
@@ -6929,67 +6929,67 @@
<source>Save document</source> <source>Save document</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">598</context> <context context-type="linenumber">599</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="1784543155727940353" datatype="html"> <trans-unit id="1784543155727940353" datatype="html">
<source>Save and close / next</source> <source>Save and close / next</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">607</context> <context context-type="linenumber">608</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="5758784066858623886" datatype="html"> <trans-unit id="5758784066858623886" datatype="html">
<source>Error retrieving metadata</source> <source>Error retrieving metadata</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">659</context> <context context-type="linenumber">660</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="3456881259945295697" datatype="html"> <trans-unit id="3456881259945295697" datatype="html">
<source>Error retrieving suggestions.</source> <source>Error retrieving suggestions.</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">688</context> <context context-type="linenumber">689</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="2194092841814123758" datatype="html"> <trans-unit id="2194092841814123758" datatype="html">
<source>Document &quot;<x id="PH" equiv-text="newValues.title"/>&quot; saved successfully.</source> <source>Document &quot;<x id="PH" equiv-text="newValues.title"/>&quot; saved successfully.</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">860</context> <context context-type="linenumber">861</context>
</context-group> </context-group>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">884</context> <context context-type="linenumber">885</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="6626387786259219838" datatype="html"> <trans-unit id="6626387786259219838" datatype="html">
<source>Error saving document &quot;<x id="PH" equiv-text="this.document.title"/>&quot;</source> <source>Error saving document &quot;<x id="PH" equiv-text="this.document.title"/>&quot;</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">890</context> <context context-type="linenumber">891</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="448882439049417053" datatype="html"> <trans-unit id="448882439049417053" datatype="html">
<source>Error saving document</source> <source>Error saving document</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">939</context> <context context-type="linenumber">940</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="8410796510716511826" datatype="html"> <trans-unit id="8410796510716511826" datatype="html">
<source>Do you really want to move the document &quot;<x id="PH" equiv-text="this.document.title"/>&quot; to the trash?</source> <source>Do you really want to move the document &quot;<x id="PH" equiv-text="this.document.title"/>&quot; to the trash?</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">971</context> <context context-type="linenumber">972</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="282586936710748252" datatype="html"> <trans-unit id="282586936710748252" datatype="html">
<source>Documents can be restored prior to permanent deletion.</source> <source>Documents can be restored prior to permanent deletion.</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">972</context> <context context-type="linenumber">973</context>
</context-group> </context-group>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context> <context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
@@ -7000,7 +7000,7 @@
<source>Move to trash</source> <source>Move to trash</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">974</context> <context context-type="linenumber">975</context>
</context-group> </context-group>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context> <context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
@@ -7011,14 +7011,14 @@
<source>Error deleting document</source> <source>Error deleting document</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">993</context> <context context-type="linenumber">994</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="619486176823357521" datatype="html"> <trans-unit id="619486176823357521" datatype="html">
<source>Reprocess confirm</source> <source>Reprocess confirm</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1013</context> <context context-type="linenumber">1014</context>
</context-group> </context-group>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context> <context context-type="sourcefile">src/app/components/document-list/bulk-editor/bulk-editor.component.ts</context>
@@ -7029,67 +7029,67 @@
<source>This operation will permanently recreate the archive file for this document.</source> <source>This operation will permanently recreate the archive file for this document.</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1014</context> <context context-type="linenumber">1015</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="302054111564709516" datatype="html"> <trans-unit id="302054111564709516" datatype="html">
<source>The archive file will be re-generated with the current settings.</source> <source>The archive file will be re-generated with the current settings.</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1015</context> <context context-type="linenumber">1016</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="8251197608401006898" datatype="html"> <trans-unit id="8251197608401006898" datatype="html">
<source>Reprocess operation for &quot;<x id="PH" equiv-text="this.document.title"/>&quot; will begin in the background. Close and re-open or reload this document after the operation has completed to see new content.</source> <source>Reprocess operation for &quot;<x id="PH" equiv-text="this.document.title"/>&quot; will begin in the background. Close and re-open or reload this document after the operation has completed to see new content.</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1025</context> <context context-type="linenumber">1026</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="4409560272830824468" datatype="html"> <trans-unit id="4409560272830824468" datatype="html">
<source>Error executing operation</source> <source>Error executing operation</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1036</context> <context context-type="linenumber">1037</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="6030453331794586802" datatype="html"> <trans-unit id="6030453331794586802" datatype="html">
<source>Error downloading document</source> <source>Error downloading document</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1085</context> <context context-type="linenumber">1086</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="4458954481601077369" datatype="html"> <trans-unit id="4458954481601077369" datatype="html">
<source>Page Fit</source> <source>Page Fit</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1162</context> <context context-type="linenumber">1163</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="4663705961777238777" datatype="html"> <trans-unit id="4663705961777238777" datatype="html">
<source>PDF edit operation for &quot;<x id="PH" equiv-text="this.document.title"/>&quot; will begin in the background.</source> <source>PDF edit operation for &quot;<x id="PH" equiv-text="this.document.title"/>&quot; will begin in the background.</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1400</context> <context context-type="linenumber">1401</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="9043972994040261999" datatype="html"> <trans-unit id="9043972994040261999" datatype="html">
<source>Error executing PDF edit operation</source> <source>Error executing PDF edit operation</source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1412</context> <context context-type="linenumber">1413</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="6085793215710522488" datatype="html"> <trans-unit id="6085793215710522488" datatype="html">
<source>An error occurred loading tiff: <x id="PH" equiv-text="err.toString()"/></source> <source>An error occurred loading tiff: <x id="PH" equiv-text="err.toString()"/></source>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1479</context> <context context-type="linenumber">1480</context>
</context-group> </context-group>
<context-group purpose="location"> <context-group purpose="location">
<context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context> <context context-type="sourcefile">src/app/components/document-detail/document-detail.component.ts</context>
<context context-type="linenumber">1483</context> <context context-type="linenumber">1484</context>
</context-group> </context-group>
</trans-unit> </trans-unit>
<trans-unit id="4958946940233632319" datatype="html"> <trans-unit id="4958946940233632319" datatype="html">

View File

@@ -472,6 +472,7 @@ export class DocumentDetailComponent
if (titleValue !== this.titleInput.value) return if (titleValue !== this.titleInput.value) return
this.title = titleValue this.title = titleValue
this.documentForm.patchValue({ title: titleValue }) this.documentForm.patchValue({ title: titleValue })
this.documentForm.get('title').markAsDirty()
}) })
this.setupDirtyTracking(useDoc, doc) this.setupDirtyTracking(useDoc, doc)
}, },

View File

@@ -4,6 +4,7 @@ import logging
import pickle import pickle
import re import re
import warnings import warnings
from functools import lru_cache
from hashlib import sha256 from hashlib import sha256
from pathlib import Path from pathlib import Path
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
@@ -50,6 +51,7 @@ class ClassifierModelCorruptError(Exception):
pass pass
@lru_cache(maxsize=1)
def load_classifier(*, raise_exception: bool = False) -> DocumentClassifier | None: def load_classifier(*, raise_exception: bool = False) -> DocumentClassifier | None:
if not settings.MODEL_FILE.is_file(): if not settings.MODEL_FILE.is_file():
logger.debug( logger.debug(
@@ -61,6 +63,11 @@ def load_classifier(*, raise_exception: bool = False) -> DocumentClassifier | No
classifier = DocumentClassifier() classifier = DocumentClassifier()
try: try:
classifier.load() classifier.load()
logger.debug("classifier_id=%s", id(classifier))
logger.debug(
"classifier_data_vectorizer_hash=%s",
classifier.data_vectorizer_hash,
)
except IncompatibleClassifierVersionError as e: except IncompatibleClassifierVersionError as e:
logger.info(f"Classifier version incompatible: {e.message}, will re-train") logger.info(f"Classifier version incompatible: {e.message}, will re-train")
@@ -96,7 +103,8 @@ class DocumentClassifier:
# v7 - Updated scikit-learn package version # v7 - Updated scikit-learn package version
# v8 - Added storage path classifier # v8 - Added storage path classifier
# v9 - Changed from hashing to time/ids for re-train check # v9 - Changed from hashing to time/ids for re-train check
FORMAT_VERSION = 9 # v10 - Switch persistence to joblib with memory-mapping to reduce load-time memory spikes
FORMAT_VERSION = 10
def __init__(self) -> None: def __init__(self) -> None:
# last time a document changed and therefore training might be required # last time a document changed and therefore training might be required
@@ -128,32 +136,51 @@ class DocumentClassifier:
).hexdigest() ).hexdigest()
def load(self) -> None: def load(self) -> None:
import joblib
from sklearn.exceptions import InconsistentVersionWarning from sklearn.exceptions import InconsistentVersionWarning
# Catch warnings for processing # Catch warnings for processing
with warnings.catch_warnings(record=True) as w: with warnings.catch_warnings(record=True) as w:
with Path(settings.MODEL_FILE).open("rb") as f: try:
schema_version = pickle.load(f) state = joblib.load(settings.MODEL_FILE, mmap_mode="r")
except Exception as err:
# As a fallback, try to detect old pickle-based and mark incompatible
try:
with Path(settings.MODEL_FILE).open("rb") as f:
_ = pickle.load(f)
raise IncompatibleClassifierVersionError(
"Cannot load classifier, incompatible versions.",
) from err
except IncompatibleClassifierVersionError:
raise
except Exception:
# Not even a readable pickle header
raise ClassifierModelCorruptError from err
if schema_version != self.FORMAT_VERSION: try:
if (
not isinstance(state, dict)
or state.get("format_version") != self.FORMAT_VERSION
):
raise IncompatibleClassifierVersionError( raise IncompatibleClassifierVersionError(
"Cannot load classifier, incompatible versions.", "Cannot load classifier, incompatible versions.",
) )
else:
try:
self.last_doc_change_time = pickle.load(f)
self.last_auto_type_hash = pickle.load(f)
self.data_vectorizer = pickle.load(f) self.last_doc_change_time = state.get("last_doc_change_time")
self._update_data_vectorizer_hash() self.last_auto_type_hash = state.get("last_auto_type_hash")
self.tags_binarizer = pickle.load(f)
self.tags_classifier = pickle.load(f) self.data_vectorizer = state.get("data_vectorizer")
self.correspondent_classifier = pickle.load(f) self._update_data_vectorizer_hash()
self.document_type_classifier = pickle.load(f) self.tags_binarizer = state.get("tags_binarizer")
self.storage_path_classifier = pickle.load(f)
except Exception as err: self.tags_classifier = state.get("tags_classifier")
raise ClassifierModelCorruptError from err self.correspondent_classifier = state.get("correspondent_classifier")
self.document_type_classifier = state.get("document_type_classifier")
self.storage_path_classifier = state.get("storage_path_classifier")
except IncompatibleClassifierVersionError:
raise
except Exception as err:
raise ClassifierModelCorruptError from err
# Check for the warning about unpickling from differing versions # Check for the warning about unpickling from differing versions
# and consider it incompatible # and consider it incompatible
@@ -171,23 +198,24 @@ class DocumentClassifier:
raise IncompatibleClassifierVersionError("sklearn version update") raise IncompatibleClassifierVersionError("sklearn version update")
def save(self) -> None: def save(self) -> None:
import joblib
target_file: Path = settings.MODEL_FILE target_file: Path = settings.MODEL_FILE
target_file_temp: Path = target_file.with_suffix(".pickle.part") target_file_temp: Path = target_file.with_suffix(".joblib.part")
with target_file_temp.open("wb") as f: state = {
pickle.dump(self.FORMAT_VERSION, f) "format_version": self.FORMAT_VERSION,
"last_doc_change_time": self.last_doc_change_time,
"last_auto_type_hash": self.last_auto_type_hash,
"data_vectorizer": self.data_vectorizer,
"tags_binarizer": self.tags_binarizer,
"tags_classifier": self.tags_classifier,
"correspondent_classifier": self.correspondent_classifier,
"document_type_classifier": self.document_type_classifier,
"storage_path_classifier": self.storage_path_classifier,
}
pickle.dump(self.last_doc_change_time, f) joblib.dump(state, target_file_temp, compress=3)
pickle.dump(self.last_auto_type_hash, f)
pickle.dump(self.data_vectorizer, f)
pickle.dump(self.tags_binarizer, f)
pickle.dump(self.tags_classifier, f)
pickle.dump(self.correspondent_classifier, f)
pickle.dump(self.document_type_classifier, f)
pickle.dump(self.storage_path_classifier, f)
target_file_temp.rename(target_file) target_file_temp.rename(target_file)

View File

@@ -3,7 +3,9 @@ import logging
import os import os
import platform import platform
import re import re
import resource
import tempfile import tempfile
import time
import zipfile import zipfile
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
@@ -190,6 +192,33 @@ if settings.AUDIT_LOG_ENABLED:
logger = logging.getLogger("paperless.api") logger = logging.getLogger("paperless.api")
try:
import psutil
_PS = psutil.Process(os.getpid())
except Exception:
_PS = None
_diag_log = logging.getLogger("paperless")
def _mem_mb():
rss = _PS.memory_info().rss if _PS else 0
peak_kb = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
return rss / (1024 * 1024), peak_kb / 1024.0
def _mark(phase, doc_id, t0):
rss, peak = _mem_mb()
_diag_log.debug(
"sugg doc=%s phase=%s rss=%.1fMB peak=%.1fMB t=%.1fms",
doc_id,
phase,
rss,
peak,
(time.perf_counter() - t0) * 1000,
)
class IndexView(TemplateView): class IndexView(TemplateView):
template_name = "index.html" template_name = "index.html"
@@ -758,7 +787,16 @@ class DocumentViewSet(
), ),
) )
def suggestions(self, request, pk=None): def suggestions(self, request, pk=None):
doc = get_object_or_404(Document.objects.select_related("owner"), pk=pk) t0 = time.perf_counter()
# Don't fetch content here
doc = get_object_or_404(
Document.objects.select_related("owner").only(
"id",
"owner_id",
),
pk=pk,
)
_mark("start", doc.pk, t0)
if request.user is not None and not has_perms_owner_aware( if request.user is not None and not has_perms_owner_aware(
request.user, request.user,
"view_document", "view_document",
@@ -769,18 +807,23 @@ class DocumentViewSet(
document_suggestions = get_suggestion_cache(doc.pk) document_suggestions = get_suggestion_cache(doc.pk)
if document_suggestions is not None: if document_suggestions is not None:
_mark("cache_hit_return", doc.pk, t0)
refresh_suggestions_cache(doc.pk) refresh_suggestions_cache(doc.pk)
return Response(document_suggestions.suggestions) return Response(document_suggestions.suggestions)
classifier = load_classifier() classifier = load_classifier()
_mark("loaded_classifier", doc.pk, t0)
dates = [] dates = []
if settings.NUMBER_OF_SUGGESTED_DATES > 0: if settings.NUMBER_OF_SUGGESTED_DATES > 0:
gen = parse_date_generator(doc.filename, doc.content) gen = parse_date_generator(doc.filename, doc.content)
_mark("before_dates", doc.pk, t0)
dates = sorted( dates = sorted(
{i for i in itertools.islice(gen, settings.NUMBER_OF_SUGGESTED_DATES)}, {i for i in itertools.islice(gen, settings.NUMBER_OF_SUGGESTED_DATES)},
) )
_mark("after_dates", doc.pk, t0)
_mark("before_match", doc.pk, t0)
resp_data = { resp_data = {
"correspondents": [ "correspondents": [
c.id for c in match_correspondents(doc, classifier, request.user) c.id for c in match_correspondents(doc, classifier, request.user)
@@ -794,9 +837,11 @@ class DocumentViewSet(
], ],
"dates": [date.strftime("%Y-%m-%d") for date in dates if date is not None], "dates": [date.strftime("%Y-%m-%d") for date in dates if date is not None],
} }
_mark("assembled_resp", doc.pk, t0)
# Cache the suggestions and the classifier hash for later # Cache the suggestions and the classifier hash for later
set_suggestions_cache(doc.pk, resp_data, classifier) set_suggestions_cache(doc.pk, resp_data, classifier)
_mark("cached", doc.pk, t0)
return Response(resp_data) return Response(resp_data)

View File

@@ -1,7 +1,14 @@
import logging
import os
import resource
import time
from django.conf import settings from django.conf import settings
from paperless import version from paperless import version
logger = logging.getLogger("middleware")
class ApiVersionMiddleware: class ApiVersionMiddleware:
def __init__(self, get_response): def __init__(self, get_response):
@@ -15,3 +22,56 @@ class ApiVersionMiddleware:
response["X-Version"] = version.__full_version_str__ response["X-Version"] = version.__full_version_str__
return response return response
try:
import psutil
_PSUTIL = True
except Exception:
_PSUTIL = False
class MemLogMiddleware:
def __init__(self, get_response):
self.get_response = get_response
def __call__(self, request):
# capture baseline
ru_before = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
if _PSUTIL:
p = psutil.Process()
rss_before = p.memory_info().rss
else:
rss_before = 0
t0 = time.perf_counter()
try:
return self.get_response(request)
finally:
dur_ms = (time.perf_counter() - t0) * 1000.0
ru_after = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
# ru_maxrss is KB on Linux; convert to MB
peak_mb = (ru_after) / 1024.0
peak_delta_mb = (ru_after - ru_before) / 1024.0
if _PSUTIL:
rss_after = p.memory_info().rss
delta_mb = (rss_after - rss_before) / (1024 * 1024)
rss_mb = rss_after / (1024 * 1024)
else:
delta_mb = 0.0
rss_mb = 0.0
logger.debug(
"pid=%s mem rss=%.1fMB Δend=%.1fMB peak=%.1fMB Δpeak=%.1fMB dur=%.1fms %s %s",
os.getpid(),
rss_mb,
delta_mb,
peak_mb,
peak_delta_mb,
dur_ms,
request.method,
request.path,
)

View File

@@ -363,6 +363,7 @@ if DEBUG:
) )
MIDDLEWARE = [ MIDDLEWARE = [
"paperless.middleware.MemLogMiddleware",
"django.middleware.security.SecurityMiddleware", "django.middleware.security.SecurityMiddleware",
"whitenoise.middleware.WhiteNoiseMiddleware", "whitenoise.middleware.WhiteNoiseMiddleware",
"django.contrib.sessions.middleware.SessionMiddleware", "django.contrib.sessions.middleware.SessionMiddleware",
@@ -833,7 +834,7 @@ LOGGING = {
"disable_existing_loggers": False, "disable_existing_loggers": False,
"formatters": { "formatters": {
"verbose": { "verbose": {
"format": "[{asctime}] [{levelname}] [{name}] {message}", "format": "[{asctime}] [{levelname}] [{name}] pid={process} {message}",
"style": "{", "style": "{",
}, },
"simple": { "simple": {
@@ -878,6 +879,7 @@ LOGGING = {
"kombu": {"handlers": ["file_celery"], "level": "DEBUG"}, "kombu": {"handlers": ["file_celery"], "level": "DEBUG"},
"_granian": {"handlers": ["file_paperless"], "level": "DEBUG"}, "_granian": {"handlers": ["file_paperless"], "level": "DEBUG"},
"granian.access": {"handlers": ["file_paperless"], "level": "DEBUG"}, "granian.access": {"handlers": ["file_paperless"], "level": "DEBUG"},
"middleware": {"handlers": ["console"], "level": "DEBUG"},
}, },
} }

15
uv.lock generated
View File

@@ -2046,6 +2046,7 @@ dependencies = [
{ name = "ocrmypdf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "ocrmypdf", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "pathvalidate", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "pathvalidate", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "pdf2image", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "pdf2image", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "psutil", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "psycopg-pool", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "psycopg-pool", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "python-dateutil", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "python-dateutil", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
{ name = "python-dotenv", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "python-dotenv", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
@@ -2182,6 +2183,7 @@ requires-dist = [
{ name = "ocrmypdf", specifier = "~=16.10.0" }, { name = "ocrmypdf", specifier = "~=16.10.0" },
{ name = "pathvalidate", specifier = "~=3.3.1" }, { name = "pathvalidate", specifier = "~=3.3.1" },
{ name = "pdf2image", specifier = "~=1.17.0" }, { name = "pdf2image", specifier = "~=1.17.0" },
{ name = "psutil", specifier = ">=7.0.0" },
{ name = "psycopg", extras = ["c", "pool"], marker = "extra == 'postgres'", specifier = "==3.2.9" }, { name = "psycopg", extras = ["c", "pool"], marker = "extra == 'postgres'", specifier = "==3.2.9" },
{ name = "psycopg-c", marker = "python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'postgres'", url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.9/psycopg_c-3.2.9-cp312-cp312-linux_aarch64.whl" }, { name = "psycopg-c", marker = "python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'postgres'", url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.9/psycopg_c-3.2.9-cp312-cp312-linux_aarch64.whl" },
{ name = "psycopg-c", marker = "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'postgres'", url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.9/psycopg_c-3.2.9-cp312-cp312-linux_x86_64.whl" }, { name = "psycopg-c", marker = "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'postgres'", url = "https://github.com/paperless-ngx/builder/releases/download/psycopg-3.2.9/psycopg_c-3.2.9-cp312-cp312-linux_x86_64.whl" },
@@ -2548,6 +2550,19 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/e4/ea/d836f008d33151c7a1f62caf3d8dd782e4d15f6a43897f64480c2b8de2ad/prompt_toolkit-3.0.50-py3-none-any.whl", hash = "sha256:9b6427eb19e479d98acff65196a307c555eb567989e6d88ebbb1b509d9779198", size = 387816, upload-time = "2025-01-20T15:55:29.98Z" }, { url = "https://files.pythonhosted.org/packages/e4/ea/d836f008d33151c7a1f62caf3d8dd782e4d15f6a43897f64480c2b8de2ad/prompt_toolkit-3.0.50-py3-none-any.whl", hash = "sha256:9b6427eb19e479d98acff65196a307c555eb567989e6d88ebbb1b509d9779198", size = 387816, upload-time = "2025-01-20T15:55:29.98Z" },
] ]
[[package]]
name = "psutil"
version = "7.0.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/2a/80/336820c1ad9286a4ded7e845b2eccfcb27851ab8ac6abece774a6ff4d3de/psutil-7.0.0.tar.gz", hash = "sha256:7be9c3eba38beccb6495ea33afd982a44074b78f28c434a1f51cc07fd315c456", size = 497003, upload-time = "2025-02-13T21:54:07.946Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/ed/e6/2d26234410f8b8abdbf891c9da62bee396583f713fb9f3325a4760875d22/psutil-7.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:101d71dc322e3cffd7cea0650b09b3d08b8e7c4109dd6809fe452dfd00e58b25", size = 238051, upload-time = "2025-02-13T21:54:12.36Z" },
{ url = "https://files.pythonhosted.org/packages/04/8b/30f930733afe425e3cbfc0e1468a30a18942350c1a8816acfade80c005c4/psutil-7.0.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:39db632f6bb862eeccf56660871433e111b6ea58f2caea825571951d4b6aa3da", size = 239535, upload-time = "2025-02-13T21:54:16.07Z" },
{ url = "https://files.pythonhosted.org/packages/2a/ed/d362e84620dd22876b55389248e522338ed1bf134a5edd3b8231d7207f6d/psutil-7.0.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fcee592b4c6f146991ca55919ea3d1f8926497a713ed7faaf8225e174581e91", size = 275004, upload-time = "2025-02-13T21:54:18.662Z" },
{ url = "https://files.pythonhosted.org/packages/bf/b9/b0eb3f3cbcb734d930fdf839431606844a825b23eaf9a6ab371edac8162c/psutil-7.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b1388a4f6875d7e2aff5c4ca1cc16c545ed41dd8bb596cefea80111db353a34", size = 277986, upload-time = "2025-02-13T21:54:21.811Z" },
{ url = "https://files.pythonhosted.org/packages/eb/a2/709e0fe2f093556c17fbafda93ac032257242cabcc7ff3369e2cb76a97aa/psutil-7.0.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5f098451abc2828f7dc6b58d44b532b22f2088f4999a937557b603ce72b1993", size = 279544, upload-time = "2025-02-13T21:54:24.68Z" },
]
[[package]] [[package]]
name = "psycopg" name = "psycopg"
version = "3.2.9" version = "3.2.9"