Chore: Reduce imports for a slight memory improvement (#9217)

This commit is contained in:
Trenton H 2025-02-24 15:06:14 -08:00 committed by GitHub
parent 3104417076
commit 827fcba277
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 71 additions and 26 deletions

View File

@ -26,7 +26,7 @@ extend-select = [
"T20", # https://docs.astral.sh/ruff/rules/#flake8-print-t20 "T20", # https://docs.astral.sh/ruff/rules/#flake8-print-t20
"SIM", # https://docs.astral.sh/ruff/rules/#flake8-simplify-sim "SIM", # https://docs.astral.sh/ruff/rules/#flake8-simplify-sim
"TID", # https://docs.astral.sh/ruff/rules/#flake8-tidy-imports-tid "TID", # https://docs.astral.sh/ruff/rules/#flake8-tidy-imports-tid
"TCH", # https://docs.astral.sh/ruff/rules/#flake8-type-checking-tch "TC", # https://docs.astral.sh/ruff/rules/#flake8-type-checking-tc
"PLC", # https://docs.astral.sh/ruff/rules/#pylint-pl "PLC", # https://docs.astral.sh/ruff/rules/#pylint-pl
"PLE", # https://docs.astral.sh/ruff/rules/#pylint-pl "PLE", # https://docs.astral.sh/ruff/rules/#pylint-pl
"RUF", # https://docs.astral.sh/ruff/rules/#ruff-specific-rules-ruf "RUF", # https://docs.astral.sh/ruff/rules/#ruff-specific-rules-ruf

View File

@ -1,3 +1,5 @@
from __future__ import annotations
import logging import logging
import re import re
import tempfile import tempfile
@ -10,7 +12,6 @@ from pdf2image import convert_from_path
from pikepdf import Page from pikepdf import Page
from pikepdf import PasswordError from pikepdf import PasswordError
from pikepdf import Pdf from pikepdf import Pdf
from PIL import Image
from documents.converters import convert_from_tiff_to_pdf from documents.converters import convert_from_tiff_to_pdf
from documents.data_models import ConsumableDocument from documents.data_models import ConsumableDocument
@ -25,6 +26,8 @@ from documents.utils import maybe_override_pixel_limit
if TYPE_CHECKING: if TYPE_CHECKING:
from collections.abc import Callable from collections.abc import Callable
from PIL import Image
logger = logging.getLogger("paperless.barcodes") logger = logging.getLogger("paperless.barcodes")

View File

@ -1,12 +1,14 @@
from __future__ import annotations
from pathlib import Path from pathlib import Path
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from typing import NoReturn from typing import NoReturn
from zipfile import ZipFile
from documents.models import Document
if TYPE_CHECKING: if TYPE_CHECKING:
from collections.abc import Callable from collections.abc import Callable
from zipfile import ZipFile
from documents.models import Document
class BulkArchiveStrategy: class BulkArchiveStrategy:

View File

@ -1,8 +1,11 @@
from __future__ import annotations
import hashlib import hashlib
import itertools import itertools
import logging import logging
import tempfile import tempfile
from pathlib import Path from pathlib import Path
from typing import TYPE_CHECKING
from typing import Literal from typing import Literal
from celery import chain from celery import chain
@ -10,7 +13,6 @@ from celery import chord
from celery import group from celery import group
from celery import shared_task from celery import shared_task
from django.conf import settings from django.conf import settings
from django.contrib.auth.models import User
from django.db.models import Q from django.db.models import Q
from django.utils import timezone from django.utils import timezone
@ -29,6 +31,9 @@ from documents.tasks import bulk_update_documents
from documents.tasks import consume_file from documents.tasks import consume_file
from documents.tasks import update_document_content_maybe_archive_file from documents.tasks import update_document_content_maybe_archive_file
if TYPE_CHECKING:
from django.contrib.auth.models import User
logger: logging.Logger = logging.getLogger("paperless.bulk_edit") logger: logging.Logger = logging.getLogger("paperless.bulk_edit")

View File

@ -1,9 +1,10 @@
from __future__ import annotations
import logging import logging
from binascii import hexlify from binascii import hexlify
from dataclasses import dataclass from dataclasses import dataclass
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from typing import Final from typing import Final
from typing import Optional
from django.core.cache import cache from django.core.cache import cache
@ -80,7 +81,7 @@ def get_suggestion_cache(document_id: int) -> SuggestionCacheData | None:
def set_suggestions_cache( def set_suggestions_cache(
document_id: int, document_id: int,
suggestions: dict, suggestions: dict,
classifier: Optional["DocumentClassifier"], classifier: DocumentClassifier | None,
*, *,
timeout=CACHE_50_MINUTES, timeout=CACHE_50_MINUTES,
) -> None: ) -> None:

View File

@ -1,22 +1,22 @@
from __future__ import annotations
import logging import logging
import pickle import pickle
import re import re
import time import time
import warnings import warnings
from collections.abc import Iterator
from hashlib import sha256 from hashlib import sha256
from pathlib import Path from pathlib import Path
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from typing import Optional
if TYPE_CHECKING: if TYPE_CHECKING:
from collections.abc import Iterator
from datetime import datetime from datetime import datetime
from numpy import ndarray from numpy import ndarray
from django.conf import settings from django.conf import settings
from django.core.cache import cache from django.core.cache import cache
from sklearn.exceptions import InconsistentVersionWarning
from documents.caching import CACHE_50_MINUTES from documents.caching import CACHE_50_MINUTES
from documents.caching import CLASSIFIER_HASH_KEY from documents.caching import CLASSIFIER_HASH_KEY
@ -38,7 +38,7 @@ class ClassifierModelCorruptError(Exception):
pass pass
def load_classifier(*, raise_exception: bool = False) -> Optional["DocumentClassifier"]: def load_classifier(*, raise_exception: bool = False) -> DocumentClassifier | None:
if not settings.MODEL_FILE.is_file(): if not settings.MODEL_FILE.is_file():
logger.debug( logger.debug(
"Document classification model does not exist (yet), not " "Document classification model does not exist (yet), not "
@ -103,6 +103,8 @@ class DocumentClassifier:
self._stop_words = None self._stop_words = None
def load(self) -> None: def load(self) -> None:
from sklearn.exceptions import InconsistentVersionWarning
# Catch warnings for processing # Catch warnings for processing
with warnings.catch_warnings(record=True) as w: with warnings.catch_warnings(record=True) as w:
with Path(settings.MODEL_FILE).open("rb") as f: with Path(settings.MODEL_FILE).open("rb") as f:

View File

@ -1,9 +1,11 @@
from __future__ import annotations
import functools import functools
import inspect import inspect
import json import json
import operator import operator
from collections.abc import Callable
from contextlib import contextmanager from contextlib import contextmanager
from typing import TYPE_CHECKING
from django.contrib.contenttypes.models import ContentType from django.contrib.contenttypes.models import ContentType
from django.db.models import Case from django.db.models import Case
@ -39,6 +41,9 @@ from documents.models import ShareLink
from documents.models import StoragePath from documents.models import StoragePath
from documents.models import Tag from documents.models import Tag
if TYPE_CHECKING:
from collections.abc import Callable
CHAR_KWARGS = ["istartswith", "iendswith", "icontains", "iexact"] CHAR_KWARGS = ["istartswith", "iendswith", "icontains", "iexact"]
ID_KWARGS = ["in", "exact"] ID_KWARGS = ["in", "exact"]
INT_KWARGS = ["exact", "gt", "gte", "lt", "lte", "isnull"] INT_KWARGS = ["exact", "gt", "gte", "lt", "lte", "isnull"]

View File

@ -1,3 +1,5 @@
from __future__ import annotations
import logging import logging
import math import math
from collections import Counter from collections import Counter
@ -5,10 +7,10 @@ from contextlib import contextmanager
from datetime import datetime from datetime import datetime
from datetime import timezone from datetime import timezone
from shutil import rmtree from shutil import rmtree
from typing import TYPE_CHECKING
from typing import Literal from typing import Literal
from django.conf import settings from django.conf import settings
from django.db.models import QuerySet
from django.utils import timezone as django_timezone from django.utils import timezone as django_timezone
from guardian.shortcuts import get_users_with_perms from guardian.shortcuts import get_users_with_perms
from whoosh import classify from whoosh import classify
@ -32,10 +34,7 @@ from whoosh.qparser import QueryParser
from whoosh.qparser.dateparse import DateParserPlugin from whoosh.qparser.dateparse import DateParserPlugin
from whoosh.qparser.dateparse import English from whoosh.qparser.dateparse import English
from whoosh.qparser.plugins import FieldsPlugin from whoosh.qparser.plugins import FieldsPlugin
from whoosh.reading import IndexReader
from whoosh.scoring import TF_IDF from whoosh.scoring import TF_IDF
from whoosh.searching import ResultsPage
from whoosh.searching import Searcher
from whoosh.util.times import timespan from whoosh.util.times import timespan
from whoosh.writing import AsyncWriter from whoosh.writing import AsyncWriter
@ -44,6 +43,12 @@ from documents.models import Document
from documents.models import Note from documents.models import Note
from documents.models import User from documents.models import User
if TYPE_CHECKING:
from django.db.models import QuerySet
from whoosh.reading import IndexReader
from whoosh.searching import ResultsPage
from whoosh.searching import Searcher
logger = logging.getLogger("paperless.index") logger = logging.getLogger("paperless.index")

View File

@ -1,8 +1,10 @@
from __future__ import annotations
import logging import logging
import re import re
from fnmatch import fnmatch from fnmatch import fnmatch
from typing import TYPE_CHECKING
from documents.classifier import DocumentClassifier
from documents.data_models import ConsumableDocument from documents.data_models import ConsumableDocument
from documents.data_models import DocumentSource from documents.data_models import DocumentSource
from documents.models import Correspondent from documents.models import Correspondent
@ -15,6 +17,9 @@ from documents.models import Workflow
from documents.models import WorkflowTrigger from documents.models import WorkflowTrigger
from documents.permissions import get_objects_for_user_owner_aware from documents.permissions import get_objects_for_user_owner_aware
if TYPE_CHECKING:
from documents.classifier import DocumentClassifier
logger = logging.getLogger("paperless.matching") logger = logging.getLogger("paperless.matching")

View File

@ -1,4 +1,5 @@
import datetime from __future__ import annotations
import logging import logging
import mimetypes import mimetypes
import os import os
@ -6,10 +7,10 @@ import re
import shutil import shutil
import subprocess import subprocess
import tempfile import tempfile
from collections.abc import Iterator
from functools import lru_cache from functools import lru_cache
from pathlib import Path from pathlib import Path
from re import Match from re import Match
from typing import TYPE_CHECKING
from django.conf import settings from django.conf import settings
from django.utils import timezone from django.utils import timezone
@ -19,6 +20,10 @@ from documents.signals import document_consumer_declaration
from documents.utils import copy_file_with_basic_stats from documents.utils import copy_file_with_basic_stats
from documents.utils import run_subprocess from documents.utils import run_subprocess
if TYPE_CHECKING:
import datetime
from collections.abc import Iterator
# This regular expression will try to find dates in the document at # This regular expression will try to find dates in the document at
# hand and will match the following formats: # hand and will match the following formats:
# - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits # - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
@ -106,7 +111,7 @@ def get_supported_file_extensions() -> set[str]:
return extensions return extensions
def get_parser_class_for_mime_type(mime_type: str) -> type["DocumentParser"] | None: def get_parser_class_for_mime_type(mime_type: str) -> type[DocumentParser] | None:
""" """
Returns the best parser (by weight) for the given mimetype or Returns the best parser (by weight) for the given mimetype or
None if no parser exists None if no parser exists

View File

@ -1,10 +1,12 @@
from __future__ import annotations
import datetime import datetime
import logging import logging
import math import math
import re import re
import zoneinfo import zoneinfo
from collections.abc import Iterable
from decimal import Decimal from decimal import Decimal
from typing import TYPE_CHECKING
import magic import magic
from celery import states from celery import states
@ -32,6 +34,7 @@ from rest_framework.fields import SerializerMethodField
if settings.AUDIT_LOG_ENABLED: if settings.AUDIT_LOG_ENABLED:
from auditlog.context import set_actor from auditlog.context import set_actor
from documents import bulk_edit from documents import bulk_edit
from documents.data_models import DocumentSource from documents.data_models import DocumentSource
from documents.models import Correspondent from documents.models import Correspondent
@ -60,6 +63,9 @@ from documents.templating.utils import convert_format_str_to_template_format
from documents.validators import uri_validator from documents.validators import uri_validator
from documents.validators import url_validator from documents.validators import url_validator
if TYPE_CHECKING:
from collections.abc import Iterable
logger = logging.getLogger("paperless.serializers") logger = logging.getLogger("paperless.serializers")

View File

@ -1,7 +1,9 @@
from __future__ import annotations
import logging import logging
import os import os
import shutil import shutil
from pathlib import Path from typing import TYPE_CHECKING
import httpx import httpx
from celery import shared_task from celery import shared_task
@ -23,9 +25,6 @@ from guardian.shortcuts import remove_perm
from documents import matching from documents import matching
from documents.caching import clear_document_caches from documents.caching import clear_document_caches
from documents.classifier import DocumentClassifier
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
from documents.file_handling import create_source_path_directory from documents.file_handling import create_source_path_directory
from documents.file_handling import delete_empty_directories from documents.file_handling import delete_empty_directories
from documents.file_handling import generate_unique_filename from documents.file_handling import generate_unique_filename
@ -46,6 +45,13 @@ from documents.permissions import get_objects_for_user_owner_aware
from documents.permissions import set_permissions_for_object from documents.permissions import set_permissions_for_object
from documents.templating.workflows import parse_w_workflow_placeholders from documents.templating.workflows import parse_w_workflow_placeholders
if TYPE_CHECKING:
from pathlib import Path
from documents.classifier import DocumentClassifier
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides
logger = logging.getLogger("paperless.handlers") logger = logging.getLogger("paperless.handlers")