mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Chore: Reduce imports for a slight memory improvement (#9217)
This commit is contained in:
parent
3104417076
commit
827fcba277
@ -26,7 +26,7 @@ extend-select = [
|
|||||||
"T20", # https://docs.astral.sh/ruff/rules/#flake8-print-t20
|
"T20", # https://docs.astral.sh/ruff/rules/#flake8-print-t20
|
||||||
"SIM", # https://docs.astral.sh/ruff/rules/#flake8-simplify-sim
|
"SIM", # https://docs.astral.sh/ruff/rules/#flake8-simplify-sim
|
||||||
"TID", # https://docs.astral.sh/ruff/rules/#flake8-tidy-imports-tid
|
"TID", # https://docs.astral.sh/ruff/rules/#flake8-tidy-imports-tid
|
||||||
"TCH", # https://docs.astral.sh/ruff/rules/#flake8-type-checking-tch
|
"TC", # https://docs.astral.sh/ruff/rules/#flake8-type-checking-tc
|
||||||
"PLC", # https://docs.astral.sh/ruff/rules/#pylint-pl
|
"PLC", # https://docs.astral.sh/ruff/rules/#pylint-pl
|
||||||
"PLE", # https://docs.astral.sh/ruff/rules/#pylint-pl
|
"PLE", # https://docs.astral.sh/ruff/rules/#pylint-pl
|
||||||
"RUF", # https://docs.astral.sh/ruff/rules/#ruff-specific-rules-ruf
|
"RUF", # https://docs.astral.sh/ruff/rules/#ruff-specific-rules-ruf
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
import tempfile
|
import tempfile
|
||||||
@ -10,7 +12,6 @@ from pdf2image import convert_from_path
|
|||||||
from pikepdf import Page
|
from pikepdf import Page
|
||||||
from pikepdf import PasswordError
|
from pikepdf import PasswordError
|
||||||
from pikepdf import Pdf
|
from pikepdf import Pdf
|
||||||
from PIL import Image
|
|
||||||
|
|
||||||
from documents.converters import convert_from_tiff_to_pdf
|
from documents.converters import convert_from_tiff_to_pdf
|
||||||
from documents.data_models import ConsumableDocument
|
from documents.data_models import ConsumableDocument
|
||||||
@ -25,6 +26,8 @@ from documents.utils import maybe_override_pixel_limit
|
|||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from collections.abc import Callable
|
from collections.abc import Callable
|
||||||
|
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
logger = logging.getLogger("paperless.barcodes")
|
logger = logging.getLogger("paperless.barcodes")
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,12 +1,14 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
from typing import NoReturn
|
from typing import NoReturn
|
||||||
from zipfile import ZipFile
|
|
||||||
|
|
||||||
from documents.models import Document
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from collections.abc import Callable
|
from collections.abc import Callable
|
||||||
|
from zipfile import ZipFile
|
||||||
|
|
||||||
|
from documents.models import Document
|
||||||
|
|
||||||
|
|
||||||
class BulkArchiveStrategy:
|
class BulkArchiveStrategy:
|
||||||
|
@ -1,8 +1,11 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import itertools
|
import itertools
|
||||||
import logging
|
import logging
|
||||||
import tempfile
|
import tempfile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
from typing import Literal
|
from typing import Literal
|
||||||
|
|
||||||
from celery import chain
|
from celery import chain
|
||||||
@ -10,7 +13,6 @@ from celery import chord
|
|||||||
from celery import group
|
from celery import group
|
||||||
from celery import shared_task
|
from celery import shared_task
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.contrib.auth.models import User
|
|
||||||
from django.db.models import Q
|
from django.db.models import Q
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
|
||||||
@ -29,6 +31,9 @@ from documents.tasks import bulk_update_documents
|
|||||||
from documents.tasks import consume_file
|
from documents.tasks import consume_file
|
||||||
from documents.tasks import update_document_content_maybe_archive_file
|
from documents.tasks import update_document_content_maybe_archive_file
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from django.contrib.auth.models import User
|
||||||
|
|
||||||
logger: logging.Logger = logging.getLogger("paperless.bulk_edit")
|
logger: logging.Logger = logging.getLogger("paperless.bulk_edit")
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from binascii import hexlify
|
from binascii import hexlify
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
from typing import Final
|
from typing import Final
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
from django.core.cache import cache
|
from django.core.cache import cache
|
||||||
|
|
||||||
@ -80,7 +81,7 @@ def get_suggestion_cache(document_id: int) -> SuggestionCacheData | None:
|
|||||||
def set_suggestions_cache(
|
def set_suggestions_cache(
|
||||||
document_id: int,
|
document_id: int,
|
||||||
suggestions: dict,
|
suggestions: dict,
|
||||||
classifier: Optional["DocumentClassifier"],
|
classifier: DocumentClassifier | None,
|
||||||
*,
|
*,
|
||||||
timeout=CACHE_50_MINUTES,
|
timeout=CACHE_50_MINUTES,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
@ -1,22 +1,22 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import pickle
|
import pickle
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
import warnings
|
import warnings
|
||||||
from collections.abc import Iterator
|
|
||||||
from hashlib import sha256
|
from hashlib import sha256
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
|
from collections.abc import Iterator
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from numpy import ndarray
|
from numpy import ndarray
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.core.cache import cache
|
from django.core.cache import cache
|
||||||
from sklearn.exceptions import InconsistentVersionWarning
|
|
||||||
|
|
||||||
from documents.caching import CACHE_50_MINUTES
|
from documents.caching import CACHE_50_MINUTES
|
||||||
from documents.caching import CLASSIFIER_HASH_KEY
|
from documents.caching import CLASSIFIER_HASH_KEY
|
||||||
@ -38,7 +38,7 @@ class ClassifierModelCorruptError(Exception):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def load_classifier(*, raise_exception: bool = False) -> Optional["DocumentClassifier"]:
|
def load_classifier(*, raise_exception: bool = False) -> DocumentClassifier | None:
|
||||||
if not settings.MODEL_FILE.is_file():
|
if not settings.MODEL_FILE.is_file():
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"Document classification model does not exist (yet), not "
|
"Document classification model does not exist (yet), not "
|
||||||
@ -103,6 +103,8 @@ class DocumentClassifier:
|
|||||||
self._stop_words = None
|
self._stop_words = None
|
||||||
|
|
||||||
def load(self) -> None:
|
def load(self) -> None:
|
||||||
|
from sklearn.exceptions import InconsistentVersionWarning
|
||||||
|
|
||||||
# Catch warnings for processing
|
# Catch warnings for processing
|
||||||
with warnings.catch_warnings(record=True) as w:
|
with warnings.catch_warnings(record=True) as w:
|
||||||
with Path(settings.MODEL_FILE).open("rb") as f:
|
with Path(settings.MODEL_FILE).open("rb") as f:
|
||||||
|
@ -1,9 +1,11 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import functools
|
import functools
|
||||||
import inspect
|
import inspect
|
||||||
import json
|
import json
|
||||||
import operator
|
import operator
|
||||||
from collections.abc import Callable
|
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from django.contrib.contenttypes.models import ContentType
|
from django.contrib.contenttypes.models import ContentType
|
||||||
from django.db.models import Case
|
from django.db.models import Case
|
||||||
@ -39,6 +41,9 @@ from documents.models import ShareLink
|
|||||||
from documents.models import StoragePath
|
from documents.models import StoragePath
|
||||||
from documents.models import Tag
|
from documents.models import Tag
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from collections.abc import Callable
|
||||||
|
|
||||||
CHAR_KWARGS = ["istartswith", "iendswith", "icontains", "iexact"]
|
CHAR_KWARGS = ["istartswith", "iendswith", "icontains", "iexact"]
|
||||||
ID_KWARGS = ["in", "exact"]
|
ID_KWARGS = ["in", "exact"]
|
||||||
INT_KWARGS = ["exact", "gt", "gte", "lt", "lte", "isnull"]
|
INT_KWARGS = ["exact", "gt", "gte", "lt", "lte", "isnull"]
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import math
|
import math
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
@ -5,10 +7,10 @@ from contextlib import contextmanager
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from datetime import timezone
|
from datetime import timezone
|
||||||
from shutil import rmtree
|
from shutil import rmtree
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
from typing import Literal
|
from typing import Literal
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.db.models import QuerySet
|
|
||||||
from django.utils import timezone as django_timezone
|
from django.utils import timezone as django_timezone
|
||||||
from guardian.shortcuts import get_users_with_perms
|
from guardian.shortcuts import get_users_with_perms
|
||||||
from whoosh import classify
|
from whoosh import classify
|
||||||
@ -32,10 +34,7 @@ from whoosh.qparser import QueryParser
|
|||||||
from whoosh.qparser.dateparse import DateParserPlugin
|
from whoosh.qparser.dateparse import DateParserPlugin
|
||||||
from whoosh.qparser.dateparse import English
|
from whoosh.qparser.dateparse import English
|
||||||
from whoosh.qparser.plugins import FieldsPlugin
|
from whoosh.qparser.plugins import FieldsPlugin
|
||||||
from whoosh.reading import IndexReader
|
|
||||||
from whoosh.scoring import TF_IDF
|
from whoosh.scoring import TF_IDF
|
||||||
from whoosh.searching import ResultsPage
|
|
||||||
from whoosh.searching import Searcher
|
|
||||||
from whoosh.util.times import timespan
|
from whoosh.util.times import timespan
|
||||||
from whoosh.writing import AsyncWriter
|
from whoosh.writing import AsyncWriter
|
||||||
|
|
||||||
@ -44,6 +43,12 @@ from documents.models import Document
|
|||||||
from documents.models import Note
|
from documents.models import Note
|
||||||
from documents.models import User
|
from documents.models import User
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from django.db.models import QuerySet
|
||||||
|
from whoosh.reading import IndexReader
|
||||||
|
from whoosh.searching import ResultsPage
|
||||||
|
from whoosh.searching import Searcher
|
||||||
|
|
||||||
logger = logging.getLogger("paperless.index")
|
logger = logging.getLogger("paperless.index")
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,8 +1,10 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
from fnmatch import fnmatch
|
from fnmatch import fnmatch
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from documents.classifier import DocumentClassifier
|
|
||||||
from documents.data_models import ConsumableDocument
|
from documents.data_models import ConsumableDocument
|
||||||
from documents.data_models import DocumentSource
|
from documents.data_models import DocumentSource
|
||||||
from documents.models import Correspondent
|
from documents.models import Correspondent
|
||||||
@ -15,6 +17,9 @@ from documents.models import Workflow
|
|||||||
from documents.models import WorkflowTrigger
|
from documents.models import WorkflowTrigger
|
||||||
from documents.permissions import get_objects_for_user_owner_aware
|
from documents.permissions import get_objects_for_user_owner_aware
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from documents.classifier import DocumentClassifier
|
||||||
|
|
||||||
logger = logging.getLogger("paperless.matching")
|
logger = logging.getLogger("paperless.matching")
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import datetime
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import mimetypes
|
import mimetypes
|
||||||
import os
|
import os
|
||||||
@ -6,10 +7,10 @@ import re
|
|||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import tempfile
|
import tempfile
|
||||||
from collections.abc import Iterator
|
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from re import Match
|
from re import Match
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
@ -19,6 +20,10 @@ from documents.signals import document_consumer_declaration
|
|||||||
from documents.utils import copy_file_with_basic_stats
|
from documents.utils import copy_file_with_basic_stats
|
||||||
from documents.utils import run_subprocess
|
from documents.utils import run_subprocess
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
import datetime
|
||||||
|
from collections.abc import Iterator
|
||||||
|
|
||||||
# This regular expression will try to find dates in the document at
|
# This regular expression will try to find dates in the document at
|
||||||
# hand and will match the following formats:
|
# hand and will match the following formats:
|
||||||
# - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
# - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||||
@ -106,7 +111,7 @@ def get_supported_file_extensions() -> set[str]:
|
|||||||
return extensions
|
return extensions
|
||||||
|
|
||||||
|
|
||||||
def get_parser_class_for_mime_type(mime_type: str) -> type["DocumentParser"] | None:
|
def get_parser_class_for_mime_type(mime_type: str) -> type[DocumentParser] | None:
|
||||||
"""
|
"""
|
||||||
Returns the best parser (by weight) for the given mimetype or
|
Returns the best parser (by weight) for the given mimetype or
|
||||||
None if no parser exists
|
None if no parser exists
|
||||||
|
@ -1,10 +1,12 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import logging
|
import logging
|
||||||
import math
|
import math
|
||||||
import re
|
import re
|
||||||
import zoneinfo
|
import zoneinfo
|
||||||
from collections.abc import Iterable
|
|
||||||
from decimal import Decimal
|
from decimal import Decimal
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
import magic
|
import magic
|
||||||
from celery import states
|
from celery import states
|
||||||
@ -32,6 +34,7 @@ from rest_framework.fields import SerializerMethodField
|
|||||||
if settings.AUDIT_LOG_ENABLED:
|
if settings.AUDIT_LOG_ENABLED:
|
||||||
from auditlog.context import set_actor
|
from auditlog.context import set_actor
|
||||||
|
|
||||||
|
|
||||||
from documents import bulk_edit
|
from documents import bulk_edit
|
||||||
from documents.data_models import DocumentSource
|
from documents.data_models import DocumentSource
|
||||||
from documents.models import Correspondent
|
from documents.models import Correspondent
|
||||||
@ -60,6 +63,9 @@ from documents.templating.utils import convert_format_str_to_template_format
|
|||||||
from documents.validators import uri_validator
|
from documents.validators import uri_validator
|
||||||
from documents.validators import url_validator
|
from documents.validators import url_validator
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from collections.abc import Iterable
|
||||||
|
|
||||||
logger = logging.getLogger("paperless.serializers")
|
logger = logging.getLogger("paperless.serializers")
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
from pathlib import Path
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from celery import shared_task
|
from celery import shared_task
|
||||||
@ -23,9 +25,6 @@ from guardian.shortcuts import remove_perm
|
|||||||
|
|
||||||
from documents import matching
|
from documents import matching
|
||||||
from documents.caching import clear_document_caches
|
from documents.caching import clear_document_caches
|
||||||
from documents.classifier import DocumentClassifier
|
|
||||||
from documents.data_models import ConsumableDocument
|
|
||||||
from documents.data_models import DocumentMetadataOverrides
|
|
||||||
from documents.file_handling import create_source_path_directory
|
from documents.file_handling import create_source_path_directory
|
||||||
from documents.file_handling import delete_empty_directories
|
from documents.file_handling import delete_empty_directories
|
||||||
from documents.file_handling import generate_unique_filename
|
from documents.file_handling import generate_unique_filename
|
||||||
@ -46,6 +45,13 @@ from documents.permissions import get_objects_for_user_owner_aware
|
|||||||
from documents.permissions import set_permissions_for_object
|
from documents.permissions import set_permissions_for_object
|
||||||
from documents.templating.workflows import parse_w_workflow_placeholders
|
from documents.templating.workflows import parse_w_workflow_placeholders
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from documents.classifier import DocumentClassifier
|
||||||
|
from documents.data_models import ConsumableDocument
|
||||||
|
from documents.data_models import DocumentMetadataOverrides
|
||||||
|
|
||||||
logger = logging.getLogger("paperless.handlers")
|
logger = logging.getLogger("paperless.handlers")
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user