Merge parsers

This commit is contained in:
shamoon 2025-04-08 16:35:40 -07:00
parent ea5ec58967
commit 2cc14dd5c3
No known key found for this signature in database
28 changed files with 49 additions and 49 deletions

View File

@ -212,9 +212,6 @@ lint.per-file-ignores."src/documents/migrations/1012_fix_archive_files.py" = [
lint.per-file-ignores."src/documents/models.py" = [
"SIM115",
]
lint.per-file-ignores."src/documents/parsers.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/signals/handlers.py" = [
"PTH",
] # TODO Enable & remove
@ -254,6 +251,9 @@ lint.per-file-ignores."src/paperless/checks.py" = [
lint.per-file-ignores."src/paperless/file_handling.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/paperless/parsers.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/paperless/settings.py" = [
"PTH",
] # TODO Enable & remove

View File

@ -16,12 +16,12 @@ from django.core.management.base import CommandError
from watchdog.events import FileSystemEventHandler
from watchdog.observers.polling import PollingObserver
from documents.parsers import is_file_ext_supported
from documents.tasks import consume_file
from paperless.data_models import ConsumableDocument
from paperless.data_models import DocumentMetadataOverrides
from paperless.data_models import DocumentSource
from paperless.models import Tag
from paperless.parsers import is_file_ext_supported
try:
from inotifyrecursive import INotify

View File

@ -22,7 +22,6 @@ from django.db.models.signals import post_save
from filelock import FileLock
from documents.management.commands.mixins import CryptMixin
from documents.parsers import run_convert
from documents.settings import EXPORTER_ARCHIVE_NAME
from documents.settings import EXPORTER_CRYPTO_SETTINGS_NAME
from documents.settings import EXPORTER_FILE_NAME
@ -38,6 +37,7 @@ from paperless.models import Document
from paperless.models import DocumentType
from paperless.models import Note
from paperless.models import Tag
from paperless.parsers import run_convert
from paperless.utils import copy_file_with_basic_stats
if settings.AUDIT_LOG_ENABLED:

View File

@ -8,8 +8,8 @@ from django.core.management.base import BaseCommand
from documents.management.commands.mixins import MultiProcessMixin
from documents.management.commands.mixins import ProgressBarMixin
from documents.parsers import get_parser_class_for_mime_type
from paperless.models import Document
from paperless.parsers import get_parser_class_for_mime_type
def _process_document(doc_id):

View File

@ -189,9 +189,9 @@ def parse_wrapper(parser, path, mime_type, file_name):
def create_archive_version(doc, retry_count=3):
from documents.parsers import DocumentParser
from documents.parsers import ParseError
from documents.parsers import get_parser_class_for_mime_type
from paperless.parsers import DocumentParser
from paperless.parsers import ParseError
from paperless.parsers import get_parser_class_for_mime_type
logger.info(f"Regenerating archive document for document ID:{doc.id}")
parser_class = get_parser_class_for_mime_type(doc.mime_type)
@ -271,7 +271,7 @@ def move_old_to_new_locations(apps, schema_editor):
# check that we can regenerate affected archive versions
for doc_id in affected_document_ids:
from documents.parsers import get_parser_class_for_mime_type
from paperless.parsers import get_parser_class_for_mime_type
doc = Document.objects.get(id=doc_id)
parser_class = get_parser_class_for_mime_type(doc.mime_type)

View File

@ -9,7 +9,7 @@ from pathlib import Path
from django.conf import settings
from django.db import migrations
from documents.parsers import run_convert
from paperless.parsers import run_convert
logger = logging.getLogger("paperless.migrations")

View File

@ -10,7 +10,7 @@ import gnupg
from django.conf import settings
from django.db import migrations
from documents.parsers import run_convert
from paperless.parsers import run_convert
logger = logging.getLogger("paperless.migrations")

View File

@ -21,8 +21,6 @@ from whoosh.writing import AsyncWriter
from documents import sanity_checker
from documents.caching import clear_document_caches
from documents.parsers import DocumentParser
from documents.parsers import get_parser_class_for_mime_type
from documents.plugins.base import ConsumeTaskPlugin
from documents.plugins.base import ProgressManager
from documents.plugins.base import StopConsumeTaskError
@ -52,6 +50,8 @@ from paperless.models import Tag
from paperless.models import Workflow
from paperless.models import WorkflowRun
from paperless.models import WorkflowTrigger
from paperless.parsers import DocumentParser
from paperless.parsers import get_parser_class_for_mime_type
if settings.AUDIT_LOG_ENABLED:
from auditlog.models import LogEntry

View File

@ -17,8 +17,6 @@ from django.test import override_settings
from django.utils import timezone
from guardian.core import ObjectPermissionChecker
from documents.parsers import DocumentParser
from documents.parsers import ParseError
from documents.plugins.helpers import ProgressStatusOptions
from documents.tasks import sanity_check
from documents.tests.utils import DirectoriesMixin
@ -33,6 +31,8 @@ from paperless.models import Document
from paperless.models import DocumentType
from paperless.models import StoragePath
from paperless.models import Tag
from paperless.parsers import DocumentParser
from paperless.parsers import ParseError
from paperless_mail.models import MailRule
from paperless_mail.parsers import MailDocumentParser

View File

@ -3,8 +3,8 @@ from zoneinfo import ZoneInfo
from pytest_django.fixtures import SettingsWrapper
from documents.parsers import parse_date
from documents.parsers import parse_date_generator
from paperless.parsers import parse_date
from paperless.parsers import parse_date_generator
class TestDate:

View File

@ -6,10 +6,10 @@ from django.core.management import call_command
from django.test import TestCase
from documents.management.commands.document_thumbnails import _process_document
from documents.parsers import get_default_thumbnail
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import FileSystemAssertsMixin
from paperless.models import Document
from paperless.parsers import get_default_thumbnail
class TestMakeThumbnails(DirectoriesMixin, FileSystemAssertsMixin, TestCase):

View File

@ -8,10 +8,10 @@ from unittest import mock
from django.conf import settings
from django.test import override_settings
from documents.parsers import ParseError
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import FileSystemAssertsMixin
from documents.tests.utils import TestMigrations
from paperless.parsers import ParseError
STORAGE_TYPE_GPG = "gpg"

View File

@ -4,9 +4,9 @@ import shutil
from django.conf import settings
from django.test import override_settings
from documents.parsers import get_default_file_extension
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import TestMigrations
from paperless.parsers import get_default_file_extension
STORAGE_TYPE_UNENCRYPTED = "unencrypted"
STORAGE_TYPE_GPG = "gpg"

View File

@ -5,10 +5,10 @@ from django.apps import apps
from django.test import TestCase
from django.test import override_settings
from documents.parsers import get_default_file_extension
from documents.parsers import get_parser_class_for_mime_type
from documents.parsers import get_supported_file_extensions
from documents.parsers import is_file_ext_supported
from paperless.parsers import get_default_file_extension
from paperless.parsers import get_parser_class_for_mime_type
from paperless.parsers import get_supported_file_extensions
from paperless.parsers import is_file_ext_supported
from paperless_tesseract.parsers import RasterisedDocumentParser
from paperless_text.parsers import TextDocumentParser
from paperless_tika.parsers import TikaDocumentParser

View File

@ -20,12 +20,12 @@ from django.db.migrations.executor import MigrationExecutor
from django.test import TransactionTestCase
from django.test import override_settings
from documents.parsers import ParseError
from documents.plugins.helpers import ProgressStatusOptions
from paperless.consumer import ConsumerPlugin
from paperless.data_models import ConsumableDocument
from paperless.data_models import DocumentMetadataOverrides
from paperless.data_models import DocumentSource
from paperless.parsers import ParseError
def setup_directories():

View File

@ -15,10 +15,6 @@ from django.utils import timezone
from filelock import FileLock
from rest_framework.reverse import reverse
from documents.parsers import DocumentParser
from documents.parsers import ParseError
from documents.parsers import get_parser_class_for_mime_type
from documents.parsers import parse_date
from documents.plugins.base import AlwaysRunPluginMixin
from documents.plugins.base import ConsumeTaskPlugin
from documents.plugins.base import NoCleanupPluginMixin
@ -43,6 +39,10 @@ from paperless.models import DocumentType
from paperless.models import StoragePath
from paperless.models import Tag
from paperless.models import WorkflowTrigger
from paperless.parsers import DocumentParser
from paperless.parsers import ParseError
from paperless.parsers import get_parser_class_for_mime_type
from paperless.parsers import parse_date
from paperless.permissions import set_permissions_for_object
from paperless.utils import copy_basic_file_stats
from paperless.utils import copy_file_with_basic_stats

View File

@ -23,8 +23,8 @@ from django.db.models.functions import Cast
from django.db.models.functions import Substr
from django_softdelete.models import SoftDeleteModel
from documents.parsers import get_default_file_extension
from paperless.data_models import DocumentSource
from paperless.parsers import get_default_file_extension
DEFAULT_SINGLETON_INSTANCE_ID = 1

View File

@ -35,7 +35,6 @@ if settings.AUDIT_LOG_ENABLED:
from auditlog.context import set_actor
from documents.parsers import is_mime_type_supported
from documents.templating.filepath import validate_filepath_template_and_render
from documents.templating.utils import convert_format_str_to_template_format
from paperless import bulk_edit
@ -59,6 +58,7 @@ from paperless.models import WorkflowAction
from paperless.models import WorkflowActionEmail
from paperless.models import WorkflowActionWebhook
from paperless.models import WorkflowTrigger
from paperless.parsers import is_mime_type_supported
from paperless.permissions import get_groups_with_only_permission
from paperless.permissions import set_permissions_for_object
from paperless.validators import uri_validator

View File

@ -110,8 +110,6 @@ from documents.filters import ShareLinkFilterSet
from documents.filters import StoragePathFilterSet
from documents.filters import TagFilterSet
from documents.mail import send_email
from documents.parsers import get_parser_class_for_mime_type
from documents.parsers import parse_date_generator
from documents.schema import generate_object_with_permissions_schema
from documents.signals import document_updated
from documents.tasks import consume_file
@ -159,6 +157,8 @@ from paperless.models import UiSettings
from paperless.models import Workflow
from paperless.models import WorkflowAction
from paperless.models import WorkflowTrigger
from paperless.parsers import get_parser_class_for_mime_type
from paperless.parsers import parse_date_generator
from paperless.permissions import PaperlessAdminPermissions
from paperless.permissions import PaperlessNotePermissions
from paperless.permissions import PaperlessObjectPermissions

View File

@ -33,13 +33,13 @@ from imap_tools import errors
from imap_tools.mailbox import MailBoxTls
from imap_tools.query import LogicOperator
from documents.parsers import is_mime_type_supported
from documents.tasks import consume_file
from paperless.data_models import ConsumableDocument
from paperless.data_models import DocumentMetadataOverrides
from paperless.data_models import DocumentSource
from paperless.loggers import LoggingMixin
from paperless.models import Correspondent
from paperless.parsers import is_mime_type_supported
from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule
from paperless_mail.models import ProcessedMail

View File

@ -18,10 +18,10 @@ from imap_tools import MailAttachment
from imap_tools import MailMessage
from tika_client import TikaClient
from documents.parsers import DocumentParser
from documents.parsers import ParseError
from documents.parsers import make_thumbnail_from_pdf
from paperless.models import OutputTypeChoices
from paperless.parsers import DocumentParser
from paperless.parsers import ParseError
from paperless.parsers import make_thumbnail_from_pdf
from paperless_mail.models import MailRule

View File

@ -10,7 +10,7 @@ from pytest_django.fixtures import SettingsWrapper
from pytest_httpx import HTTPXMock
from pytest_mock import MockerFixture
from documents.parsers import ParseError
from paperless.parsers import ParseError
from paperless_mail.parsers import MailDocumentParser

View File

@ -7,13 +7,13 @@ from typing import TYPE_CHECKING
from django.conf import settings
from PIL import Image
from documents.parsers import DocumentParser
from documents.parsers import ParseError
from documents.parsers import make_thumbnail_from_pdf
from paperless.config import OcrConfig
from paperless.models import ArchiveFileChoices
from paperless.models import CleanChoices
from paperless.models import ModeChoices
from paperless.parsers import DocumentParser
from paperless.parsers import ParseError
from paperless.parsers import make_thumbnail_from_pdf
from paperless.utils import maybe_override_pixel_limit
from paperless.utils import run_subprocess

View File

@ -9,10 +9,10 @@ from django.test import TestCase
from django.test import override_settings
from ocrmypdf import SubprocessOutputError
from documents.parsers import ParseError
from documents.parsers import run_convert
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import FileSystemAssertsMixin
from paperless.parsers import ParseError
from paperless.parsers import run_convert
from paperless_tesseract.parsers import RasterisedDocumentParser
from paperless_tesseract.parsers import post_process_text

View File

@ -5,7 +5,7 @@ from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont
from documents.parsers import DocumentParser
from paperless.parsers import DocumentParser
class TextDocumentParser(DocumentParser):

View File

@ -7,11 +7,11 @@ from gotenberg_client import GotenbergClient
from gotenberg_client.options import PdfAFormat
from tika_client import TikaClient
from documents.parsers import DocumentParser
from documents.parsers import ParseError
from documents.parsers import make_thumbnail_from_pdf
from paperless.config import OutputTypeConfig
from paperless.models import OutputTypeChoices
from paperless.parsers import DocumentParser
from paperless.parsers import ParseError
from paperless.parsers import make_thumbnail_from_pdf
class TikaDocumentParser(DocumentParser):

View File

@ -8,7 +8,7 @@ from httpx import codes
from pytest_django.fixtures import SettingsWrapper
from pytest_httpx import HTTPXMock
from documents.parsers import ParseError
from paperless.parsers import ParseError
from paperless_tika.parsers import TikaDocumentParser