Merge parsers

This commit is contained in:
shamoon 2025-04-08 16:35:40 -07:00
parent ea5ec58967
commit 2cc14dd5c3
No known key found for this signature in database
28 changed files with 49 additions and 49 deletions

View File

@ -212,9 +212,6 @@ lint.per-file-ignores."src/documents/migrations/1012_fix_archive_files.py" = [
lint.per-file-ignores."src/documents/models.py" = [ lint.per-file-ignores."src/documents/models.py" = [
"SIM115", "SIM115",
] ]
lint.per-file-ignores."src/documents/parsers.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/signals/handlers.py" = [ lint.per-file-ignores."src/documents/signals/handlers.py" = [
"PTH", "PTH",
] # TODO Enable & remove ] # TODO Enable & remove
@ -254,6 +251,9 @@ lint.per-file-ignores."src/paperless/checks.py" = [
lint.per-file-ignores."src/paperless/file_handling.py" = [ lint.per-file-ignores."src/paperless/file_handling.py" = [
"PTH", "PTH",
] # TODO Enable & remove ] # TODO Enable & remove
lint.per-file-ignores."src/paperless/parsers.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/paperless/settings.py" = [ lint.per-file-ignores."src/paperless/settings.py" = [
"PTH", "PTH",
] # TODO Enable & remove ] # TODO Enable & remove

View File

@ -16,12 +16,12 @@ from django.core.management.base import CommandError
from watchdog.events import FileSystemEventHandler from watchdog.events import FileSystemEventHandler
from watchdog.observers.polling import PollingObserver from watchdog.observers.polling import PollingObserver
from documents.parsers import is_file_ext_supported
from documents.tasks import consume_file from documents.tasks import consume_file
from paperless.data_models import ConsumableDocument from paperless.data_models import ConsumableDocument
from paperless.data_models import DocumentMetadataOverrides from paperless.data_models import DocumentMetadataOverrides
from paperless.data_models import DocumentSource from paperless.data_models import DocumentSource
from paperless.models import Tag from paperless.models import Tag
from paperless.parsers import is_file_ext_supported
try: try:
from inotifyrecursive import INotify from inotifyrecursive import INotify

View File

@ -22,7 +22,6 @@ from django.db.models.signals import post_save
from filelock import FileLock from filelock import FileLock
from documents.management.commands.mixins import CryptMixin from documents.management.commands.mixins import CryptMixin
from documents.parsers import run_convert
from documents.settings import EXPORTER_ARCHIVE_NAME from documents.settings import EXPORTER_ARCHIVE_NAME
from documents.settings import EXPORTER_CRYPTO_SETTINGS_NAME from documents.settings import EXPORTER_CRYPTO_SETTINGS_NAME
from documents.settings import EXPORTER_FILE_NAME from documents.settings import EXPORTER_FILE_NAME
@ -38,6 +37,7 @@ from paperless.models import Document
from paperless.models import DocumentType from paperless.models import DocumentType
from paperless.models import Note from paperless.models import Note
from paperless.models import Tag from paperless.models import Tag
from paperless.parsers import run_convert
from paperless.utils import copy_file_with_basic_stats from paperless.utils import copy_file_with_basic_stats
if settings.AUDIT_LOG_ENABLED: if settings.AUDIT_LOG_ENABLED:

View File

@ -8,8 +8,8 @@ from django.core.management.base import BaseCommand
from documents.management.commands.mixins import MultiProcessMixin from documents.management.commands.mixins import MultiProcessMixin
from documents.management.commands.mixins import ProgressBarMixin from documents.management.commands.mixins import ProgressBarMixin
from documents.parsers import get_parser_class_for_mime_type
from paperless.models import Document from paperless.models import Document
from paperless.parsers import get_parser_class_for_mime_type
def _process_document(doc_id): def _process_document(doc_id):

View File

@ -189,9 +189,9 @@ def parse_wrapper(parser, path, mime_type, file_name):
def create_archive_version(doc, retry_count=3): def create_archive_version(doc, retry_count=3):
from documents.parsers import DocumentParser from paperless.parsers import DocumentParser
from documents.parsers import ParseError from paperless.parsers import ParseError
from documents.parsers import get_parser_class_for_mime_type from paperless.parsers import get_parser_class_for_mime_type
logger.info(f"Regenerating archive document for document ID:{doc.id}") logger.info(f"Regenerating archive document for document ID:{doc.id}")
parser_class = get_parser_class_for_mime_type(doc.mime_type) parser_class = get_parser_class_for_mime_type(doc.mime_type)
@ -271,7 +271,7 @@ def move_old_to_new_locations(apps, schema_editor):
# check that we can regenerate affected archive versions # check that we can regenerate affected archive versions
for doc_id in affected_document_ids: for doc_id in affected_document_ids:
from documents.parsers import get_parser_class_for_mime_type from paperless.parsers import get_parser_class_for_mime_type
doc = Document.objects.get(id=doc_id) doc = Document.objects.get(id=doc_id)
parser_class = get_parser_class_for_mime_type(doc.mime_type) parser_class = get_parser_class_for_mime_type(doc.mime_type)

View File

@ -9,7 +9,7 @@ from pathlib import Path
from django.conf import settings from django.conf import settings
from django.db import migrations from django.db import migrations
from documents.parsers import run_convert from paperless.parsers import run_convert
logger = logging.getLogger("paperless.migrations") logger = logging.getLogger("paperless.migrations")

View File

@ -10,7 +10,7 @@ import gnupg
from django.conf import settings from django.conf import settings
from django.db import migrations from django.db import migrations
from documents.parsers import run_convert from paperless.parsers import run_convert
logger = logging.getLogger("paperless.migrations") logger = logging.getLogger("paperless.migrations")

View File

@ -21,8 +21,6 @@ from whoosh.writing import AsyncWriter
from documents import sanity_checker from documents import sanity_checker
from documents.caching import clear_document_caches from documents.caching import clear_document_caches
from documents.parsers import DocumentParser
from documents.parsers import get_parser_class_for_mime_type
from documents.plugins.base import ConsumeTaskPlugin from documents.plugins.base import ConsumeTaskPlugin
from documents.plugins.base import ProgressManager from documents.plugins.base import ProgressManager
from documents.plugins.base import StopConsumeTaskError from documents.plugins.base import StopConsumeTaskError
@ -52,6 +50,8 @@ from paperless.models import Tag
from paperless.models import Workflow from paperless.models import Workflow
from paperless.models import WorkflowRun from paperless.models import WorkflowRun
from paperless.models import WorkflowTrigger from paperless.models import WorkflowTrigger
from paperless.parsers import DocumentParser
from paperless.parsers import get_parser_class_for_mime_type
if settings.AUDIT_LOG_ENABLED: if settings.AUDIT_LOG_ENABLED:
from auditlog.models import LogEntry from auditlog.models import LogEntry

View File

@ -17,8 +17,6 @@ from django.test import override_settings
from django.utils import timezone from django.utils import timezone
from guardian.core import ObjectPermissionChecker from guardian.core import ObjectPermissionChecker
from documents.parsers import DocumentParser
from documents.parsers import ParseError
from documents.plugins.helpers import ProgressStatusOptions from documents.plugins.helpers import ProgressStatusOptions
from documents.tasks import sanity_check from documents.tasks import sanity_check
from documents.tests.utils import DirectoriesMixin from documents.tests.utils import DirectoriesMixin
@ -33,6 +31,8 @@ from paperless.models import Document
from paperless.models import DocumentType from paperless.models import DocumentType
from paperless.models import StoragePath from paperless.models import StoragePath
from paperless.models import Tag from paperless.models import Tag
from paperless.parsers import DocumentParser
from paperless.parsers import ParseError
from paperless_mail.models import MailRule from paperless_mail.models import MailRule
from paperless_mail.parsers import MailDocumentParser from paperless_mail.parsers import MailDocumentParser

View File

@ -3,8 +3,8 @@ from zoneinfo import ZoneInfo
from pytest_django.fixtures import SettingsWrapper from pytest_django.fixtures import SettingsWrapper
from documents.parsers import parse_date from paperless.parsers import parse_date
from documents.parsers import parse_date_generator from paperless.parsers import parse_date_generator
class TestDate: class TestDate:

View File

@ -6,10 +6,10 @@ from django.core.management import call_command
from django.test import TestCase from django.test import TestCase
from documents.management.commands.document_thumbnails import _process_document from documents.management.commands.document_thumbnails import _process_document
from documents.parsers import get_default_thumbnail
from documents.tests.utils import DirectoriesMixin from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import FileSystemAssertsMixin from documents.tests.utils import FileSystemAssertsMixin
from paperless.models import Document from paperless.models import Document
from paperless.parsers import get_default_thumbnail
class TestMakeThumbnails(DirectoriesMixin, FileSystemAssertsMixin, TestCase): class TestMakeThumbnails(DirectoriesMixin, FileSystemAssertsMixin, TestCase):

View File

@ -8,10 +8,10 @@ from unittest import mock
from django.conf import settings from django.conf import settings
from django.test import override_settings from django.test import override_settings
from documents.parsers import ParseError
from documents.tests.utils import DirectoriesMixin from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import FileSystemAssertsMixin from documents.tests.utils import FileSystemAssertsMixin
from documents.tests.utils import TestMigrations from documents.tests.utils import TestMigrations
from paperless.parsers import ParseError
STORAGE_TYPE_GPG = "gpg" STORAGE_TYPE_GPG = "gpg"

View File

@ -4,9 +4,9 @@ import shutil
from django.conf import settings from django.conf import settings
from django.test import override_settings from django.test import override_settings
from documents.parsers import get_default_file_extension
from documents.tests.utils import DirectoriesMixin from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import TestMigrations from documents.tests.utils import TestMigrations
from paperless.parsers import get_default_file_extension
STORAGE_TYPE_UNENCRYPTED = "unencrypted" STORAGE_TYPE_UNENCRYPTED = "unencrypted"
STORAGE_TYPE_GPG = "gpg" STORAGE_TYPE_GPG = "gpg"

View File

@ -5,10 +5,10 @@ from django.apps import apps
from django.test import TestCase from django.test import TestCase
from django.test import override_settings from django.test import override_settings
from documents.parsers import get_default_file_extension from paperless.parsers import get_default_file_extension
from documents.parsers import get_parser_class_for_mime_type from paperless.parsers import get_parser_class_for_mime_type
from documents.parsers import get_supported_file_extensions from paperless.parsers import get_supported_file_extensions
from documents.parsers import is_file_ext_supported from paperless.parsers import is_file_ext_supported
from paperless_tesseract.parsers import RasterisedDocumentParser from paperless_tesseract.parsers import RasterisedDocumentParser
from paperless_text.parsers import TextDocumentParser from paperless_text.parsers import TextDocumentParser
from paperless_tika.parsers import TikaDocumentParser from paperless_tika.parsers import TikaDocumentParser

View File

@ -20,12 +20,12 @@ from django.db.migrations.executor import MigrationExecutor
from django.test import TransactionTestCase from django.test import TransactionTestCase
from django.test import override_settings from django.test import override_settings
from documents.parsers import ParseError
from documents.plugins.helpers import ProgressStatusOptions from documents.plugins.helpers import ProgressStatusOptions
from paperless.consumer import ConsumerPlugin from paperless.consumer import ConsumerPlugin
from paperless.data_models import ConsumableDocument from paperless.data_models import ConsumableDocument
from paperless.data_models import DocumentMetadataOverrides from paperless.data_models import DocumentMetadataOverrides
from paperless.data_models import DocumentSource from paperless.data_models import DocumentSource
from paperless.parsers import ParseError
def setup_directories(): def setup_directories():

View File

@ -15,10 +15,6 @@ from django.utils import timezone
from filelock import FileLock from filelock import FileLock
from rest_framework.reverse import reverse from rest_framework.reverse import reverse
from documents.parsers import DocumentParser
from documents.parsers import ParseError
from documents.parsers import get_parser_class_for_mime_type
from documents.parsers import parse_date
from documents.plugins.base import AlwaysRunPluginMixin from documents.plugins.base import AlwaysRunPluginMixin
from documents.plugins.base import ConsumeTaskPlugin from documents.plugins.base import ConsumeTaskPlugin
from documents.plugins.base import NoCleanupPluginMixin from documents.plugins.base import NoCleanupPluginMixin
@ -43,6 +39,10 @@ from paperless.models import DocumentType
from paperless.models import StoragePath from paperless.models import StoragePath
from paperless.models import Tag from paperless.models import Tag
from paperless.models import WorkflowTrigger from paperless.models import WorkflowTrigger
from paperless.parsers import DocumentParser
from paperless.parsers import ParseError
from paperless.parsers import get_parser_class_for_mime_type
from paperless.parsers import parse_date
from paperless.permissions import set_permissions_for_object from paperless.permissions import set_permissions_for_object
from paperless.utils import copy_basic_file_stats from paperless.utils import copy_basic_file_stats
from paperless.utils import copy_file_with_basic_stats from paperless.utils import copy_file_with_basic_stats

View File

@ -23,8 +23,8 @@ from django.db.models.functions import Cast
from django.db.models.functions import Substr from django.db.models.functions import Substr
from django_softdelete.models import SoftDeleteModel from django_softdelete.models import SoftDeleteModel
from documents.parsers import get_default_file_extension
from paperless.data_models import DocumentSource from paperless.data_models import DocumentSource
from paperless.parsers import get_default_file_extension
DEFAULT_SINGLETON_INSTANCE_ID = 1 DEFAULT_SINGLETON_INSTANCE_ID = 1

View File

@ -35,7 +35,6 @@ if settings.AUDIT_LOG_ENABLED:
from auditlog.context import set_actor from auditlog.context import set_actor
from documents.parsers import is_mime_type_supported
from documents.templating.filepath import validate_filepath_template_and_render from documents.templating.filepath import validate_filepath_template_and_render
from documents.templating.utils import convert_format_str_to_template_format from documents.templating.utils import convert_format_str_to_template_format
from paperless import bulk_edit from paperless import bulk_edit
@ -59,6 +58,7 @@ from paperless.models import WorkflowAction
from paperless.models import WorkflowActionEmail from paperless.models import WorkflowActionEmail
from paperless.models import WorkflowActionWebhook from paperless.models import WorkflowActionWebhook
from paperless.models import WorkflowTrigger from paperless.models import WorkflowTrigger
from paperless.parsers import is_mime_type_supported
from paperless.permissions import get_groups_with_only_permission from paperless.permissions import get_groups_with_only_permission
from paperless.permissions import set_permissions_for_object from paperless.permissions import set_permissions_for_object
from paperless.validators import uri_validator from paperless.validators import uri_validator

View File

@ -110,8 +110,6 @@ from documents.filters import ShareLinkFilterSet
from documents.filters import StoragePathFilterSet from documents.filters import StoragePathFilterSet
from documents.filters import TagFilterSet from documents.filters import TagFilterSet
from documents.mail import send_email from documents.mail import send_email
from documents.parsers import get_parser_class_for_mime_type
from documents.parsers import parse_date_generator
from documents.schema import generate_object_with_permissions_schema from documents.schema import generate_object_with_permissions_schema
from documents.signals import document_updated from documents.signals import document_updated
from documents.tasks import consume_file from documents.tasks import consume_file
@ -159,6 +157,8 @@ from paperless.models import UiSettings
from paperless.models import Workflow from paperless.models import Workflow
from paperless.models import WorkflowAction from paperless.models import WorkflowAction
from paperless.models import WorkflowTrigger from paperless.models import WorkflowTrigger
from paperless.parsers import get_parser_class_for_mime_type
from paperless.parsers import parse_date_generator
from paperless.permissions import PaperlessAdminPermissions from paperless.permissions import PaperlessAdminPermissions
from paperless.permissions import PaperlessNotePermissions from paperless.permissions import PaperlessNotePermissions
from paperless.permissions import PaperlessObjectPermissions from paperless.permissions import PaperlessObjectPermissions

View File

@ -33,13 +33,13 @@ from imap_tools import errors
from imap_tools.mailbox import MailBoxTls from imap_tools.mailbox import MailBoxTls
from imap_tools.query import LogicOperator from imap_tools.query import LogicOperator
from documents.parsers import is_mime_type_supported
from documents.tasks import consume_file from documents.tasks import consume_file
from paperless.data_models import ConsumableDocument from paperless.data_models import ConsumableDocument
from paperless.data_models import DocumentMetadataOverrides from paperless.data_models import DocumentMetadataOverrides
from paperless.data_models import DocumentSource from paperless.data_models import DocumentSource
from paperless.loggers import LoggingMixin from paperless.loggers import LoggingMixin
from paperless.models import Correspondent from paperless.models import Correspondent
from paperless.parsers import is_mime_type_supported
from paperless_mail.models import MailAccount from paperless_mail.models import MailAccount
from paperless_mail.models import MailRule from paperless_mail.models import MailRule
from paperless_mail.models import ProcessedMail from paperless_mail.models import ProcessedMail

View File

@ -18,10 +18,10 @@ from imap_tools import MailAttachment
from imap_tools import MailMessage from imap_tools import MailMessage
from tika_client import TikaClient from tika_client import TikaClient
from documents.parsers import DocumentParser
from documents.parsers import ParseError
from documents.parsers import make_thumbnail_from_pdf
from paperless.models import OutputTypeChoices from paperless.models import OutputTypeChoices
from paperless.parsers import DocumentParser
from paperless.parsers import ParseError
from paperless.parsers import make_thumbnail_from_pdf
from paperless_mail.models import MailRule from paperless_mail.models import MailRule

View File

@ -10,7 +10,7 @@ from pytest_django.fixtures import SettingsWrapper
from pytest_httpx import HTTPXMock from pytest_httpx import HTTPXMock
from pytest_mock import MockerFixture from pytest_mock import MockerFixture
from documents.parsers import ParseError from paperless.parsers import ParseError
from paperless_mail.parsers import MailDocumentParser from paperless_mail.parsers import MailDocumentParser

View File

@ -7,13 +7,13 @@ from typing import TYPE_CHECKING
from django.conf import settings from django.conf import settings
from PIL import Image from PIL import Image
from documents.parsers import DocumentParser
from documents.parsers import ParseError
from documents.parsers import make_thumbnail_from_pdf
from paperless.config import OcrConfig from paperless.config import OcrConfig
from paperless.models import ArchiveFileChoices from paperless.models import ArchiveFileChoices
from paperless.models import CleanChoices from paperless.models import CleanChoices
from paperless.models import ModeChoices from paperless.models import ModeChoices
from paperless.parsers import DocumentParser
from paperless.parsers import ParseError
from paperless.parsers import make_thumbnail_from_pdf
from paperless.utils import maybe_override_pixel_limit from paperless.utils import maybe_override_pixel_limit
from paperless.utils import run_subprocess from paperless.utils import run_subprocess

View File

@ -9,10 +9,10 @@ from django.test import TestCase
from django.test import override_settings from django.test import override_settings
from ocrmypdf import SubprocessOutputError from ocrmypdf import SubprocessOutputError
from documents.parsers import ParseError
from documents.parsers import run_convert
from documents.tests.utils import DirectoriesMixin from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import FileSystemAssertsMixin from documents.tests.utils import FileSystemAssertsMixin
from paperless.parsers import ParseError
from paperless.parsers import run_convert
from paperless_tesseract.parsers import RasterisedDocumentParser from paperless_tesseract.parsers import RasterisedDocumentParser
from paperless_tesseract.parsers import post_process_text from paperless_tesseract.parsers import post_process_text

View File

@ -5,7 +5,7 @@ from PIL import Image
from PIL import ImageDraw from PIL import ImageDraw
from PIL import ImageFont from PIL import ImageFont
from documents.parsers import DocumentParser from paperless.parsers import DocumentParser
class TextDocumentParser(DocumentParser): class TextDocumentParser(DocumentParser):

View File

@ -7,11 +7,11 @@ from gotenberg_client import GotenbergClient
from gotenberg_client.options import PdfAFormat from gotenberg_client.options import PdfAFormat
from tika_client import TikaClient from tika_client import TikaClient
from documents.parsers import DocumentParser
from documents.parsers import ParseError
from documents.parsers import make_thumbnail_from_pdf
from paperless.config import OutputTypeConfig from paperless.config import OutputTypeConfig
from paperless.models import OutputTypeChoices from paperless.models import OutputTypeChoices
from paperless.parsers import DocumentParser
from paperless.parsers import ParseError
from paperless.parsers import make_thumbnail_from_pdf
class TikaDocumentParser(DocumentParser): class TikaDocumentParser(DocumentParser):

View File

@ -8,7 +8,7 @@ from httpx import codes
from pytest_django.fixtures import SettingsWrapper from pytest_django.fixtures import SettingsWrapper
from pytest_httpx import HTTPXMock from pytest_httpx import HTTPXMock
from documents.parsers import ParseError from paperless.parsers import ParseError
from paperless_tika.parsers import TikaDocumentParser from paperless_tika.parsers import TikaDocumentParser