Chore: switch from os.path to pathlib.Path (#9060)

This commit is contained in:
Sebastian Steinbeißer 2025-03-05 22:06:01 +01:00 committed by GitHub
parent aaaa6c1393
commit 76d363f22d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 89 additions and 150 deletions

View File

@ -209,37 +209,18 @@ lint.per-file-ignores."src/documents/management/commands/document_consumer.py" =
lint.per-file-ignores."src/documents/management/commands/document_exporter.py" = [ lint.per-file-ignores."src/documents/management/commands/document_exporter.py" = [
"PTH", "PTH",
] # TODO Enable & remove ] # TODO Enable & remove
lint.per-file-ignores."src/documents/migrations/0012_auto_20160305_0040.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/migrations/0014_document_checksum.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/migrations/1003_mime_types.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/migrations/1012_fix_archive_files.py" = [ lint.per-file-ignores."src/documents/migrations/1012_fix_archive_files.py" = [
"PTH", "PTH",
] # TODO Enable & remove ] # TODO Enable & remove
lint.per-file-ignores."src/documents/models.py" = [ lint.per-file-ignores."src/documents/models.py" = [
"PTH",
"SIM115", "SIM115",
] # TODO PTH Enable & remove ]
lint.per-file-ignores."src/documents/parsers.py" = [ lint.per-file-ignores."src/documents/parsers.py" = [
"PTH", "PTH",
] # TODO Enable & remove ] # TODO Enable & remove
lint.per-file-ignores."src/documents/signals/handlers.py" = [ lint.per-file-ignores."src/documents/signals/handlers.py" = [
"PTH", "PTH",
] # TODO Enable & remove ] # TODO Enable & remove
lint.per-file-ignores."src/documents/tasks.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/tests/test_api_app_config.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/tests/test_classifier.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/tests/test_consumer.py" = [ lint.per-file-ignores."src/documents/tests/test_consumer.py" = [
"PTH", "PTH",
] # TODO Enable & remove ] # TODO Enable & remove
@ -255,9 +236,6 @@ lint.per-file-ignores."src/documents/tests/test_management_consumer.py" = [
lint.per-file-ignores."src/documents/tests/test_management_exporter.py" = [ lint.per-file-ignores."src/documents/tests/test_management_exporter.py" = [
"PTH", "PTH",
] # TODO Enable & remove ] # TODO Enable & remove
lint.per-file-ignores."src/documents/tests/test_management_thumbnails.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/tests/test_migration_archive_files.py" = [ lint.per-file-ignores."src/documents/tests/test_migration_archive_files.py" = [
"PTH", "PTH",
] # TODO Enable & remove ] # TODO Enable & remove
@ -270,12 +248,6 @@ lint.per-file-ignores."src/documents/tests/test_migration_mime_type.py" = [
lint.per-file-ignores."src/documents/tests/test_sanity_check.py" = [ lint.per-file-ignores."src/documents/tests/test_sanity_check.py" = [
"PTH", "PTH",
] # TODO Enable & remove ] # TODO Enable & remove
lint.per-file-ignores."src/documents/tests/test_tasks.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/tests/test_views.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/views.py" = [ lint.per-file-ignores."src/documents/views.py" = [
"PTH", "PTH",
] # TODO Enable & remove ] # TODO Enable & remove
@ -285,34 +257,16 @@ lint.per-file-ignores."src/paperless/checks.py" = [
lint.per-file-ignores."src/paperless/settings.py" = [ lint.per-file-ignores."src/paperless/settings.py" = [
"PTH", "PTH",
] # TODO Enable & remove ] # TODO Enable & remove
lint.per-file-ignores."src/paperless/tests/test_checks.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/paperless/urls.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/paperless/views.py" = [ lint.per-file-ignores."src/paperless/views.py" = [
"PTH", "PTH",
] # TODO Enable & remove ] # TODO Enable & remove
lint.per-file-ignores."src/paperless_mail/mail.py" = [ lint.per-file-ignores."src/paperless_mail/mail.py" = [
"PTH", "PTH",
] # TODO Enable & remove ] # TODO Enable & remove
lint.per-file-ignores."src/paperless_mail/preprocessor.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/paperless_tesseract/parsers.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/paperless_tesseract/tests/test_parser.py" = [ lint.per-file-ignores."src/paperless_tesseract/tests/test_parser.py" = [
"PTH", "PTH",
"RUF001", "RUF001",
] # TODO PTH Enable & remove ] # TODO PTH Enable & remove
lint.per-file-ignores."src/paperless_tika/tests/test_live_tika.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/paperless_tika/tests/test_tika_parser.py" = [
"PTH",
] # TODO Enable & remove
lint.isort.force-single-line = true lint.isort.force-single-line = true
[tool.pytest.ini_options] [tool.pytest.ini_options]

View File

@ -5,6 +5,7 @@ import re
import shutil import shutil
import subprocess import subprocess
import tempfile import tempfile
from pathlib import Path
import gnupg import gnupg
from django.conf import settings from django.conf import settings
@ -34,16 +35,16 @@ class GnuPG:
def move_documents_and_create_thumbnails(apps, schema_editor): def move_documents_and_create_thumbnails(apps, schema_editor):
os.makedirs( (Path(settings.MEDIA_ROOT) / "documents" / "originals").mkdir(
os.path.join(settings.MEDIA_ROOT, "documents", "originals"), parents=True,
exist_ok=True, exist_ok=True,
) )
os.makedirs( (Path(settings.MEDIA_ROOT) / "documents" / "thumbnails").mkdir(
os.path.join(settings.MEDIA_ROOT, "documents", "thumbnails"), parents=True,
exist_ok=True, exist_ok=True,
) )
documents = os.listdir(os.path.join(settings.MEDIA_ROOT, "documents")) documents: list[str] = os.listdir(Path(settings.MEDIA_ROOT) / "documents")
if set(documents) == {"originals", "thumbnails"}: if set(documents) == {"originals", "thumbnails"}:
return return
@ -60,10 +61,7 @@ def move_documents_and_create_thumbnails(apps, schema_editor):
), ),
) )
try: Path(settings.SCRATCH_DIR).mkdir(parents=True, exists_ok=True)
os.makedirs(settings.SCRATCH_DIR)
except FileExistsError:
pass
for f in sorted(documents): for f in sorted(documents):
if not f.endswith("gpg"): if not f.endswith("gpg"):
@ -77,14 +75,13 @@ def move_documents_and_create_thumbnails(apps, schema_editor):
), ),
) )
thumb_temp = tempfile.mkdtemp(prefix="paperless", dir=settings.SCRATCH_DIR) thumb_temp: str = tempfile.mkdtemp(prefix="paperless", dir=settings.SCRATCH_DIR)
orig_temp = tempfile.mkdtemp(prefix="paperless", dir=settings.SCRATCH_DIR) orig_temp: str = tempfile.mkdtemp(prefix="paperless", dir=settings.SCRATCH_DIR)
orig_source = os.path.join(settings.MEDIA_ROOT, "documents", f) orig_source: Path = Path(settings.MEDIA_ROOT) / "documents" / f
orig_target = os.path.join(orig_temp, f.replace(".gpg", "")) orig_target: Path = Path(orig_temp) / f.replace(".gpg", "")
with open(orig_source, "rb") as encrypted: with orig_source.open("rb") as encrypted, orig_target.open("wb") as unencrypted:
with open(orig_target, "wb") as unencrypted:
unencrypted.write(GnuPG.decrypted(encrypted)) unencrypted.write(GnuPG.decrypted(encrypted))
subprocess.Popen( subprocess.Popen(
@ -95,27 +92,29 @@ def move_documents_and_create_thumbnails(apps, schema_editor):
"-alpha", "-alpha",
"remove", "remove",
orig_target, orig_target,
os.path.join(thumb_temp, "convert-%04d.png"), Path(thumb_temp) / "convert-%04d.png",
), ),
).wait() ).wait()
thumb_source = os.path.join(thumb_temp, "convert-0000.png") thumb_source: Path = Path(thumb_temp) / "convert-0000.png"
thumb_target = os.path.join( thumb_target: Path = (
settings.MEDIA_ROOT, Path(settings.MEDIA_ROOT)
"documents", / "documents"
"thumbnails", / "thumbnails"
re.sub(r"(\d+)\.\w+(\.gpg)", "\\1.png\\2", f), / re.sub(r"(\d+)\.\w+(\.gpg)", "\\1.png\\2", f)
) )
with open(thumb_source, "rb") as unencrypted: with (
with open(thumb_target, "wb") as encrypted: thumb_source.open("rb") as unencrypted,
thumb_target.open("wb") as encrypted,
):
encrypted.write(GnuPG.encrypted(unencrypted)) encrypted.write(GnuPG.encrypted(unencrypted))
shutil.rmtree(thumb_temp) shutil.rmtree(thumb_temp)
shutil.rmtree(orig_temp) shutil.rmtree(orig_temp)
shutil.move( shutil.move(
os.path.join(settings.MEDIA_ROOT, "documents", f), Path(settings.MEDIA_ROOT) / "documents" / f,
os.path.join(settings.MEDIA_ROOT, "documents", "originals", f), Path(settings.MEDIA_ROOT) / "documents" / "originals" / f,
) )

View File

@ -1,7 +1,7 @@
# Generated by Django 1.9.4 on 2016-03-28 19:09 # Generated by Django 1.9.4 on 2016-03-28 19:09
import hashlib import hashlib
import os from pathlib import Path
import django.utils.timezone import django.utils.timezone
import gnupg import gnupg
@ -58,16 +58,16 @@ class Document:
@property @property
def source_path(self): def source_path(self):
return os.path.join( return (
settings.MEDIA_ROOT, Path(settings.MEDIA_ROOT)
"documents", / "documents"
"originals", / "originals"
f"{self.pk:07}.{self.file_type}.gpg", / f"{self.pk:07}.{self.file_type}.gpg"
) ).as_posix()
@property @property
def source_file(self): def source_file(self):
return open(self.source_path, "rb") return Path(self.source_path).open("rb")
@property @property
def file_name(self): def file_name(self):

View File

@ -1,5 +1,5 @@
# Generated by Django 3.1.3 on 2020-11-20 11:21 # Generated by Django 3.1.3 on 2020-11-20 11:21
import os from pathlib import Path
import magic import magic
from django.conf import settings from django.conf import settings
@ -12,15 +12,15 @@ STORAGE_TYPE_UNENCRYPTED = "unencrypted"
STORAGE_TYPE_GPG = "gpg" STORAGE_TYPE_GPG = "gpg"
def source_path(self): def source_path(self) -> Path:
if self.filename: if self.filename:
fname = str(self.filename) fname: str = str(self.filename)
else: else:
fname = f"{self.pk:07}.{self.file_type}" fname = f"{self.pk:07}.{self.file_type}"
if self.storage_type == STORAGE_TYPE_GPG: if self.storage_type == STORAGE_TYPE_GPG:
fname += ".gpg" fname += ".gpg"
return os.path.join(settings.ORIGINALS_DIR, fname) return Path(settings.ORIGINALS_DIR) / fname
def add_mime_types(apps, schema_editor): def add_mime_types(apps, schema_editor):
@ -28,7 +28,7 @@ def add_mime_types(apps, schema_editor):
documents = Document.objects.all() documents = Document.objects.all()
for d in documents: for d in documents:
f = open(source_path(d), "rb") with Path(source_path(d)).open("rb") as f:
if d.storage_type == STORAGE_TYPE_GPG: if d.storage_type == STORAGE_TYPE_GPG:
data = GnuPG.decrypted(f) data = GnuPG.decrypted(f)
else: else:
@ -37,15 +37,13 @@ def add_mime_types(apps, schema_editor):
d.mime_type = magic.from_buffer(data, mime=True) d.mime_type = magic.from_buffer(data, mime=True)
d.save() d.save()
f.close()
def add_file_extensions(apps, schema_editor): def add_file_extensions(apps, schema_editor):
Document = apps.get_model("documents", "Document") Document = apps.get_model("documents", "Document")
documents = Document.objects.all() documents = Document.objects.all()
for d in documents: for d in documents:
d.file_type = os.path.splitext(d.filename)[1].strip(".") d.file_type = Path(d.filename).suffix.lstrip(".")
d.save() d.save()

View File

@ -315,7 +315,7 @@ class Document(SoftDeleteModel, ModelWithOwner):
@property @property
def source_file(self): def source_file(self):
return open(self.source_path, "rb") return Path(self.source_path).open("rb")
@property @property
def has_archive_version(self) -> bool: def has_archive_version(self) -> bool:
@ -330,7 +330,7 @@ class Document(SoftDeleteModel, ModelWithOwner):
@property @property
def archive_file(self): def archive_file(self):
return open(self.archive_path, "rb") return Path(self.archive_path).open("rb")
def get_public_filename(self, *, archive=False, counter=0, suffix=None) -> str: def get_public_filename(self, *, archive=False, counter=0, suffix=None) -> str:
""" """
@ -367,7 +367,7 @@ class Document(SoftDeleteModel, ModelWithOwner):
@property @property
def thumbnail_file(self): def thumbnail_file(self):
return open(self.thumbnail_path, "rb") return Path(self.thumbnail_path).open("rb")
@property @property
def created_date(self): def created_date(self):

View File

@ -272,7 +272,7 @@ def update_document_content_maybe_archive_file(document_id):
with transaction.atomic(): with transaction.atomic():
oldDocument = Document.objects.get(pk=document.pk) oldDocument = Document.objects.get(pk=document.pk)
if parser.get_archive_path(): if parser.get_archive_path():
with open(parser.get_archive_path(), "rb") as f: with Path(parser.get_archive_path()).open("rb") as f:
checksum = hashlib.md5(f.read()).hexdigest() checksum = hashlib.md5(f.read()).hexdigest()
# I'm going to save first so that in case the file move # I'm going to save first so that in case the file move
# fails, the database is rolled back. # fails, the database is rolled back.

View File

@ -1,5 +1,5 @@
import json import json
import os from pathlib import Path
from django.contrib.auth.models import User from django.contrib.auth.models import User
from rest_framework import status from rest_framework import status
@ -136,10 +136,7 @@ class TestApiAppConfig(DirectoriesMixin, APITestCase):
THEN: THEN:
- old app_logo file is deleted - old app_logo file is deleted
""" """
with open( with (Path(__file__).parent / "samples" / "simple.jpg").open("rb") as f:
os.path.join(os.path.dirname(__file__), "samples", "simple.jpg"),
"rb",
) as f:
self.client.patch( self.client.patch(
f"{self.ENDPOINT}1/", f"{self.ENDPOINT}1/",
{ {
@ -148,15 +145,12 @@ class TestApiAppConfig(DirectoriesMixin, APITestCase):
) )
config = ApplicationConfiguration.objects.first() config = ApplicationConfiguration.objects.first()
old_logo = config.app_logo old_logo = config.app_logo
self.assertTrue(os.path.exists(old_logo.path)) self.assertTrue(Path(old_logo.path).exists())
with open( with (Path(__file__).parent / "samples" / "simple.png").open("rb") as f:
os.path.join(os.path.dirname(__file__), "samples", "simple.png"),
"rb",
) as f:
self.client.patch( self.client.patch(
f"{self.ENDPOINT}1/", f"{self.ENDPOINT}1/",
{ {
"app_logo": f, "app_logo": f,
}, },
) )
self.assertFalse(os.path.exists(old_logo.path)) self.assertFalse(Path(old_logo.path).exists())

View File

@ -1,4 +1,3 @@
import os
import re import re
import shutil import shutil
from pathlib import Path from pathlib import Path
@ -617,7 +616,7 @@ class TestClassifier(DirectoriesMixin, TestCase):
self.assertListEqual(self.classifier.predict_tags(doc2.content), []) self.assertListEqual(self.classifier.predict_tags(doc2.content), [])
def test_load_classifier_not_exists(self): def test_load_classifier_not_exists(self):
self.assertFalse(os.path.exists(settings.MODEL_FILE)) self.assertFalse(Path(settings.MODEL_FILE).exists())
self.assertIsNone(load_classifier()) self.assertIsNone(load_classifier())
@mock.patch("documents.classifier.DocumentClassifier.load") @mock.patch("documents.classifier.DocumentClassifier.load")
@ -632,7 +631,7 @@ class TestClassifier(DirectoriesMixin, TestCase):
}, },
) )
@override_settings( @override_settings(
MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"), MODEL_FILE=(Path(__file__).parent / "data" / "model.pickle").as_posix(),
) )
@pytest.mark.skip( @pytest.mark.skip(
reason="Disabled caching due to high memory usage - need to investigate.", reason="Disabled caching due to high memory usage - need to investigate.",
@ -648,24 +647,24 @@ class TestClassifier(DirectoriesMixin, TestCase):
@mock.patch("documents.classifier.DocumentClassifier.load") @mock.patch("documents.classifier.DocumentClassifier.load")
def test_load_classifier_incompatible_version(self, load): def test_load_classifier_incompatible_version(self, load):
Path(settings.MODEL_FILE).touch() Path(settings.MODEL_FILE).touch()
self.assertTrue(os.path.exists(settings.MODEL_FILE)) self.assertTrue(Path(settings.MODEL_FILE).exists())
load.side_effect = IncompatibleClassifierVersionError("Dummy Error") load.side_effect = IncompatibleClassifierVersionError("Dummy Error")
self.assertIsNone(load_classifier()) self.assertIsNone(load_classifier())
self.assertFalse(os.path.exists(settings.MODEL_FILE)) self.assertFalse(Path(settings.MODEL_FILE).exists())
@mock.patch("documents.classifier.DocumentClassifier.load") @mock.patch("documents.classifier.DocumentClassifier.load")
def test_load_classifier_os_error(self, load): def test_load_classifier_os_error(self, load):
Path(settings.MODEL_FILE).touch() Path(settings.MODEL_FILE).touch()
self.assertTrue(os.path.exists(settings.MODEL_FILE)) self.assertTrue(Path(settings.MODEL_FILE).exists())
load.side_effect = OSError() load.side_effect = OSError()
self.assertIsNone(load_classifier()) self.assertIsNone(load_classifier())
self.assertTrue(os.path.exists(settings.MODEL_FILE)) self.assertTrue(Path(settings.MODEL_FILE).exists())
def test_load_old_classifier_version(self): def test_load_old_classifier_version(self):
shutil.copy( shutil.copy(
os.path.join(os.path.dirname(__file__), "data", "v1.17.4.model.pickle"), Path(__file__).parent / "data" / "v1.17.4.model.pickle",
self.dirs.scratch_dir, self.dirs.scratch_dir,
) )
with override_settings( with override_settings(

View File

@ -1,5 +1,5 @@
import os
import shutil import shutil
from pathlib import Path
from unittest import mock from unittest import mock
from django.core.management import call_command from django.core.management import call_command
@ -22,7 +22,7 @@ class TestMakeThumbnails(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
filename="test.pdf", filename="test.pdf",
) )
shutil.copy( shutil.copy(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), Path(__file__).parent / "samples" / "simple.pdf",
self.d1.source_path, self.d1.source_path,
) )
@ -34,7 +34,7 @@ class TestMakeThumbnails(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
filename="test2.pdf", filename="test2.pdf",
) )
shutil.copy( shutil.copy(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), Path(__file__).parent / "samples" / "simple.pdf",
self.d2.source_path, self.d2.source_path,
) )
@ -46,7 +46,7 @@ class TestMakeThumbnails(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
filename="test3.pdf", filename="test3.pdf",
) )
shutil.copy( shutil.copy(
os.path.join(os.path.dirname(__file__), "samples", "password-is-test.pdf"), Path(__file__).parent / "samples" / "password-is-test.pdf",
self.d3.source_path, self.d3.source_path,
) )

View File

@ -1,4 +1,3 @@
import os
import shutil import shutil
from datetime import timedelta from datetime import timedelta
from pathlib import Path from pathlib import Path
@ -88,18 +87,18 @@ class TestClassifier(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
tasks.train_classifier() tasks.train_classifier()
self.assertIsFile(settings.MODEL_FILE) self.assertIsFile(settings.MODEL_FILE)
mtime = os.stat(settings.MODEL_FILE).st_mtime mtime = Path(settings.MODEL_FILE).stat().st_mtime
tasks.train_classifier() tasks.train_classifier()
self.assertIsFile(settings.MODEL_FILE) self.assertIsFile(settings.MODEL_FILE)
mtime2 = os.stat(settings.MODEL_FILE).st_mtime mtime2 = Path(settings.MODEL_FILE).stat().st_mtime
self.assertEqual(mtime, mtime2) self.assertEqual(mtime, mtime2)
doc.content = "test2" doc.content = "test2"
doc.save() doc.save()
tasks.train_classifier() tasks.train_classifier()
self.assertIsFile(settings.MODEL_FILE) self.assertIsFile(settings.MODEL_FILE)
mtime3 = os.stat(settings.MODEL_FILE).st_mtime mtime3 = Path(settings.MODEL_FILE).stat().st_mtime
self.assertNotEqual(mtime2, mtime3) self.assertNotEqual(mtime2, mtime3)

View File

@ -1,6 +1,6 @@
import os
import tempfile import tempfile
from datetime import timedelta from datetime import timedelta
from pathlib import Path
from django.conf import settings from django.conf import settings
from django.contrib.auth.models import Permission from django.contrib.auth.models import Permission
@ -107,12 +107,12 @@ class TestViews(DirectoriesMixin, TestCase):
content = b"This is a test" content = b"This is a test"
with open(filename, "wb") as f: with Path(filename).open("wb") as f:
f.write(content) f.write(content)
doc = Document.objects.create( doc = Document.objects.create(
title="none", title="none",
filename=os.path.basename(filename), filename=Path(filename).name,
mime_type="application/pdf", mime_type="application/pdf",
) )

View File

@ -38,9 +38,9 @@ class TestChecks(DirectoriesMixin, TestCase):
self.assertTrue(msg.msg.endswith("is set but doesn't exist.")) self.assertTrue(msg.msg.endswith("is set but doesn't exist."))
def test_paths_check_no_access(self): def test_paths_check_no_access(self):
os.chmod(self.dirs.data_dir, 0o000) Path(self.dirs.data_dir).chmod(0o000)
os.chmod(self.dirs.media_dir, 0o000) Path(self.dirs.media_dir).chmod(0o000)
os.chmod(self.dirs.consumption_dir, 0o000) Path(self.dirs.consumption_dir).chmod(0o000)
self.addCleanup(os.chmod, self.dirs.data_dir, 0o777) self.addCleanup(os.chmod, self.dirs.data_dir, 0o777)
self.addCleanup(os.chmod, self.dirs.media_dir, 0o777) self.addCleanup(os.chmod, self.dirs.media_dir, 0o777)

View File

@ -1,4 +1,4 @@
import os from pathlib import Path
from allauth.account import views as allauth_account_views from allauth.account import views as allauth_account_views
from allauth.mfa.base import views as allauth_mfa_views from allauth.mfa.base import views as allauth_mfa_views
@ -270,7 +270,7 @@ urlpatterns = [
re_path( re_path(
r"^logo(?P<path>.*)$", r"^logo(?P<path>.*)$",
serve, serve,
kwargs={"document_root": os.path.join(settings.MEDIA_ROOT, "logo")}, kwargs={"document_root": Path(settings.MEDIA_ROOT) / "logo"},
), ),
# allauth # allauth
path( path(

View File

@ -1,8 +1,8 @@
import abc import abc
import os
from email import message_from_bytes from email import message_from_bytes
from email import policy from email import policy
from email.message import Message from email.message import Message
from pathlib import Path
from django.conf import settings from django.conf import settings
from gnupg import GPG from gnupg import GPG
@ -50,7 +50,7 @@ class MailMessageDecryptor(MailMessagePreprocessor, LoggingMixin):
return False return False
if settings.EMAIL_GNUPG_HOME is None: if settings.EMAIL_GNUPG_HOME is None:
return True return True
return os.path.isdir(settings.EMAIL_GNUPG_HOME) return Path(settings.EMAIL_GNUPG_HOME).is_dir()
def run(self, message: MailMessage) -> MailMessage: def run(self, message: MailMessage) -> MailMessage:
if not hasattr(message, "obj"): if not hasattr(message, "obj"):

View File

@ -159,7 +159,7 @@ class RasterisedDocumentParser(DocumentParser):
# the whole text, so do not utilize it in that case # the whole text, so do not utilize it in that case
if ( if (
sidecar_file is not None sidecar_file is not None
and os.path.isfile(sidecar_file) and sidecar_file.is_file()
and self.settings.mode != "redo" and self.settings.mode != "redo"
): ):
text = self.read_file_handle_unicode_errors(sidecar_file) text = self.read_file_handle_unicode_errors(sidecar_file)
@ -174,7 +174,7 @@ class RasterisedDocumentParser(DocumentParser):
# no success with the sidecar file, try PDF # no success with the sidecar file, try PDF
if not os.path.isfile(pdf_file): if not Path(pdf_file).is_file():
return None return None
try: try:
@ -368,8 +368,8 @@ class RasterisedDocumentParser(DocumentParser):
from ocrmypdf import SubprocessOutputError from ocrmypdf import SubprocessOutputError
from ocrmypdf.exceptions import DigitalSignatureError from ocrmypdf.exceptions import DigitalSignatureError
archive_path = Path(os.path.join(self.tempdir, "archive.pdf")) archive_path = Path(self.tempdir) / "archive.pdf"
sidecar_file = Path(os.path.join(self.tempdir, "sidecar.txt")) sidecar_file = Path(self.tempdir) / "sidecar.txt"
args = self.construct_ocrmypdf_parameters( args = self.construct_ocrmypdf_parameters(
document_path, document_path,
@ -412,12 +412,8 @@ class RasterisedDocumentParser(DocumentParser):
f"Attempting force OCR to get the text.", f"Attempting force OCR to get the text.",
) )
archive_path_fallback = Path( archive_path_fallback = Path(self.tempdir) / "archive-fallback.pdf"
os.path.join(self.tempdir, "archive-fallback.pdf"), sidecar_file_fallback = Path(self.tempdir) / "sidecar-fallback.txt"
)
sidecar_file_fallback = Path(
os.path.join(self.tempdir, "sidecar-fallback.txt"),
)
# Attempt to run OCR with safe settings. # Attempt to run OCR with safe settings.

View File

@ -75,7 +75,7 @@ class TestTikaParserAgainstServer:
== "This is an DOCX test document, also made September 14, 2022" == "This is an DOCX test document, also made September 14, 2022"
) )
assert tika_parser.archive_path is not None assert tika_parser.archive_path is not None
with open(tika_parser.archive_path, "rb") as f: with Path(tika_parser.archive_path).open("rb") as f:
assert b"PDF-" in f.read()[:10] assert b"PDF-" in f.read()[:10]
# self.assertEqual(tika_parser.date, datetime.datetime(2022, 9, 14)) # self.assertEqual(tika_parser.date, datetime.datetime(2022, 9, 14))
@ -104,7 +104,7 @@ class TestTikaParserAgainstServer:
in tika_parser.text in tika_parser.text
) )
assert tika_parser.archive_path is not None assert tika_parser.archive_path is not None
with open(tika_parser.archive_path, "rb") as f: with Path(tika_parser.archive_path).open("rb") as f:
assert b"PDF-" in f.read()[:10] assert b"PDF-" in f.read()[:10]
def test_tika_fails_multi_part( def test_tika_fails_multi_part(
@ -130,5 +130,5 @@ class TestTikaParserAgainstServer:
) )
assert tika_parser.archive_path is not None assert tika_parser.archive_path is not None
with open(tika_parser.archive_path, "rb") as f: with Path(tika_parser.archive_path).open("rb") as f:
assert b"PDF-" in f.read()[:10] assert b"PDF-" in f.read()[:10]

View File

@ -38,7 +38,7 @@ class TestTikaParser:
assert tika_parser.text == "the content" assert tika_parser.text == "the content"
assert tika_parser.archive_path is not None assert tika_parser.archive_path is not None
with open(tika_parser.archive_path, "rb") as f: with Path(tika_parser.archive_path).open("rb") as f:
assert f.read() == b"PDF document" assert f.read() == b"PDF document"
assert tika_parser.date == datetime.datetime( assert tika_parser.date == datetime.datetime(