Chore: switch from os.path to pathlib.Path (#9060)

This commit is contained in:
Sebastian Steinbeißer 2025-03-05 22:06:01 +01:00 committed by GitHub
parent aaaa6c1393
commit 76d363f22d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 89 additions and 150 deletions

View File

@ -209,37 +209,18 @@ lint.per-file-ignores."src/documents/management/commands/document_consumer.py" =
lint.per-file-ignores."src/documents/management/commands/document_exporter.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/migrations/0012_auto_20160305_0040.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/migrations/0014_document_checksum.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/migrations/1003_mime_types.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/migrations/1012_fix_archive_files.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/models.py" = [
"PTH",
"SIM115",
] # TODO PTH Enable & remove
]
lint.per-file-ignores."src/documents/parsers.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/signals/handlers.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/tasks.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/tests/test_api_app_config.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/tests/test_classifier.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/tests/test_consumer.py" = [
"PTH",
] # TODO Enable & remove
@ -255,9 +236,6 @@ lint.per-file-ignores."src/documents/tests/test_management_consumer.py" = [
lint.per-file-ignores."src/documents/tests/test_management_exporter.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/tests/test_management_thumbnails.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/tests/test_migration_archive_files.py" = [
"PTH",
] # TODO Enable & remove
@ -270,12 +248,6 @@ lint.per-file-ignores."src/documents/tests/test_migration_mime_type.py" = [
lint.per-file-ignores."src/documents/tests/test_sanity_check.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/tests/test_tasks.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/tests/test_views.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/views.py" = [
"PTH",
] # TODO Enable & remove
@ -285,34 +257,16 @@ lint.per-file-ignores."src/paperless/checks.py" = [
lint.per-file-ignores."src/paperless/settings.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/paperless/tests/test_checks.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/paperless/urls.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/paperless/views.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/paperless_mail/mail.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/paperless_mail/preprocessor.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/paperless_tesseract/parsers.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/paperless_tesseract/tests/test_parser.py" = [
"PTH",
"RUF001",
] # TODO PTH Enable & remove
lint.per-file-ignores."src/paperless_tika/tests/test_live_tika.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/paperless_tika/tests/test_tika_parser.py" = [
"PTH",
] # TODO Enable & remove
lint.isort.force-single-line = true
[tool.pytest.ini_options]

View File

@ -5,6 +5,7 @@ import re
import shutil
import subprocess
import tempfile
from pathlib import Path
import gnupg
from django.conf import settings
@ -34,16 +35,16 @@ class GnuPG:
def move_documents_and_create_thumbnails(apps, schema_editor):
os.makedirs(
os.path.join(settings.MEDIA_ROOT, "documents", "originals"),
(Path(settings.MEDIA_ROOT) / "documents" / "originals").mkdir(
parents=True,
exist_ok=True,
)
os.makedirs(
os.path.join(settings.MEDIA_ROOT, "documents", "thumbnails"),
(Path(settings.MEDIA_ROOT) / "documents" / "thumbnails").mkdir(
parents=True,
exist_ok=True,
)
documents = os.listdir(os.path.join(settings.MEDIA_ROOT, "documents"))
documents: list[str] = os.listdir(Path(settings.MEDIA_ROOT) / "documents")
if set(documents) == {"originals", "thumbnails"}:
return
@ -60,10 +61,7 @@ def move_documents_and_create_thumbnails(apps, schema_editor):
),
)
try:
os.makedirs(settings.SCRATCH_DIR)
except FileExistsError:
pass
Path(settings.SCRATCH_DIR).mkdir(parents=True, exists_ok=True)
for f in sorted(documents):
if not f.endswith("gpg"):
@ -77,15 +75,14 @@ def move_documents_and_create_thumbnails(apps, schema_editor):
),
)
thumb_temp = tempfile.mkdtemp(prefix="paperless", dir=settings.SCRATCH_DIR)
orig_temp = tempfile.mkdtemp(prefix="paperless", dir=settings.SCRATCH_DIR)
thumb_temp: str = tempfile.mkdtemp(prefix="paperless", dir=settings.SCRATCH_DIR)
orig_temp: str = tempfile.mkdtemp(prefix="paperless", dir=settings.SCRATCH_DIR)
orig_source = os.path.join(settings.MEDIA_ROOT, "documents", f)
orig_target = os.path.join(orig_temp, f.replace(".gpg", ""))
orig_source: Path = Path(settings.MEDIA_ROOT) / "documents" / f
orig_target: Path = Path(orig_temp) / f.replace(".gpg", "")
with open(orig_source, "rb") as encrypted:
with open(orig_target, "wb") as unencrypted:
unencrypted.write(GnuPG.decrypted(encrypted))
with orig_source.open("rb") as encrypted, orig_target.open("wb") as unencrypted:
unencrypted.write(GnuPG.decrypted(encrypted))
subprocess.Popen(
(
@ -95,27 +92,29 @@ def move_documents_and_create_thumbnails(apps, schema_editor):
"-alpha",
"remove",
orig_target,
os.path.join(thumb_temp, "convert-%04d.png"),
Path(thumb_temp) / "convert-%04d.png",
),
).wait()
thumb_source = os.path.join(thumb_temp, "convert-0000.png")
thumb_target = os.path.join(
settings.MEDIA_ROOT,
"documents",
"thumbnails",
re.sub(r"(\d+)\.\w+(\.gpg)", "\\1.png\\2", f),
thumb_source: Path = Path(thumb_temp) / "convert-0000.png"
thumb_target: Path = (
Path(settings.MEDIA_ROOT)
/ "documents"
/ "thumbnails"
/ re.sub(r"(\d+)\.\w+(\.gpg)", "\\1.png\\2", f)
)
with open(thumb_source, "rb") as unencrypted:
with open(thumb_target, "wb") as encrypted:
encrypted.write(GnuPG.encrypted(unencrypted))
with (
thumb_source.open("rb") as unencrypted,
thumb_target.open("wb") as encrypted,
):
encrypted.write(GnuPG.encrypted(unencrypted))
shutil.rmtree(thumb_temp)
shutil.rmtree(orig_temp)
shutil.move(
os.path.join(settings.MEDIA_ROOT, "documents", f),
os.path.join(settings.MEDIA_ROOT, "documents", "originals", f),
Path(settings.MEDIA_ROOT) / "documents" / f,
Path(settings.MEDIA_ROOT) / "documents" / "originals" / f,
)

View File

@ -1,7 +1,7 @@
# Generated by Django 1.9.4 on 2016-03-28 19:09
import hashlib
import os
from pathlib import Path
import django.utils.timezone
import gnupg
@ -58,16 +58,16 @@ class Document:
@property
def source_path(self):
return os.path.join(
settings.MEDIA_ROOT,
"documents",
"originals",
f"{self.pk:07}.{self.file_type}.gpg",
)
return (
Path(settings.MEDIA_ROOT)
/ "documents"
/ "originals"
/ f"{self.pk:07}.{self.file_type}.gpg"
).as_posix()
@property
def source_file(self):
return open(self.source_path, "rb")
return Path(self.source_path).open("rb")
@property
def file_name(self):

View File

@ -1,5 +1,5 @@
# Generated by Django 3.1.3 on 2020-11-20 11:21
import os
from pathlib import Path
import magic
from django.conf import settings
@ -12,15 +12,15 @@ STORAGE_TYPE_UNENCRYPTED = "unencrypted"
STORAGE_TYPE_GPG = "gpg"
def source_path(self):
def source_path(self) -> Path:
if self.filename:
fname = str(self.filename)
fname: str = str(self.filename)
else:
fname = f"{self.pk:07}.{self.file_type}"
if self.storage_type == STORAGE_TYPE_GPG:
fname += ".gpg"
return os.path.join(settings.ORIGINALS_DIR, fname)
return Path(settings.ORIGINALS_DIR) / fname
def add_mime_types(apps, schema_editor):
@ -28,24 +28,22 @@ def add_mime_types(apps, schema_editor):
documents = Document.objects.all()
for d in documents:
f = open(source_path(d), "rb")
if d.storage_type == STORAGE_TYPE_GPG:
data = GnuPG.decrypted(f)
else:
data = f.read(1024)
with Path(source_path(d)).open("rb") as f:
if d.storage_type == STORAGE_TYPE_GPG:
data = GnuPG.decrypted(f)
else:
data = f.read(1024)
d.mime_type = magic.from_buffer(data, mime=True)
d.save()
f.close()
def add_file_extensions(apps, schema_editor):
Document = apps.get_model("documents", "Document")
documents = Document.objects.all()
for d in documents:
d.file_type = os.path.splitext(d.filename)[1].strip(".")
d.file_type = Path(d.filename).suffix.lstrip(".")
d.save()

View File

@ -315,7 +315,7 @@ class Document(SoftDeleteModel, ModelWithOwner):
@property
def source_file(self):
return open(self.source_path, "rb")
return Path(self.source_path).open("rb")
@property
def has_archive_version(self) -> bool:
@ -330,7 +330,7 @@ class Document(SoftDeleteModel, ModelWithOwner):
@property
def archive_file(self):
return open(self.archive_path, "rb")
return Path(self.archive_path).open("rb")
def get_public_filename(self, *, archive=False, counter=0, suffix=None) -> str:
"""
@ -367,7 +367,7 @@ class Document(SoftDeleteModel, ModelWithOwner):
@property
def thumbnail_file(self):
return open(self.thumbnail_path, "rb")
return Path(self.thumbnail_path).open("rb")
@property
def created_date(self):

View File

@ -272,7 +272,7 @@ def update_document_content_maybe_archive_file(document_id):
with transaction.atomic():
oldDocument = Document.objects.get(pk=document.pk)
if parser.get_archive_path():
with open(parser.get_archive_path(), "rb") as f:
with Path(parser.get_archive_path()).open("rb") as f:
checksum = hashlib.md5(f.read()).hexdigest()
# I'm going to save first so that in case the file move
# fails, the database is rolled back.

View File

@ -1,5 +1,5 @@
import json
import os
from pathlib import Path
from django.contrib.auth.models import User
from rest_framework import status
@ -136,10 +136,7 @@ class TestApiAppConfig(DirectoriesMixin, APITestCase):
THEN:
- old app_logo file is deleted
"""
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.jpg"),
"rb",
) as f:
with (Path(__file__).parent / "samples" / "simple.jpg").open("rb") as f:
self.client.patch(
f"{self.ENDPOINT}1/",
{
@ -148,15 +145,12 @@ class TestApiAppConfig(DirectoriesMixin, APITestCase):
)
config = ApplicationConfiguration.objects.first()
old_logo = config.app_logo
self.assertTrue(os.path.exists(old_logo.path))
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.png"),
"rb",
) as f:
self.assertTrue(Path(old_logo.path).exists())
with (Path(__file__).parent / "samples" / "simple.png").open("rb") as f:
self.client.patch(
f"{self.ENDPOINT}1/",
{
"app_logo": f,
},
)
self.assertFalse(os.path.exists(old_logo.path))
self.assertFalse(Path(old_logo.path).exists())

View File

@ -1,4 +1,3 @@
import os
import re
import shutil
from pathlib import Path
@ -617,7 +616,7 @@ class TestClassifier(DirectoriesMixin, TestCase):
self.assertListEqual(self.classifier.predict_tags(doc2.content), [])
def test_load_classifier_not_exists(self):
self.assertFalse(os.path.exists(settings.MODEL_FILE))
self.assertFalse(Path(settings.MODEL_FILE).exists())
self.assertIsNone(load_classifier())
@mock.patch("documents.classifier.DocumentClassifier.load")
@ -632,7 +631,7 @@ class TestClassifier(DirectoriesMixin, TestCase):
},
)
@override_settings(
MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"),
MODEL_FILE=(Path(__file__).parent / "data" / "model.pickle").as_posix(),
)
@pytest.mark.skip(
reason="Disabled caching due to high memory usage - need to investigate.",
@ -648,24 +647,24 @@ class TestClassifier(DirectoriesMixin, TestCase):
@mock.patch("documents.classifier.DocumentClassifier.load")
def test_load_classifier_incompatible_version(self, load):
Path(settings.MODEL_FILE).touch()
self.assertTrue(os.path.exists(settings.MODEL_FILE))
self.assertTrue(Path(settings.MODEL_FILE).exists())
load.side_effect = IncompatibleClassifierVersionError("Dummy Error")
self.assertIsNone(load_classifier())
self.assertFalse(os.path.exists(settings.MODEL_FILE))
self.assertFalse(Path(settings.MODEL_FILE).exists())
@mock.patch("documents.classifier.DocumentClassifier.load")
def test_load_classifier_os_error(self, load):
Path(settings.MODEL_FILE).touch()
self.assertTrue(os.path.exists(settings.MODEL_FILE))
self.assertTrue(Path(settings.MODEL_FILE).exists())
load.side_effect = OSError()
self.assertIsNone(load_classifier())
self.assertTrue(os.path.exists(settings.MODEL_FILE))
self.assertTrue(Path(settings.MODEL_FILE).exists())
def test_load_old_classifier_version(self):
shutil.copy(
os.path.join(os.path.dirname(__file__), "data", "v1.17.4.model.pickle"),
Path(__file__).parent / "data" / "v1.17.4.model.pickle",
self.dirs.scratch_dir,
)
with override_settings(

View File

@ -1,5 +1,5 @@
import os
import shutil
from pathlib import Path
from unittest import mock
from django.core.management import call_command
@ -22,7 +22,7 @@ class TestMakeThumbnails(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
filename="test.pdf",
)
shutil.copy(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
Path(__file__).parent / "samples" / "simple.pdf",
self.d1.source_path,
)
@ -34,7 +34,7 @@ class TestMakeThumbnails(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
filename="test2.pdf",
)
shutil.copy(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
Path(__file__).parent / "samples" / "simple.pdf",
self.d2.source_path,
)
@ -46,7 +46,7 @@ class TestMakeThumbnails(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
filename="test3.pdf",
)
shutil.copy(
os.path.join(os.path.dirname(__file__), "samples", "password-is-test.pdf"),
Path(__file__).parent / "samples" / "password-is-test.pdf",
self.d3.source_path,
)

View File

@ -1,4 +1,3 @@
import os
import shutil
from datetime import timedelta
from pathlib import Path
@ -88,18 +87,18 @@ class TestClassifier(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
tasks.train_classifier()
self.assertIsFile(settings.MODEL_FILE)
mtime = os.stat(settings.MODEL_FILE).st_mtime
mtime = Path(settings.MODEL_FILE).stat().st_mtime
tasks.train_classifier()
self.assertIsFile(settings.MODEL_FILE)
mtime2 = os.stat(settings.MODEL_FILE).st_mtime
mtime2 = Path(settings.MODEL_FILE).stat().st_mtime
self.assertEqual(mtime, mtime2)
doc.content = "test2"
doc.save()
tasks.train_classifier()
self.assertIsFile(settings.MODEL_FILE)
mtime3 = os.stat(settings.MODEL_FILE).st_mtime
mtime3 = Path(settings.MODEL_FILE).stat().st_mtime
self.assertNotEqual(mtime2, mtime3)

View File

@ -1,6 +1,6 @@
import os
import tempfile
from datetime import timedelta
from pathlib import Path
from django.conf import settings
from django.contrib.auth.models import Permission
@ -107,12 +107,12 @@ class TestViews(DirectoriesMixin, TestCase):
content = b"This is a test"
with open(filename, "wb") as f:
with Path(filename).open("wb") as f:
f.write(content)
doc = Document.objects.create(
title="none",
filename=os.path.basename(filename),
filename=Path(filename).name,
mime_type="application/pdf",
)

View File

@ -38,9 +38,9 @@ class TestChecks(DirectoriesMixin, TestCase):
self.assertTrue(msg.msg.endswith("is set but doesn't exist."))
def test_paths_check_no_access(self):
os.chmod(self.dirs.data_dir, 0o000)
os.chmod(self.dirs.media_dir, 0o000)
os.chmod(self.dirs.consumption_dir, 0o000)
Path(self.dirs.data_dir).chmod(0o000)
Path(self.dirs.media_dir).chmod(0o000)
Path(self.dirs.consumption_dir).chmod(0o000)
self.addCleanup(os.chmod, self.dirs.data_dir, 0o777)
self.addCleanup(os.chmod, self.dirs.media_dir, 0o777)

View File

@ -1,4 +1,4 @@
import os
from pathlib import Path
from allauth.account import views as allauth_account_views
from allauth.mfa.base import views as allauth_mfa_views
@ -270,7 +270,7 @@ urlpatterns = [
re_path(
r"^logo(?P<path>.*)$",
serve,
kwargs={"document_root": os.path.join(settings.MEDIA_ROOT, "logo")},
kwargs={"document_root": Path(settings.MEDIA_ROOT) / "logo"},
),
# allauth
path(

View File

@ -1,8 +1,8 @@
import abc
import os
from email import message_from_bytes
from email import policy
from email.message import Message
from pathlib import Path
from django.conf import settings
from gnupg import GPG
@ -50,7 +50,7 @@ class MailMessageDecryptor(MailMessagePreprocessor, LoggingMixin):
return False
if settings.EMAIL_GNUPG_HOME is None:
return True
return os.path.isdir(settings.EMAIL_GNUPG_HOME)
return Path(settings.EMAIL_GNUPG_HOME).is_dir()
def run(self, message: MailMessage) -> MailMessage:
if not hasattr(message, "obj"):

View File

@ -159,7 +159,7 @@ class RasterisedDocumentParser(DocumentParser):
# the whole text, so do not utilize it in that case
if (
sidecar_file is not None
and os.path.isfile(sidecar_file)
and sidecar_file.is_file()
and self.settings.mode != "redo"
):
text = self.read_file_handle_unicode_errors(sidecar_file)
@ -174,7 +174,7 @@ class RasterisedDocumentParser(DocumentParser):
# no success with the sidecar file, try PDF
if not os.path.isfile(pdf_file):
if not Path(pdf_file).is_file():
return None
try:
@ -368,8 +368,8 @@ class RasterisedDocumentParser(DocumentParser):
from ocrmypdf import SubprocessOutputError
from ocrmypdf.exceptions import DigitalSignatureError
archive_path = Path(os.path.join(self.tempdir, "archive.pdf"))
sidecar_file = Path(os.path.join(self.tempdir, "sidecar.txt"))
archive_path = Path(self.tempdir) / "archive.pdf"
sidecar_file = Path(self.tempdir) / "sidecar.txt"
args = self.construct_ocrmypdf_parameters(
document_path,
@ -412,12 +412,8 @@ class RasterisedDocumentParser(DocumentParser):
f"Attempting force OCR to get the text.",
)
archive_path_fallback = Path(
os.path.join(self.tempdir, "archive-fallback.pdf"),
)
sidecar_file_fallback = Path(
os.path.join(self.tempdir, "sidecar-fallback.txt"),
)
archive_path_fallback = Path(self.tempdir) / "archive-fallback.pdf"
sidecar_file_fallback = Path(self.tempdir) / "sidecar-fallback.txt"
# Attempt to run OCR with safe settings.

View File

@ -75,7 +75,7 @@ class TestTikaParserAgainstServer:
== "This is an DOCX test document, also made September 14, 2022"
)
assert tika_parser.archive_path is not None
with open(tika_parser.archive_path, "rb") as f:
with Path(tika_parser.archive_path).open("rb") as f:
assert b"PDF-" in f.read()[:10]
# self.assertEqual(tika_parser.date, datetime.datetime(2022, 9, 14))
@ -104,7 +104,7 @@ class TestTikaParserAgainstServer:
in tika_parser.text
)
assert tika_parser.archive_path is not None
with open(tika_parser.archive_path, "rb") as f:
with Path(tika_parser.archive_path).open("rb") as f:
assert b"PDF-" in f.read()[:10]
def test_tika_fails_multi_part(
@ -130,5 +130,5 @@ class TestTikaParserAgainstServer:
)
assert tika_parser.archive_path is not None
with open(tika_parser.archive_path, "rb") as f:
with Path(tika_parser.archive_path).open("rb") as f:
assert b"PDF-" in f.read()[:10]

View File

@ -38,7 +38,7 @@ class TestTikaParser:
assert tika_parser.text == "the content"
assert tika_parser.archive_path is not None
with open(tika_parser.archive_path, "rb") as f:
with Path(tika_parser.archive_path).open("rb") as f:
assert f.read() == b"PDF document"
assert tika_parser.date == datetime.datetime(