Chore: Switch from os.path to pathlib.Path (#8644)

This commit is contained in:
Sebastian Steinbeißer 2025-01-29 19:58:53 +01:00 committed by GitHub
parent 79956d6a7b
commit fce7b03324
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 72 additions and 137 deletions

View File

@ -38,7 +38,6 @@ ignore = ["DJ001", "SIM105", "RUF012"]
[lint.per-file-ignores]
".github/scripts/*.py" = ["E501", "INP001", "SIM117"]
"docker/wait-for-redis.py" = ["INP001", "T201"]
"src/documents/consumer.py" = ["PTH"] # TODO Enable & remove
"src/documents/file_handling.py" = ["PTH"] # TODO Enable & remove
"src/documents/management/commands/document_consumer.py" = ["PTH"] # TODO Enable & remove
"src/documents/management/commands/document_exporter.py" = ["PTH"] # TODO Enable & remove
@ -51,8 +50,6 @@ ignore = ["DJ001", "SIM105", "RUF012"]
"src/documents/signals/handlers.py" = ["PTH"] # TODO Enable & remove
"src/documents/tasks.py" = ["PTH"] # TODO Enable & remove
"src/documents/tests/test_api_app_config.py" = ["PTH"] # TODO Enable & remove
"src/documents/tests/test_api_bulk_download.py" = ["PTH"] # TODO Enable & remove
"src/documents/tests/test_api_documents.py" = ["PTH"] # TODO Enable & remove
"src/documents/tests/test_classifier.py" = ["PTH"] # TODO Enable & remove
"src/documents/tests/test_consumer.py" = ["PTH"] # TODO Enable & remove
"src/documents/tests/test_file_handling.py" = ["PTH"] # TODO Enable & remove

View File

@ -4,6 +4,7 @@ import os
import tempfile
from enum import Enum
from pathlib import Path
from typing import TYPE_CHECKING
import magic
from django.conf import settings
@ -154,7 +155,11 @@ class ConsumerPlugin(
"""
Confirm the input file still exists where it should
"""
if not os.path.isfile(self.input_doc.original_file):
if TYPE_CHECKING:
assert isinstance(self.input_doc.original_file, Path), (
self.input_doc.original_file
)
if not self.input_doc.original_file.is_file():
self._fail(
ConsumerStatusShortMessage.FILE_NOT_FOUND,
f"Cannot consume {self.input_doc.original_file}: File not found.",
@ -164,7 +169,7 @@ class ConsumerPlugin(
"""
Using the MD5 of the file, check this exact file doesn't already exist
"""
with open(self.input_doc.original_file, "rb") as f:
with Path(self.input_doc.original_file).open("rb") as f:
checksum = hashlib.md5(f.read()).hexdigest()
existing_doc = Document.global_objects.filter(
Q(checksum=checksum) | Q(archive_checksum=checksum),
@ -178,7 +183,7 @@ class ConsumerPlugin(
log_msg += " Note: existing document is in the trash."
if settings.CONSUMER_DELETE_DUPLICATES:
os.unlink(self.input_doc.original_file)
Path(self.input_doc.original_file).unlink()
self._fail(
msg,
log_msg,
@ -237,7 +242,7 @@ class ConsumerPlugin(
if not settings.PRE_CONSUME_SCRIPT:
return
if not os.path.isfile(settings.PRE_CONSUME_SCRIPT):
if not Path(settings.PRE_CONSUME_SCRIPT).is_file():
self._fail(
ConsumerStatusShortMessage.PRE_CONSUME_SCRIPT_NOT_FOUND,
f"Configured pre-consume script "
@ -280,7 +285,7 @@ class ConsumerPlugin(
if not settings.POST_CONSUME_SCRIPT:
return
if not os.path.isfile(settings.POST_CONSUME_SCRIPT):
if not Path(settings.POST_CONSUME_SCRIPT).is_file():
self._fail(
ConsumerStatusShortMessage.POST_CONSUME_SCRIPT_NOT_FOUND,
f"Configured post-consume script "
@ -582,7 +587,7 @@ class ConsumerPlugin(
document.thumbnail_path,
)
if archive_path and os.path.isfile(archive_path):
if archive_path and Path(archive_path).is_file():
document.archive_filename = generate_unique_filename(
document,
archive_filename=True,
@ -594,7 +599,7 @@ class ConsumerPlugin(
document.archive_path,
)
with open(archive_path, "rb") as f:
with Path(archive_path).open("rb") as f:
document.archive_checksum = hashlib.md5(
f.read(),
).hexdigest()
@ -612,14 +617,14 @@ class ConsumerPlugin(
self.unmodified_original.unlink()
# https://github.com/jonaswinkler/paperless-ng/discussions/1037
shadow_file = os.path.join(
os.path.dirname(self.input_doc.original_file),
"._" + os.path.basename(self.input_doc.original_file),
shadow_file = (
Path(self.input_doc.original_file).parent
/ f"._{Path(self.input_doc.original_file).name}"
)
if os.path.isfile(shadow_file):
if Path(shadow_file).is_file():
self.log.debug(f"Deleting file {shadow_file}")
os.unlink(shadow_file)
Path(shadow_file).unlink()
except Exception as e:
self._fail(
@ -704,7 +709,7 @@ class ConsumerPlugin(
create_date = date
self.log.debug(f"Creation date from parse_date: {create_date}")
else:
stats = os.stat(self.input_doc.original_file)
stats = Path(self.input_doc.original_file).stat()
create_date = timezone.make_aware(
datetime.datetime.fromtimestamp(stats.st_mtime),
)
@ -800,7 +805,10 @@ class ConsumerPlugin(
) # adds to document
def _write(self, storage_type, source, target):
with open(source, "rb") as read_file, open(target, "wb") as write_file:
with (
Path(source).open("rb") as read_file,
Path(target).open("wb") as write_file,
):
write_file.write(read_file.read())
# Attempt to copy file's original stats, but it's ok if we can't

View File

@ -353,7 +353,7 @@ def cleanup_document_deletion(sender, instance, **kwargs):
f"{filename} could not be deleted: {e}",
)
elif filename and not os.path.isfile(filename):
logger.warn(f"Expected {filename} tp exist, but it did not")
logger.warning(f"Expected {filename} to exist, but it did not")
delete_empty_directories(
os.path.dirname(instance.source_path),

View File

@ -1,7 +1,6 @@
import datetime
import io
import json
import os
import shutil
import zipfile
@ -15,9 +14,10 @@ from documents.models import Correspondent
from documents.models import Document
from documents.models import DocumentType
from documents.tests.utils import DirectoriesMixin
from documents.tests.utils import SampleDirMixin
class TestBulkDownload(DirectoriesMixin, APITestCase):
class TestBulkDownload(DirectoriesMixin, SampleDirMixin, APITestCase):
ENDPOINT = "/api/documents/bulk_download/"
def setUp(self):
@ -51,22 +51,10 @@ class TestBulkDownload(DirectoriesMixin, APITestCase):
archive_checksum="D",
)
shutil.copy(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
self.doc2.source_path,
)
shutil.copy(
os.path.join(os.path.dirname(__file__), "samples", "simple.png"),
self.doc2b.source_path,
)
shutil.copy(
os.path.join(os.path.dirname(__file__), "samples", "simple.jpg"),
self.doc3.source_path,
)
shutil.copy(
os.path.join(os.path.dirname(__file__), "samples", "test_with_bom.pdf"),
self.doc3.archive_path,
)
shutil.copy(self.SAMPLE_DIR / "simple.pdf", self.doc2.source_path)
shutil.copy(self.SAMPLE_DIR / "simple.png", self.doc2b.source_path)
shutil.copy(self.SAMPLE_DIR / "simple.jpg", self.doc3.source_path)
shutil.copy(self.SAMPLE_DIR / "test_with_bom.pdf", self.doc3.archive_path)
def test_download_originals(self):
response = self.client.post(

View File

@ -1,5 +1,4 @@
import datetime
import os
import shutil
import tempfile
import uuid
@ -8,6 +7,7 @@ from binascii import hexlify
from datetime import date
from datetime import timedelta
from pathlib import Path
from typing import TYPE_CHECKING
from unittest import mock
import celery
@ -171,19 +171,18 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
content = b"This is a test"
content_thumbnail = b"thumbnail content"
with open(filename, "wb") as f:
with Path(filename).open("wb") as f:
f.write(content)
doc = Document.objects.create(
title="none",
filename=os.path.basename(filename),
filename=Path(filename).name,
mime_type="application/pdf",
)
with open(
os.path.join(self.dirs.thumbnail_dir, f"{doc.pk:07d}.webp"),
"wb",
) as f:
if TYPE_CHECKING:
assert isinstance(self.dirs.thumbnail_dir, Path), self.dirs.thumbnail_dir
with (self.dirs.thumbnail_dir / f"{doc.pk:07d}.webp").open("wb") as f:
f.write(content_thumbnail)
response = self.client.get(f"/api/documents/{doc.pk}/download/")
@ -217,7 +216,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
content = b"This is a test"
content_thumbnail = b"thumbnail content"
with open(filename, "wb") as f:
with Path(filename).open("wb") as f:
f.write(content)
user1 = User.objects.create_user(username="test1")
@ -229,15 +228,12 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
doc = Document.objects.create(
title="none",
filename=os.path.basename(filename),
filename=Path(filename).name,
mime_type="application/pdf",
owner=user1,
)
with open(
os.path.join(self.dirs.thumbnail_dir, f"{doc.pk:07d}.webp"),
"wb",
) as f:
with (Path(self.dirs.thumbnail_dir) / f"{doc.pk:07d}.webp").open("wb") as f:
f.write(content_thumbnail)
response = self.client.get(f"/api/documents/{doc.pk}/download/")
@ -272,10 +268,10 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
mime_type="application/pdf",
)
with open(doc.source_path, "wb") as f:
with Path(doc.source_path).open("wb") as f:
f.write(content)
with open(doc.archive_path, "wb") as f:
with Path(doc.archive_path).open("wb") as f:
f.write(content_archive)
response = self.client.get(f"/api/documents/{doc.pk}/download/")
@ -305,7 +301,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
def test_document_actions_not_existing_file(self):
doc = Document.objects.create(
title="none",
filename=os.path.basename("asd"),
filename=Path("asd").name,
mime_type="application/pdf",
)
@ -1026,10 +1022,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
id=str(uuid.uuid4()),
)
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f:
with (Path(__file__).parent / "samples" / "simple.pdf").open("rb") as f:
response = self.client.post(
"/api/documents/post_document/",
{"document": f},
@ -1061,10 +1054,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
id=str(uuid.uuid4()),
)
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f:
with (Path(__file__).parent / "samples" / "simple.pdf").open("rb") as f:
response = self.client.post(
"/api/documents/post_document/",
{
@ -1095,10 +1085,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
id=str(uuid.uuid4()),
)
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f:
with (Path(__file__).parent / "samples" / "simple.pdf").open("rb") as f:
response = self.client.post(
"/api/documents/post_document/",
{"documenst": f},
@ -1111,10 +1098,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
id=str(uuid.uuid4()),
)
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.zip"),
"rb",
) as f:
with (Path(__file__).parent / "samples" / "simple.zip").open("rb") as f:
response = self.client.post(
"/api/documents/post_document/",
{"document": f},
@ -1127,10 +1111,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
id=str(uuid.uuid4()),
)
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f:
with (Path(__file__).parent / "samples" / "simple.pdf").open("rb") as f:
response = self.client.post(
"/api/documents/post_document/",
{"document": f, "title": "my custom title"},
@ -1152,10 +1133,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
)
c = Correspondent.objects.create(name="test-corres")
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f:
with (Path(__file__).parent / "samples" / "simple.pdf").open("rb") as f:
response = self.client.post(
"/api/documents/post_document/",
{"document": f, "correspondent": c.id},
@ -1176,10 +1154,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
id=str(uuid.uuid4()),
)
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f:
with (Path(__file__).parent / "samples" / "simple.pdf").open("rb") as f:
response = self.client.post(
"/api/documents/post_document/",
{"document": f, "correspondent": 3456},
@ -1194,10 +1169,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
)
dt = DocumentType.objects.create(name="invoice")
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f:
with (Path(__file__).parent / "samples" / "simple.pdf").open("rb") as f:
response = self.client.post(
"/api/documents/post_document/",
{"document": f, "document_type": dt.id},
@ -1218,10 +1190,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
id=str(uuid.uuid4()),
)
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f:
with (Path(__file__).parent / "samples" / "simple.pdf").open("rb") as f:
response = self.client.post(
"/api/documents/post_document/",
{"document": f, "document_type": 34578},
@ -1236,10 +1205,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
)
sp = StoragePath.objects.create(name="invoices")
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f:
with (Path(__file__).parent / "samples" / "simple.pdf").open("rb") as f:
response = self.client.post(
"/api/documents/post_document/",
{"document": f, "storage_path": sp.id},
@ -1260,10 +1226,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
id=str(uuid.uuid4()),
)
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f:
with (Path(__file__).parent / "samples" / "simple.pdf").open("rb") as f:
response = self.client.post(
"/api/documents/post_document/",
{"document": f, "storage_path": 34578},
@ -1279,10 +1242,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
t1 = Tag.objects.create(name="tag1")
t2 = Tag.objects.create(name="tag2")
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f:
with (Path(__file__).parent / "samples" / "simple.pdf").open("rb") as f:
response = self.client.post(
"/api/documents/post_document/",
{"document": f, "tags": [t2.id, t1.id]},
@ -1305,10 +1265,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
t1 = Tag.objects.create(name="tag1")
t2 = Tag.objects.create(name="tag2")
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f:
with (Path(__file__).parent / "samples" / "simple.pdf").open("rb") as f:
response = self.client.post(
"/api/documents/post_document/",
{"document": f, "tags": [t2.id, t1.id, 734563]},
@ -1332,10 +1289,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
0,
tzinfo=zoneinfo.ZoneInfo("America/Los_Angeles"),
)
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f:
with (Path(__file__).parent / "samples" / "simple.pdf").open("rb") as f:
response = self.client.post(
"/api/documents/post_document/",
{"document": f, "created": created},
@ -1353,10 +1307,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
id=str(uuid.uuid4()),
)
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f:
with (Path(__file__).parent / "samples" / "simple.pdf").open("rb") as f:
response = self.client.post(
"/api/documents/post_document/",
{"document": f, "archive_serial_number": 500},
@ -1385,10 +1336,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
data_type=CustomField.FieldDataType.STRING,
)
with open(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
"rb",
) as f:
with (Path(__file__).parent / "samples" / "simple.pdf").open("rb") as f:
response = self.client.post(
"/api/documents/post_document/",
{
@ -1417,10 +1365,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
id=str(uuid.uuid4()),
)
with open(
os.path.join(os.path.dirname(__file__), "samples", "invalid_pdf.pdf"),
"rb",
) as f:
with (Path(__file__).parent / "samples" / "invalid_pdf.pdf").open("rb") as f:
response = self.client.post(
"/api/documents/post_document/",
{"document": f},
@ -1437,14 +1382,14 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
archive_filename="archive.pdf",
)
source_file = os.path.join(
os.path.dirname(__file__),
"samples",
"documents",
"thumbnails",
"0000001.webp",
source_file: Path = (
Path(__file__).parent
/ "samples"
/ "documents"
/ "thumbnails"
/ "0000001.webp"
)
archive_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
archive_file: Path = Path(__file__).parent / "samples" / "simple.pdf"
shutil.copy(source_file, doc.source_path)
shutil.copy(archive_file, doc.archive_path)
@ -1460,8 +1405,8 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
self.assertGreater(len(meta["archive_metadata"]), 0)
self.assertEqual(meta["media_filename"], "file.pdf")
self.assertEqual(meta["archive_media_filename"], "archive.pdf")
self.assertEqual(meta["original_size"], os.stat(source_file).st_size)
self.assertEqual(meta["archive_size"], os.stat(archive_file).st_size)
self.assertEqual(meta["original_size"], Path(source_file).stat().st_size)
self.assertEqual(meta["archive_size"], Path(archive_file).stat().st_size)
response = self.client.get(f"/api/documents/{doc.pk}/metadata/")
self.assertEqual(response.status_code, status.HTTP_200_OK)
@ -1477,10 +1422,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
mime_type="application/pdf",
)
shutil.copy(
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
doc.source_path,
)
shutil.copy(Path(__file__).parent / "samples" / "simple.pdf", doc.source_path)
response = self.client.get(f"/api/documents/{doc.pk}/metadata/")
self.assertEqual(response.status_code, status.HTTP_200_OK)
@ -1939,9 +1881,9 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
def test_get_logs(self):
log_data = "test\ntest2\n"
with open(os.path.join(settings.LOGGING_DIR, "mail.log"), "w") as f:
with (Path(settings.LOGGING_DIR) / "mail.log").open("w") as f:
f.write(log_data)
with open(os.path.join(settings.LOGGING_DIR, "paperless.log"), "w") as f:
with (Path(settings.LOGGING_DIR) / "paperless.log").open("w") as f:
f.write(log_data)
response = self.client.get("/api/logs/")
self.assertEqual(response.status_code, status.HTTP_200_OK)
@ -1949,7 +1891,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
def test_get_logs_only_when_exist(self):
log_data = "test\ntest2\n"
with open(os.path.join(settings.LOGGING_DIR, "paperless.log"), "w") as f:
with (Path(settings.LOGGING_DIR) / "paperless.log").open("w") as f:
f.write(log_data)
response = self.client.get("/api/logs/")
self.assertEqual(response.status_code, status.HTTP_200_OK)
@ -1966,7 +1908,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
def test_get_log(self):
log_data = "test\ntest2\n"
with open(os.path.join(settings.LOGGING_DIR, "paperless.log"), "w") as f:
with (Path(settings.LOGGING_DIR) / "paperless.log").open("w") as f:
f.write(log_data)
response = self.client.get("/api/logs/paperless/")
self.assertEqual(response.status_code, status.HTTP_200_OK)