From fce7b033247ba7611fca3ec3f3afa2fdd72aa297 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Steinbei=C3=9Fer?= <33968289+gothicVI@users.noreply.github.com> Date: Wed, 29 Jan 2025 19:58:53 +0100 Subject: [PATCH] Chore: Switch from os.path to pathlib.Path (#8644) --- .ruff.toml | 3 - src/documents/consumer.py | 36 +++-- src/documents/signals/handlers.py | 2 +- src/documents/tests/test_api_bulk_download.py | 24 +-- src/documents/tests/test_api_documents.py | 144 ++++++------------ 5 files changed, 72 insertions(+), 137 deletions(-) diff --git a/.ruff.toml b/.ruff.toml index 96ee7430b..d9ca6b321 100644 --- a/.ruff.toml +++ b/.ruff.toml @@ -38,7 +38,6 @@ ignore = ["DJ001", "SIM105", "RUF012"] [lint.per-file-ignores] ".github/scripts/*.py" = ["E501", "INP001", "SIM117"] "docker/wait-for-redis.py" = ["INP001", "T201"] -"src/documents/consumer.py" = ["PTH"] # TODO Enable & remove "src/documents/file_handling.py" = ["PTH"] # TODO Enable & remove "src/documents/management/commands/document_consumer.py" = ["PTH"] # TODO Enable & remove "src/documents/management/commands/document_exporter.py" = ["PTH"] # TODO Enable & remove @@ -51,8 +50,6 @@ ignore = ["DJ001", "SIM105", "RUF012"] "src/documents/signals/handlers.py" = ["PTH"] # TODO Enable & remove "src/documents/tasks.py" = ["PTH"] # TODO Enable & remove "src/documents/tests/test_api_app_config.py" = ["PTH"] # TODO Enable & remove -"src/documents/tests/test_api_bulk_download.py" = ["PTH"] # TODO Enable & remove -"src/documents/tests/test_api_documents.py" = ["PTH"] # TODO Enable & remove "src/documents/tests/test_classifier.py" = ["PTH"] # TODO Enable & remove "src/documents/tests/test_consumer.py" = ["PTH"] # TODO Enable & remove "src/documents/tests/test_file_handling.py" = ["PTH"] # TODO Enable & remove diff --git a/src/documents/consumer.py b/src/documents/consumer.py index ec92ddba8..35c18ac7b 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -4,6 +4,7 @@ import os import tempfile from enum import Enum from pathlib import Path +from typing import TYPE_CHECKING import magic from django.conf import settings @@ -154,7 +155,11 @@ class ConsumerPlugin( """ Confirm the input file still exists where it should """ - if not os.path.isfile(self.input_doc.original_file): + if TYPE_CHECKING: + assert isinstance(self.input_doc.original_file, Path), ( + self.input_doc.original_file + ) + if not self.input_doc.original_file.is_file(): self._fail( ConsumerStatusShortMessage.FILE_NOT_FOUND, f"Cannot consume {self.input_doc.original_file}: File not found.", @@ -164,7 +169,7 @@ class ConsumerPlugin( """ Using the MD5 of the file, check this exact file doesn't already exist """ - with open(self.input_doc.original_file, "rb") as f: + with Path(self.input_doc.original_file).open("rb") as f: checksum = hashlib.md5(f.read()).hexdigest() existing_doc = Document.global_objects.filter( Q(checksum=checksum) | Q(archive_checksum=checksum), @@ -178,7 +183,7 @@ class ConsumerPlugin( log_msg += " Note: existing document is in the trash." if settings.CONSUMER_DELETE_DUPLICATES: - os.unlink(self.input_doc.original_file) + Path(self.input_doc.original_file).unlink() self._fail( msg, log_msg, @@ -237,7 +242,7 @@ class ConsumerPlugin( if not settings.PRE_CONSUME_SCRIPT: return - if not os.path.isfile(settings.PRE_CONSUME_SCRIPT): + if not Path(settings.PRE_CONSUME_SCRIPT).is_file(): self._fail( ConsumerStatusShortMessage.PRE_CONSUME_SCRIPT_NOT_FOUND, f"Configured pre-consume script " @@ -280,7 +285,7 @@ class ConsumerPlugin( if not settings.POST_CONSUME_SCRIPT: return - if not os.path.isfile(settings.POST_CONSUME_SCRIPT): + if not Path(settings.POST_CONSUME_SCRIPT).is_file(): self._fail( ConsumerStatusShortMessage.POST_CONSUME_SCRIPT_NOT_FOUND, f"Configured post-consume script " @@ -582,7 +587,7 @@ class ConsumerPlugin( document.thumbnail_path, ) - if archive_path and os.path.isfile(archive_path): + if archive_path and Path(archive_path).is_file(): document.archive_filename = generate_unique_filename( document, archive_filename=True, @@ -594,7 +599,7 @@ class ConsumerPlugin( document.archive_path, ) - with open(archive_path, "rb") as f: + with Path(archive_path).open("rb") as f: document.archive_checksum = hashlib.md5( f.read(), ).hexdigest() @@ -612,14 +617,14 @@ class ConsumerPlugin( self.unmodified_original.unlink() # https://github.com/jonaswinkler/paperless-ng/discussions/1037 - shadow_file = os.path.join( - os.path.dirname(self.input_doc.original_file), - "._" + os.path.basename(self.input_doc.original_file), + shadow_file = ( + Path(self.input_doc.original_file).parent + / f"._{Path(self.input_doc.original_file).name}" ) - if os.path.isfile(shadow_file): + if Path(shadow_file).is_file(): self.log.debug(f"Deleting file {shadow_file}") - os.unlink(shadow_file) + Path(shadow_file).unlink() except Exception as e: self._fail( @@ -704,7 +709,7 @@ class ConsumerPlugin( create_date = date self.log.debug(f"Creation date from parse_date: {create_date}") else: - stats = os.stat(self.input_doc.original_file) + stats = Path(self.input_doc.original_file).stat() create_date = timezone.make_aware( datetime.datetime.fromtimestamp(stats.st_mtime), ) @@ -800,7 +805,10 @@ class ConsumerPlugin( ) # adds to document def _write(self, storage_type, source, target): - with open(source, "rb") as read_file, open(target, "wb") as write_file: + with ( + Path(source).open("rb") as read_file, + Path(target).open("wb") as write_file, + ): write_file.write(read_file.read()) # Attempt to copy file's original stats, but it's ok if we can't diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index e60585a37..4885910fd 100644 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -353,7 +353,7 @@ def cleanup_document_deletion(sender, instance, **kwargs): f"{filename} could not be deleted: {e}", ) elif filename and not os.path.isfile(filename): - logger.warn(f"Expected {filename} tp exist, but it did not") + logger.warning(f"Expected {filename} to exist, but it did not") delete_empty_directories( os.path.dirname(instance.source_path), diff --git a/src/documents/tests/test_api_bulk_download.py b/src/documents/tests/test_api_bulk_download.py index 31bf182b5..a7e8f5df3 100644 --- a/src/documents/tests/test_api_bulk_download.py +++ b/src/documents/tests/test_api_bulk_download.py @@ -1,7 +1,6 @@ import datetime import io import json -import os import shutil import zipfile @@ -15,9 +14,10 @@ from documents.models import Correspondent from documents.models import Document from documents.models import DocumentType from documents.tests.utils import DirectoriesMixin +from documents.tests.utils import SampleDirMixin -class TestBulkDownload(DirectoriesMixin, APITestCase): +class TestBulkDownload(DirectoriesMixin, SampleDirMixin, APITestCase): ENDPOINT = "/api/documents/bulk_download/" def setUp(self): @@ -51,22 +51,10 @@ class TestBulkDownload(DirectoriesMixin, APITestCase): archive_checksum="D", ) - shutil.copy( - os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), - self.doc2.source_path, - ) - shutil.copy( - os.path.join(os.path.dirname(__file__), "samples", "simple.png"), - self.doc2b.source_path, - ) - shutil.copy( - os.path.join(os.path.dirname(__file__), "samples", "simple.jpg"), - self.doc3.source_path, - ) - shutil.copy( - os.path.join(os.path.dirname(__file__), "samples", "test_with_bom.pdf"), - self.doc3.archive_path, - ) + shutil.copy(self.SAMPLE_DIR / "simple.pdf", self.doc2.source_path) + shutil.copy(self.SAMPLE_DIR / "simple.png", self.doc2b.source_path) + shutil.copy(self.SAMPLE_DIR / "simple.jpg", self.doc3.source_path) + shutil.copy(self.SAMPLE_DIR / "test_with_bom.pdf", self.doc3.archive_path) def test_download_originals(self): response = self.client.post( diff --git a/src/documents/tests/test_api_documents.py b/src/documents/tests/test_api_documents.py index 70db15217..b7a4f4e2f 100644 --- a/src/documents/tests/test_api_documents.py +++ b/src/documents/tests/test_api_documents.py @@ -1,5 +1,4 @@ import datetime -import os import shutil import tempfile import uuid @@ -8,6 +7,7 @@ from binascii import hexlify from datetime import date from datetime import timedelta from pathlib import Path +from typing import TYPE_CHECKING from unittest import mock import celery @@ -171,19 +171,18 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): content = b"This is a test" content_thumbnail = b"thumbnail content" - with open(filename, "wb") as f: + with Path(filename).open("wb") as f: f.write(content) doc = Document.objects.create( title="none", - filename=os.path.basename(filename), + filename=Path(filename).name, mime_type="application/pdf", ) - with open( - os.path.join(self.dirs.thumbnail_dir, f"{doc.pk:07d}.webp"), - "wb", - ) as f: + if TYPE_CHECKING: + assert isinstance(self.dirs.thumbnail_dir, Path), self.dirs.thumbnail_dir + with (self.dirs.thumbnail_dir / f"{doc.pk:07d}.webp").open("wb") as f: f.write(content_thumbnail) response = self.client.get(f"/api/documents/{doc.pk}/download/") @@ -217,7 +216,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): content = b"This is a test" content_thumbnail = b"thumbnail content" - with open(filename, "wb") as f: + with Path(filename).open("wb") as f: f.write(content) user1 = User.objects.create_user(username="test1") @@ -229,15 +228,12 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): doc = Document.objects.create( title="none", - filename=os.path.basename(filename), + filename=Path(filename).name, mime_type="application/pdf", owner=user1, ) - with open( - os.path.join(self.dirs.thumbnail_dir, f"{doc.pk:07d}.webp"), - "wb", - ) as f: + with (Path(self.dirs.thumbnail_dir) / f"{doc.pk:07d}.webp").open("wb") as f: f.write(content_thumbnail) response = self.client.get(f"/api/documents/{doc.pk}/download/") @@ -272,10 +268,10 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): mime_type="application/pdf", ) - with open(doc.source_path, "wb") as f: + with Path(doc.source_path).open("wb") as f: f.write(content) - with open(doc.archive_path, "wb") as f: + with Path(doc.archive_path).open("wb") as f: f.write(content_archive) response = self.client.get(f"/api/documents/{doc.pk}/download/") @@ -305,7 +301,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): def test_document_actions_not_existing_file(self): doc = Document.objects.create( title="none", - filename=os.path.basename("asd"), + filename=Path("asd").name, mime_type="application/pdf", ) @@ -1026,10 +1022,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): id=str(uuid.uuid4()), ) - with open( - os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), - "rb", - ) as f: + with (Path(__file__).parent / "samples" / "simple.pdf").open("rb") as f: response = self.client.post( "/api/documents/post_document/", {"document": f}, @@ -1061,10 +1054,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): id=str(uuid.uuid4()), ) - with open( - os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), - "rb", - ) as f: + with (Path(__file__).parent / "samples" / "simple.pdf").open("rb") as f: response = self.client.post( "/api/documents/post_document/", { @@ -1095,10 +1085,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): id=str(uuid.uuid4()), ) - with open( - os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), - "rb", - ) as f: + with (Path(__file__).parent / "samples" / "simple.pdf").open("rb") as f: response = self.client.post( "/api/documents/post_document/", {"documenst": f}, @@ -1111,10 +1098,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): id=str(uuid.uuid4()), ) - with open( - os.path.join(os.path.dirname(__file__), "samples", "simple.zip"), - "rb", - ) as f: + with (Path(__file__).parent / "samples" / "simple.zip").open("rb") as f: response = self.client.post( "/api/documents/post_document/", {"document": f}, @@ -1127,10 +1111,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): id=str(uuid.uuid4()), ) - with open( - os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), - "rb", - ) as f: + with (Path(__file__).parent / "samples" / "simple.pdf").open("rb") as f: response = self.client.post( "/api/documents/post_document/", {"document": f, "title": "my custom title"}, @@ -1152,10 +1133,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): ) c = Correspondent.objects.create(name="test-corres") - with open( - os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), - "rb", - ) as f: + with (Path(__file__).parent / "samples" / "simple.pdf").open("rb") as f: response = self.client.post( "/api/documents/post_document/", {"document": f, "correspondent": c.id}, @@ -1176,10 +1154,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): id=str(uuid.uuid4()), ) - with open( - os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), - "rb", - ) as f: + with (Path(__file__).parent / "samples" / "simple.pdf").open("rb") as f: response = self.client.post( "/api/documents/post_document/", {"document": f, "correspondent": 3456}, @@ -1194,10 +1169,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): ) dt = DocumentType.objects.create(name="invoice") - with open( - os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), - "rb", - ) as f: + with (Path(__file__).parent / "samples" / "simple.pdf").open("rb") as f: response = self.client.post( "/api/documents/post_document/", {"document": f, "document_type": dt.id}, @@ -1218,10 +1190,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): id=str(uuid.uuid4()), ) - with open( - os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), - "rb", - ) as f: + with (Path(__file__).parent / "samples" / "simple.pdf").open("rb") as f: response = self.client.post( "/api/documents/post_document/", {"document": f, "document_type": 34578}, @@ -1236,10 +1205,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): ) sp = StoragePath.objects.create(name="invoices") - with open( - os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), - "rb", - ) as f: + with (Path(__file__).parent / "samples" / "simple.pdf").open("rb") as f: response = self.client.post( "/api/documents/post_document/", {"document": f, "storage_path": sp.id}, @@ -1260,10 +1226,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): id=str(uuid.uuid4()), ) - with open( - os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), - "rb", - ) as f: + with (Path(__file__).parent / "samples" / "simple.pdf").open("rb") as f: response = self.client.post( "/api/documents/post_document/", {"document": f, "storage_path": 34578}, @@ -1279,10 +1242,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): t1 = Tag.objects.create(name="tag1") t2 = Tag.objects.create(name="tag2") - with open( - os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), - "rb", - ) as f: + with (Path(__file__).parent / "samples" / "simple.pdf").open("rb") as f: response = self.client.post( "/api/documents/post_document/", {"document": f, "tags": [t2.id, t1.id]}, @@ -1305,10 +1265,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): t1 = Tag.objects.create(name="tag1") t2 = Tag.objects.create(name="tag2") - with open( - os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), - "rb", - ) as f: + with (Path(__file__).parent / "samples" / "simple.pdf").open("rb") as f: response = self.client.post( "/api/documents/post_document/", {"document": f, "tags": [t2.id, t1.id, 734563]}, @@ -1332,10 +1289,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): 0, tzinfo=zoneinfo.ZoneInfo("America/Los_Angeles"), ) - with open( - os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), - "rb", - ) as f: + with (Path(__file__).parent / "samples" / "simple.pdf").open("rb") as f: response = self.client.post( "/api/documents/post_document/", {"document": f, "created": created}, @@ -1353,10 +1307,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): id=str(uuid.uuid4()), ) - with open( - os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), - "rb", - ) as f: + with (Path(__file__).parent / "samples" / "simple.pdf").open("rb") as f: response = self.client.post( "/api/documents/post_document/", {"document": f, "archive_serial_number": 500}, @@ -1385,10 +1336,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): data_type=CustomField.FieldDataType.STRING, ) - with open( - os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), - "rb", - ) as f: + with (Path(__file__).parent / "samples" / "simple.pdf").open("rb") as f: response = self.client.post( "/api/documents/post_document/", { @@ -1417,10 +1365,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): id=str(uuid.uuid4()), ) - with open( - os.path.join(os.path.dirname(__file__), "samples", "invalid_pdf.pdf"), - "rb", - ) as f: + with (Path(__file__).parent / "samples" / "invalid_pdf.pdf").open("rb") as f: response = self.client.post( "/api/documents/post_document/", {"document": f}, @@ -1437,14 +1382,14 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): archive_filename="archive.pdf", ) - source_file = os.path.join( - os.path.dirname(__file__), - "samples", - "documents", - "thumbnails", - "0000001.webp", + source_file: Path = ( + Path(__file__).parent + / "samples" + / "documents" + / "thumbnails" + / "0000001.webp" ) - archive_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf") + archive_file: Path = Path(__file__).parent / "samples" / "simple.pdf" shutil.copy(source_file, doc.source_path) shutil.copy(archive_file, doc.archive_path) @@ -1460,8 +1405,8 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): self.assertGreater(len(meta["archive_metadata"]), 0) self.assertEqual(meta["media_filename"], "file.pdf") self.assertEqual(meta["archive_media_filename"], "archive.pdf") - self.assertEqual(meta["original_size"], os.stat(source_file).st_size) - self.assertEqual(meta["archive_size"], os.stat(archive_file).st_size) + self.assertEqual(meta["original_size"], Path(source_file).stat().st_size) + self.assertEqual(meta["archive_size"], Path(archive_file).stat().st_size) response = self.client.get(f"/api/documents/{doc.pk}/metadata/") self.assertEqual(response.status_code, status.HTTP_200_OK) @@ -1477,10 +1422,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): mime_type="application/pdf", ) - shutil.copy( - os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), - doc.source_path, - ) + shutil.copy(Path(__file__).parent / "samples" / "simple.pdf", doc.source_path) response = self.client.get(f"/api/documents/{doc.pk}/metadata/") self.assertEqual(response.status_code, status.HTTP_200_OK) @@ -1939,9 +1881,9 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): def test_get_logs(self): log_data = "test\ntest2\n" - with open(os.path.join(settings.LOGGING_DIR, "mail.log"), "w") as f: + with (Path(settings.LOGGING_DIR) / "mail.log").open("w") as f: f.write(log_data) - with open(os.path.join(settings.LOGGING_DIR, "paperless.log"), "w") as f: + with (Path(settings.LOGGING_DIR) / "paperless.log").open("w") as f: f.write(log_data) response = self.client.get("/api/logs/") self.assertEqual(response.status_code, status.HTTP_200_OK) @@ -1949,7 +1891,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): def test_get_logs_only_when_exist(self): log_data = "test\ntest2\n" - with open(os.path.join(settings.LOGGING_DIR, "paperless.log"), "w") as f: + with (Path(settings.LOGGING_DIR) / "paperless.log").open("w") as f: f.write(log_data) response = self.client.get("/api/logs/") self.assertEqual(response.status_code, status.HTTP_200_OK) @@ -1966,7 +1908,7 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase): def test_get_log(self): log_data = "test\ntest2\n" - with open(os.path.join(settings.LOGGING_DIR, "paperless.log"), "w") as f: + with (Path(settings.LOGGING_DIR) / "paperless.log").open("w") as f: f.write(log_data) response = self.client.get("/api/logs/paperless/") self.assertEqual(response.status_code, status.HTTP_200_OK)