From 76d363f22d8d7b9c86df3a58465b1defac86b294 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Steinbei=C3=9Fer?= <33968289+gothicVI@users.noreply.github.com> Date: Wed, 5 Mar 2025 22:06:01 +0100 Subject: [PATCH] Chore: switch from os.path to pathlib.Path (#9060) --- pyproject.toml | 48 +--------------- .../migrations/0012_auto_20160305_0040.py | 55 +++++++++---------- .../migrations/0014_document_checksum.py | 16 +++--- src/documents/migrations/1003_mime_types.py | 22 ++++---- src/documents/models.py | 6 +- src/documents/tasks.py | 2 +- src/documents/tests/test_api_app_config.py | 16 ++---- src/documents/tests/test_classifier.py | 15 +++-- .../tests/test_management_thumbnails.py | 8 +-- src/documents/tests/test_tasks.py | 7 +-- src/documents/tests/test_views.py | 6 +- src/paperless/tests/test_checks.py | 6 +- src/paperless/urls.py | 4 +- src/paperless_mail/preprocessor.py | 4 +- src/paperless_tesseract/parsers.py | 16 ++---- src/paperless_tika/tests/test_live_tika.py | 6 +- src/paperless_tika/tests/test_tika_parser.py | 2 +- 17 files changed, 89 insertions(+), 150 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d26d05aa3..a583cd1a4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -209,37 +209,18 @@ lint.per-file-ignores."src/documents/management/commands/document_consumer.py" = lint.per-file-ignores."src/documents/management/commands/document_exporter.py" = [ "PTH", ] # TODO Enable & remove -lint.per-file-ignores."src/documents/migrations/0012_auto_20160305_0040.py" = [ - "PTH", -] # TODO Enable & remove -lint.per-file-ignores."src/documents/migrations/0014_document_checksum.py" = [ - "PTH", -] # TODO Enable & remove -lint.per-file-ignores."src/documents/migrations/1003_mime_types.py" = [ - "PTH", -] # TODO Enable & remove lint.per-file-ignores."src/documents/migrations/1012_fix_archive_files.py" = [ "PTH", ] # TODO Enable & remove lint.per-file-ignores."src/documents/models.py" = [ - "PTH", "SIM115", -] # TODO PTH Enable & remove +] lint.per-file-ignores."src/documents/parsers.py" = [ "PTH", ] # TODO Enable & remove lint.per-file-ignores."src/documents/signals/handlers.py" = [ "PTH", ] # TODO Enable & remove -lint.per-file-ignores."src/documents/tasks.py" = [ - "PTH", -] # TODO Enable & remove -lint.per-file-ignores."src/documents/tests/test_api_app_config.py" = [ - "PTH", -] # TODO Enable & remove -lint.per-file-ignores."src/documents/tests/test_classifier.py" = [ - "PTH", -] # TODO Enable & remove lint.per-file-ignores."src/documents/tests/test_consumer.py" = [ "PTH", ] # TODO Enable & remove @@ -255,9 +236,6 @@ lint.per-file-ignores."src/documents/tests/test_management_consumer.py" = [ lint.per-file-ignores."src/documents/tests/test_management_exporter.py" = [ "PTH", ] # TODO Enable & remove -lint.per-file-ignores."src/documents/tests/test_management_thumbnails.py" = [ - "PTH", -] # TODO Enable & remove lint.per-file-ignores."src/documents/tests/test_migration_archive_files.py" = [ "PTH", ] # TODO Enable & remove @@ -270,12 +248,6 @@ lint.per-file-ignores."src/documents/tests/test_migration_mime_type.py" = [ lint.per-file-ignores."src/documents/tests/test_sanity_check.py" = [ "PTH", ] # TODO Enable & remove -lint.per-file-ignores."src/documents/tests/test_tasks.py" = [ - "PTH", -] # TODO Enable & remove -lint.per-file-ignores."src/documents/tests/test_views.py" = [ - "PTH", -] # TODO Enable & remove lint.per-file-ignores."src/documents/views.py" = [ "PTH", ] # TODO Enable & remove @@ -285,34 +257,16 @@ lint.per-file-ignores."src/paperless/checks.py" = [ lint.per-file-ignores."src/paperless/settings.py" = [ "PTH", ] # TODO Enable & remove -lint.per-file-ignores."src/paperless/tests/test_checks.py" = [ - "PTH", -] # TODO Enable & remove -lint.per-file-ignores."src/paperless/urls.py" = [ - "PTH", -] # TODO Enable & remove lint.per-file-ignores."src/paperless/views.py" = [ "PTH", ] # TODO Enable & remove lint.per-file-ignores."src/paperless_mail/mail.py" = [ "PTH", ] # TODO Enable & remove -lint.per-file-ignores."src/paperless_mail/preprocessor.py" = [ - "PTH", -] # TODO Enable & remove -lint.per-file-ignores."src/paperless_tesseract/parsers.py" = [ - "PTH", -] # TODO Enable & remove lint.per-file-ignores."src/paperless_tesseract/tests/test_parser.py" = [ "PTH", "RUF001", ] # TODO PTH Enable & remove -lint.per-file-ignores."src/paperless_tika/tests/test_live_tika.py" = [ - "PTH", -] # TODO Enable & remove -lint.per-file-ignores."src/paperless_tika/tests/test_tika_parser.py" = [ - "PTH", -] # TODO Enable & remove lint.isort.force-single-line = true [tool.pytest.ini_options] diff --git a/src/documents/migrations/0012_auto_20160305_0040.py b/src/documents/migrations/0012_auto_20160305_0040.py index b656ef70e..d7b2ab52b 100644 --- a/src/documents/migrations/0012_auto_20160305_0040.py +++ b/src/documents/migrations/0012_auto_20160305_0040.py @@ -5,6 +5,7 @@ import re import shutil import subprocess import tempfile +from pathlib import Path import gnupg from django.conf import settings @@ -34,16 +35,16 @@ class GnuPG: def move_documents_and_create_thumbnails(apps, schema_editor): - os.makedirs( - os.path.join(settings.MEDIA_ROOT, "documents", "originals"), + (Path(settings.MEDIA_ROOT) / "documents" / "originals").mkdir( + parents=True, exist_ok=True, ) - os.makedirs( - os.path.join(settings.MEDIA_ROOT, "documents", "thumbnails"), + (Path(settings.MEDIA_ROOT) / "documents" / "thumbnails").mkdir( + parents=True, exist_ok=True, ) - documents = os.listdir(os.path.join(settings.MEDIA_ROOT, "documents")) + documents: list[str] = os.listdir(Path(settings.MEDIA_ROOT) / "documents") if set(documents) == {"originals", "thumbnails"}: return @@ -60,10 +61,7 @@ def move_documents_and_create_thumbnails(apps, schema_editor): ), ) - try: - os.makedirs(settings.SCRATCH_DIR) - except FileExistsError: - pass + Path(settings.SCRATCH_DIR).mkdir(parents=True, exists_ok=True) for f in sorted(documents): if not f.endswith("gpg"): @@ -77,15 +75,14 @@ def move_documents_and_create_thumbnails(apps, schema_editor): ), ) - thumb_temp = tempfile.mkdtemp(prefix="paperless", dir=settings.SCRATCH_DIR) - orig_temp = tempfile.mkdtemp(prefix="paperless", dir=settings.SCRATCH_DIR) + thumb_temp: str = tempfile.mkdtemp(prefix="paperless", dir=settings.SCRATCH_DIR) + orig_temp: str = tempfile.mkdtemp(prefix="paperless", dir=settings.SCRATCH_DIR) - orig_source = os.path.join(settings.MEDIA_ROOT, "documents", f) - orig_target = os.path.join(orig_temp, f.replace(".gpg", "")) + orig_source: Path = Path(settings.MEDIA_ROOT) / "documents" / f + orig_target: Path = Path(orig_temp) / f.replace(".gpg", "") - with open(orig_source, "rb") as encrypted: - with open(orig_target, "wb") as unencrypted: - unencrypted.write(GnuPG.decrypted(encrypted)) + with orig_source.open("rb") as encrypted, orig_target.open("wb") as unencrypted: + unencrypted.write(GnuPG.decrypted(encrypted)) subprocess.Popen( ( @@ -95,27 +92,29 @@ def move_documents_and_create_thumbnails(apps, schema_editor): "-alpha", "remove", orig_target, - os.path.join(thumb_temp, "convert-%04d.png"), + Path(thumb_temp) / "convert-%04d.png", ), ).wait() - thumb_source = os.path.join(thumb_temp, "convert-0000.png") - thumb_target = os.path.join( - settings.MEDIA_ROOT, - "documents", - "thumbnails", - re.sub(r"(\d+)\.\w+(\.gpg)", "\\1.png\\2", f), + thumb_source: Path = Path(thumb_temp) / "convert-0000.png" + thumb_target: Path = ( + Path(settings.MEDIA_ROOT) + / "documents" + / "thumbnails" + / re.sub(r"(\d+)\.\w+(\.gpg)", "\\1.png\\2", f) ) - with open(thumb_source, "rb") as unencrypted: - with open(thumb_target, "wb") as encrypted: - encrypted.write(GnuPG.encrypted(unencrypted)) + with ( + thumb_source.open("rb") as unencrypted, + thumb_target.open("wb") as encrypted, + ): + encrypted.write(GnuPG.encrypted(unencrypted)) shutil.rmtree(thumb_temp) shutil.rmtree(orig_temp) shutil.move( - os.path.join(settings.MEDIA_ROOT, "documents", f), - os.path.join(settings.MEDIA_ROOT, "documents", "originals", f), + Path(settings.MEDIA_ROOT) / "documents" / f, + Path(settings.MEDIA_ROOT) / "documents" / "originals" / f, ) diff --git a/src/documents/migrations/0014_document_checksum.py b/src/documents/migrations/0014_document_checksum.py index f3da30eb6..dac945e32 100644 --- a/src/documents/migrations/0014_document_checksum.py +++ b/src/documents/migrations/0014_document_checksum.py @@ -1,7 +1,7 @@ # Generated by Django 1.9.4 on 2016-03-28 19:09 import hashlib -import os +from pathlib import Path import django.utils.timezone import gnupg @@ -58,16 +58,16 @@ class Document: @property def source_path(self): - return os.path.join( - settings.MEDIA_ROOT, - "documents", - "originals", - f"{self.pk:07}.{self.file_type}.gpg", - ) + return ( + Path(settings.MEDIA_ROOT) + / "documents" + / "originals" + / f"{self.pk:07}.{self.file_type}.gpg" + ).as_posix() @property def source_file(self): - return open(self.source_path, "rb") + return Path(self.source_path).open("rb") @property def file_name(self): diff --git a/src/documents/migrations/1003_mime_types.py b/src/documents/migrations/1003_mime_types.py index 446657495..4c7ddb492 100644 --- a/src/documents/migrations/1003_mime_types.py +++ b/src/documents/migrations/1003_mime_types.py @@ -1,5 +1,5 @@ # Generated by Django 3.1.3 on 2020-11-20 11:21 -import os +from pathlib import Path import magic from django.conf import settings @@ -12,15 +12,15 @@ STORAGE_TYPE_UNENCRYPTED = "unencrypted" STORAGE_TYPE_GPG = "gpg" -def source_path(self): +def source_path(self) -> Path: if self.filename: - fname = str(self.filename) + fname: str = str(self.filename) else: fname = f"{self.pk:07}.{self.file_type}" if self.storage_type == STORAGE_TYPE_GPG: fname += ".gpg" - return os.path.join(settings.ORIGINALS_DIR, fname) + return Path(settings.ORIGINALS_DIR) / fname def add_mime_types(apps, schema_editor): @@ -28,24 +28,22 @@ def add_mime_types(apps, schema_editor): documents = Document.objects.all() for d in documents: - f = open(source_path(d), "rb") - if d.storage_type == STORAGE_TYPE_GPG: - data = GnuPG.decrypted(f) - else: - data = f.read(1024) + with Path(source_path(d)).open("rb") as f: + if d.storage_type == STORAGE_TYPE_GPG: + data = GnuPG.decrypted(f) + else: + data = f.read(1024) d.mime_type = magic.from_buffer(data, mime=True) d.save() - f.close() - def add_file_extensions(apps, schema_editor): Document = apps.get_model("documents", "Document") documents = Document.objects.all() for d in documents: - d.file_type = os.path.splitext(d.filename)[1].strip(".") + d.file_type = Path(d.filename).suffix.lstrip(".") d.save() diff --git a/src/documents/models.py b/src/documents/models.py index 7cff304ad..4b3f97e50 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -315,7 +315,7 @@ class Document(SoftDeleteModel, ModelWithOwner): @property def source_file(self): - return open(self.source_path, "rb") + return Path(self.source_path).open("rb") @property def has_archive_version(self) -> bool: @@ -330,7 +330,7 @@ class Document(SoftDeleteModel, ModelWithOwner): @property def archive_file(self): - return open(self.archive_path, "rb") + return Path(self.archive_path).open("rb") def get_public_filename(self, *, archive=False, counter=0, suffix=None) -> str: """ @@ -367,7 +367,7 @@ class Document(SoftDeleteModel, ModelWithOwner): @property def thumbnail_file(self): - return open(self.thumbnail_path, "rb") + return Path(self.thumbnail_path).open("rb") @property def created_date(self): diff --git a/src/documents/tasks.py b/src/documents/tasks.py index 8a504d28d..e60418c3b 100644 --- a/src/documents/tasks.py +++ b/src/documents/tasks.py @@ -272,7 +272,7 @@ def update_document_content_maybe_archive_file(document_id): with transaction.atomic(): oldDocument = Document.objects.get(pk=document.pk) if parser.get_archive_path(): - with open(parser.get_archive_path(), "rb") as f: + with Path(parser.get_archive_path()).open("rb") as f: checksum = hashlib.md5(f.read()).hexdigest() # I'm going to save first so that in case the file move # fails, the database is rolled back. diff --git a/src/documents/tests/test_api_app_config.py b/src/documents/tests/test_api_app_config.py index 0d7771c07..df5f9e2ad 100644 --- a/src/documents/tests/test_api_app_config.py +++ b/src/documents/tests/test_api_app_config.py @@ -1,5 +1,5 @@ import json -import os +from pathlib import Path from django.contrib.auth.models import User from rest_framework import status @@ -136,10 +136,7 @@ class TestApiAppConfig(DirectoriesMixin, APITestCase): THEN: - old app_logo file is deleted """ - with open( - os.path.join(os.path.dirname(__file__), "samples", "simple.jpg"), - "rb", - ) as f: + with (Path(__file__).parent / "samples" / "simple.jpg").open("rb") as f: self.client.patch( f"{self.ENDPOINT}1/", { @@ -148,15 +145,12 @@ class TestApiAppConfig(DirectoriesMixin, APITestCase): ) config = ApplicationConfiguration.objects.first() old_logo = config.app_logo - self.assertTrue(os.path.exists(old_logo.path)) - with open( - os.path.join(os.path.dirname(__file__), "samples", "simple.png"), - "rb", - ) as f: + self.assertTrue(Path(old_logo.path).exists()) + with (Path(__file__).parent / "samples" / "simple.png").open("rb") as f: self.client.patch( f"{self.ENDPOINT}1/", { "app_logo": f, }, ) - self.assertFalse(os.path.exists(old_logo.path)) + self.assertFalse(Path(old_logo.path).exists()) diff --git a/src/documents/tests/test_classifier.py b/src/documents/tests/test_classifier.py index f90a88050..d1bc8e04f 100644 --- a/src/documents/tests/test_classifier.py +++ b/src/documents/tests/test_classifier.py @@ -1,4 +1,3 @@ -import os import re import shutil from pathlib import Path @@ -617,7 +616,7 @@ class TestClassifier(DirectoriesMixin, TestCase): self.assertListEqual(self.classifier.predict_tags(doc2.content), []) def test_load_classifier_not_exists(self): - self.assertFalse(os.path.exists(settings.MODEL_FILE)) + self.assertFalse(Path(settings.MODEL_FILE).exists()) self.assertIsNone(load_classifier()) @mock.patch("documents.classifier.DocumentClassifier.load") @@ -632,7 +631,7 @@ class TestClassifier(DirectoriesMixin, TestCase): }, ) @override_settings( - MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"), + MODEL_FILE=(Path(__file__).parent / "data" / "model.pickle").as_posix(), ) @pytest.mark.skip( reason="Disabled caching due to high memory usage - need to investigate.", @@ -648,24 +647,24 @@ class TestClassifier(DirectoriesMixin, TestCase): @mock.patch("documents.classifier.DocumentClassifier.load") def test_load_classifier_incompatible_version(self, load): Path(settings.MODEL_FILE).touch() - self.assertTrue(os.path.exists(settings.MODEL_FILE)) + self.assertTrue(Path(settings.MODEL_FILE).exists()) load.side_effect = IncompatibleClassifierVersionError("Dummy Error") self.assertIsNone(load_classifier()) - self.assertFalse(os.path.exists(settings.MODEL_FILE)) + self.assertFalse(Path(settings.MODEL_FILE).exists()) @mock.patch("documents.classifier.DocumentClassifier.load") def test_load_classifier_os_error(self, load): Path(settings.MODEL_FILE).touch() - self.assertTrue(os.path.exists(settings.MODEL_FILE)) + self.assertTrue(Path(settings.MODEL_FILE).exists()) load.side_effect = OSError() self.assertIsNone(load_classifier()) - self.assertTrue(os.path.exists(settings.MODEL_FILE)) + self.assertTrue(Path(settings.MODEL_FILE).exists()) def test_load_old_classifier_version(self): shutil.copy( - os.path.join(os.path.dirname(__file__), "data", "v1.17.4.model.pickle"), + Path(__file__).parent / "data" / "v1.17.4.model.pickle", self.dirs.scratch_dir, ) with override_settings( diff --git a/src/documents/tests/test_management_thumbnails.py b/src/documents/tests/test_management_thumbnails.py index 4056b65fe..cb80e6c70 100644 --- a/src/documents/tests/test_management_thumbnails.py +++ b/src/documents/tests/test_management_thumbnails.py @@ -1,5 +1,5 @@ -import os import shutil +from pathlib import Path from unittest import mock from django.core.management import call_command @@ -22,7 +22,7 @@ class TestMakeThumbnails(DirectoriesMixin, FileSystemAssertsMixin, TestCase): filename="test.pdf", ) shutil.copy( - os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), + Path(__file__).parent / "samples" / "simple.pdf", self.d1.source_path, ) @@ -34,7 +34,7 @@ class TestMakeThumbnails(DirectoriesMixin, FileSystemAssertsMixin, TestCase): filename="test2.pdf", ) shutil.copy( - os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), + Path(__file__).parent / "samples" / "simple.pdf", self.d2.source_path, ) @@ -46,7 +46,7 @@ class TestMakeThumbnails(DirectoriesMixin, FileSystemAssertsMixin, TestCase): filename="test3.pdf", ) shutil.copy( - os.path.join(os.path.dirname(__file__), "samples", "password-is-test.pdf"), + Path(__file__).parent / "samples" / "password-is-test.pdf", self.d3.source_path, ) diff --git a/src/documents/tests/test_tasks.py b/src/documents/tests/test_tasks.py index 348eb0db5..11712549a 100644 --- a/src/documents/tests/test_tasks.py +++ b/src/documents/tests/test_tasks.py @@ -1,4 +1,3 @@ -import os import shutil from datetime import timedelta from pathlib import Path @@ -88,18 +87,18 @@ class TestClassifier(DirectoriesMixin, FileSystemAssertsMixin, TestCase): tasks.train_classifier() self.assertIsFile(settings.MODEL_FILE) - mtime = os.stat(settings.MODEL_FILE).st_mtime + mtime = Path(settings.MODEL_FILE).stat().st_mtime tasks.train_classifier() self.assertIsFile(settings.MODEL_FILE) - mtime2 = os.stat(settings.MODEL_FILE).st_mtime + mtime2 = Path(settings.MODEL_FILE).stat().st_mtime self.assertEqual(mtime, mtime2) doc.content = "test2" doc.save() tasks.train_classifier() self.assertIsFile(settings.MODEL_FILE) - mtime3 = os.stat(settings.MODEL_FILE).st_mtime + mtime3 = Path(settings.MODEL_FILE).stat().st_mtime self.assertNotEqual(mtime2, mtime3) diff --git a/src/documents/tests/test_views.py b/src/documents/tests/test_views.py index 9f52a6aa4..4c987e3af 100644 --- a/src/documents/tests/test_views.py +++ b/src/documents/tests/test_views.py @@ -1,6 +1,6 @@ -import os import tempfile from datetime import timedelta +from pathlib import Path from django.conf import settings from django.contrib.auth.models import Permission @@ -107,12 +107,12 @@ class TestViews(DirectoriesMixin, TestCase): content = b"This is a test" - with open(filename, "wb") as f: + with Path(filename).open("wb") as f: f.write(content) doc = Document.objects.create( title="none", - filename=os.path.basename(filename), + filename=Path(filename).name, mime_type="application/pdf", ) diff --git a/src/paperless/tests/test_checks.py b/src/paperless/tests/test_checks.py index d2ea9102b..ff6c25e43 100644 --- a/src/paperless/tests/test_checks.py +++ b/src/paperless/tests/test_checks.py @@ -38,9 +38,9 @@ class TestChecks(DirectoriesMixin, TestCase): self.assertTrue(msg.msg.endswith("is set but doesn't exist.")) def test_paths_check_no_access(self): - os.chmod(self.dirs.data_dir, 0o000) - os.chmod(self.dirs.media_dir, 0o000) - os.chmod(self.dirs.consumption_dir, 0o000) + Path(self.dirs.data_dir).chmod(0o000) + Path(self.dirs.media_dir).chmod(0o000) + Path(self.dirs.consumption_dir).chmod(0o000) self.addCleanup(os.chmod, self.dirs.data_dir, 0o777) self.addCleanup(os.chmod, self.dirs.media_dir, 0o777) diff --git a/src/paperless/urls.py b/src/paperless/urls.py index fa237fe5c..4c81a4e48 100644 --- a/src/paperless/urls.py +++ b/src/paperless/urls.py @@ -1,4 +1,4 @@ -import os +from pathlib import Path from allauth.account import views as allauth_account_views from allauth.mfa.base import views as allauth_mfa_views @@ -270,7 +270,7 @@ urlpatterns = [ re_path( r"^logo(?P.*)$", serve, - kwargs={"document_root": os.path.join(settings.MEDIA_ROOT, "logo")}, + kwargs={"document_root": Path(settings.MEDIA_ROOT) / "logo"}, ), # allauth path( diff --git a/src/paperless_mail/preprocessor.py b/src/paperless_mail/preprocessor.py index 7e0c76780..d33f80c72 100644 --- a/src/paperless_mail/preprocessor.py +++ b/src/paperless_mail/preprocessor.py @@ -1,8 +1,8 @@ import abc -import os from email import message_from_bytes from email import policy from email.message import Message +from pathlib import Path from django.conf import settings from gnupg import GPG @@ -50,7 +50,7 @@ class MailMessageDecryptor(MailMessagePreprocessor, LoggingMixin): return False if settings.EMAIL_GNUPG_HOME is None: return True - return os.path.isdir(settings.EMAIL_GNUPG_HOME) + return Path(settings.EMAIL_GNUPG_HOME).is_dir() def run(self, message: MailMessage) -> MailMessage: if not hasattr(message, "obj"): diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py index a8be899f5..64c3030c7 100644 --- a/src/paperless_tesseract/parsers.py +++ b/src/paperless_tesseract/parsers.py @@ -159,7 +159,7 @@ class RasterisedDocumentParser(DocumentParser): # the whole text, so do not utilize it in that case if ( sidecar_file is not None - and os.path.isfile(sidecar_file) + and sidecar_file.is_file() and self.settings.mode != "redo" ): text = self.read_file_handle_unicode_errors(sidecar_file) @@ -174,7 +174,7 @@ class RasterisedDocumentParser(DocumentParser): # no success with the sidecar file, try PDF - if not os.path.isfile(pdf_file): + if not Path(pdf_file).is_file(): return None try: @@ -368,8 +368,8 @@ class RasterisedDocumentParser(DocumentParser): from ocrmypdf import SubprocessOutputError from ocrmypdf.exceptions import DigitalSignatureError - archive_path = Path(os.path.join(self.tempdir, "archive.pdf")) - sidecar_file = Path(os.path.join(self.tempdir, "sidecar.txt")) + archive_path = Path(self.tempdir) / "archive.pdf" + sidecar_file = Path(self.tempdir) / "sidecar.txt" args = self.construct_ocrmypdf_parameters( document_path, @@ -412,12 +412,8 @@ class RasterisedDocumentParser(DocumentParser): f"Attempting force OCR to get the text.", ) - archive_path_fallback = Path( - os.path.join(self.tempdir, "archive-fallback.pdf"), - ) - sidecar_file_fallback = Path( - os.path.join(self.tempdir, "sidecar-fallback.txt"), - ) + archive_path_fallback = Path(self.tempdir) / "archive-fallback.pdf" + sidecar_file_fallback = Path(self.tempdir) / "sidecar-fallback.txt" # Attempt to run OCR with safe settings. diff --git a/src/paperless_tika/tests/test_live_tika.py b/src/paperless_tika/tests/test_live_tika.py index 7d8cffffd..6bb4ff021 100644 --- a/src/paperless_tika/tests/test_live_tika.py +++ b/src/paperless_tika/tests/test_live_tika.py @@ -75,7 +75,7 @@ class TestTikaParserAgainstServer: == "This is an DOCX test document, also made September 14, 2022" ) assert tika_parser.archive_path is not None - with open(tika_parser.archive_path, "rb") as f: + with Path(tika_parser.archive_path).open("rb") as f: assert b"PDF-" in f.read()[:10] # self.assertEqual(tika_parser.date, datetime.datetime(2022, 9, 14)) @@ -104,7 +104,7 @@ class TestTikaParserAgainstServer: in tika_parser.text ) assert tika_parser.archive_path is not None - with open(tika_parser.archive_path, "rb") as f: + with Path(tika_parser.archive_path).open("rb") as f: assert b"PDF-" in f.read()[:10] def test_tika_fails_multi_part( @@ -130,5 +130,5 @@ class TestTikaParserAgainstServer: ) assert tika_parser.archive_path is not None - with open(tika_parser.archive_path, "rb") as f: + with Path(tika_parser.archive_path).open("rb") as f: assert b"PDF-" in f.read()[:10] diff --git a/src/paperless_tika/tests/test_tika_parser.py b/src/paperless_tika/tests/test_tika_parser.py index cebae2486..05bc4fe2e 100644 --- a/src/paperless_tika/tests/test_tika_parser.py +++ b/src/paperless_tika/tests/test_tika_parser.py @@ -38,7 +38,7 @@ class TestTikaParser: assert tika_parser.text == "the content" assert tika_parser.archive_path is not None - with open(tika_parser.archive_path, "rb") as f: + with Path(tika_parser.archive_path).open("rb") as f: assert f.read() == b"PDF document" assert tika_parser.date == datetime.datetime(