Chore: switch from os.path to pathlib.Path (#10539)

This commit is contained in:
Sebastian Steinbeißer
2025-09-03 17:12:41 +02:00
committed by GitHub
parent cc621cf729
commit d2064a2535
11 changed files with 151 additions and 165 deletions

View File

@@ -205,18 +205,9 @@ lint.per-file-ignores."docker/wait-for-redis.py" = [
"INP001", "INP001",
"T201", "T201",
] ]
lint.per-file-ignores."src/documents/management/commands/document_consumer.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/migrations/1012_fix_archive_files.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/documents/models.py" = [ lint.per-file-ignores."src/documents/models.py" = [
"SIM115", "SIM115",
] ]
lint.per-file-ignores."src/documents/parsers.py" = [
"PTH",
] # TODO Enable & remove
lint.per-file-ignores."src/paperless_tesseract/tests/test_parser.py" = [ lint.per-file-ignores."src/paperless_tesseract/tests/test_parser.py" = [
"RUF001", "RUF001",
] ]

View File

@@ -32,7 +32,7 @@ except ImportError: # pragma: no cover
logger = logging.getLogger("paperless.management.consumer") logger = logging.getLogger("paperless.management.consumer")
def _tags_from_path(filepath) -> list[int]: def _tags_from_path(filepath: Path) -> list[int]:
""" """
Walk up the directory tree from filepath to CONSUMPTION_DIR Walk up the directory tree from filepath to CONSUMPTION_DIR
and get or create Tag IDs for every directory. and get or create Tag IDs for every directory.
@@ -41,7 +41,7 @@ def _tags_from_path(filepath) -> list[int]:
""" """
db.close_old_connections() db.close_old_connections()
tag_ids = set() tag_ids = set()
path_parts = Path(filepath).relative_to(settings.CONSUMPTION_DIR).parent.parts path_parts = filepath.relative_to(settings.CONSUMPTION_DIR).parent.parts
for part in path_parts: for part in path_parts:
tag_ids.add( tag_ids.add(
Tag.objects.get_or_create(name__iexact=part, defaults={"name": part})[0].pk, Tag.objects.get_or_create(name__iexact=part, defaults={"name": part})[0].pk,
@@ -50,17 +50,13 @@ def _tags_from_path(filepath) -> list[int]:
return list(tag_ids) return list(tag_ids)
def _is_ignored(filepath: str) -> bool: def _is_ignored(filepath: Path) -> bool:
""" """
Checks if the given file should be ignored, based on configured Checks if the given file should be ignored, based on configured
patterns. patterns.
Returns True if the file is ignored, False otherwise Returns True if the file is ignored, False otherwise
""" """
filepath = os.path.abspath(
os.path.normpath(filepath),
)
# Trim out the consume directory, leaving only filename and it's # Trim out the consume directory, leaving only filename and it's
# path relative to the consume directory # path relative to the consume directory
filepath_relative = PurePath(filepath).relative_to(settings.CONSUMPTION_DIR) filepath_relative = PurePath(filepath).relative_to(settings.CONSUMPTION_DIR)
@@ -85,15 +81,15 @@ def _is_ignored(filepath: str) -> bool:
return False return False
def _consume(filepath: str) -> None: def _consume(filepath: Path) -> None:
if os.path.isdir(filepath) or _is_ignored(filepath): if filepath.is_dir() or _is_ignored(filepath):
return return
if not os.path.isfile(filepath): if not filepath.is_file():
logger.debug(f"Not consuming file {filepath}: File has moved.") logger.debug(f"Not consuming file {filepath}: File has moved.")
return return
if not is_file_ext_supported(os.path.splitext(filepath)[1]): if not is_file_ext_supported(filepath.suffix):
logger.warning(f"Not consuming file {filepath}: Unknown file extension.") logger.warning(f"Not consuming file {filepath}: Unknown file extension.")
return return
@@ -107,7 +103,7 @@ def _consume(filepath: str) -> None:
while (read_try_count < os_error_retry_count) and not file_open_ok: while (read_try_count < os_error_retry_count) and not file_open_ok:
try: try:
with open(filepath, "rb"): with filepath.open("rb"):
file_open_ok = True file_open_ok = True
except OSError as e: except OSError as e:
read_try_count += 1 read_try_count += 1
@@ -141,7 +137,7 @@ def _consume(filepath: str) -> None:
logger.exception("Error while consuming document") logger.exception("Error while consuming document")
def _consume_wait_unmodified(file: str) -> None: def _consume_wait_unmodified(file: Path) -> None:
""" """
Waits for the given file to appear unmodified based on file size Waits for the given file to appear unmodified based on file size
and modification time. Will wait a configured number of seconds and modification time. Will wait a configured number of seconds
@@ -157,7 +153,7 @@ def _consume_wait_unmodified(file: str) -> None:
current_try = 0 current_try = 0
while current_try < settings.CONSUMER_POLLING_RETRY_COUNT: while current_try < settings.CONSUMER_POLLING_RETRY_COUNT:
try: try:
stat_data = os.stat(file) stat_data = file.stat()
new_mtime = stat_data.st_mtime new_mtime = stat_data.st_mtime
new_size = stat_data.st_size new_size = stat_data.st_size
except FileNotFoundError: except FileNotFoundError:
@@ -182,10 +178,10 @@ class Handler(FileSystemEventHandler):
self._pool = pool self._pool = pool
def on_created(self, event): def on_created(self, event):
self._pool.submit(_consume_wait_unmodified, event.src_path) self._pool.submit(_consume_wait_unmodified, Path(event.src_path))
def on_moved(self, event): def on_moved(self, event):
self._pool.submit(_consume_wait_unmodified, event.dest_path) self._pool.submit(_consume_wait_unmodified, Path(event.dest_path))
class Command(BaseCommand): class Command(BaseCommand):
@@ -227,9 +223,9 @@ class Command(BaseCommand):
if not directory: if not directory:
raise CommandError("CONSUMPTION_DIR does not appear to be set.") raise CommandError("CONSUMPTION_DIR does not appear to be set.")
directory = os.path.abspath(directory) directory = Path(directory).resolve()
if not os.path.isdir(directory): if not directory.is_dir():
raise CommandError(f"Consumption directory {directory} does not exist") raise CommandError(f"Consumption directory {directory} does not exist")
# Consumer will need this # Consumer will need this
@@ -238,11 +234,11 @@ class Command(BaseCommand):
if recursive: if recursive:
for dirpath, _, filenames in os.walk(directory): for dirpath, _, filenames in os.walk(directory):
for filename in filenames: for filename in filenames:
filepath = os.path.join(dirpath, filename) filepath = Path(dirpath) / filename
_consume(filepath) _consume(filepath)
else: else:
for entry in os.scandir(directory): for filepath in directory.iterdir():
_consume(entry.path) _consume(filepath)
if options["oneshot"]: if options["oneshot"]:
return return
@@ -310,7 +306,7 @@ class Command(BaseCommand):
try: try:
for event in inotify.read(timeout=timeout_ms): for event in inotify.read(timeout=timeout_ms):
path = inotify.get_path(event.wd) if recursive else directory path = inotify.get_path(event.wd) if recursive else directory
filepath = os.path.join(path, event.name) filepath = Path(path) / event.name
if flags.MODIFY in flags.from_mask(event.mask): if flags.MODIFY in flags.from_mask(event.mask):
notified_files.pop(filepath, None) notified_files.pop(filepath, None)
else: else:
@@ -327,9 +323,7 @@ class Command(BaseCommand):
# Also make sure the file exists still, some scanners might write a # Also make sure the file exists still, some scanners might write a
# temporary file first # temporary file first
file_still_exists = os.path.exists(filepath) and os.path.isfile( file_still_exists = filepath.exists() and filepath.is_file()
filepath,
)
if waited_long_enough and file_still_exists: if waited_long_enough and file_still_exists:
_consume(filepath) _consume(filepath)

View File

@@ -5,6 +5,7 @@ import logging
import os import os
import shutil import shutil
from collections import defaultdict from collections import defaultdict
from pathlib import Path
from time import sleep from time import sleep
import pathvalidate import pathvalidate
@@ -50,38 +51,38 @@ def many_to_dictionary(field): # pragma: no cover
return mydictionary return mydictionary
def archive_name_from_filename(filename): def archive_name_from_filename(filename: Path) -> Path:
return os.path.splitext(filename)[0] + ".pdf" return Path(filename.stem + ".pdf")
def archive_path_old(doc): def archive_path_old(doc) -> Path:
if doc.filename: if doc.filename:
fname = archive_name_from_filename(doc.filename) fname = archive_name_from_filename(Path(doc.filename))
else: else:
fname = f"{doc.pk:07}.pdf" fname = Path(f"{doc.pk:07}.pdf")
return os.path.join(settings.ARCHIVE_DIR, fname) return settings.ARCHIVE_DIR / fname
STORAGE_TYPE_GPG = "gpg" STORAGE_TYPE_GPG = "gpg"
def archive_path_new(doc): def archive_path_new(doc) -> Path | None:
if doc.archive_filename is not None: if doc.archive_filename is not None:
return os.path.join(settings.ARCHIVE_DIR, str(doc.archive_filename)) return settings.ARCHIVE_DIR / doc.archive_filename
else: else:
return None return None
def source_path(doc): def source_path(doc) -> Path:
if doc.filename: if doc.filename:
fname = str(doc.filename) fname = doc.filename
else: else:
fname = f"{doc.pk:07}{doc.file_type}" fname = f"{doc.pk:07}{doc.file_type}"
if doc.storage_type == STORAGE_TYPE_GPG: if doc.storage_type == STORAGE_TYPE_GPG:
fname += ".gpg" # pragma: no cover fname = Path(str(fname) + ".gpg") # pragma: no cover
return os.path.join(settings.ORIGINALS_DIR, fname) return settings.ORIGINALS_DIR / fname
def generate_unique_filename(doc, *, archive_filename=False): def generate_unique_filename(doc, *, archive_filename=False):
@@ -104,7 +105,7 @@ def generate_unique_filename(doc, *, archive_filename=False):
# still the same as before. # still the same as before.
return new_filename return new_filename
if os.path.exists(os.path.join(root, new_filename)): if (root / new_filename).exists():
counter += 1 counter += 1
else: else:
return new_filename return new_filename
@@ -202,18 +203,18 @@ def create_archive_version(doc, retry_count=3):
parser, parser,
source_path(doc), source_path(doc),
doc.mime_type, doc.mime_type,
os.path.basename(doc.filename), Path(doc.filename).name,
) )
doc.content = parser.get_text() doc.content = parser.get_text()
if parser.get_archive_path() and os.path.isfile(parser.get_archive_path()): if parser.get_archive_path() and Path(parser.get_archive_path()).is_file():
doc.archive_filename = generate_unique_filename( doc.archive_filename = generate_unique_filename(
doc, doc,
archive_filename=True, archive_filename=True,
) )
with open(parser.get_archive_path(), "rb") as f: with Path(parser.get_archive_path()).open("rb") as f:
doc.archive_checksum = hashlib.md5(f.read()).hexdigest() doc.archive_checksum = hashlib.md5(f.read()).hexdigest()
os.makedirs(os.path.dirname(archive_path_new(doc)), exist_ok=True) archive_path_new(doc).parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(parser.get_archive_path(), archive_path_new(doc)) shutil.copy2(parser.get_archive_path(), archive_path_new(doc))
else: else:
doc.archive_checksum = None doc.archive_checksum = None
@@ -264,7 +265,7 @@ def move_old_to_new_locations(apps, schema_editor):
# check that archive files of all unaffected documents are in place # check that archive files of all unaffected documents are in place
for doc in Document.objects.filter(archive_checksum__isnull=False): for doc in Document.objects.filter(archive_checksum__isnull=False):
old_path = archive_path_old(doc) old_path = archive_path_old(doc)
if doc.id not in affected_document_ids and not os.path.isfile(old_path): if doc.id not in affected_document_ids and not old_path.is_file():
raise ValueError( raise ValueError(
f"Archived document ID:{doc.id} does not exist at: {old_path}", f"Archived document ID:{doc.id} does not exist at: {old_path}",
) )
@@ -285,12 +286,12 @@ def move_old_to_new_locations(apps, schema_editor):
if doc.id in affected_document_ids: if doc.id in affected_document_ids:
old_path = archive_path_old(doc) old_path = archive_path_old(doc)
# remove affected archive versions # remove affected archive versions
if os.path.isfile(old_path): if old_path.is_file():
logger.debug(f"Removing {old_path}") logger.debug(f"Removing {old_path}")
os.unlink(old_path) old_path.unlink()
else: else:
# Set archive path for unaffected files # Set archive path for unaffected files
doc.archive_filename = archive_name_from_filename(doc.filename) doc.archive_filename = archive_name_from_filename(Path(doc.filename))
Document.objects.filter(id=doc.id).update( Document.objects.filter(id=doc.id).update(
archive_filename=doc.archive_filename, archive_filename=doc.archive_filename,
) )
@@ -316,7 +317,7 @@ def move_new_to_old_locations(apps, schema_editor):
f"filename.", f"filename.",
) )
old_archive_paths.add(old_archive_path) old_archive_paths.add(old_archive_path)
if new_archive_path != old_archive_path and os.path.isfile(old_archive_path): if new_archive_path != old_archive_path and old_archive_path.is_file():
raise ValueError( raise ValueError(
f"Cannot migrate: Cannot move {new_archive_path} to " f"Cannot migrate: Cannot move {new_archive_path} to "
f"{old_archive_path}: file already exists.", f"{old_archive_path}: file already exists.",

View File

@@ -169,7 +169,7 @@ def run_convert(
args += ["-depth", str(depth)] if depth else [] args += ["-depth", str(depth)] if depth else []
args += ["-auto-orient"] if auto_orient else [] args += ["-auto-orient"] if auto_orient else []
args += ["-define", "pdf:use-cropbox=true"] if use_cropbox else [] args += ["-define", "pdf:use-cropbox=true"] if use_cropbox else []
args += [input_file, output_file] args += [str(input_file), str(output_file)]
logger.debug("Execute: " + " ".join(args), extra={"group": logging_group}) logger.debug("Execute: " + " ".join(args), extra={"group": logging_group})
@@ -188,8 +188,8 @@ def get_default_thumbnail() -> Path:
return (Path(__file__).parent / "resources" / "document.webp").resolve() return (Path(__file__).parent / "resources" / "document.webp").resolve()
def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None) -> str: def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None) -> Path:
out_path = os.path.join(temp_dir, "convert_gs.webp") out_path: Path = Path(temp_dir) / "convert_gs.webp"
# if convert fails, fall back to extracting # if convert fails, fall back to extracting
# the first PDF page as a PNG using Ghostscript # the first PDF page as a PNG using Ghostscript
@@ -199,7 +199,7 @@ def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None) -
extra={"group": logging_group}, extra={"group": logging_group},
) )
# Ghostscript doesn't handle WebP outputs # Ghostscript doesn't handle WebP outputs
gs_out_path = os.path.join(temp_dir, "gs_out.png") gs_out_path: Path = Path(temp_dir) / "gs_out.png"
cmd = [settings.GS_BINARY, "-q", "-sDEVICE=pngalpha", "-o", gs_out_path, in_path] cmd = [settings.GS_BINARY, "-q", "-sDEVICE=pngalpha", "-o", gs_out_path, in_path]
try: try:
@@ -227,16 +227,16 @@ def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None) -
# The caller might expect a generated thumbnail that can be moved, # The caller might expect a generated thumbnail that can be moved,
# so we need to copy it before it gets moved. # so we need to copy it before it gets moved.
# https://github.com/paperless-ngx/paperless-ngx/issues/3631 # https://github.com/paperless-ngx/paperless-ngx/issues/3631
default_thumbnail_path = os.path.join(temp_dir, "document.webp") default_thumbnail_path: Path = Path(temp_dir) / "document.webp"
copy_file_with_basic_stats(get_default_thumbnail(), default_thumbnail_path) copy_file_with_basic_stats(get_default_thumbnail(), default_thumbnail_path)
return default_thumbnail_path return default_thumbnail_path
def make_thumbnail_from_pdf(in_path, temp_dir, logging_group=None) -> Path: def make_thumbnail_from_pdf(in_path: Path, temp_dir: Path, logging_group=None) -> Path:
""" """
The thumbnail of a PDF is just a 500px wide image of the first page. The thumbnail of a PDF is just a 500px wide image of the first page.
""" """
out_path = temp_dir / "convert.webp" out_path: Path = temp_dir / "convert.webp"
# Run convert to get a decent thumbnail # Run convert to get a decent thumbnail
try: try:

View File

@@ -654,7 +654,7 @@ class TestClassifier(DirectoriesMixin, TestCase):
}, },
) )
@override_settings( @override_settings(
MODEL_FILE=(Path(__file__).parent / "data" / "model.pickle").as_posix(), MODEL_FILE=str(Path(__file__).parent / "data" / "model.pickle"),
) )
@pytest.mark.skip( @pytest.mark.skip(
reason="Disabled caching due to high memory usage - need to investigate.", reason="Disabled caching due to high memory usage - need to investigate.",

View File

@@ -254,7 +254,7 @@ class TestConsumer(
# https://github.com/jonaswinkler/paperless-ng/discussions/1037 # https://github.com/jonaswinkler/paperless-ng/discussions/1037
filename = self.get_test_file() filename = self.get_test_file()
shadow_file = Path(self.dirs.scratch_dir / "._sample.pdf") shadow_file = Path(self.dirs.scratch_dir) / "._sample.pdf"
shutil.copy(filename, shadow_file) shutil.copy(filename, shadow_file)

View File

@@ -258,66 +258,66 @@ class TestConsumer(DirectoriesMixin, ConsumerThreadMixin, TransactionTestCase):
def test_is_ignored(self): def test_is_ignored(self):
test_paths = [ test_paths = [
{ {
"path": (Path(self.dirs.consumption_dir) / "foo.pdf").as_posix(), "path": str(Path(self.dirs.consumption_dir) / "foo.pdf"),
"ignore": False, "ignore": False,
}, },
{ {
"path": ( "path": str(
Path(self.dirs.consumption_dir) / "foo" / "bar.pdf" Path(self.dirs.consumption_dir) / "foo" / "bar.pdf",
).as_posix(), ),
"ignore": False, "ignore": False,
}, },
{ {
"path": (Path(self.dirs.consumption_dir) / ".DS_STORE").as_posix(), "path": str(Path(self.dirs.consumption_dir) / ".DS_STORE"),
"ignore": True, "ignore": True,
}, },
{ {
"path": (Path(self.dirs.consumption_dir) / ".DS_Store").as_posix(), "path": str(Path(self.dirs.consumption_dir) / ".DS_Store"),
"ignore": True, "ignore": True,
}, },
{ {
"path": ( "path": str(
Path(self.dirs.consumption_dir) / ".stfolder" / "foo.pdf" Path(self.dirs.consumption_dir) / ".stfolder" / "foo.pdf",
).as_posix(), ),
"ignore": True, "ignore": True,
}, },
{ {
"path": (Path(self.dirs.consumption_dir) / ".stfolder.pdf").as_posix(), "path": str(Path(self.dirs.consumption_dir) / ".stfolder.pdf"),
"ignore": False, "ignore": False,
}, },
{ {
"path": ( "path": str(
Path(self.dirs.consumption_dir) / ".stversions" / "foo.pdf" Path(self.dirs.consumption_dir) / ".stversions" / "foo.pdf",
).as_posix(), ),
"ignore": True, "ignore": True,
}, },
{ {
"path": ( "path": str(
Path(self.dirs.consumption_dir) / ".stversions.pdf" Path(self.dirs.consumption_dir) / ".stversions.pdf",
).as_posix(), ),
"ignore": False, "ignore": False,
}, },
{ {
"path": (Path(self.dirs.consumption_dir) / "._foo.pdf").as_posix(), "path": str(Path(self.dirs.consumption_dir) / "._foo.pdf"),
"ignore": True, "ignore": True,
}, },
{ {
"path": (Path(self.dirs.consumption_dir) / "my_foo.pdf").as_posix(), "path": str(Path(self.dirs.consumption_dir) / "my_foo.pdf"),
"ignore": False, "ignore": False,
}, },
{ {
"path": ( "path": str(
Path(self.dirs.consumption_dir) / "._foo" / "bar.pdf" Path(self.dirs.consumption_dir) / "._foo" / "bar.pdf",
).as_posix(), ),
"ignore": True, "ignore": True,
}, },
{ {
"path": ( "path": str(
Path(self.dirs.consumption_dir) Path(self.dirs.consumption_dir)
/ "@eaDir" / "@eaDir"
/ "SYNO@.fileindexdb" / "SYNO@.fileindexdb"
/ "_1jk.fnm" / "_1jk.fnm",
).as_posix(), ),
"ignore": True, "ignore": True,
}, },
] ]
@@ -330,7 +330,7 @@ class TestConsumer(DirectoriesMixin, ConsumerThreadMixin, TransactionTestCase):
f'_is_ignored("{filepath}") != {expected_ignored_result}', f'_is_ignored("{filepath}") != {expected_ignored_result}',
) )
@mock.patch("documents.management.commands.document_consumer.open") @mock.patch("documents.management.commands.document_consumer.Path.open")
def test_consume_file_busy(self, open_mock): def test_consume_file_busy(self, open_mock):
# Calling this mock always raises this # Calling this mock always raises this
open_mock.side_effect = OSError open_mock.side_effect = OSError

View File

@@ -230,9 +230,9 @@ class TestExportImport(
for element in manifest: for element in manifest:
if element["model"] == "documents.document": if element["model"] == "documents.document":
fname = ( fname = str(
self.target / element[document_exporter.EXPORTER_FILE_NAME] self.target / element[document_exporter.EXPORTER_FILE_NAME],
).as_posix() )
self.assertIsFile(fname) self.assertIsFile(fname)
self.assertIsFile( self.assertIsFile(
self.target / element[document_exporter.EXPORTER_THUMBNAIL_NAME], self.target / element[document_exporter.EXPORTER_THUMBNAIL_NAME],
@@ -462,9 +462,9 @@ class TestExportImport(
call_command(*args) call_command(*args)
expected_file = ( expected_file = str(
self.target / f"export-{timezone.localdate().isoformat()}.zip" self.target / f"export-{timezone.localdate().isoformat()}.zip",
).as_posix() )
self.assertIsFile(expected_file) self.assertIsFile(expected_file)
@@ -498,9 +498,9 @@ class TestExportImport(
): ):
call_command(*args) call_command(*args)
expected_file = ( expected_file = str(
self.target / f"export-{timezone.localdate().isoformat()}.zip" self.target / f"export-{timezone.localdate().isoformat()}.zip",
).as_posix() )
self.assertIsFile(expected_file) self.assertIsFile(expected_file)
@@ -544,9 +544,9 @@ class TestExportImport(
call_command(*args) call_command(*args)
expected_file = ( expected_file = str(
self.target / f"export-{timezone.localdate().isoformat()}.zip" self.target / f"export-{timezone.localdate().isoformat()}.zip",
).as_posix() )
self.assertIsFile(expected_file) self.assertIsFile(expected_file)
self.assertIsNotFile(existing_file) self.assertIsNotFile(existing_file)

View File

@@ -19,15 +19,15 @@ migration_1012_obj = importlib.import_module(
) )
def archive_name_from_filename(filename): def archive_name_from_filename(filename: Path) -> Path:
return Path(filename).stem + ".pdf" return Path(filename.stem + ".pdf")
def archive_path_old(self): def archive_path_old(self) -> Path:
if self.filename: if self.filename:
fname = archive_name_from_filename(self.filename) fname = archive_name_from_filename(Path(self.filename))
else: else:
fname = f"{self.pk:07}.pdf" fname = Path(f"{self.pk:07}.pdf")
return Path(settings.ARCHIVE_DIR) / fname return Path(settings.ARCHIVE_DIR) / fname

View File

@@ -679,7 +679,7 @@ def _parse_db_settings() -> dict:
databases = { databases = {
"default": { "default": {
"ENGINE": "django.db.backends.sqlite3", "ENGINE": "django.db.backends.sqlite3",
"NAME": str(DATA_DIR / "db.sqlite3"), "NAME": DATA_DIR / "db.sqlite3",
"OPTIONS": {}, "OPTIONS": {},
}, },
} }
@@ -807,7 +807,7 @@ LANGUAGES = [
("zh-tw", _("Chinese Traditional")), ("zh-tw", _("Chinese Traditional")),
] ]
LOCALE_PATHS = [str(BASE_DIR / "locale")] LOCALE_PATHS = [BASE_DIR / "locale"]
TIME_ZONE = os.getenv("PAPERLESS_TIME_ZONE", "UTC") TIME_ZONE = os.getenv("PAPERLESS_TIME_ZONE", "UTC")
@@ -848,21 +848,21 @@ LOGGING = {
"file_paperless": { "file_paperless": {
"class": "concurrent_log_handler.ConcurrentRotatingFileHandler", "class": "concurrent_log_handler.ConcurrentRotatingFileHandler",
"formatter": "verbose", "formatter": "verbose",
"filename": str(LOGGING_DIR / "paperless.log"), "filename": LOGGING_DIR / "paperless.log",
"maxBytes": LOGROTATE_MAX_SIZE, "maxBytes": LOGROTATE_MAX_SIZE,
"backupCount": LOGROTATE_MAX_BACKUPS, "backupCount": LOGROTATE_MAX_BACKUPS,
}, },
"file_mail": { "file_mail": {
"class": "concurrent_log_handler.ConcurrentRotatingFileHandler", "class": "concurrent_log_handler.ConcurrentRotatingFileHandler",
"formatter": "verbose", "formatter": "verbose",
"filename": str(LOGGING_DIR / "mail.log"), "filename": LOGGING_DIR / "mail.log",
"maxBytes": LOGROTATE_MAX_SIZE, "maxBytes": LOGROTATE_MAX_SIZE,
"backupCount": LOGROTATE_MAX_BACKUPS, "backupCount": LOGROTATE_MAX_BACKUPS,
}, },
"file_celery": { "file_celery": {
"class": "concurrent_log_handler.ConcurrentRotatingFileHandler", "class": "concurrent_log_handler.ConcurrentRotatingFileHandler",
"formatter": "verbose", "formatter": "verbose",
"filename": str(LOGGING_DIR / "celery.log"), "filename": LOGGING_DIR / "celery.log",
"maxBytes": LOGROTATE_MAX_SIZE, "maxBytes": LOGROTATE_MAX_SIZE,
"backupCount": LOGROTATE_MAX_BACKUPS, "backupCount": LOGROTATE_MAX_BACKUPS,
}, },
@@ -921,7 +921,7 @@ CELERY_ACCEPT_CONTENT = ["application/json", "application/x-python-serialize"]
CELERY_BEAT_SCHEDULE = _parse_beat_schedule() CELERY_BEAT_SCHEDULE = _parse_beat_schedule()
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule-filename # https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule-filename
CELERY_BEAT_SCHEDULE_FILENAME = str(DATA_DIR / "celerybeat-schedule.db") CELERY_BEAT_SCHEDULE_FILENAME = DATA_DIR / "celerybeat-schedule.db"
# Cachalot: Database read cache. # Cachalot: Database read cache.

View File

@@ -69,13 +69,13 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
""" """
parser = RasterisedDocumentParser(uuid.uuid4()) parser = RasterisedDocumentParser(uuid.uuid4())
page_count = parser.get_page_count( page_count = parser.get_page_count(
(self.SAMPLE_FILES / "simple-digital.pdf").as_posix(), str(self.SAMPLE_FILES / "simple-digital.pdf"),
"application/pdf", "application/pdf",
) )
self.assertEqual(page_count, 1) self.assertEqual(page_count, 1)
page_count = parser.get_page_count( page_count = parser.get_page_count(
(self.SAMPLE_FILES / "multi-page-mixed.pdf").as_posix(), str(self.SAMPLE_FILES / "multi-page-mixed.pdf"),
"application/pdf", "application/pdf",
) )
self.assertEqual(page_count, 6) self.assertEqual(page_count, 6)
@@ -92,7 +92,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
parser = RasterisedDocumentParser(uuid.uuid4()) parser = RasterisedDocumentParser(uuid.uuid4())
with self.assertLogs("paperless.parsing.tesseract", level="WARNING") as cm: with self.assertLogs("paperless.parsing.tesseract", level="WARNING") as cm:
page_count = parser.get_page_count( page_count = parser.get_page_count(
(self.SAMPLE_FILES / "password-protected.pdf").as_posix(), str(self.SAMPLE_FILES / "password-protected.pdf"),
"application/pdf", "application/pdf",
) )
self.assertEqual(page_count, None) self.assertEqual(page_count, None)
@@ -101,7 +101,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
def test_thumbnail(self): def test_thumbnail(self):
parser = RasterisedDocumentParser(uuid.uuid4()) parser = RasterisedDocumentParser(uuid.uuid4())
thumb = parser.get_thumbnail( thumb = parser.get_thumbnail(
(self.SAMPLE_FILES / "simple-digital.pdf").as_posix(), str(self.SAMPLE_FILES / "simple-digital.pdf"),
"application/pdf", "application/pdf",
) )
self.assertIsFile(thumb) self.assertIsFile(thumb)
@@ -109,7 +109,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
@mock.patch("documents.parsers.run_convert") @mock.patch("documents.parsers.run_convert")
def test_thumbnail_fallback(self, m): def test_thumbnail_fallback(self, m):
def call_convert(input_file, output_file, **kwargs): def call_convert(input_file, output_file, **kwargs):
if ".pdf" in input_file: if ".pdf" in str(input_file):
raise ParseError("Does not compute.") raise ParseError("Does not compute.")
else: else:
run_convert(input_file=input_file, output_file=output_file, **kwargs) run_convert(input_file=input_file, output_file=output_file, **kwargs)
@@ -118,7 +118,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
parser = RasterisedDocumentParser(uuid.uuid4()) parser = RasterisedDocumentParser(uuid.uuid4())
thumb = parser.get_thumbnail( thumb = parser.get_thumbnail(
(self.SAMPLE_FILES / "simple-digital.pdf").as_posix(), str(self.SAMPLE_FILES / "simple-digital.pdf"),
"application/pdf", "application/pdf",
) )
self.assertIsFile(thumb) self.assertIsFile(thumb)
@@ -126,7 +126,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
def test_thumbnail_encrypted(self): def test_thumbnail_encrypted(self):
parser = RasterisedDocumentParser(uuid.uuid4()) parser = RasterisedDocumentParser(uuid.uuid4())
thumb = parser.get_thumbnail( thumb = parser.get_thumbnail(
(self.SAMPLE_FILES / "encrypted.pdf").as_posix(), str(self.SAMPLE_FILES / "encrypted.pdf"),
"application/pdf", "application/pdf",
) )
self.assertIsFile(thumb) self.assertIsFile(thumb)
@@ -134,17 +134,17 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
def test_get_dpi(self): def test_get_dpi(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
dpi = parser.get_dpi((self.SAMPLE_FILES / "simple-no-dpi.png").as_posix()) dpi = parser.get_dpi(str(self.SAMPLE_FILES / "simple-no-dpi.png"))
self.assertEqual(dpi, None) self.assertEqual(dpi, None)
dpi = parser.get_dpi((self.SAMPLE_FILES / "simple.png").as_posix()) dpi = parser.get_dpi(str(self.SAMPLE_FILES / "simple.png"))
self.assertEqual(dpi, 72) self.assertEqual(dpi, 72)
def test_simple_digital(self): def test_simple_digital(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
(self.SAMPLE_FILES / "simple-digital.pdf").as_posix(), str(self.SAMPLE_FILES / "simple-digital.pdf"),
"application/pdf", "application/pdf",
) )
@@ -156,7 +156,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
(self.SAMPLE_FILES / "with-form.pdf").as_posix(), str(self.SAMPLE_FILES / "with-form.pdf"),
"application/pdf", "application/pdf",
) )
@@ -172,7 +172,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
(self.SAMPLE_FILES / "with-form.pdf").as_posix(), str(self.SAMPLE_FILES / "with-form.pdf"),
"application/pdf", "application/pdf",
) )
@@ -186,7 +186,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
def test_signed(self): def test_signed(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse((self.SAMPLE_FILES / "signed.pdf").as_posix(), "application/pdf") parser.parse(str(self.SAMPLE_FILES / "signed.pdf"), "application/pdf")
self.assertIsNone(parser.archive_path) self.assertIsNone(parser.archive_path)
self.assertContainsStrings( self.assertContainsStrings(
@@ -202,7 +202,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
(self.SAMPLE_FILES / "encrypted.pdf").as_posix(), str(self.SAMPLE_FILES / "encrypted.pdf"),
"application/pdf", "application/pdf",
) )
@@ -213,7 +213,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
def test_with_form_error_notext(self): def test_with_form_error_notext(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
(self.SAMPLE_FILES / "with-form.pdf").as_posix(), str(self.SAMPLE_FILES / "with-form.pdf"),
"application/pdf", "application/pdf",
) )
@@ -227,7 +227,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
(self.SAMPLE_FILES / "with-form.pdf").as_posix(), str(self.SAMPLE_FILES / "with-form.pdf"),
"application/pdf", "application/pdf",
) )
@@ -239,7 +239,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
def test_image_simple(self): def test_image_simple(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse((self.SAMPLE_FILES / "simple.png").as_posix(), "image/png") parser.parse(str(self.SAMPLE_FILES / "simple.png"), "image/png")
self.assertIsFile(parser.archive_path) self.assertIsFile(parser.archive_path)
@@ -255,7 +255,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
dest_file = Path(tempdir) / "simple-alpha.png" dest_file = Path(tempdir) / "simple-alpha.png"
shutil.copy(sample_file, dest_file) shutil.copy(sample_file, dest_file)
parser.parse(dest_file.as_posix(), "image/png") parser.parse(str(dest_file), "image/png")
self.assertIsFile(parser.archive_path) self.assertIsFile(parser.archive_path)
@@ -265,7 +265,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
dpi = parser.calculate_a4_dpi( dpi = parser.calculate_a4_dpi(
(self.SAMPLE_FILES / "simple-no-dpi.png").as_posix(), str(self.SAMPLE_FILES / "simple-no-dpi.png"),
) )
self.assertEqual(dpi, 62) self.assertEqual(dpi, 62)
@@ -277,7 +277,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
def f(): def f():
parser.parse( parser.parse(
(self.SAMPLE_FILES / "simple-no-dpi.png").as_posix(), str(self.SAMPLE_FILES / "simple-no-dpi.png"),
"image/png", "image/png",
) )
@@ -287,7 +287,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
def test_image_no_dpi_default(self): def test_image_no_dpi_default(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse((self.SAMPLE_FILES / "simple-no-dpi.png").as_posix(), "image/png") parser.parse(str(self.SAMPLE_FILES / "simple-no-dpi.png"), "image/png")
self.assertIsFile(parser.archive_path) self.assertIsFile(parser.archive_path)
@@ -299,7 +299,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
def test_multi_page(self): def test_multi_page(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
(self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(), str(self.SAMPLE_FILES / "multi-page-digital.pdf"),
"application/pdf", "application/pdf",
) )
self.assertIsFile(parser.archive_path) self.assertIsFile(parser.archive_path)
@@ -312,7 +312,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
def test_multi_page_pages_skip(self): def test_multi_page_pages_skip(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
(self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(), str(self.SAMPLE_FILES / "multi-page-digital.pdf"),
"application/pdf", "application/pdf",
) )
self.assertIsFile(parser.archive_path) self.assertIsFile(parser.archive_path)
@@ -325,7 +325,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
def test_multi_page_pages_redo(self): def test_multi_page_pages_redo(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
(self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(), str(self.SAMPLE_FILES / "multi-page-digital.pdf"),
"application/pdf", "application/pdf",
) )
self.assertIsFile(parser.archive_path) self.assertIsFile(parser.archive_path)
@@ -338,7 +338,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
def test_multi_page_pages_force(self): def test_multi_page_pages_force(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
(self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(), str(self.SAMPLE_FILES / "multi-page-digital.pdf"),
"application/pdf", "application/pdf",
) )
self.assertIsFile(parser.archive_path) self.assertIsFile(parser.archive_path)
@@ -351,7 +351,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
def test_multi_page_analog_pages_skip(self): def test_multi_page_analog_pages_skip(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
(self.SAMPLE_FILES / "multi-page-images.pdf").as_posix(), str(self.SAMPLE_FILES / "multi-page-images.pdf"),
"application/pdf", "application/pdf",
) )
self.assertIsFile(parser.archive_path) self.assertIsFile(parser.archive_path)
@@ -375,7 +375,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
""" """
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
(self.SAMPLE_FILES / "multi-page-images.pdf").as_posix(), str(self.SAMPLE_FILES / "multi-page-images.pdf"),
"application/pdf", "application/pdf",
) )
self.assertIsFile(parser.archive_path) self.assertIsFile(parser.archive_path)
@@ -397,7 +397,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
""" """
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
(self.SAMPLE_FILES / "multi-page-images.pdf").as_posix(), str(self.SAMPLE_FILES / "multi-page-images.pdf"),
"application/pdf", "application/pdf",
) )
self.assertIsFile(parser.archive_path) self.assertIsFile(parser.archive_path)
@@ -419,7 +419,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
""" """
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
(self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(), str(self.SAMPLE_FILES / "multi-page-digital.pdf"),
"application/pdf", "application/pdf",
) )
self.assertIsNone(parser.archive_path) self.assertIsNone(parser.archive_path)
@@ -442,7 +442,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
""" """
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
(self.SAMPLE_FILES / "multi-page-images.pdf").as_posix(), str(self.SAMPLE_FILES / "multi-page-images.pdf"),
"application/pdf", "application/pdf",
) )
@@ -467,7 +467,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
""" """
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
(self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(), str(self.SAMPLE_FILES / "multi-page-digital.pdf"),
"application/pdf", "application/pdf",
) )
self.assertIsNotNone(parser.archive_path) self.assertIsNotNone(parser.archive_path)
@@ -490,7 +490,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
""" """
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
(self.SAMPLE_FILES / "multi-page-images.pdf").as_posix(), str(self.SAMPLE_FILES / "multi-page-images.pdf"),
"application/pdf", "application/pdf",
) )
self.assertIsNotNone(parser.archive_path) self.assertIsNotNone(parser.archive_path)
@@ -513,7 +513,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
""" """
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
(self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(), str(self.SAMPLE_FILES / "multi-page-digital.pdf"),
"application/pdf", "application/pdf",
) )
self.assertIsNone(parser.archive_path) self.assertIsNone(parser.archive_path)
@@ -536,7 +536,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
""" """
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
(self.SAMPLE_FILES / "multi-page-images.pdf").as_posix(), str(self.SAMPLE_FILES / "multi-page-images.pdf"),
"application/pdf", "application/pdf",
) )
self.assertIsNotNone(parser.archive_path) self.assertIsNotNone(parser.archive_path)
@@ -559,7 +559,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
""" """
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
(self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(), str(self.SAMPLE_FILES / "multi-page-digital.pdf"),
"application/pdf", "application/pdf",
) )
self.assertIsNone(parser.archive_path) self.assertIsNone(parser.archive_path)
@@ -582,7 +582,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
""" """
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
(self.SAMPLE_FILES / "multi-page-images.pdf").as_posix(), str(self.SAMPLE_FILES / "multi-page-images.pdf"),
"application/pdf", "application/pdf",
) )
self.assertIsNone(parser.archive_path) self.assertIsNone(parser.archive_path)
@@ -605,7 +605,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
""" """
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
(self.SAMPLE_FILES / "multi-page-mixed.pdf").as_posix(), str(self.SAMPLE_FILES / "multi-page-mixed.pdf"),
"application/pdf", "application/pdf",
) )
self.assertIsNotNone(parser.archive_path) self.assertIsNotNone(parser.archive_path)
@@ -636,7 +636,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
""" """
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
(self.SAMPLE_FILES / "single-page-mixed.pdf").as_posix(), str(self.SAMPLE_FILES / "single-page-mixed.pdf"),
"application/pdf", "application/pdf",
) )
self.assertIsNotNone(parser.archive_path) self.assertIsNotNone(parser.archive_path)
@@ -673,7 +673,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
""" """
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
(self.SAMPLE_FILES / "multi-page-mixed.pdf").as_posix(), str(self.SAMPLE_FILES / "multi-page-mixed.pdf"),
"application/pdf", "application/pdf",
) )
self.assertIsNone(parser.archive_path) self.assertIsNone(parser.archive_path)
@@ -685,7 +685,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
@override_settings(OCR_MODE="skip", OCR_ROTATE_PAGES=True) @override_settings(OCR_MODE="skip", OCR_ROTATE_PAGES=True)
def test_rotate(self): def test_rotate(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse((self.SAMPLE_FILES / "rotated.pdf").as_posix(), "application/pdf") parser.parse(str(self.SAMPLE_FILES / "rotated.pdf"), "application/pdf")
self.assertContainsStrings( self.assertContainsStrings(
parser.get_text(), parser.get_text(),
[ [
@@ -707,7 +707,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
""" """
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
(self.SAMPLE_FILES / "multi-page-images.tiff").as_posix(), str(self.SAMPLE_FILES / "multi-page-images.tiff"),
"image/tiff", "image/tiff",
) )
self.assertIsFile(parser.archive_path) self.assertIsFile(parser.archive_path)
@@ -752,9 +752,9 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
- Text from all pages extracted - Text from all pages extracted
""" """
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
sample_file = ( sample_file = str(
self.SAMPLE_FILES / "multi-page-images-alpha-rgb.tiff" self.SAMPLE_FILES / "multi-page-images-alpha-rgb.tiff",
).as_posix() )
with tempfile.NamedTemporaryFile() as tmp_file: with tempfile.NamedTemporaryFile() as tmp_file:
shutil.copy(sample_file, tmp_file.name) shutil.copy(sample_file, tmp_file.name)
parser.parse( parser.parse(
@@ -843,7 +843,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
(self.SAMPLE_FILES / "rtl-test.pdf").as_posix(), str(self.SAMPLE_FILES / "rtl-test.pdf"),
"application/pdf", "application/pdf",
) )
@@ -858,7 +858,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.assertRaises( self.assertRaises(
ParseError, ParseError,
parser.parse, parser.parse,
(self.SAMPLE_FILES / "simple-digital.pdf").as_posix(), str(self.SAMPLE_FILES / "simple-digital.pdf"),
"application/pdf", "application/pdf",
) )
@@ -868,32 +868,32 @@ class TestParserFileTypes(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
def test_bmp(self): def test_bmp(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse((self.SAMPLE_FILES / "simple.bmp").as_posix(), "image/bmp") parser.parse(str(self.SAMPLE_FILES / "simple.bmp"), "image/bmp")
self.assertIsFile(parser.archive_path) self.assertIsFile(parser.archive_path)
self.assertIn("this is a test document", parser.get_text().lower()) self.assertIn("this is a test document", parser.get_text().lower())
def test_jpg(self): def test_jpg(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse((self.SAMPLE_FILES / "simple.jpg").as_posix(), "image/jpeg") parser.parse(str(self.SAMPLE_FILES / "simple.jpg"), "image/jpeg")
self.assertIsFile(parser.archive_path) self.assertIsFile(parser.archive_path)
self.assertIn("this is a test document", parser.get_text().lower()) self.assertIn("this is a test document", parser.get_text().lower())
def test_heic(self): def test_heic(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse((self.SAMPLE_FILES / "simple.heic").as_posix(), "image/heic") parser.parse(str(self.SAMPLE_FILES / "simple.heic"), "image/heic")
self.assertIsFile(parser.archive_path) self.assertIsFile(parser.archive_path)
self.assertIn("pizza", parser.get_text().lower()) self.assertIn("pizza", parser.get_text().lower())
@override_settings(OCR_IMAGE_DPI=200) @override_settings(OCR_IMAGE_DPI=200)
def test_gif(self): def test_gif(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse((self.SAMPLE_FILES / "simple.gif").as_posix(), "image/gif") parser.parse(str(self.SAMPLE_FILES / "simple.gif"), "image/gif")
self.assertIsFile(parser.archive_path) self.assertIsFile(parser.archive_path)
self.assertIn("this is a test document", parser.get_text().lower()) self.assertIn("this is a test document", parser.get_text().lower())
def test_tiff(self): def test_tiff(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse((self.SAMPLE_FILES / "simple.tif").as_posix(), "image/tiff") parser.parse(str(self.SAMPLE_FILES / "simple.tif"), "image/tiff")
self.assertIsFile(parser.archive_path) self.assertIsFile(parser.archive_path)
self.assertIn("this is a test document", parser.get_text().lower()) self.assertIn("this is a test document", parser.get_text().lower())
@@ -901,7 +901,7 @@ class TestParserFileTypes(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
def test_webp(self): def test_webp(self):
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
parser.parse( parser.parse(
(self.SAMPLE_FILES / "document.webp").as_posix(), str(self.SAMPLE_FILES / "document.webp"),
"image/webp", "image/webp",
) )
self.assertIsFile(parser.archive_path) self.assertIsFile(parser.archive_path)