mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-09-04 21:06:20 -05:00
Chore: switch from os.path to pathlib.Path (#10539)
This commit is contained in:

committed by
GitHub

parent
cc621cf729
commit
d2064a2535
@@ -205,18 +205,9 @@ lint.per-file-ignores."docker/wait-for-redis.py" = [
|
|||||||
"INP001",
|
"INP001",
|
||||||
"T201",
|
"T201",
|
||||||
]
|
]
|
||||||
lint.per-file-ignores."src/documents/management/commands/document_consumer.py" = [
|
|
||||||
"PTH",
|
|
||||||
] # TODO Enable & remove
|
|
||||||
lint.per-file-ignores."src/documents/migrations/1012_fix_archive_files.py" = [
|
|
||||||
"PTH",
|
|
||||||
] # TODO Enable & remove
|
|
||||||
lint.per-file-ignores."src/documents/models.py" = [
|
lint.per-file-ignores."src/documents/models.py" = [
|
||||||
"SIM115",
|
"SIM115",
|
||||||
]
|
]
|
||||||
lint.per-file-ignores."src/documents/parsers.py" = [
|
|
||||||
"PTH",
|
|
||||||
] # TODO Enable & remove
|
|
||||||
lint.per-file-ignores."src/paperless_tesseract/tests/test_parser.py" = [
|
lint.per-file-ignores."src/paperless_tesseract/tests/test_parser.py" = [
|
||||||
"RUF001",
|
"RUF001",
|
||||||
]
|
]
|
||||||
|
@@ -32,7 +32,7 @@ except ImportError: # pragma: no cover
|
|||||||
logger = logging.getLogger("paperless.management.consumer")
|
logger = logging.getLogger("paperless.management.consumer")
|
||||||
|
|
||||||
|
|
||||||
def _tags_from_path(filepath) -> list[int]:
|
def _tags_from_path(filepath: Path) -> list[int]:
|
||||||
"""
|
"""
|
||||||
Walk up the directory tree from filepath to CONSUMPTION_DIR
|
Walk up the directory tree from filepath to CONSUMPTION_DIR
|
||||||
and get or create Tag IDs for every directory.
|
and get or create Tag IDs for every directory.
|
||||||
@@ -41,7 +41,7 @@ def _tags_from_path(filepath) -> list[int]:
|
|||||||
"""
|
"""
|
||||||
db.close_old_connections()
|
db.close_old_connections()
|
||||||
tag_ids = set()
|
tag_ids = set()
|
||||||
path_parts = Path(filepath).relative_to(settings.CONSUMPTION_DIR).parent.parts
|
path_parts = filepath.relative_to(settings.CONSUMPTION_DIR).parent.parts
|
||||||
for part in path_parts:
|
for part in path_parts:
|
||||||
tag_ids.add(
|
tag_ids.add(
|
||||||
Tag.objects.get_or_create(name__iexact=part, defaults={"name": part})[0].pk,
|
Tag.objects.get_or_create(name__iexact=part, defaults={"name": part})[0].pk,
|
||||||
@@ -50,17 +50,13 @@ def _tags_from_path(filepath) -> list[int]:
|
|||||||
return list(tag_ids)
|
return list(tag_ids)
|
||||||
|
|
||||||
|
|
||||||
def _is_ignored(filepath: str) -> bool:
|
def _is_ignored(filepath: Path) -> bool:
|
||||||
"""
|
"""
|
||||||
Checks if the given file should be ignored, based on configured
|
Checks if the given file should be ignored, based on configured
|
||||||
patterns.
|
patterns.
|
||||||
|
|
||||||
Returns True if the file is ignored, False otherwise
|
Returns True if the file is ignored, False otherwise
|
||||||
"""
|
"""
|
||||||
filepath = os.path.abspath(
|
|
||||||
os.path.normpath(filepath),
|
|
||||||
)
|
|
||||||
|
|
||||||
# Trim out the consume directory, leaving only filename and it's
|
# Trim out the consume directory, leaving only filename and it's
|
||||||
# path relative to the consume directory
|
# path relative to the consume directory
|
||||||
filepath_relative = PurePath(filepath).relative_to(settings.CONSUMPTION_DIR)
|
filepath_relative = PurePath(filepath).relative_to(settings.CONSUMPTION_DIR)
|
||||||
@@ -85,15 +81,15 @@ def _is_ignored(filepath: str) -> bool:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def _consume(filepath: str) -> None:
|
def _consume(filepath: Path) -> None:
|
||||||
if os.path.isdir(filepath) or _is_ignored(filepath):
|
if filepath.is_dir() or _is_ignored(filepath):
|
||||||
return
|
return
|
||||||
|
|
||||||
if not os.path.isfile(filepath):
|
if not filepath.is_file():
|
||||||
logger.debug(f"Not consuming file {filepath}: File has moved.")
|
logger.debug(f"Not consuming file {filepath}: File has moved.")
|
||||||
return
|
return
|
||||||
|
|
||||||
if not is_file_ext_supported(os.path.splitext(filepath)[1]):
|
if not is_file_ext_supported(filepath.suffix):
|
||||||
logger.warning(f"Not consuming file {filepath}: Unknown file extension.")
|
logger.warning(f"Not consuming file {filepath}: Unknown file extension.")
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -107,7 +103,7 @@ def _consume(filepath: str) -> None:
|
|||||||
|
|
||||||
while (read_try_count < os_error_retry_count) and not file_open_ok:
|
while (read_try_count < os_error_retry_count) and not file_open_ok:
|
||||||
try:
|
try:
|
||||||
with open(filepath, "rb"):
|
with filepath.open("rb"):
|
||||||
file_open_ok = True
|
file_open_ok = True
|
||||||
except OSError as e:
|
except OSError as e:
|
||||||
read_try_count += 1
|
read_try_count += 1
|
||||||
@@ -141,7 +137,7 @@ def _consume(filepath: str) -> None:
|
|||||||
logger.exception("Error while consuming document")
|
logger.exception("Error while consuming document")
|
||||||
|
|
||||||
|
|
||||||
def _consume_wait_unmodified(file: str) -> None:
|
def _consume_wait_unmodified(file: Path) -> None:
|
||||||
"""
|
"""
|
||||||
Waits for the given file to appear unmodified based on file size
|
Waits for the given file to appear unmodified based on file size
|
||||||
and modification time. Will wait a configured number of seconds
|
and modification time. Will wait a configured number of seconds
|
||||||
@@ -157,7 +153,7 @@ def _consume_wait_unmodified(file: str) -> None:
|
|||||||
current_try = 0
|
current_try = 0
|
||||||
while current_try < settings.CONSUMER_POLLING_RETRY_COUNT:
|
while current_try < settings.CONSUMER_POLLING_RETRY_COUNT:
|
||||||
try:
|
try:
|
||||||
stat_data = os.stat(file)
|
stat_data = file.stat()
|
||||||
new_mtime = stat_data.st_mtime
|
new_mtime = stat_data.st_mtime
|
||||||
new_size = stat_data.st_size
|
new_size = stat_data.st_size
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
@@ -182,10 +178,10 @@ class Handler(FileSystemEventHandler):
|
|||||||
self._pool = pool
|
self._pool = pool
|
||||||
|
|
||||||
def on_created(self, event):
|
def on_created(self, event):
|
||||||
self._pool.submit(_consume_wait_unmodified, event.src_path)
|
self._pool.submit(_consume_wait_unmodified, Path(event.src_path))
|
||||||
|
|
||||||
def on_moved(self, event):
|
def on_moved(self, event):
|
||||||
self._pool.submit(_consume_wait_unmodified, event.dest_path)
|
self._pool.submit(_consume_wait_unmodified, Path(event.dest_path))
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(BaseCommand):
|
||||||
@@ -227,9 +223,9 @@ class Command(BaseCommand):
|
|||||||
if not directory:
|
if not directory:
|
||||||
raise CommandError("CONSUMPTION_DIR does not appear to be set.")
|
raise CommandError("CONSUMPTION_DIR does not appear to be set.")
|
||||||
|
|
||||||
directory = os.path.abspath(directory)
|
directory = Path(directory).resolve()
|
||||||
|
|
||||||
if not os.path.isdir(directory):
|
if not directory.is_dir():
|
||||||
raise CommandError(f"Consumption directory {directory} does not exist")
|
raise CommandError(f"Consumption directory {directory} does not exist")
|
||||||
|
|
||||||
# Consumer will need this
|
# Consumer will need this
|
||||||
@@ -238,11 +234,11 @@ class Command(BaseCommand):
|
|||||||
if recursive:
|
if recursive:
|
||||||
for dirpath, _, filenames in os.walk(directory):
|
for dirpath, _, filenames in os.walk(directory):
|
||||||
for filename in filenames:
|
for filename in filenames:
|
||||||
filepath = os.path.join(dirpath, filename)
|
filepath = Path(dirpath) / filename
|
||||||
_consume(filepath)
|
_consume(filepath)
|
||||||
else:
|
else:
|
||||||
for entry in os.scandir(directory):
|
for filepath in directory.iterdir():
|
||||||
_consume(entry.path)
|
_consume(filepath)
|
||||||
|
|
||||||
if options["oneshot"]:
|
if options["oneshot"]:
|
||||||
return
|
return
|
||||||
@@ -310,7 +306,7 @@ class Command(BaseCommand):
|
|||||||
try:
|
try:
|
||||||
for event in inotify.read(timeout=timeout_ms):
|
for event in inotify.read(timeout=timeout_ms):
|
||||||
path = inotify.get_path(event.wd) if recursive else directory
|
path = inotify.get_path(event.wd) if recursive else directory
|
||||||
filepath = os.path.join(path, event.name)
|
filepath = Path(path) / event.name
|
||||||
if flags.MODIFY in flags.from_mask(event.mask):
|
if flags.MODIFY in flags.from_mask(event.mask):
|
||||||
notified_files.pop(filepath, None)
|
notified_files.pop(filepath, None)
|
||||||
else:
|
else:
|
||||||
@@ -327,9 +323,7 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
# Also make sure the file exists still, some scanners might write a
|
# Also make sure the file exists still, some scanners might write a
|
||||||
# temporary file first
|
# temporary file first
|
||||||
file_still_exists = os.path.exists(filepath) and os.path.isfile(
|
file_still_exists = filepath.exists() and filepath.is_file()
|
||||||
filepath,
|
|
||||||
)
|
|
||||||
|
|
||||||
if waited_long_enough and file_still_exists:
|
if waited_long_enough and file_still_exists:
|
||||||
_consume(filepath)
|
_consume(filepath)
|
||||||
|
@@ -5,6 +5,7 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
from pathlib import Path
|
||||||
from time import sleep
|
from time import sleep
|
||||||
|
|
||||||
import pathvalidate
|
import pathvalidate
|
||||||
@@ -50,38 +51,38 @@ def many_to_dictionary(field): # pragma: no cover
|
|||||||
return mydictionary
|
return mydictionary
|
||||||
|
|
||||||
|
|
||||||
def archive_name_from_filename(filename):
|
def archive_name_from_filename(filename: Path) -> Path:
|
||||||
return os.path.splitext(filename)[0] + ".pdf"
|
return Path(filename.stem + ".pdf")
|
||||||
|
|
||||||
|
|
||||||
def archive_path_old(doc):
|
def archive_path_old(doc) -> Path:
|
||||||
if doc.filename:
|
if doc.filename:
|
||||||
fname = archive_name_from_filename(doc.filename)
|
fname = archive_name_from_filename(Path(doc.filename))
|
||||||
else:
|
else:
|
||||||
fname = f"{doc.pk:07}.pdf"
|
fname = Path(f"{doc.pk:07}.pdf")
|
||||||
|
|
||||||
return os.path.join(settings.ARCHIVE_DIR, fname)
|
return settings.ARCHIVE_DIR / fname
|
||||||
|
|
||||||
|
|
||||||
STORAGE_TYPE_GPG = "gpg"
|
STORAGE_TYPE_GPG = "gpg"
|
||||||
|
|
||||||
|
|
||||||
def archive_path_new(doc):
|
def archive_path_new(doc) -> Path | None:
|
||||||
if doc.archive_filename is not None:
|
if doc.archive_filename is not None:
|
||||||
return os.path.join(settings.ARCHIVE_DIR, str(doc.archive_filename))
|
return settings.ARCHIVE_DIR / doc.archive_filename
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def source_path(doc):
|
def source_path(doc) -> Path:
|
||||||
if doc.filename:
|
if doc.filename:
|
||||||
fname = str(doc.filename)
|
fname = doc.filename
|
||||||
else:
|
else:
|
||||||
fname = f"{doc.pk:07}{doc.file_type}"
|
fname = f"{doc.pk:07}{doc.file_type}"
|
||||||
if doc.storage_type == STORAGE_TYPE_GPG:
|
if doc.storage_type == STORAGE_TYPE_GPG:
|
||||||
fname += ".gpg" # pragma: no cover
|
fname = Path(str(fname) + ".gpg") # pragma: no cover
|
||||||
|
|
||||||
return os.path.join(settings.ORIGINALS_DIR, fname)
|
return settings.ORIGINALS_DIR / fname
|
||||||
|
|
||||||
|
|
||||||
def generate_unique_filename(doc, *, archive_filename=False):
|
def generate_unique_filename(doc, *, archive_filename=False):
|
||||||
@@ -104,7 +105,7 @@ def generate_unique_filename(doc, *, archive_filename=False):
|
|||||||
# still the same as before.
|
# still the same as before.
|
||||||
return new_filename
|
return new_filename
|
||||||
|
|
||||||
if os.path.exists(os.path.join(root, new_filename)):
|
if (root / new_filename).exists():
|
||||||
counter += 1
|
counter += 1
|
||||||
else:
|
else:
|
||||||
return new_filename
|
return new_filename
|
||||||
@@ -202,18 +203,18 @@ def create_archive_version(doc, retry_count=3):
|
|||||||
parser,
|
parser,
|
||||||
source_path(doc),
|
source_path(doc),
|
||||||
doc.mime_type,
|
doc.mime_type,
|
||||||
os.path.basename(doc.filename),
|
Path(doc.filename).name,
|
||||||
)
|
)
|
||||||
doc.content = parser.get_text()
|
doc.content = parser.get_text()
|
||||||
|
|
||||||
if parser.get_archive_path() and os.path.isfile(parser.get_archive_path()):
|
if parser.get_archive_path() and Path(parser.get_archive_path()).is_file():
|
||||||
doc.archive_filename = generate_unique_filename(
|
doc.archive_filename = generate_unique_filename(
|
||||||
doc,
|
doc,
|
||||||
archive_filename=True,
|
archive_filename=True,
|
||||||
)
|
)
|
||||||
with open(parser.get_archive_path(), "rb") as f:
|
with Path(parser.get_archive_path()).open("rb") as f:
|
||||||
doc.archive_checksum = hashlib.md5(f.read()).hexdigest()
|
doc.archive_checksum = hashlib.md5(f.read()).hexdigest()
|
||||||
os.makedirs(os.path.dirname(archive_path_new(doc)), exist_ok=True)
|
archive_path_new(doc).parent.mkdir(parents=True, exist_ok=True)
|
||||||
shutil.copy2(parser.get_archive_path(), archive_path_new(doc))
|
shutil.copy2(parser.get_archive_path(), archive_path_new(doc))
|
||||||
else:
|
else:
|
||||||
doc.archive_checksum = None
|
doc.archive_checksum = None
|
||||||
@@ -264,7 +265,7 @@ def move_old_to_new_locations(apps, schema_editor):
|
|||||||
# check that archive files of all unaffected documents are in place
|
# check that archive files of all unaffected documents are in place
|
||||||
for doc in Document.objects.filter(archive_checksum__isnull=False):
|
for doc in Document.objects.filter(archive_checksum__isnull=False):
|
||||||
old_path = archive_path_old(doc)
|
old_path = archive_path_old(doc)
|
||||||
if doc.id not in affected_document_ids and not os.path.isfile(old_path):
|
if doc.id not in affected_document_ids and not old_path.is_file():
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Archived document ID:{doc.id} does not exist at: {old_path}",
|
f"Archived document ID:{doc.id} does not exist at: {old_path}",
|
||||||
)
|
)
|
||||||
@@ -285,12 +286,12 @@ def move_old_to_new_locations(apps, schema_editor):
|
|||||||
if doc.id in affected_document_ids:
|
if doc.id in affected_document_ids:
|
||||||
old_path = archive_path_old(doc)
|
old_path = archive_path_old(doc)
|
||||||
# remove affected archive versions
|
# remove affected archive versions
|
||||||
if os.path.isfile(old_path):
|
if old_path.is_file():
|
||||||
logger.debug(f"Removing {old_path}")
|
logger.debug(f"Removing {old_path}")
|
||||||
os.unlink(old_path)
|
old_path.unlink()
|
||||||
else:
|
else:
|
||||||
# Set archive path for unaffected files
|
# Set archive path for unaffected files
|
||||||
doc.archive_filename = archive_name_from_filename(doc.filename)
|
doc.archive_filename = archive_name_from_filename(Path(doc.filename))
|
||||||
Document.objects.filter(id=doc.id).update(
|
Document.objects.filter(id=doc.id).update(
|
||||||
archive_filename=doc.archive_filename,
|
archive_filename=doc.archive_filename,
|
||||||
)
|
)
|
||||||
@@ -316,7 +317,7 @@ def move_new_to_old_locations(apps, schema_editor):
|
|||||||
f"filename.",
|
f"filename.",
|
||||||
)
|
)
|
||||||
old_archive_paths.add(old_archive_path)
|
old_archive_paths.add(old_archive_path)
|
||||||
if new_archive_path != old_archive_path and os.path.isfile(old_archive_path):
|
if new_archive_path != old_archive_path and old_archive_path.is_file():
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Cannot migrate: Cannot move {new_archive_path} to "
|
f"Cannot migrate: Cannot move {new_archive_path} to "
|
||||||
f"{old_archive_path}: file already exists.",
|
f"{old_archive_path}: file already exists.",
|
||||||
|
@@ -169,7 +169,7 @@ def run_convert(
|
|||||||
args += ["-depth", str(depth)] if depth else []
|
args += ["-depth", str(depth)] if depth else []
|
||||||
args += ["-auto-orient"] if auto_orient else []
|
args += ["-auto-orient"] if auto_orient else []
|
||||||
args += ["-define", "pdf:use-cropbox=true"] if use_cropbox else []
|
args += ["-define", "pdf:use-cropbox=true"] if use_cropbox else []
|
||||||
args += [input_file, output_file]
|
args += [str(input_file), str(output_file)]
|
||||||
|
|
||||||
logger.debug("Execute: " + " ".join(args), extra={"group": logging_group})
|
logger.debug("Execute: " + " ".join(args), extra={"group": logging_group})
|
||||||
|
|
||||||
@@ -188,8 +188,8 @@ def get_default_thumbnail() -> Path:
|
|||||||
return (Path(__file__).parent / "resources" / "document.webp").resolve()
|
return (Path(__file__).parent / "resources" / "document.webp").resolve()
|
||||||
|
|
||||||
|
|
||||||
def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None) -> str:
|
def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None) -> Path:
|
||||||
out_path = os.path.join(temp_dir, "convert_gs.webp")
|
out_path: Path = Path(temp_dir) / "convert_gs.webp"
|
||||||
|
|
||||||
# if convert fails, fall back to extracting
|
# if convert fails, fall back to extracting
|
||||||
# the first PDF page as a PNG using Ghostscript
|
# the first PDF page as a PNG using Ghostscript
|
||||||
@@ -199,7 +199,7 @@ def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None) -
|
|||||||
extra={"group": logging_group},
|
extra={"group": logging_group},
|
||||||
)
|
)
|
||||||
# Ghostscript doesn't handle WebP outputs
|
# Ghostscript doesn't handle WebP outputs
|
||||||
gs_out_path = os.path.join(temp_dir, "gs_out.png")
|
gs_out_path: Path = Path(temp_dir) / "gs_out.png"
|
||||||
cmd = [settings.GS_BINARY, "-q", "-sDEVICE=pngalpha", "-o", gs_out_path, in_path]
|
cmd = [settings.GS_BINARY, "-q", "-sDEVICE=pngalpha", "-o", gs_out_path, in_path]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -227,16 +227,16 @@ def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None) -
|
|||||||
# The caller might expect a generated thumbnail that can be moved,
|
# The caller might expect a generated thumbnail that can be moved,
|
||||||
# so we need to copy it before it gets moved.
|
# so we need to copy it before it gets moved.
|
||||||
# https://github.com/paperless-ngx/paperless-ngx/issues/3631
|
# https://github.com/paperless-ngx/paperless-ngx/issues/3631
|
||||||
default_thumbnail_path = os.path.join(temp_dir, "document.webp")
|
default_thumbnail_path: Path = Path(temp_dir) / "document.webp"
|
||||||
copy_file_with_basic_stats(get_default_thumbnail(), default_thumbnail_path)
|
copy_file_with_basic_stats(get_default_thumbnail(), default_thumbnail_path)
|
||||||
return default_thumbnail_path
|
return default_thumbnail_path
|
||||||
|
|
||||||
|
|
||||||
def make_thumbnail_from_pdf(in_path, temp_dir, logging_group=None) -> Path:
|
def make_thumbnail_from_pdf(in_path: Path, temp_dir: Path, logging_group=None) -> Path:
|
||||||
"""
|
"""
|
||||||
The thumbnail of a PDF is just a 500px wide image of the first page.
|
The thumbnail of a PDF is just a 500px wide image of the first page.
|
||||||
"""
|
"""
|
||||||
out_path = temp_dir / "convert.webp"
|
out_path: Path = temp_dir / "convert.webp"
|
||||||
|
|
||||||
# Run convert to get a decent thumbnail
|
# Run convert to get a decent thumbnail
|
||||||
try:
|
try:
|
||||||
|
@@ -654,7 +654,7 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
@override_settings(
|
@override_settings(
|
||||||
MODEL_FILE=(Path(__file__).parent / "data" / "model.pickle").as_posix(),
|
MODEL_FILE=str(Path(__file__).parent / "data" / "model.pickle"),
|
||||||
)
|
)
|
||||||
@pytest.mark.skip(
|
@pytest.mark.skip(
|
||||||
reason="Disabled caching due to high memory usage - need to investigate.",
|
reason="Disabled caching due to high memory usage - need to investigate.",
|
||||||
|
@@ -254,7 +254,7 @@ class TestConsumer(
|
|||||||
# https://github.com/jonaswinkler/paperless-ng/discussions/1037
|
# https://github.com/jonaswinkler/paperless-ng/discussions/1037
|
||||||
|
|
||||||
filename = self.get_test_file()
|
filename = self.get_test_file()
|
||||||
shadow_file = Path(self.dirs.scratch_dir / "._sample.pdf")
|
shadow_file = Path(self.dirs.scratch_dir) / "._sample.pdf"
|
||||||
|
|
||||||
shutil.copy(filename, shadow_file)
|
shutil.copy(filename, shadow_file)
|
||||||
|
|
||||||
|
@@ -258,66 +258,66 @@ class TestConsumer(DirectoriesMixin, ConsumerThreadMixin, TransactionTestCase):
|
|||||||
def test_is_ignored(self):
|
def test_is_ignored(self):
|
||||||
test_paths = [
|
test_paths = [
|
||||||
{
|
{
|
||||||
"path": (Path(self.dirs.consumption_dir) / "foo.pdf").as_posix(),
|
"path": str(Path(self.dirs.consumption_dir) / "foo.pdf"),
|
||||||
"ignore": False,
|
"ignore": False,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"path": (
|
"path": str(
|
||||||
Path(self.dirs.consumption_dir) / "foo" / "bar.pdf"
|
Path(self.dirs.consumption_dir) / "foo" / "bar.pdf",
|
||||||
).as_posix(),
|
),
|
||||||
"ignore": False,
|
"ignore": False,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"path": (Path(self.dirs.consumption_dir) / ".DS_STORE").as_posix(),
|
"path": str(Path(self.dirs.consumption_dir) / ".DS_STORE"),
|
||||||
"ignore": True,
|
"ignore": True,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"path": (Path(self.dirs.consumption_dir) / ".DS_Store").as_posix(),
|
"path": str(Path(self.dirs.consumption_dir) / ".DS_Store"),
|
||||||
"ignore": True,
|
"ignore": True,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"path": (
|
"path": str(
|
||||||
Path(self.dirs.consumption_dir) / ".stfolder" / "foo.pdf"
|
Path(self.dirs.consumption_dir) / ".stfolder" / "foo.pdf",
|
||||||
).as_posix(),
|
),
|
||||||
"ignore": True,
|
"ignore": True,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"path": (Path(self.dirs.consumption_dir) / ".stfolder.pdf").as_posix(),
|
"path": str(Path(self.dirs.consumption_dir) / ".stfolder.pdf"),
|
||||||
"ignore": False,
|
"ignore": False,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"path": (
|
"path": str(
|
||||||
Path(self.dirs.consumption_dir) / ".stversions" / "foo.pdf"
|
Path(self.dirs.consumption_dir) / ".stversions" / "foo.pdf",
|
||||||
).as_posix(),
|
),
|
||||||
"ignore": True,
|
"ignore": True,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"path": (
|
"path": str(
|
||||||
Path(self.dirs.consumption_dir) / ".stversions.pdf"
|
Path(self.dirs.consumption_dir) / ".stversions.pdf",
|
||||||
).as_posix(),
|
),
|
||||||
"ignore": False,
|
"ignore": False,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"path": (Path(self.dirs.consumption_dir) / "._foo.pdf").as_posix(),
|
"path": str(Path(self.dirs.consumption_dir) / "._foo.pdf"),
|
||||||
"ignore": True,
|
"ignore": True,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"path": (Path(self.dirs.consumption_dir) / "my_foo.pdf").as_posix(),
|
"path": str(Path(self.dirs.consumption_dir) / "my_foo.pdf"),
|
||||||
"ignore": False,
|
"ignore": False,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"path": (
|
"path": str(
|
||||||
Path(self.dirs.consumption_dir) / "._foo" / "bar.pdf"
|
Path(self.dirs.consumption_dir) / "._foo" / "bar.pdf",
|
||||||
).as_posix(),
|
),
|
||||||
"ignore": True,
|
"ignore": True,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"path": (
|
"path": str(
|
||||||
Path(self.dirs.consumption_dir)
|
Path(self.dirs.consumption_dir)
|
||||||
/ "@eaDir"
|
/ "@eaDir"
|
||||||
/ "SYNO@.fileindexdb"
|
/ "SYNO@.fileindexdb"
|
||||||
/ "_1jk.fnm"
|
/ "_1jk.fnm",
|
||||||
).as_posix(),
|
),
|
||||||
"ignore": True,
|
"ignore": True,
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
@@ -330,7 +330,7 @@ class TestConsumer(DirectoriesMixin, ConsumerThreadMixin, TransactionTestCase):
|
|||||||
f'_is_ignored("{filepath}") != {expected_ignored_result}',
|
f'_is_ignored("{filepath}") != {expected_ignored_result}',
|
||||||
)
|
)
|
||||||
|
|
||||||
@mock.patch("documents.management.commands.document_consumer.open")
|
@mock.patch("documents.management.commands.document_consumer.Path.open")
|
||||||
def test_consume_file_busy(self, open_mock):
|
def test_consume_file_busy(self, open_mock):
|
||||||
# Calling this mock always raises this
|
# Calling this mock always raises this
|
||||||
open_mock.side_effect = OSError
|
open_mock.side_effect = OSError
|
||||||
|
@@ -230,9 +230,9 @@ class TestExportImport(
|
|||||||
|
|
||||||
for element in manifest:
|
for element in manifest:
|
||||||
if element["model"] == "documents.document":
|
if element["model"] == "documents.document":
|
||||||
fname = (
|
fname = str(
|
||||||
self.target / element[document_exporter.EXPORTER_FILE_NAME]
|
self.target / element[document_exporter.EXPORTER_FILE_NAME],
|
||||||
).as_posix()
|
)
|
||||||
self.assertIsFile(fname)
|
self.assertIsFile(fname)
|
||||||
self.assertIsFile(
|
self.assertIsFile(
|
||||||
self.target / element[document_exporter.EXPORTER_THUMBNAIL_NAME],
|
self.target / element[document_exporter.EXPORTER_THUMBNAIL_NAME],
|
||||||
@@ -462,9 +462,9 @@ class TestExportImport(
|
|||||||
|
|
||||||
call_command(*args)
|
call_command(*args)
|
||||||
|
|
||||||
expected_file = (
|
expected_file = str(
|
||||||
self.target / f"export-{timezone.localdate().isoformat()}.zip"
|
self.target / f"export-{timezone.localdate().isoformat()}.zip",
|
||||||
).as_posix()
|
)
|
||||||
|
|
||||||
self.assertIsFile(expected_file)
|
self.assertIsFile(expected_file)
|
||||||
|
|
||||||
@@ -498,9 +498,9 @@ class TestExportImport(
|
|||||||
):
|
):
|
||||||
call_command(*args)
|
call_command(*args)
|
||||||
|
|
||||||
expected_file = (
|
expected_file = str(
|
||||||
self.target / f"export-{timezone.localdate().isoformat()}.zip"
|
self.target / f"export-{timezone.localdate().isoformat()}.zip",
|
||||||
).as_posix()
|
)
|
||||||
|
|
||||||
self.assertIsFile(expected_file)
|
self.assertIsFile(expected_file)
|
||||||
|
|
||||||
@@ -544,9 +544,9 @@ class TestExportImport(
|
|||||||
|
|
||||||
call_command(*args)
|
call_command(*args)
|
||||||
|
|
||||||
expected_file = (
|
expected_file = str(
|
||||||
self.target / f"export-{timezone.localdate().isoformat()}.zip"
|
self.target / f"export-{timezone.localdate().isoformat()}.zip",
|
||||||
).as_posix()
|
)
|
||||||
|
|
||||||
self.assertIsFile(expected_file)
|
self.assertIsFile(expected_file)
|
||||||
self.assertIsNotFile(existing_file)
|
self.assertIsNotFile(existing_file)
|
||||||
|
@@ -19,15 +19,15 @@ migration_1012_obj = importlib.import_module(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def archive_name_from_filename(filename):
|
def archive_name_from_filename(filename: Path) -> Path:
|
||||||
return Path(filename).stem + ".pdf"
|
return Path(filename.stem + ".pdf")
|
||||||
|
|
||||||
|
|
||||||
def archive_path_old(self):
|
def archive_path_old(self) -> Path:
|
||||||
if self.filename:
|
if self.filename:
|
||||||
fname = archive_name_from_filename(self.filename)
|
fname = archive_name_from_filename(Path(self.filename))
|
||||||
else:
|
else:
|
||||||
fname = f"{self.pk:07}.pdf"
|
fname = Path(f"{self.pk:07}.pdf")
|
||||||
|
|
||||||
return Path(settings.ARCHIVE_DIR) / fname
|
return Path(settings.ARCHIVE_DIR) / fname
|
||||||
|
|
||||||
|
@@ -679,7 +679,7 @@ def _parse_db_settings() -> dict:
|
|||||||
databases = {
|
databases = {
|
||||||
"default": {
|
"default": {
|
||||||
"ENGINE": "django.db.backends.sqlite3",
|
"ENGINE": "django.db.backends.sqlite3",
|
||||||
"NAME": str(DATA_DIR / "db.sqlite3"),
|
"NAME": DATA_DIR / "db.sqlite3",
|
||||||
"OPTIONS": {},
|
"OPTIONS": {},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@@ -807,7 +807,7 @@ LANGUAGES = [
|
|||||||
("zh-tw", _("Chinese Traditional")),
|
("zh-tw", _("Chinese Traditional")),
|
||||||
]
|
]
|
||||||
|
|
||||||
LOCALE_PATHS = [str(BASE_DIR / "locale")]
|
LOCALE_PATHS = [BASE_DIR / "locale"]
|
||||||
|
|
||||||
TIME_ZONE = os.getenv("PAPERLESS_TIME_ZONE", "UTC")
|
TIME_ZONE = os.getenv("PAPERLESS_TIME_ZONE", "UTC")
|
||||||
|
|
||||||
@@ -848,21 +848,21 @@ LOGGING = {
|
|||||||
"file_paperless": {
|
"file_paperless": {
|
||||||
"class": "concurrent_log_handler.ConcurrentRotatingFileHandler",
|
"class": "concurrent_log_handler.ConcurrentRotatingFileHandler",
|
||||||
"formatter": "verbose",
|
"formatter": "verbose",
|
||||||
"filename": str(LOGGING_DIR / "paperless.log"),
|
"filename": LOGGING_DIR / "paperless.log",
|
||||||
"maxBytes": LOGROTATE_MAX_SIZE,
|
"maxBytes": LOGROTATE_MAX_SIZE,
|
||||||
"backupCount": LOGROTATE_MAX_BACKUPS,
|
"backupCount": LOGROTATE_MAX_BACKUPS,
|
||||||
},
|
},
|
||||||
"file_mail": {
|
"file_mail": {
|
||||||
"class": "concurrent_log_handler.ConcurrentRotatingFileHandler",
|
"class": "concurrent_log_handler.ConcurrentRotatingFileHandler",
|
||||||
"formatter": "verbose",
|
"formatter": "verbose",
|
||||||
"filename": str(LOGGING_DIR / "mail.log"),
|
"filename": LOGGING_DIR / "mail.log",
|
||||||
"maxBytes": LOGROTATE_MAX_SIZE,
|
"maxBytes": LOGROTATE_MAX_SIZE,
|
||||||
"backupCount": LOGROTATE_MAX_BACKUPS,
|
"backupCount": LOGROTATE_MAX_BACKUPS,
|
||||||
},
|
},
|
||||||
"file_celery": {
|
"file_celery": {
|
||||||
"class": "concurrent_log_handler.ConcurrentRotatingFileHandler",
|
"class": "concurrent_log_handler.ConcurrentRotatingFileHandler",
|
||||||
"formatter": "verbose",
|
"formatter": "verbose",
|
||||||
"filename": str(LOGGING_DIR / "celery.log"),
|
"filename": LOGGING_DIR / "celery.log",
|
||||||
"maxBytes": LOGROTATE_MAX_SIZE,
|
"maxBytes": LOGROTATE_MAX_SIZE,
|
||||||
"backupCount": LOGROTATE_MAX_BACKUPS,
|
"backupCount": LOGROTATE_MAX_BACKUPS,
|
||||||
},
|
},
|
||||||
@@ -921,7 +921,7 @@ CELERY_ACCEPT_CONTENT = ["application/json", "application/x-python-serialize"]
|
|||||||
CELERY_BEAT_SCHEDULE = _parse_beat_schedule()
|
CELERY_BEAT_SCHEDULE = _parse_beat_schedule()
|
||||||
|
|
||||||
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule-filename
|
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule-filename
|
||||||
CELERY_BEAT_SCHEDULE_FILENAME = str(DATA_DIR / "celerybeat-schedule.db")
|
CELERY_BEAT_SCHEDULE_FILENAME = DATA_DIR / "celerybeat-schedule.db"
|
||||||
|
|
||||||
|
|
||||||
# Cachalot: Database read cache.
|
# Cachalot: Database read cache.
|
||||||
|
@@ -69,13 +69,13 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
"""
|
"""
|
||||||
parser = RasterisedDocumentParser(uuid.uuid4())
|
parser = RasterisedDocumentParser(uuid.uuid4())
|
||||||
page_count = parser.get_page_count(
|
page_count = parser.get_page_count(
|
||||||
(self.SAMPLE_FILES / "simple-digital.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "simple-digital.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
self.assertEqual(page_count, 1)
|
self.assertEqual(page_count, 1)
|
||||||
|
|
||||||
page_count = parser.get_page_count(
|
page_count = parser.get_page_count(
|
||||||
(self.SAMPLE_FILES / "multi-page-mixed.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "multi-page-mixed.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
self.assertEqual(page_count, 6)
|
self.assertEqual(page_count, 6)
|
||||||
@@ -92,7 +92,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
parser = RasterisedDocumentParser(uuid.uuid4())
|
parser = RasterisedDocumentParser(uuid.uuid4())
|
||||||
with self.assertLogs("paperless.parsing.tesseract", level="WARNING") as cm:
|
with self.assertLogs("paperless.parsing.tesseract", level="WARNING") as cm:
|
||||||
page_count = parser.get_page_count(
|
page_count = parser.get_page_count(
|
||||||
(self.SAMPLE_FILES / "password-protected.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "password-protected.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
self.assertEqual(page_count, None)
|
self.assertEqual(page_count, None)
|
||||||
@@ -101,7 +101,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
def test_thumbnail(self):
|
def test_thumbnail(self):
|
||||||
parser = RasterisedDocumentParser(uuid.uuid4())
|
parser = RasterisedDocumentParser(uuid.uuid4())
|
||||||
thumb = parser.get_thumbnail(
|
thumb = parser.get_thumbnail(
|
||||||
(self.SAMPLE_FILES / "simple-digital.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "simple-digital.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
self.assertIsFile(thumb)
|
self.assertIsFile(thumb)
|
||||||
@@ -109,7 +109,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
@mock.patch("documents.parsers.run_convert")
|
@mock.patch("documents.parsers.run_convert")
|
||||||
def test_thumbnail_fallback(self, m):
|
def test_thumbnail_fallback(self, m):
|
||||||
def call_convert(input_file, output_file, **kwargs):
|
def call_convert(input_file, output_file, **kwargs):
|
||||||
if ".pdf" in input_file:
|
if ".pdf" in str(input_file):
|
||||||
raise ParseError("Does not compute.")
|
raise ParseError("Does not compute.")
|
||||||
else:
|
else:
|
||||||
run_convert(input_file=input_file, output_file=output_file, **kwargs)
|
run_convert(input_file=input_file, output_file=output_file, **kwargs)
|
||||||
@@ -118,7 +118,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
|
|
||||||
parser = RasterisedDocumentParser(uuid.uuid4())
|
parser = RasterisedDocumentParser(uuid.uuid4())
|
||||||
thumb = parser.get_thumbnail(
|
thumb = parser.get_thumbnail(
|
||||||
(self.SAMPLE_FILES / "simple-digital.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "simple-digital.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
self.assertIsFile(thumb)
|
self.assertIsFile(thumb)
|
||||||
@@ -126,7 +126,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
def test_thumbnail_encrypted(self):
|
def test_thumbnail_encrypted(self):
|
||||||
parser = RasterisedDocumentParser(uuid.uuid4())
|
parser = RasterisedDocumentParser(uuid.uuid4())
|
||||||
thumb = parser.get_thumbnail(
|
thumb = parser.get_thumbnail(
|
||||||
(self.SAMPLE_FILES / "encrypted.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "encrypted.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
self.assertIsFile(thumb)
|
self.assertIsFile(thumb)
|
||||||
@@ -134,17 +134,17 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
def test_get_dpi(self):
|
def test_get_dpi(self):
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
|
|
||||||
dpi = parser.get_dpi((self.SAMPLE_FILES / "simple-no-dpi.png").as_posix())
|
dpi = parser.get_dpi(str(self.SAMPLE_FILES / "simple-no-dpi.png"))
|
||||||
self.assertEqual(dpi, None)
|
self.assertEqual(dpi, None)
|
||||||
|
|
||||||
dpi = parser.get_dpi((self.SAMPLE_FILES / "simple.png").as_posix())
|
dpi = parser.get_dpi(str(self.SAMPLE_FILES / "simple.png"))
|
||||||
self.assertEqual(dpi, 72)
|
self.assertEqual(dpi, 72)
|
||||||
|
|
||||||
def test_simple_digital(self):
|
def test_simple_digital(self):
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
|
|
||||||
parser.parse(
|
parser.parse(
|
||||||
(self.SAMPLE_FILES / "simple-digital.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "simple-digital.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -156,7 +156,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
|
|
||||||
parser.parse(
|
parser.parse(
|
||||||
(self.SAMPLE_FILES / "with-form.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "with-form.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -172,7 +172,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
|
|
||||||
parser.parse(
|
parser.parse(
|
||||||
(self.SAMPLE_FILES / "with-form.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "with-form.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -186,7 +186,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
def test_signed(self):
|
def test_signed(self):
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
|
|
||||||
parser.parse((self.SAMPLE_FILES / "signed.pdf").as_posix(), "application/pdf")
|
parser.parse(str(self.SAMPLE_FILES / "signed.pdf"), "application/pdf")
|
||||||
|
|
||||||
self.assertIsNone(parser.archive_path)
|
self.assertIsNone(parser.archive_path)
|
||||||
self.assertContainsStrings(
|
self.assertContainsStrings(
|
||||||
@@ -202,7 +202,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
|
|
||||||
parser.parse(
|
parser.parse(
|
||||||
(self.SAMPLE_FILES / "encrypted.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "encrypted.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -213,7 +213,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
def test_with_form_error_notext(self):
|
def test_with_form_error_notext(self):
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
parser.parse(
|
parser.parse(
|
||||||
(self.SAMPLE_FILES / "with-form.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "with-form.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -227,7 +227,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
|
|
||||||
parser.parse(
|
parser.parse(
|
||||||
(self.SAMPLE_FILES / "with-form.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "with-form.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -239,7 +239,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
def test_image_simple(self):
|
def test_image_simple(self):
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
|
|
||||||
parser.parse((self.SAMPLE_FILES / "simple.png").as_posix(), "image/png")
|
parser.parse(str(self.SAMPLE_FILES / "simple.png"), "image/png")
|
||||||
|
|
||||||
self.assertIsFile(parser.archive_path)
|
self.assertIsFile(parser.archive_path)
|
||||||
|
|
||||||
@@ -255,7 +255,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
dest_file = Path(tempdir) / "simple-alpha.png"
|
dest_file = Path(tempdir) / "simple-alpha.png"
|
||||||
shutil.copy(sample_file, dest_file)
|
shutil.copy(sample_file, dest_file)
|
||||||
|
|
||||||
parser.parse(dest_file.as_posix(), "image/png")
|
parser.parse(str(dest_file), "image/png")
|
||||||
|
|
||||||
self.assertIsFile(parser.archive_path)
|
self.assertIsFile(parser.archive_path)
|
||||||
|
|
||||||
@@ -265,7 +265,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
|
|
||||||
dpi = parser.calculate_a4_dpi(
|
dpi = parser.calculate_a4_dpi(
|
||||||
(self.SAMPLE_FILES / "simple-no-dpi.png").as_posix(),
|
str(self.SAMPLE_FILES / "simple-no-dpi.png"),
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertEqual(dpi, 62)
|
self.assertEqual(dpi, 62)
|
||||||
@@ -277,7 +277,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
|
|
||||||
def f():
|
def f():
|
||||||
parser.parse(
|
parser.parse(
|
||||||
(self.SAMPLE_FILES / "simple-no-dpi.png").as_posix(),
|
str(self.SAMPLE_FILES / "simple-no-dpi.png"),
|
||||||
"image/png",
|
"image/png",
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -287,7 +287,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
def test_image_no_dpi_default(self):
|
def test_image_no_dpi_default(self):
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
|
|
||||||
parser.parse((self.SAMPLE_FILES / "simple-no-dpi.png").as_posix(), "image/png")
|
parser.parse(str(self.SAMPLE_FILES / "simple-no-dpi.png"), "image/png")
|
||||||
|
|
||||||
self.assertIsFile(parser.archive_path)
|
self.assertIsFile(parser.archive_path)
|
||||||
|
|
||||||
@@ -299,7 +299,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
def test_multi_page(self):
|
def test_multi_page(self):
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
parser.parse(
|
parser.parse(
|
||||||
(self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "multi-page-digital.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
self.assertIsFile(parser.archive_path)
|
self.assertIsFile(parser.archive_path)
|
||||||
@@ -312,7 +312,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
def test_multi_page_pages_skip(self):
|
def test_multi_page_pages_skip(self):
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
parser.parse(
|
parser.parse(
|
||||||
(self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "multi-page-digital.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
self.assertIsFile(parser.archive_path)
|
self.assertIsFile(parser.archive_path)
|
||||||
@@ -325,7 +325,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
def test_multi_page_pages_redo(self):
|
def test_multi_page_pages_redo(self):
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
parser.parse(
|
parser.parse(
|
||||||
(self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "multi-page-digital.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
self.assertIsFile(parser.archive_path)
|
self.assertIsFile(parser.archive_path)
|
||||||
@@ -338,7 +338,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
def test_multi_page_pages_force(self):
|
def test_multi_page_pages_force(self):
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
parser.parse(
|
parser.parse(
|
||||||
(self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "multi-page-digital.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
self.assertIsFile(parser.archive_path)
|
self.assertIsFile(parser.archive_path)
|
||||||
@@ -351,7 +351,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
def test_multi_page_analog_pages_skip(self):
|
def test_multi_page_analog_pages_skip(self):
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
parser.parse(
|
parser.parse(
|
||||||
(self.SAMPLE_FILES / "multi-page-images.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "multi-page-images.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
self.assertIsFile(parser.archive_path)
|
self.assertIsFile(parser.archive_path)
|
||||||
@@ -375,7 +375,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
"""
|
"""
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
parser.parse(
|
parser.parse(
|
||||||
(self.SAMPLE_FILES / "multi-page-images.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "multi-page-images.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
self.assertIsFile(parser.archive_path)
|
self.assertIsFile(parser.archive_path)
|
||||||
@@ -397,7 +397,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
"""
|
"""
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
parser.parse(
|
parser.parse(
|
||||||
(self.SAMPLE_FILES / "multi-page-images.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "multi-page-images.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
self.assertIsFile(parser.archive_path)
|
self.assertIsFile(parser.archive_path)
|
||||||
@@ -419,7 +419,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
"""
|
"""
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
parser.parse(
|
parser.parse(
|
||||||
(self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "multi-page-digital.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
self.assertIsNone(parser.archive_path)
|
self.assertIsNone(parser.archive_path)
|
||||||
@@ -442,7 +442,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
"""
|
"""
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
parser.parse(
|
parser.parse(
|
||||||
(self.SAMPLE_FILES / "multi-page-images.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "multi-page-images.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -467,7 +467,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
"""
|
"""
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
parser.parse(
|
parser.parse(
|
||||||
(self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "multi-page-digital.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
self.assertIsNotNone(parser.archive_path)
|
self.assertIsNotNone(parser.archive_path)
|
||||||
@@ -490,7 +490,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
"""
|
"""
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
parser.parse(
|
parser.parse(
|
||||||
(self.SAMPLE_FILES / "multi-page-images.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "multi-page-images.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
self.assertIsNotNone(parser.archive_path)
|
self.assertIsNotNone(parser.archive_path)
|
||||||
@@ -513,7 +513,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
"""
|
"""
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
parser.parse(
|
parser.parse(
|
||||||
(self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "multi-page-digital.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
self.assertIsNone(parser.archive_path)
|
self.assertIsNone(parser.archive_path)
|
||||||
@@ -536,7 +536,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
"""
|
"""
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
parser.parse(
|
parser.parse(
|
||||||
(self.SAMPLE_FILES / "multi-page-images.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "multi-page-images.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
self.assertIsNotNone(parser.archive_path)
|
self.assertIsNotNone(parser.archive_path)
|
||||||
@@ -559,7 +559,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
"""
|
"""
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
parser.parse(
|
parser.parse(
|
||||||
(self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "multi-page-digital.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
self.assertIsNone(parser.archive_path)
|
self.assertIsNone(parser.archive_path)
|
||||||
@@ -582,7 +582,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
"""
|
"""
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
parser.parse(
|
parser.parse(
|
||||||
(self.SAMPLE_FILES / "multi-page-images.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "multi-page-images.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
self.assertIsNone(parser.archive_path)
|
self.assertIsNone(parser.archive_path)
|
||||||
@@ -605,7 +605,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
"""
|
"""
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
parser.parse(
|
parser.parse(
|
||||||
(self.SAMPLE_FILES / "multi-page-mixed.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "multi-page-mixed.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
self.assertIsNotNone(parser.archive_path)
|
self.assertIsNotNone(parser.archive_path)
|
||||||
@@ -636,7 +636,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
"""
|
"""
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
parser.parse(
|
parser.parse(
|
||||||
(self.SAMPLE_FILES / "single-page-mixed.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "single-page-mixed.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
self.assertIsNotNone(parser.archive_path)
|
self.assertIsNotNone(parser.archive_path)
|
||||||
@@ -673,7 +673,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
"""
|
"""
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
parser.parse(
|
parser.parse(
|
||||||
(self.SAMPLE_FILES / "multi-page-mixed.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "multi-page-mixed.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
self.assertIsNone(parser.archive_path)
|
self.assertIsNone(parser.archive_path)
|
||||||
@@ -685,7 +685,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
@override_settings(OCR_MODE="skip", OCR_ROTATE_PAGES=True)
|
@override_settings(OCR_MODE="skip", OCR_ROTATE_PAGES=True)
|
||||||
def test_rotate(self):
|
def test_rotate(self):
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
parser.parse((self.SAMPLE_FILES / "rotated.pdf").as_posix(), "application/pdf")
|
parser.parse(str(self.SAMPLE_FILES / "rotated.pdf"), "application/pdf")
|
||||||
self.assertContainsStrings(
|
self.assertContainsStrings(
|
||||||
parser.get_text(),
|
parser.get_text(),
|
||||||
[
|
[
|
||||||
@@ -707,7 +707,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
"""
|
"""
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
parser.parse(
|
parser.parse(
|
||||||
(self.SAMPLE_FILES / "multi-page-images.tiff").as_posix(),
|
str(self.SAMPLE_FILES / "multi-page-images.tiff"),
|
||||||
"image/tiff",
|
"image/tiff",
|
||||||
)
|
)
|
||||||
self.assertIsFile(parser.archive_path)
|
self.assertIsFile(parser.archive_path)
|
||||||
@@ -752,9 +752,9 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
- Text from all pages extracted
|
- Text from all pages extracted
|
||||||
"""
|
"""
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
sample_file = (
|
sample_file = str(
|
||||||
self.SAMPLE_FILES / "multi-page-images-alpha-rgb.tiff"
|
self.SAMPLE_FILES / "multi-page-images-alpha-rgb.tiff",
|
||||||
).as_posix()
|
)
|
||||||
with tempfile.NamedTemporaryFile() as tmp_file:
|
with tempfile.NamedTemporaryFile() as tmp_file:
|
||||||
shutil.copy(sample_file, tmp_file.name)
|
shutil.copy(sample_file, tmp_file.name)
|
||||||
parser.parse(
|
parser.parse(
|
||||||
@@ -843,7 +843,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
|
|
||||||
parser.parse(
|
parser.parse(
|
||||||
(self.SAMPLE_FILES / "rtl-test.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "rtl-test.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -858,7 +858,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
self.assertRaises(
|
self.assertRaises(
|
||||||
ParseError,
|
ParseError,
|
||||||
parser.parse,
|
parser.parse,
|
||||||
(self.SAMPLE_FILES / "simple-digital.pdf").as_posix(),
|
str(self.SAMPLE_FILES / "simple-digital.pdf"),
|
||||||
"application/pdf",
|
"application/pdf",
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -868,32 +868,32 @@ class TestParserFileTypes(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
|
|
||||||
def test_bmp(self):
|
def test_bmp(self):
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
parser.parse((self.SAMPLE_FILES / "simple.bmp").as_posix(), "image/bmp")
|
parser.parse(str(self.SAMPLE_FILES / "simple.bmp"), "image/bmp")
|
||||||
self.assertIsFile(parser.archive_path)
|
self.assertIsFile(parser.archive_path)
|
||||||
self.assertIn("this is a test document", parser.get_text().lower())
|
self.assertIn("this is a test document", parser.get_text().lower())
|
||||||
|
|
||||||
def test_jpg(self):
|
def test_jpg(self):
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
parser.parse((self.SAMPLE_FILES / "simple.jpg").as_posix(), "image/jpeg")
|
parser.parse(str(self.SAMPLE_FILES / "simple.jpg"), "image/jpeg")
|
||||||
self.assertIsFile(parser.archive_path)
|
self.assertIsFile(parser.archive_path)
|
||||||
self.assertIn("this is a test document", parser.get_text().lower())
|
self.assertIn("this is a test document", parser.get_text().lower())
|
||||||
|
|
||||||
def test_heic(self):
|
def test_heic(self):
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
parser.parse((self.SAMPLE_FILES / "simple.heic").as_posix(), "image/heic")
|
parser.parse(str(self.SAMPLE_FILES / "simple.heic"), "image/heic")
|
||||||
self.assertIsFile(parser.archive_path)
|
self.assertIsFile(parser.archive_path)
|
||||||
self.assertIn("pizza", parser.get_text().lower())
|
self.assertIn("pizza", parser.get_text().lower())
|
||||||
|
|
||||||
@override_settings(OCR_IMAGE_DPI=200)
|
@override_settings(OCR_IMAGE_DPI=200)
|
||||||
def test_gif(self):
|
def test_gif(self):
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
parser.parse((self.SAMPLE_FILES / "simple.gif").as_posix(), "image/gif")
|
parser.parse(str(self.SAMPLE_FILES / "simple.gif"), "image/gif")
|
||||||
self.assertIsFile(parser.archive_path)
|
self.assertIsFile(parser.archive_path)
|
||||||
self.assertIn("this is a test document", parser.get_text().lower())
|
self.assertIn("this is a test document", parser.get_text().lower())
|
||||||
|
|
||||||
def test_tiff(self):
|
def test_tiff(self):
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
parser.parse((self.SAMPLE_FILES / "simple.tif").as_posix(), "image/tiff")
|
parser.parse(str(self.SAMPLE_FILES / "simple.tif"), "image/tiff")
|
||||||
self.assertIsFile(parser.archive_path)
|
self.assertIsFile(parser.archive_path)
|
||||||
self.assertIn("this is a test document", parser.get_text().lower())
|
self.assertIn("this is a test document", parser.get_text().lower())
|
||||||
|
|
||||||
@@ -901,7 +901,7 @@ class TestParserFileTypes(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
def test_webp(self):
|
def test_webp(self):
|
||||||
parser = RasterisedDocumentParser(None)
|
parser = RasterisedDocumentParser(None)
|
||||||
parser.parse(
|
parser.parse(
|
||||||
(self.SAMPLE_FILES / "document.webp").as_posix(),
|
str(self.SAMPLE_FILES / "document.webp"),
|
||||||
"image/webp",
|
"image/webp",
|
||||||
)
|
)
|
||||||
self.assertIsFile(parser.archive_path)
|
self.assertIsFile(parser.archive_path)
|
||||||
|
Reference in New Issue
Block a user