mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-22 03:16:15 -05:00 
			
		
		
		
	Chore: switch from os.path to pathlib.Path (#10539)
This commit is contained in:
		 Sebastian Steinbeißer
					Sebastian Steinbeißer
				
			
				
					committed by
					
						 GitHub
						GitHub
					
				
			
			
				
	
			
			
			 GitHub
						GitHub
					
				
			
						parent
						
							cc621cf729
						
					
				
				
					commit
					d2064a2535
				
			| @@ -205,18 +205,9 @@ lint.per-file-ignores."docker/wait-for-redis.py" = [ | |||||||
|   "INP001", |   "INP001", | ||||||
|   "T201", |   "T201", | ||||||
| ] | ] | ||||||
| lint.per-file-ignores."src/documents/management/commands/document_consumer.py" = [ |  | ||||||
|   "PTH", |  | ||||||
| ] # TODO Enable & remove |  | ||||||
| lint.per-file-ignores."src/documents/migrations/1012_fix_archive_files.py" = [ |  | ||||||
|   "PTH", |  | ||||||
| ] # TODO Enable & remove |  | ||||||
| lint.per-file-ignores."src/documents/models.py" = [ | lint.per-file-ignores."src/documents/models.py" = [ | ||||||
|   "SIM115", |   "SIM115", | ||||||
| ] | ] | ||||||
| lint.per-file-ignores."src/documents/parsers.py" = [ |  | ||||||
|   "PTH", |  | ||||||
| ] # TODO Enable & remove |  | ||||||
| lint.per-file-ignores."src/paperless_tesseract/tests/test_parser.py" = [ | lint.per-file-ignores."src/paperless_tesseract/tests/test_parser.py" = [ | ||||||
|   "RUF001", |   "RUF001", | ||||||
| ] | ] | ||||||
|   | |||||||
| @@ -32,7 +32,7 @@ except ImportError:  # pragma: no cover | |||||||
| logger = logging.getLogger("paperless.management.consumer") | logger = logging.getLogger("paperless.management.consumer") | ||||||
|  |  | ||||||
|  |  | ||||||
| def _tags_from_path(filepath) -> list[int]: | def _tags_from_path(filepath: Path) -> list[int]: | ||||||
|     """ |     """ | ||||||
|     Walk up the directory tree from filepath to CONSUMPTION_DIR |     Walk up the directory tree from filepath to CONSUMPTION_DIR | ||||||
|     and get or create Tag IDs for every directory. |     and get or create Tag IDs for every directory. | ||||||
| @@ -41,7 +41,7 @@ def _tags_from_path(filepath) -> list[int]: | |||||||
|     """ |     """ | ||||||
|     db.close_old_connections() |     db.close_old_connections() | ||||||
|     tag_ids = set() |     tag_ids = set() | ||||||
|     path_parts = Path(filepath).relative_to(settings.CONSUMPTION_DIR).parent.parts |     path_parts = filepath.relative_to(settings.CONSUMPTION_DIR).parent.parts | ||||||
|     for part in path_parts: |     for part in path_parts: | ||||||
|         tag_ids.add( |         tag_ids.add( | ||||||
|             Tag.objects.get_or_create(name__iexact=part, defaults={"name": part})[0].pk, |             Tag.objects.get_or_create(name__iexact=part, defaults={"name": part})[0].pk, | ||||||
| @@ -50,17 +50,13 @@ def _tags_from_path(filepath) -> list[int]: | |||||||
|     return list(tag_ids) |     return list(tag_ids) | ||||||
|  |  | ||||||
|  |  | ||||||
| def _is_ignored(filepath: str) -> bool: | def _is_ignored(filepath: Path) -> bool: | ||||||
|     """ |     """ | ||||||
|     Checks if the given file should be ignored, based on configured |     Checks if the given file should be ignored, based on configured | ||||||
|     patterns. |     patterns. | ||||||
|  |  | ||||||
|     Returns True if the file is ignored, False otherwise |     Returns True if the file is ignored, False otherwise | ||||||
|     """ |     """ | ||||||
|     filepath = os.path.abspath( |  | ||||||
|         os.path.normpath(filepath), |  | ||||||
|     ) |  | ||||||
|  |  | ||||||
|     # Trim out the consume directory, leaving only filename and it's |     # Trim out the consume directory, leaving only filename and it's | ||||||
|     # path relative to the consume directory |     # path relative to the consume directory | ||||||
|     filepath_relative = PurePath(filepath).relative_to(settings.CONSUMPTION_DIR) |     filepath_relative = PurePath(filepath).relative_to(settings.CONSUMPTION_DIR) | ||||||
| @@ -85,15 +81,15 @@ def _is_ignored(filepath: str) -> bool: | |||||||
|     return False |     return False | ||||||
|  |  | ||||||
|  |  | ||||||
| def _consume(filepath: str) -> None: | def _consume(filepath: Path) -> None: | ||||||
|     if os.path.isdir(filepath) or _is_ignored(filepath): |     if filepath.is_dir() or _is_ignored(filepath): | ||||||
|         return |         return | ||||||
|  |  | ||||||
|     if not os.path.isfile(filepath): |     if not filepath.is_file(): | ||||||
|         logger.debug(f"Not consuming file {filepath}: File has moved.") |         logger.debug(f"Not consuming file {filepath}: File has moved.") | ||||||
|         return |         return | ||||||
|  |  | ||||||
|     if not is_file_ext_supported(os.path.splitext(filepath)[1]): |     if not is_file_ext_supported(filepath.suffix): | ||||||
|         logger.warning(f"Not consuming file {filepath}: Unknown file extension.") |         logger.warning(f"Not consuming file {filepath}: Unknown file extension.") | ||||||
|         return |         return | ||||||
|  |  | ||||||
| @@ -107,7 +103,7 @@ def _consume(filepath: str) -> None: | |||||||
|  |  | ||||||
|     while (read_try_count < os_error_retry_count) and not file_open_ok: |     while (read_try_count < os_error_retry_count) and not file_open_ok: | ||||||
|         try: |         try: | ||||||
|             with open(filepath, "rb"): |             with filepath.open("rb"): | ||||||
|                 file_open_ok = True |                 file_open_ok = True | ||||||
|         except OSError as e: |         except OSError as e: | ||||||
|             read_try_count += 1 |             read_try_count += 1 | ||||||
| @@ -141,7 +137,7 @@ def _consume(filepath: str) -> None: | |||||||
|         logger.exception("Error while consuming document") |         logger.exception("Error while consuming document") | ||||||
|  |  | ||||||
|  |  | ||||||
| def _consume_wait_unmodified(file: str) -> None: | def _consume_wait_unmodified(file: Path) -> None: | ||||||
|     """ |     """ | ||||||
|     Waits for the given file to appear unmodified based on file size |     Waits for the given file to appear unmodified based on file size | ||||||
|     and modification time.  Will wait a configured number of seconds |     and modification time.  Will wait a configured number of seconds | ||||||
| @@ -157,7 +153,7 @@ def _consume_wait_unmodified(file: str) -> None: | |||||||
|     current_try = 0 |     current_try = 0 | ||||||
|     while current_try < settings.CONSUMER_POLLING_RETRY_COUNT: |     while current_try < settings.CONSUMER_POLLING_RETRY_COUNT: | ||||||
|         try: |         try: | ||||||
|             stat_data = os.stat(file) |             stat_data = file.stat() | ||||||
|             new_mtime = stat_data.st_mtime |             new_mtime = stat_data.st_mtime | ||||||
|             new_size = stat_data.st_size |             new_size = stat_data.st_size | ||||||
|         except FileNotFoundError: |         except FileNotFoundError: | ||||||
| @@ -182,10 +178,10 @@ class Handler(FileSystemEventHandler): | |||||||
|         self._pool = pool |         self._pool = pool | ||||||
|  |  | ||||||
|     def on_created(self, event): |     def on_created(self, event): | ||||||
|         self._pool.submit(_consume_wait_unmodified, event.src_path) |         self._pool.submit(_consume_wait_unmodified, Path(event.src_path)) | ||||||
|  |  | ||||||
|     def on_moved(self, event): |     def on_moved(self, event): | ||||||
|         self._pool.submit(_consume_wait_unmodified, event.dest_path) |         self._pool.submit(_consume_wait_unmodified, Path(event.dest_path)) | ||||||
|  |  | ||||||
|  |  | ||||||
| class Command(BaseCommand): | class Command(BaseCommand): | ||||||
| @@ -227,9 +223,9 @@ class Command(BaseCommand): | |||||||
|         if not directory: |         if not directory: | ||||||
|             raise CommandError("CONSUMPTION_DIR does not appear to be set.") |             raise CommandError("CONSUMPTION_DIR does not appear to be set.") | ||||||
|  |  | ||||||
|         directory = os.path.abspath(directory) |         directory = Path(directory).resolve() | ||||||
|  |  | ||||||
|         if not os.path.isdir(directory): |         if not directory.is_dir(): | ||||||
|             raise CommandError(f"Consumption directory {directory} does not exist") |             raise CommandError(f"Consumption directory {directory} does not exist") | ||||||
|  |  | ||||||
|         # Consumer will need this |         # Consumer will need this | ||||||
| @@ -238,11 +234,11 @@ class Command(BaseCommand): | |||||||
|         if recursive: |         if recursive: | ||||||
|             for dirpath, _, filenames in os.walk(directory): |             for dirpath, _, filenames in os.walk(directory): | ||||||
|                 for filename in filenames: |                 for filename in filenames: | ||||||
|                     filepath = os.path.join(dirpath, filename) |                     filepath = Path(dirpath) / filename | ||||||
|                     _consume(filepath) |                     _consume(filepath) | ||||||
|         else: |         else: | ||||||
|             for entry in os.scandir(directory): |             for filepath in directory.iterdir(): | ||||||
|                 _consume(entry.path) |                 _consume(filepath) | ||||||
|  |  | ||||||
|         if options["oneshot"]: |         if options["oneshot"]: | ||||||
|             return |             return | ||||||
| @@ -310,7 +306,7 @@ class Command(BaseCommand): | |||||||
|                 try: |                 try: | ||||||
|                     for event in inotify.read(timeout=timeout_ms): |                     for event in inotify.read(timeout=timeout_ms): | ||||||
|                         path = inotify.get_path(event.wd) if recursive else directory |                         path = inotify.get_path(event.wd) if recursive else directory | ||||||
|                         filepath = os.path.join(path, event.name) |                         filepath = Path(path) / event.name | ||||||
|                         if flags.MODIFY in flags.from_mask(event.mask): |                         if flags.MODIFY in flags.from_mask(event.mask): | ||||||
|                             notified_files.pop(filepath, None) |                             notified_files.pop(filepath, None) | ||||||
|                         else: |                         else: | ||||||
| @@ -327,9 +323,7 @@ class Command(BaseCommand): | |||||||
|  |  | ||||||
|                         # Also make sure the file exists still, some scanners might write a |                         # Also make sure the file exists still, some scanners might write a | ||||||
|                         # temporary file first |                         # temporary file first | ||||||
|                         file_still_exists = os.path.exists(filepath) and os.path.isfile( |                         file_still_exists = filepath.exists() and filepath.is_file() | ||||||
|                             filepath, |  | ||||||
|                         ) |  | ||||||
|  |  | ||||||
|                         if waited_long_enough and file_still_exists: |                         if waited_long_enough and file_still_exists: | ||||||
|                             _consume(filepath) |                             _consume(filepath) | ||||||
|   | |||||||
| @@ -5,6 +5,7 @@ import logging | |||||||
| import os | import os | ||||||
| import shutil | import shutil | ||||||
| from collections import defaultdict | from collections import defaultdict | ||||||
|  | from pathlib import Path | ||||||
| from time import sleep | from time import sleep | ||||||
|  |  | ||||||
| import pathvalidate | import pathvalidate | ||||||
| @@ -50,38 +51,38 @@ def many_to_dictionary(field):  # pragma: no cover | |||||||
|     return mydictionary |     return mydictionary | ||||||
|  |  | ||||||
|  |  | ||||||
| def archive_name_from_filename(filename): | def archive_name_from_filename(filename: Path) -> Path: | ||||||
|     return os.path.splitext(filename)[0] + ".pdf" |     return Path(filename.stem + ".pdf") | ||||||
|  |  | ||||||
|  |  | ||||||
| def archive_path_old(doc): | def archive_path_old(doc) -> Path: | ||||||
|     if doc.filename: |     if doc.filename: | ||||||
|         fname = archive_name_from_filename(doc.filename) |         fname = archive_name_from_filename(Path(doc.filename)) | ||||||
|     else: |     else: | ||||||
|         fname = f"{doc.pk:07}.pdf" |         fname = Path(f"{doc.pk:07}.pdf") | ||||||
|  |  | ||||||
|     return os.path.join(settings.ARCHIVE_DIR, fname) |     return settings.ARCHIVE_DIR / fname | ||||||
|  |  | ||||||
|  |  | ||||||
| STORAGE_TYPE_GPG = "gpg" | STORAGE_TYPE_GPG = "gpg" | ||||||
|  |  | ||||||
|  |  | ||||||
| def archive_path_new(doc): | def archive_path_new(doc) -> Path | None: | ||||||
|     if doc.archive_filename is not None: |     if doc.archive_filename is not None: | ||||||
|         return os.path.join(settings.ARCHIVE_DIR, str(doc.archive_filename)) |         return settings.ARCHIVE_DIR / doc.archive_filename | ||||||
|     else: |     else: | ||||||
|         return None |         return None | ||||||
|  |  | ||||||
|  |  | ||||||
| def source_path(doc): | def source_path(doc) -> Path: | ||||||
|     if doc.filename: |     if doc.filename: | ||||||
|         fname = str(doc.filename) |         fname = doc.filename | ||||||
|     else: |     else: | ||||||
|         fname = f"{doc.pk:07}{doc.file_type}" |         fname = f"{doc.pk:07}{doc.file_type}" | ||||||
|         if doc.storage_type == STORAGE_TYPE_GPG: |         if doc.storage_type == STORAGE_TYPE_GPG: | ||||||
|             fname += ".gpg"  # pragma: no cover |             fname = Path(str(fname) + ".gpg")  # pragma: no cover | ||||||
|  |  | ||||||
|     return os.path.join(settings.ORIGINALS_DIR, fname) |     return settings.ORIGINALS_DIR / fname | ||||||
|  |  | ||||||
|  |  | ||||||
| def generate_unique_filename(doc, *, archive_filename=False): | def generate_unique_filename(doc, *, archive_filename=False): | ||||||
| @@ -104,7 +105,7 @@ def generate_unique_filename(doc, *, archive_filename=False): | |||||||
|             # still the same as before. |             # still the same as before. | ||||||
|             return new_filename |             return new_filename | ||||||
|  |  | ||||||
|         if os.path.exists(os.path.join(root, new_filename)): |         if (root / new_filename).exists(): | ||||||
|             counter += 1 |             counter += 1 | ||||||
|         else: |         else: | ||||||
|             return new_filename |             return new_filename | ||||||
| @@ -202,18 +203,18 @@ def create_archive_version(doc, retry_count=3): | |||||||
|                 parser, |                 parser, | ||||||
|                 source_path(doc), |                 source_path(doc), | ||||||
|                 doc.mime_type, |                 doc.mime_type, | ||||||
|                 os.path.basename(doc.filename), |                 Path(doc.filename).name, | ||||||
|             ) |             ) | ||||||
|             doc.content = parser.get_text() |             doc.content = parser.get_text() | ||||||
|  |  | ||||||
|             if parser.get_archive_path() and os.path.isfile(parser.get_archive_path()): |             if parser.get_archive_path() and Path(parser.get_archive_path()).is_file(): | ||||||
|                 doc.archive_filename = generate_unique_filename( |                 doc.archive_filename = generate_unique_filename( | ||||||
|                     doc, |                     doc, | ||||||
|                     archive_filename=True, |                     archive_filename=True, | ||||||
|                 ) |                 ) | ||||||
|                 with open(parser.get_archive_path(), "rb") as f: |                 with Path(parser.get_archive_path()).open("rb") as f: | ||||||
|                     doc.archive_checksum = hashlib.md5(f.read()).hexdigest() |                     doc.archive_checksum = hashlib.md5(f.read()).hexdigest() | ||||||
|                 os.makedirs(os.path.dirname(archive_path_new(doc)), exist_ok=True) |                 archive_path_new(doc).parent.mkdir(parents=True, exist_ok=True) | ||||||
|                 shutil.copy2(parser.get_archive_path(), archive_path_new(doc)) |                 shutil.copy2(parser.get_archive_path(), archive_path_new(doc)) | ||||||
|             else: |             else: | ||||||
|                 doc.archive_checksum = None |                 doc.archive_checksum = None | ||||||
| @@ -264,7 +265,7 @@ def move_old_to_new_locations(apps, schema_editor): | |||||||
|     # check that archive files of all unaffected documents are in place |     # check that archive files of all unaffected documents are in place | ||||||
|     for doc in Document.objects.filter(archive_checksum__isnull=False): |     for doc in Document.objects.filter(archive_checksum__isnull=False): | ||||||
|         old_path = archive_path_old(doc) |         old_path = archive_path_old(doc) | ||||||
|         if doc.id not in affected_document_ids and not os.path.isfile(old_path): |         if doc.id not in affected_document_ids and not old_path.is_file(): | ||||||
|             raise ValueError( |             raise ValueError( | ||||||
|                 f"Archived document ID:{doc.id} does not exist at: {old_path}", |                 f"Archived document ID:{doc.id} does not exist at: {old_path}", | ||||||
|             ) |             ) | ||||||
| @@ -285,12 +286,12 @@ def move_old_to_new_locations(apps, schema_editor): | |||||||
|         if doc.id in affected_document_ids: |         if doc.id in affected_document_ids: | ||||||
|             old_path = archive_path_old(doc) |             old_path = archive_path_old(doc) | ||||||
|             # remove affected archive versions |             # remove affected archive versions | ||||||
|             if os.path.isfile(old_path): |             if old_path.is_file(): | ||||||
|                 logger.debug(f"Removing {old_path}") |                 logger.debug(f"Removing {old_path}") | ||||||
|                 os.unlink(old_path) |                 old_path.unlink() | ||||||
|         else: |         else: | ||||||
|             # Set archive path for unaffected files |             # Set archive path for unaffected files | ||||||
|             doc.archive_filename = archive_name_from_filename(doc.filename) |             doc.archive_filename = archive_name_from_filename(Path(doc.filename)) | ||||||
|             Document.objects.filter(id=doc.id).update( |             Document.objects.filter(id=doc.id).update( | ||||||
|                 archive_filename=doc.archive_filename, |                 archive_filename=doc.archive_filename, | ||||||
|             ) |             ) | ||||||
| @@ -316,7 +317,7 @@ def move_new_to_old_locations(apps, schema_editor): | |||||||
|                 f"filename.", |                 f"filename.", | ||||||
|             ) |             ) | ||||||
|         old_archive_paths.add(old_archive_path) |         old_archive_paths.add(old_archive_path) | ||||||
|         if new_archive_path != old_archive_path and os.path.isfile(old_archive_path): |         if new_archive_path != old_archive_path and old_archive_path.is_file(): | ||||||
|             raise ValueError( |             raise ValueError( | ||||||
|                 f"Cannot migrate: Cannot move {new_archive_path} to " |                 f"Cannot migrate: Cannot move {new_archive_path} to " | ||||||
|                 f"{old_archive_path}: file already exists.", |                 f"{old_archive_path}: file already exists.", | ||||||
|   | |||||||
| @@ -169,7 +169,7 @@ def run_convert( | |||||||
|     args += ["-depth", str(depth)] if depth else [] |     args += ["-depth", str(depth)] if depth else [] | ||||||
|     args += ["-auto-orient"] if auto_orient else [] |     args += ["-auto-orient"] if auto_orient else [] | ||||||
|     args += ["-define", "pdf:use-cropbox=true"] if use_cropbox else [] |     args += ["-define", "pdf:use-cropbox=true"] if use_cropbox else [] | ||||||
|     args += [input_file, output_file] |     args += [str(input_file), str(output_file)] | ||||||
|  |  | ||||||
|     logger.debug("Execute: " + " ".join(args), extra={"group": logging_group}) |     logger.debug("Execute: " + " ".join(args), extra={"group": logging_group}) | ||||||
|  |  | ||||||
| @@ -188,8 +188,8 @@ def get_default_thumbnail() -> Path: | |||||||
|     return (Path(__file__).parent / "resources" / "document.webp").resolve() |     return (Path(__file__).parent / "resources" / "document.webp").resolve() | ||||||
|  |  | ||||||
|  |  | ||||||
| def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None) -> str: | def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None) -> Path: | ||||||
|     out_path = os.path.join(temp_dir, "convert_gs.webp") |     out_path: Path = Path(temp_dir) / "convert_gs.webp" | ||||||
|  |  | ||||||
|     # if convert fails, fall back to extracting |     # if convert fails, fall back to extracting | ||||||
|     # the first PDF page as a PNG using Ghostscript |     # the first PDF page as a PNG using Ghostscript | ||||||
| @@ -199,7 +199,7 @@ def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None) - | |||||||
|         extra={"group": logging_group}, |         extra={"group": logging_group}, | ||||||
|     ) |     ) | ||||||
|     # Ghostscript doesn't handle WebP outputs |     # Ghostscript doesn't handle WebP outputs | ||||||
|     gs_out_path = os.path.join(temp_dir, "gs_out.png") |     gs_out_path: Path = Path(temp_dir) / "gs_out.png" | ||||||
|     cmd = [settings.GS_BINARY, "-q", "-sDEVICE=pngalpha", "-o", gs_out_path, in_path] |     cmd = [settings.GS_BINARY, "-q", "-sDEVICE=pngalpha", "-o", gs_out_path, in_path] | ||||||
|  |  | ||||||
|     try: |     try: | ||||||
| @@ -227,16 +227,16 @@ def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None) - | |||||||
|         # The caller might expect a generated thumbnail that can be moved, |         # The caller might expect a generated thumbnail that can be moved, | ||||||
|         # so we need to copy it before it gets moved. |         # so we need to copy it before it gets moved. | ||||||
|         # https://github.com/paperless-ngx/paperless-ngx/issues/3631 |         # https://github.com/paperless-ngx/paperless-ngx/issues/3631 | ||||||
|         default_thumbnail_path = os.path.join(temp_dir, "document.webp") |         default_thumbnail_path: Path = Path(temp_dir) / "document.webp" | ||||||
|         copy_file_with_basic_stats(get_default_thumbnail(), default_thumbnail_path) |         copy_file_with_basic_stats(get_default_thumbnail(), default_thumbnail_path) | ||||||
|         return default_thumbnail_path |         return default_thumbnail_path | ||||||
|  |  | ||||||
|  |  | ||||||
| def make_thumbnail_from_pdf(in_path, temp_dir, logging_group=None) -> Path: | def make_thumbnail_from_pdf(in_path: Path, temp_dir: Path, logging_group=None) -> Path: | ||||||
|     """ |     """ | ||||||
|     The thumbnail of a PDF is just a 500px wide image of the first page. |     The thumbnail of a PDF is just a 500px wide image of the first page. | ||||||
|     """ |     """ | ||||||
|     out_path = temp_dir / "convert.webp" |     out_path: Path = temp_dir / "convert.webp" | ||||||
|  |  | ||||||
|     # Run convert to get a decent thumbnail |     # Run convert to get a decent thumbnail | ||||||
|     try: |     try: | ||||||
|   | |||||||
| @@ -654,7 +654,7 @@ class TestClassifier(DirectoriesMixin, TestCase): | |||||||
|         }, |         }, | ||||||
|     ) |     ) | ||||||
|     @override_settings( |     @override_settings( | ||||||
|         MODEL_FILE=(Path(__file__).parent / "data" / "model.pickle").as_posix(), |         MODEL_FILE=str(Path(__file__).parent / "data" / "model.pickle"), | ||||||
|     ) |     ) | ||||||
|     @pytest.mark.skip( |     @pytest.mark.skip( | ||||||
|         reason="Disabled caching due to high memory usage - need to investigate.", |         reason="Disabled caching due to high memory usage - need to investigate.", | ||||||
|   | |||||||
| @@ -254,7 +254,7 @@ class TestConsumer( | |||||||
|         # https://github.com/jonaswinkler/paperless-ng/discussions/1037 |         # https://github.com/jonaswinkler/paperless-ng/discussions/1037 | ||||||
|  |  | ||||||
|         filename = self.get_test_file() |         filename = self.get_test_file() | ||||||
|         shadow_file = Path(self.dirs.scratch_dir / "._sample.pdf") |         shadow_file = Path(self.dirs.scratch_dir) / "._sample.pdf" | ||||||
|  |  | ||||||
|         shutil.copy(filename, shadow_file) |         shutil.copy(filename, shadow_file) | ||||||
|  |  | ||||||
|   | |||||||
| @@ -258,66 +258,66 @@ class TestConsumer(DirectoriesMixin, ConsumerThreadMixin, TransactionTestCase): | |||||||
|     def test_is_ignored(self): |     def test_is_ignored(self): | ||||||
|         test_paths = [ |         test_paths = [ | ||||||
|             { |             { | ||||||
|                 "path": (Path(self.dirs.consumption_dir) / "foo.pdf").as_posix(), |                 "path": str(Path(self.dirs.consumption_dir) / "foo.pdf"), | ||||||
|                 "ignore": False, |                 "ignore": False, | ||||||
|             }, |             }, | ||||||
|             { |             { | ||||||
|                 "path": ( |                 "path": str( | ||||||
|                     Path(self.dirs.consumption_dir) / "foo" / "bar.pdf" |                     Path(self.dirs.consumption_dir) / "foo" / "bar.pdf", | ||||||
|                 ).as_posix(), |                 ), | ||||||
|                 "ignore": False, |                 "ignore": False, | ||||||
|             }, |             }, | ||||||
|             { |             { | ||||||
|                 "path": (Path(self.dirs.consumption_dir) / ".DS_STORE").as_posix(), |                 "path": str(Path(self.dirs.consumption_dir) / ".DS_STORE"), | ||||||
|                 "ignore": True, |                 "ignore": True, | ||||||
|             }, |             }, | ||||||
|             { |             { | ||||||
|                 "path": (Path(self.dirs.consumption_dir) / ".DS_Store").as_posix(), |                 "path": str(Path(self.dirs.consumption_dir) / ".DS_Store"), | ||||||
|                 "ignore": True, |                 "ignore": True, | ||||||
|             }, |             }, | ||||||
|             { |             { | ||||||
|                 "path": ( |                 "path": str( | ||||||
|                     Path(self.dirs.consumption_dir) / ".stfolder" / "foo.pdf" |                     Path(self.dirs.consumption_dir) / ".stfolder" / "foo.pdf", | ||||||
|                 ).as_posix(), |                 ), | ||||||
|                 "ignore": True, |                 "ignore": True, | ||||||
|             }, |             }, | ||||||
|             { |             { | ||||||
|                 "path": (Path(self.dirs.consumption_dir) / ".stfolder.pdf").as_posix(), |                 "path": str(Path(self.dirs.consumption_dir) / ".stfolder.pdf"), | ||||||
|                 "ignore": False, |                 "ignore": False, | ||||||
|             }, |             }, | ||||||
|             { |             { | ||||||
|                 "path": ( |                 "path": str( | ||||||
|                     Path(self.dirs.consumption_dir) / ".stversions" / "foo.pdf" |                     Path(self.dirs.consumption_dir) / ".stversions" / "foo.pdf", | ||||||
|                 ).as_posix(), |                 ), | ||||||
|                 "ignore": True, |                 "ignore": True, | ||||||
|             }, |             }, | ||||||
|             { |             { | ||||||
|                 "path": ( |                 "path": str( | ||||||
|                     Path(self.dirs.consumption_dir) / ".stversions.pdf" |                     Path(self.dirs.consumption_dir) / ".stversions.pdf", | ||||||
|                 ).as_posix(), |                 ), | ||||||
|                 "ignore": False, |                 "ignore": False, | ||||||
|             }, |             }, | ||||||
|             { |             { | ||||||
|                 "path": (Path(self.dirs.consumption_dir) / "._foo.pdf").as_posix(), |                 "path": str(Path(self.dirs.consumption_dir) / "._foo.pdf"), | ||||||
|                 "ignore": True, |                 "ignore": True, | ||||||
|             }, |             }, | ||||||
|             { |             { | ||||||
|                 "path": (Path(self.dirs.consumption_dir) / "my_foo.pdf").as_posix(), |                 "path": str(Path(self.dirs.consumption_dir) / "my_foo.pdf"), | ||||||
|                 "ignore": False, |                 "ignore": False, | ||||||
|             }, |             }, | ||||||
|             { |             { | ||||||
|                 "path": ( |                 "path": str( | ||||||
|                     Path(self.dirs.consumption_dir) / "._foo" / "bar.pdf" |                     Path(self.dirs.consumption_dir) / "._foo" / "bar.pdf", | ||||||
|                 ).as_posix(), |                 ), | ||||||
|                 "ignore": True, |                 "ignore": True, | ||||||
|             }, |             }, | ||||||
|             { |             { | ||||||
|                 "path": ( |                 "path": str( | ||||||
|                     Path(self.dirs.consumption_dir) |                     Path(self.dirs.consumption_dir) | ||||||
|                     / "@eaDir" |                     / "@eaDir" | ||||||
|                     / "SYNO@.fileindexdb" |                     / "SYNO@.fileindexdb" | ||||||
|                     / "_1jk.fnm" |                     / "_1jk.fnm", | ||||||
|                 ).as_posix(), |                 ), | ||||||
|                 "ignore": True, |                 "ignore": True, | ||||||
|             }, |             }, | ||||||
|         ] |         ] | ||||||
| @@ -330,7 +330,7 @@ class TestConsumer(DirectoriesMixin, ConsumerThreadMixin, TransactionTestCase): | |||||||
|                 f'_is_ignored("{filepath}") != {expected_ignored_result}', |                 f'_is_ignored("{filepath}") != {expected_ignored_result}', | ||||||
|             ) |             ) | ||||||
|  |  | ||||||
|     @mock.patch("documents.management.commands.document_consumer.open") |     @mock.patch("documents.management.commands.document_consumer.Path.open") | ||||||
|     def test_consume_file_busy(self, open_mock): |     def test_consume_file_busy(self, open_mock): | ||||||
|         # Calling this mock always raises this |         # Calling this mock always raises this | ||||||
|         open_mock.side_effect = OSError |         open_mock.side_effect = OSError | ||||||
|   | |||||||
| @@ -230,9 +230,9 @@ class TestExportImport( | |||||||
|  |  | ||||||
|         for element in manifest: |         for element in manifest: | ||||||
|             if element["model"] == "documents.document": |             if element["model"] == "documents.document": | ||||||
|                 fname = ( |                 fname = str( | ||||||
|                     self.target / element[document_exporter.EXPORTER_FILE_NAME] |                     self.target / element[document_exporter.EXPORTER_FILE_NAME], | ||||||
|                 ).as_posix() |                 ) | ||||||
|                 self.assertIsFile(fname) |                 self.assertIsFile(fname) | ||||||
|                 self.assertIsFile( |                 self.assertIsFile( | ||||||
|                     self.target / element[document_exporter.EXPORTER_THUMBNAIL_NAME], |                     self.target / element[document_exporter.EXPORTER_THUMBNAIL_NAME], | ||||||
| @@ -462,9 +462,9 @@ class TestExportImport( | |||||||
|  |  | ||||||
|         call_command(*args) |         call_command(*args) | ||||||
|  |  | ||||||
|         expected_file = ( |         expected_file = str( | ||||||
|             self.target / f"export-{timezone.localdate().isoformat()}.zip" |             self.target / f"export-{timezone.localdate().isoformat()}.zip", | ||||||
|         ).as_posix() |         ) | ||||||
|  |  | ||||||
|         self.assertIsFile(expected_file) |         self.assertIsFile(expected_file) | ||||||
|  |  | ||||||
| @@ -498,9 +498,9 @@ class TestExportImport( | |||||||
|         ): |         ): | ||||||
|             call_command(*args) |             call_command(*args) | ||||||
|  |  | ||||||
|         expected_file = ( |         expected_file = str( | ||||||
|             self.target / f"export-{timezone.localdate().isoformat()}.zip" |             self.target / f"export-{timezone.localdate().isoformat()}.zip", | ||||||
|         ).as_posix() |         ) | ||||||
|  |  | ||||||
|         self.assertIsFile(expected_file) |         self.assertIsFile(expected_file) | ||||||
|  |  | ||||||
| @@ -544,9 +544,9 @@ class TestExportImport( | |||||||
|  |  | ||||||
|         call_command(*args) |         call_command(*args) | ||||||
|  |  | ||||||
|         expected_file = ( |         expected_file = str( | ||||||
|             self.target / f"export-{timezone.localdate().isoformat()}.zip" |             self.target / f"export-{timezone.localdate().isoformat()}.zip", | ||||||
|         ).as_posix() |         ) | ||||||
|  |  | ||||||
|         self.assertIsFile(expected_file) |         self.assertIsFile(expected_file) | ||||||
|         self.assertIsNotFile(existing_file) |         self.assertIsNotFile(existing_file) | ||||||
|   | |||||||
| @@ -19,15 +19,15 @@ migration_1012_obj = importlib.import_module( | |||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| def archive_name_from_filename(filename): | def archive_name_from_filename(filename: Path) -> Path: | ||||||
|     return Path(filename).stem + ".pdf" |     return Path(filename.stem + ".pdf") | ||||||
|  |  | ||||||
|  |  | ||||||
| def archive_path_old(self): | def archive_path_old(self) -> Path: | ||||||
|     if self.filename: |     if self.filename: | ||||||
|         fname = archive_name_from_filename(self.filename) |         fname = archive_name_from_filename(Path(self.filename)) | ||||||
|     else: |     else: | ||||||
|         fname = f"{self.pk:07}.pdf" |         fname = Path(f"{self.pk:07}.pdf") | ||||||
|  |  | ||||||
|     return Path(settings.ARCHIVE_DIR) / fname |     return Path(settings.ARCHIVE_DIR) / fname | ||||||
|  |  | ||||||
|   | |||||||
| @@ -679,7 +679,7 @@ def _parse_db_settings() -> dict: | |||||||
|     databases = { |     databases = { | ||||||
|         "default": { |         "default": { | ||||||
|             "ENGINE": "django.db.backends.sqlite3", |             "ENGINE": "django.db.backends.sqlite3", | ||||||
|             "NAME": str(DATA_DIR / "db.sqlite3"), |             "NAME": DATA_DIR / "db.sqlite3", | ||||||
|             "OPTIONS": {}, |             "OPTIONS": {}, | ||||||
|         }, |         }, | ||||||
|     } |     } | ||||||
| @@ -807,7 +807,7 @@ LANGUAGES = [ | |||||||
|     ("zh-tw", _("Chinese Traditional")), |     ("zh-tw", _("Chinese Traditional")), | ||||||
| ] | ] | ||||||
|  |  | ||||||
| LOCALE_PATHS = [str(BASE_DIR / "locale")] | LOCALE_PATHS = [BASE_DIR / "locale"] | ||||||
|  |  | ||||||
| TIME_ZONE = os.getenv("PAPERLESS_TIME_ZONE", "UTC") | TIME_ZONE = os.getenv("PAPERLESS_TIME_ZONE", "UTC") | ||||||
|  |  | ||||||
| @@ -848,21 +848,21 @@ LOGGING = { | |||||||
|         "file_paperless": { |         "file_paperless": { | ||||||
|             "class": "concurrent_log_handler.ConcurrentRotatingFileHandler", |             "class": "concurrent_log_handler.ConcurrentRotatingFileHandler", | ||||||
|             "formatter": "verbose", |             "formatter": "verbose", | ||||||
|             "filename": str(LOGGING_DIR / "paperless.log"), |             "filename": LOGGING_DIR / "paperless.log", | ||||||
|             "maxBytes": LOGROTATE_MAX_SIZE, |             "maxBytes": LOGROTATE_MAX_SIZE, | ||||||
|             "backupCount": LOGROTATE_MAX_BACKUPS, |             "backupCount": LOGROTATE_MAX_BACKUPS, | ||||||
|         }, |         }, | ||||||
|         "file_mail": { |         "file_mail": { | ||||||
|             "class": "concurrent_log_handler.ConcurrentRotatingFileHandler", |             "class": "concurrent_log_handler.ConcurrentRotatingFileHandler", | ||||||
|             "formatter": "verbose", |             "formatter": "verbose", | ||||||
|             "filename": str(LOGGING_DIR / "mail.log"), |             "filename": LOGGING_DIR / "mail.log", | ||||||
|             "maxBytes": LOGROTATE_MAX_SIZE, |             "maxBytes": LOGROTATE_MAX_SIZE, | ||||||
|             "backupCount": LOGROTATE_MAX_BACKUPS, |             "backupCount": LOGROTATE_MAX_BACKUPS, | ||||||
|         }, |         }, | ||||||
|         "file_celery": { |         "file_celery": { | ||||||
|             "class": "concurrent_log_handler.ConcurrentRotatingFileHandler", |             "class": "concurrent_log_handler.ConcurrentRotatingFileHandler", | ||||||
|             "formatter": "verbose", |             "formatter": "verbose", | ||||||
|             "filename": str(LOGGING_DIR / "celery.log"), |             "filename": LOGGING_DIR / "celery.log", | ||||||
|             "maxBytes": LOGROTATE_MAX_SIZE, |             "maxBytes": LOGROTATE_MAX_SIZE, | ||||||
|             "backupCount": LOGROTATE_MAX_BACKUPS, |             "backupCount": LOGROTATE_MAX_BACKUPS, | ||||||
|         }, |         }, | ||||||
| @@ -921,7 +921,7 @@ CELERY_ACCEPT_CONTENT = ["application/json", "application/x-python-serialize"] | |||||||
| CELERY_BEAT_SCHEDULE = _parse_beat_schedule() | CELERY_BEAT_SCHEDULE = _parse_beat_schedule() | ||||||
|  |  | ||||||
| # https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule-filename | # https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule-filename | ||||||
| CELERY_BEAT_SCHEDULE_FILENAME = str(DATA_DIR / "celerybeat-schedule.db") | CELERY_BEAT_SCHEDULE_FILENAME = DATA_DIR / "celerybeat-schedule.db" | ||||||
|  |  | ||||||
|  |  | ||||||
| # Cachalot: Database read cache. | # Cachalot: Database read cache. | ||||||
|   | |||||||
| @@ -69,13 +69,13 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|         """ |         """ | ||||||
|         parser = RasterisedDocumentParser(uuid.uuid4()) |         parser = RasterisedDocumentParser(uuid.uuid4()) | ||||||
|         page_count = parser.get_page_count( |         page_count = parser.get_page_count( | ||||||
|             (self.SAMPLE_FILES / "simple-digital.pdf").as_posix(), |             str(self.SAMPLE_FILES / "simple-digital.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|         self.assertEqual(page_count, 1) |         self.assertEqual(page_count, 1) | ||||||
|  |  | ||||||
|         page_count = parser.get_page_count( |         page_count = parser.get_page_count( | ||||||
|             (self.SAMPLE_FILES / "multi-page-mixed.pdf").as_posix(), |             str(self.SAMPLE_FILES / "multi-page-mixed.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|         self.assertEqual(page_count, 6) |         self.assertEqual(page_count, 6) | ||||||
| @@ -92,7 +92,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|         parser = RasterisedDocumentParser(uuid.uuid4()) |         parser = RasterisedDocumentParser(uuid.uuid4()) | ||||||
|         with self.assertLogs("paperless.parsing.tesseract", level="WARNING") as cm: |         with self.assertLogs("paperless.parsing.tesseract", level="WARNING") as cm: | ||||||
|             page_count = parser.get_page_count( |             page_count = parser.get_page_count( | ||||||
|                 (self.SAMPLE_FILES / "password-protected.pdf").as_posix(), |                 str(self.SAMPLE_FILES / "password-protected.pdf"), | ||||||
|                 "application/pdf", |                 "application/pdf", | ||||||
|             ) |             ) | ||||||
|             self.assertEqual(page_count, None) |             self.assertEqual(page_count, None) | ||||||
| @@ -101,7 +101,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|     def test_thumbnail(self): |     def test_thumbnail(self): | ||||||
|         parser = RasterisedDocumentParser(uuid.uuid4()) |         parser = RasterisedDocumentParser(uuid.uuid4()) | ||||||
|         thumb = parser.get_thumbnail( |         thumb = parser.get_thumbnail( | ||||||
|             (self.SAMPLE_FILES / "simple-digital.pdf").as_posix(), |             str(self.SAMPLE_FILES / "simple-digital.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|         self.assertIsFile(thumb) |         self.assertIsFile(thumb) | ||||||
| @@ -109,7 +109,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|     @mock.patch("documents.parsers.run_convert") |     @mock.patch("documents.parsers.run_convert") | ||||||
|     def test_thumbnail_fallback(self, m): |     def test_thumbnail_fallback(self, m): | ||||||
|         def call_convert(input_file, output_file, **kwargs): |         def call_convert(input_file, output_file, **kwargs): | ||||||
|             if ".pdf" in input_file: |             if ".pdf" in str(input_file): | ||||||
|                 raise ParseError("Does not compute.") |                 raise ParseError("Does not compute.") | ||||||
|             else: |             else: | ||||||
|                 run_convert(input_file=input_file, output_file=output_file, **kwargs) |                 run_convert(input_file=input_file, output_file=output_file, **kwargs) | ||||||
| @@ -118,7 +118,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|  |  | ||||||
|         parser = RasterisedDocumentParser(uuid.uuid4()) |         parser = RasterisedDocumentParser(uuid.uuid4()) | ||||||
|         thumb = parser.get_thumbnail( |         thumb = parser.get_thumbnail( | ||||||
|             (self.SAMPLE_FILES / "simple-digital.pdf").as_posix(), |             str(self.SAMPLE_FILES / "simple-digital.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|         self.assertIsFile(thumb) |         self.assertIsFile(thumb) | ||||||
| @@ -126,7 +126,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|     def test_thumbnail_encrypted(self): |     def test_thumbnail_encrypted(self): | ||||||
|         parser = RasterisedDocumentParser(uuid.uuid4()) |         parser = RasterisedDocumentParser(uuid.uuid4()) | ||||||
|         thumb = parser.get_thumbnail( |         thumb = parser.get_thumbnail( | ||||||
|             (self.SAMPLE_FILES / "encrypted.pdf").as_posix(), |             str(self.SAMPLE_FILES / "encrypted.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|         self.assertIsFile(thumb) |         self.assertIsFile(thumb) | ||||||
| @@ -134,17 +134,17 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|     def test_get_dpi(self): |     def test_get_dpi(self): | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|  |  | ||||||
|         dpi = parser.get_dpi((self.SAMPLE_FILES / "simple-no-dpi.png").as_posix()) |         dpi = parser.get_dpi(str(self.SAMPLE_FILES / "simple-no-dpi.png")) | ||||||
|         self.assertEqual(dpi, None) |         self.assertEqual(dpi, None) | ||||||
|  |  | ||||||
|         dpi = parser.get_dpi((self.SAMPLE_FILES / "simple.png").as_posix()) |         dpi = parser.get_dpi(str(self.SAMPLE_FILES / "simple.png")) | ||||||
|         self.assertEqual(dpi, 72) |         self.assertEqual(dpi, 72) | ||||||
|  |  | ||||||
|     def test_simple_digital(self): |     def test_simple_digital(self): | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|  |  | ||||||
|         parser.parse( |         parser.parse( | ||||||
|             (self.SAMPLE_FILES / "simple-digital.pdf").as_posix(), |             str(self.SAMPLE_FILES / "simple-digital.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|  |  | ||||||
| @@ -156,7 +156,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|  |  | ||||||
|         parser.parse( |         parser.parse( | ||||||
|             (self.SAMPLE_FILES / "with-form.pdf").as_posix(), |             str(self.SAMPLE_FILES / "with-form.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|  |  | ||||||
| @@ -172,7 +172,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|  |  | ||||||
|         parser.parse( |         parser.parse( | ||||||
|             (self.SAMPLE_FILES / "with-form.pdf").as_posix(), |             str(self.SAMPLE_FILES / "with-form.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|  |  | ||||||
| @@ -186,7 +186,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|     def test_signed(self): |     def test_signed(self): | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|  |  | ||||||
|         parser.parse((self.SAMPLE_FILES / "signed.pdf").as_posix(), "application/pdf") |         parser.parse(str(self.SAMPLE_FILES / "signed.pdf"), "application/pdf") | ||||||
|  |  | ||||||
|         self.assertIsNone(parser.archive_path) |         self.assertIsNone(parser.archive_path) | ||||||
|         self.assertContainsStrings( |         self.assertContainsStrings( | ||||||
| @@ -202,7 +202,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|  |  | ||||||
|         parser.parse( |         parser.parse( | ||||||
|             (self.SAMPLE_FILES / "encrypted.pdf").as_posix(), |             str(self.SAMPLE_FILES / "encrypted.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|  |  | ||||||
| @@ -213,7 +213,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|     def test_with_form_error_notext(self): |     def test_with_form_error_notext(self): | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|         parser.parse( |         parser.parse( | ||||||
|             (self.SAMPLE_FILES / "with-form.pdf").as_posix(), |             str(self.SAMPLE_FILES / "with-form.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|  |  | ||||||
| @@ -227,7 +227,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|  |  | ||||||
|         parser.parse( |         parser.parse( | ||||||
|             (self.SAMPLE_FILES / "with-form.pdf").as_posix(), |             str(self.SAMPLE_FILES / "with-form.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|  |  | ||||||
| @@ -239,7 +239,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|     def test_image_simple(self): |     def test_image_simple(self): | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|  |  | ||||||
|         parser.parse((self.SAMPLE_FILES / "simple.png").as_posix(), "image/png") |         parser.parse(str(self.SAMPLE_FILES / "simple.png"), "image/png") | ||||||
|  |  | ||||||
|         self.assertIsFile(parser.archive_path) |         self.assertIsFile(parser.archive_path) | ||||||
|  |  | ||||||
| @@ -255,7 +255,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|             dest_file = Path(tempdir) / "simple-alpha.png" |             dest_file = Path(tempdir) / "simple-alpha.png" | ||||||
|             shutil.copy(sample_file, dest_file) |             shutil.copy(sample_file, dest_file) | ||||||
|  |  | ||||||
|             parser.parse(dest_file.as_posix(), "image/png") |             parser.parse(str(dest_file), "image/png") | ||||||
|  |  | ||||||
|             self.assertIsFile(parser.archive_path) |             self.assertIsFile(parser.archive_path) | ||||||
|  |  | ||||||
| @@ -265,7 +265,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|  |  | ||||||
|         dpi = parser.calculate_a4_dpi( |         dpi = parser.calculate_a4_dpi( | ||||||
|             (self.SAMPLE_FILES / "simple-no-dpi.png").as_posix(), |             str(self.SAMPLE_FILES / "simple-no-dpi.png"), | ||||||
|         ) |         ) | ||||||
|  |  | ||||||
|         self.assertEqual(dpi, 62) |         self.assertEqual(dpi, 62) | ||||||
| @@ -277,7 +277,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|  |  | ||||||
|         def f(): |         def f(): | ||||||
|             parser.parse( |             parser.parse( | ||||||
|                 (self.SAMPLE_FILES / "simple-no-dpi.png").as_posix(), |                 str(self.SAMPLE_FILES / "simple-no-dpi.png"), | ||||||
|                 "image/png", |                 "image/png", | ||||||
|             ) |             ) | ||||||
|  |  | ||||||
| @@ -287,7 +287,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|     def test_image_no_dpi_default(self): |     def test_image_no_dpi_default(self): | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|  |  | ||||||
|         parser.parse((self.SAMPLE_FILES / "simple-no-dpi.png").as_posix(), "image/png") |         parser.parse(str(self.SAMPLE_FILES / "simple-no-dpi.png"), "image/png") | ||||||
|  |  | ||||||
|         self.assertIsFile(parser.archive_path) |         self.assertIsFile(parser.archive_path) | ||||||
|  |  | ||||||
| @@ -299,7 +299,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|     def test_multi_page(self): |     def test_multi_page(self): | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|         parser.parse( |         parser.parse( | ||||||
|             (self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(), |             str(self.SAMPLE_FILES / "multi-page-digital.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|         self.assertIsFile(parser.archive_path) |         self.assertIsFile(parser.archive_path) | ||||||
| @@ -312,7 +312,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|     def test_multi_page_pages_skip(self): |     def test_multi_page_pages_skip(self): | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|         parser.parse( |         parser.parse( | ||||||
|             (self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(), |             str(self.SAMPLE_FILES / "multi-page-digital.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|         self.assertIsFile(parser.archive_path) |         self.assertIsFile(parser.archive_path) | ||||||
| @@ -325,7 +325,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|     def test_multi_page_pages_redo(self): |     def test_multi_page_pages_redo(self): | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|         parser.parse( |         parser.parse( | ||||||
|             (self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(), |             str(self.SAMPLE_FILES / "multi-page-digital.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|         self.assertIsFile(parser.archive_path) |         self.assertIsFile(parser.archive_path) | ||||||
| @@ -338,7 +338,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|     def test_multi_page_pages_force(self): |     def test_multi_page_pages_force(self): | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|         parser.parse( |         parser.parse( | ||||||
|             (self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(), |             str(self.SAMPLE_FILES / "multi-page-digital.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|         self.assertIsFile(parser.archive_path) |         self.assertIsFile(parser.archive_path) | ||||||
| @@ -351,7 +351,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|     def test_multi_page_analog_pages_skip(self): |     def test_multi_page_analog_pages_skip(self): | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|         parser.parse( |         parser.parse( | ||||||
|             (self.SAMPLE_FILES / "multi-page-images.pdf").as_posix(), |             str(self.SAMPLE_FILES / "multi-page-images.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|         self.assertIsFile(parser.archive_path) |         self.assertIsFile(parser.archive_path) | ||||||
| @@ -375,7 +375,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|         """ |         """ | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|         parser.parse( |         parser.parse( | ||||||
|             (self.SAMPLE_FILES / "multi-page-images.pdf").as_posix(), |             str(self.SAMPLE_FILES / "multi-page-images.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|         self.assertIsFile(parser.archive_path) |         self.assertIsFile(parser.archive_path) | ||||||
| @@ -397,7 +397,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|         """ |         """ | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|         parser.parse( |         parser.parse( | ||||||
|             (self.SAMPLE_FILES / "multi-page-images.pdf").as_posix(), |             str(self.SAMPLE_FILES / "multi-page-images.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|         self.assertIsFile(parser.archive_path) |         self.assertIsFile(parser.archive_path) | ||||||
| @@ -419,7 +419,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|         """ |         """ | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|         parser.parse( |         parser.parse( | ||||||
|             (self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(), |             str(self.SAMPLE_FILES / "multi-page-digital.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|         self.assertIsNone(parser.archive_path) |         self.assertIsNone(parser.archive_path) | ||||||
| @@ -442,7 +442,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|         """ |         """ | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|         parser.parse( |         parser.parse( | ||||||
|             (self.SAMPLE_FILES / "multi-page-images.pdf").as_posix(), |             str(self.SAMPLE_FILES / "multi-page-images.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|  |  | ||||||
| @@ -467,7 +467,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|         """ |         """ | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|         parser.parse( |         parser.parse( | ||||||
|             (self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(), |             str(self.SAMPLE_FILES / "multi-page-digital.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|         self.assertIsNotNone(parser.archive_path) |         self.assertIsNotNone(parser.archive_path) | ||||||
| @@ -490,7 +490,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|         """ |         """ | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|         parser.parse( |         parser.parse( | ||||||
|             (self.SAMPLE_FILES / "multi-page-images.pdf").as_posix(), |             str(self.SAMPLE_FILES / "multi-page-images.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|         self.assertIsNotNone(parser.archive_path) |         self.assertIsNotNone(parser.archive_path) | ||||||
| @@ -513,7 +513,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|         """ |         """ | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|         parser.parse( |         parser.parse( | ||||||
|             (self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(), |             str(self.SAMPLE_FILES / "multi-page-digital.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|         self.assertIsNone(parser.archive_path) |         self.assertIsNone(parser.archive_path) | ||||||
| @@ -536,7 +536,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|         """ |         """ | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|         parser.parse( |         parser.parse( | ||||||
|             (self.SAMPLE_FILES / "multi-page-images.pdf").as_posix(), |             str(self.SAMPLE_FILES / "multi-page-images.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|         self.assertIsNotNone(parser.archive_path) |         self.assertIsNotNone(parser.archive_path) | ||||||
| @@ -559,7 +559,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|         """ |         """ | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|         parser.parse( |         parser.parse( | ||||||
|             (self.SAMPLE_FILES / "multi-page-digital.pdf").as_posix(), |             str(self.SAMPLE_FILES / "multi-page-digital.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|         self.assertIsNone(parser.archive_path) |         self.assertIsNone(parser.archive_path) | ||||||
| @@ -582,7 +582,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|         """ |         """ | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|         parser.parse( |         parser.parse( | ||||||
|             (self.SAMPLE_FILES / "multi-page-images.pdf").as_posix(), |             str(self.SAMPLE_FILES / "multi-page-images.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|         self.assertIsNone(parser.archive_path) |         self.assertIsNone(parser.archive_path) | ||||||
| @@ -605,7 +605,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|         """ |         """ | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|         parser.parse( |         parser.parse( | ||||||
|             (self.SAMPLE_FILES / "multi-page-mixed.pdf").as_posix(), |             str(self.SAMPLE_FILES / "multi-page-mixed.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|         self.assertIsNotNone(parser.archive_path) |         self.assertIsNotNone(parser.archive_path) | ||||||
| @@ -636,7 +636,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|         """ |         """ | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|         parser.parse( |         parser.parse( | ||||||
|             (self.SAMPLE_FILES / "single-page-mixed.pdf").as_posix(), |             str(self.SAMPLE_FILES / "single-page-mixed.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|         self.assertIsNotNone(parser.archive_path) |         self.assertIsNotNone(parser.archive_path) | ||||||
| @@ -673,7 +673,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|         """ |         """ | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|         parser.parse( |         parser.parse( | ||||||
|             (self.SAMPLE_FILES / "multi-page-mixed.pdf").as_posix(), |             str(self.SAMPLE_FILES / "multi-page-mixed.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|         self.assertIsNone(parser.archive_path) |         self.assertIsNone(parser.archive_path) | ||||||
| @@ -685,7 +685,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|     @override_settings(OCR_MODE="skip", OCR_ROTATE_PAGES=True) |     @override_settings(OCR_MODE="skip", OCR_ROTATE_PAGES=True) | ||||||
|     def test_rotate(self): |     def test_rotate(self): | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|         parser.parse((self.SAMPLE_FILES / "rotated.pdf").as_posix(), "application/pdf") |         parser.parse(str(self.SAMPLE_FILES / "rotated.pdf"), "application/pdf") | ||||||
|         self.assertContainsStrings( |         self.assertContainsStrings( | ||||||
|             parser.get_text(), |             parser.get_text(), | ||||||
|             [ |             [ | ||||||
| @@ -707,7 +707,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|         """ |         """ | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|         parser.parse( |         parser.parse( | ||||||
|             (self.SAMPLE_FILES / "multi-page-images.tiff").as_posix(), |             str(self.SAMPLE_FILES / "multi-page-images.tiff"), | ||||||
|             "image/tiff", |             "image/tiff", | ||||||
|         ) |         ) | ||||||
|         self.assertIsFile(parser.archive_path) |         self.assertIsFile(parser.archive_path) | ||||||
| @@ -752,9 +752,9 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|             - Text from all pages extracted |             - Text from all pages extracted | ||||||
|         """ |         """ | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|         sample_file = ( |         sample_file = str( | ||||||
|             self.SAMPLE_FILES / "multi-page-images-alpha-rgb.tiff" |             self.SAMPLE_FILES / "multi-page-images-alpha-rgb.tiff", | ||||||
|         ).as_posix() |         ) | ||||||
|         with tempfile.NamedTemporaryFile() as tmp_file: |         with tempfile.NamedTemporaryFile() as tmp_file: | ||||||
|             shutil.copy(sample_file, tmp_file.name) |             shutil.copy(sample_file, tmp_file.name) | ||||||
|             parser.parse( |             parser.parse( | ||||||
| @@ -843,7 +843,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|  |  | ||||||
|         parser.parse( |         parser.parse( | ||||||
|             (self.SAMPLE_FILES / "rtl-test.pdf").as_posix(), |             str(self.SAMPLE_FILES / "rtl-test.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|  |  | ||||||
| @@ -858,7 +858,7 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|         self.assertRaises( |         self.assertRaises( | ||||||
|             ParseError, |             ParseError, | ||||||
|             parser.parse, |             parser.parse, | ||||||
|             (self.SAMPLE_FILES / "simple-digital.pdf").as_posix(), |             str(self.SAMPLE_FILES / "simple-digital.pdf"), | ||||||
|             "application/pdf", |             "application/pdf", | ||||||
|         ) |         ) | ||||||
|  |  | ||||||
| @@ -868,32 +868,32 @@ class TestParserFileTypes(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|  |  | ||||||
|     def test_bmp(self): |     def test_bmp(self): | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|         parser.parse((self.SAMPLE_FILES / "simple.bmp").as_posix(), "image/bmp") |         parser.parse(str(self.SAMPLE_FILES / "simple.bmp"), "image/bmp") | ||||||
|         self.assertIsFile(parser.archive_path) |         self.assertIsFile(parser.archive_path) | ||||||
|         self.assertIn("this is a test document", parser.get_text().lower()) |         self.assertIn("this is a test document", parser.get_text().lower()) | ||||||
|  |  | ||||||
|     def test_jpg(self): |     def test_jpg(self): | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|         parser.parse((self.SAMPLE_FILES / "simple.jpg").as_posix(), "image/jpeg") |         parser.parse(str(self.SAMPLE_FILES / "simple.jpg"), "image/jpeg") | ||||||
|         self.assertIsFile(parser.archive_path) |         self.assertIsFile(parser.archive_path) | ||||||
|         self.assertIn("this is a test document", parser.get_text().lower()) |         self.assertIn("this is a test document", parser.get_text().lower()) | ||||||
|  |  | ||||||
|     def test_heic(self): |     def test_heic(self): | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|         parser.parse((self.SAMPLE_FILES / "simple.heic").as_posix(), "image/heic") |         parser.parse(str(self.SAMPLE_FILES / "simple.heic"), "image/heic") | ||||||
|         self.assertIsFile(parser.archive_path) |         self.assertIsFile(parser.archive_path) | ||||||
|         self.assertIn("pizza", parser.get_text().lower()) |         self.assertIn("pizza", parser.get_text().lower()) | ||||||
|  |  | ||||||
|     @override_settings(OCR_IMAGE_DPI=200) |     @override_settings(OCR_IMAGE_DPI=200) | ||||||
|     def test_gif(self): |     def test_gif(self): | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|         parser.parse((self.SAMPLE_FILES / "simple.gif").as_posix(), "image/gif") |         parser.parse(str(self.SAMPLE_FILES / "simple.gif"), "image/gif") | ||||||
|         self.assertIsFile(parser.archive_path) |         self.assertIsFile(parser.archive_path) | ||||||
|         self.assertIn("this is a test document", parser.get_text().lower()) |         self.assertIn("this is a test document", parser.get_text().lower()) | ||||||
|  |  | ||||||
|     def test_tiff(self): |     def test_tiff(self): | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|         parser.parse((self.SAMPLE_FILES / "simple.tif").as_posix(), "image/tiff") |         parser.parse(str(self.SAMPLE_FILES / "simple.tif"), "image/tiff") | ||||||
|         self.assertIsFile(parser.archive_path) |         self.assertIsFile(parser.archive_path) | ||||||
|         self.assertIn("this is a test document", parser.get_text().lower()) |         self.assertIn("this is a test document", parser.get_text().lower()) | ||||||
|  |  | ||||||
| @@ -901,7 +901,7 @@ class TestParserFileTypes(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|     def test_webp(self): |     def test_webp(self): | ||||||
|         parser = RasterisedDocumentParser(None) |         parser = RasterisedDocumentParser(None) | ||||||
|         parser.parse( |         parser.parse( | ||||||
|             (self.SAMPLE_FILES / "document.webp").as_posix(), |             str(self.SAMPLE_FILES / "document.webp"), | ||||||
|             "image/webp", |             "image/webp", | ||||||
|         ) |         ) | ||||||
|         self.assertIsFile(parser.archive_path) |         self.assertIsFile(parser.archive_path) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user