diff --git a/src/documents/management/commands/document_exporter.py b/src/documents/management/commands/document_exporter.py index dd024c7c0..c07ccb655 100644 --- a/src/documents/management/commands/document_exporter.py +++ b/src/documents/management/commands/document_exporter.py @@ -327,48 +327,50 @@ class Command(CryptMixin, BaseCommand): for index, document_dict in enumerate(document_manifest): document = document_map[document_dict["pk"]] - # 3.1. generate a unique filename - base_name = self.generate_base_name(document) + # 3.1. generate a unique filename + base_name = self.generate_base_name(document) - # 3.2. write filenames into manifest - original_target, thumbnail_target, archive_target = ( - self.generate_document_targets(document, base_name, document_dict) - ) - - # 3.3. write files to target folder - if not self.data_only: - self.copy_document_files( - document, - original_target, - thumbnail_target, - archive_target, + # 3.2. write filenames into manifest + original_target, thumbnail_target, archive_target = ( + self.generate_document_targets(document, base_name, document_dict) ) - if self.split_manifest: - manifest_name = base_name.with_name(f"{base_name.stem}-manifest.json") - if self.use_folder_prefix: - manifest_name = Path("json") / manifest_name - manifest_name = (self.target / manifest_name).resolve() - manifest_name.parent.mkdir(parents=True, exist_ok=True) - content = [document_manifest[index]] - content += list( - filter( - lambda d: d["fields"]["document"] == document_dict["pk"], - manifest_dict["notes"], - ), - ) - content += list( - filter( - lambda d: d["fields"]["document"] == document_dict["pk"], - manifest_dict["custom_field_instances"], - ), - ) + # 3.3. write files to target folder + if not self.data_only: + self.copy_document_files( + document, + original_target, + thumbnail_target, + archive_target, + ) - self.check_and_write_json( - content, - manifest_name, - ) - progress.update(task, advance=1) + if self.split_manifest: + manifest_name = base_name.with_name( + f"{base_name.stem}-manifest.json", + ) + if self.use_folder_prefix: + manifest_name = Path("json") / manifest_name + manifest_name = (self.target / manifest_name).resolve() + manifest_name.parent.mkdir(parents=True, exist_ok=True) + content = [document_manifest[index]] + content += list( + filter( + lambda d: d["fields"]["document"] == document_dict["pk"], + manifest_dict["notes"], + ), + ) + content += list( + filter( + lambda d: d["fields"]["document"] == document_dict["pk"], + manifest_dict["custom_field_instances"], + ), + ) + + self.check_and_write_json( + content, + manifest_name, + ) + progress.update(task, advance=1) # These were exported already if self.split_manifest: diff --git a/src/documents/management/commands/document_importer.py b/src/documents/management/commands/document_importer.py index d66f744f6..39168931e 100644 --- a/src/documents/management/commands/document_importer.py +++ b/src/documents/management/commands/document_importer.py @@ -383,56 +383,56 @@ class Command(CryptMixin, BaseCommand): for record in manifest_documents: document = Document.objects.get(pk=record["pk"]) - doc_file = record[EXPORTER_FILE_NAME] - document_path = self.source / doc_file + doc_file = record[EXPORTER_FILE_NAME] + document_path = self.source / doc_file - if EXPORTER_THUMBNAIL_NAME in record: - thumb_file = record[EXPORTER_THUMBNAIL_NAME] - thumbnail_path = (self.source / thumb_file).resolve() - else: - thumbnail_path = None + if EXPORTER_THUMBNAIL_NAME in record: + thumb_file = record[EXPORTER_THUMBNAIL_NAME] + thumbnail_path = (self.source / thumb_file).resolve() + else: + thumbnail_path = None - if EXPORTER_ARCHIVE_NAME in record: - archive_file = record[EXPORTER_ARCHIVE_NAME] - archive_path = self.source / archive_file - else: - archive_path = None + if EXPORTER_ARCHIVE_NAME in record: + archive_file = record[EXPORTER_ARCHIVE_NAME] + archive_path = self.source / archive_file + else: + archive_path = None - with FileLock(settings.MEDIA_LOCK): - if Path(document.source_path).is_file(): - raise FileExistsError(document.source_path) + with FileLock(settings.MEDIA_LOCK): + if Path(document.source_path).is_file(): + raise FileExistsError(document.source_path) - create_source_path_directory(document.source_path) + create_source_path_directory(document.source_path) - copy_file_with_basic_stats(document_path, document.source_path) + copy_file_with_basic_stats(document_path, document.source_path) - if thumbnail_path: - if thumbnail_path.suffix in {".png", ".PNG"}: - run_convert( - density=300, - scale="500x5000>", - alpha="remove", - strip=True, - trim=False, - auto_orient=True, - input_file=f"{thumbnail_path}[0]", - output_file=str(document.thumbnail_path), - ) - else: - copy_file_with_basic_stats( - thumbnail_path, - document.thumbnail_path, - ) + if thumbnail_path: + if thumbnail_path.suffix in {".png", ".PNG"}: + run_convert( + density=300, + scale="500x5000>", + alpha="remove", + strip=True, + trim=False, + auto_orient=True, + input_file=f"{thumbnail_path}[0]", + output_file=str(document.thumbnail_path), + ) + else: + copy_file_with_basic_stats( + thumbnail_path, + document.thumbnail_path, + ) - if archive_path: - create_source_path_directory(document.archive_path) - # TODO: this assumes that the export is valid and - # archive_filename is present on all documents with - # archived files - copy_file_with_basic_stats(archive_path, document.archive_path) + if archive_path: + create_source_path_directory(document.archive_path) + # TODO: this assumes that the export is valid and + # archive_filename is present on all documents with + # archived files + copy_file_with_basic_stats(archive_path, document.archive_path) - document.save() - progress.update(task, advance=1) + document.save() + progress.update(task, advance=1) def decrypt_secret_fields(self) -> None: """ diff --git a/src/paperless_ai/tests/test_ai_indexing.py b/src/paperless_ai/tests/test_ai_indexing.py index c36655f4d..01faec803 100644 --- a/src/paperless_ai/tests/test_ai_indexing.py +++ b/src/paperless_ai/tests/test_ai_indexing.py @@ -76,6 +76,7 @@ def test_update_llm_index( mock_queryset = MagicMock() mock_queryset.exists.return_value = True mock_queryset.__iter__.return_value = iter([real_document]) + mock_queryset.count.return_value = 1 mock_all.return_value = mock_queryset indexing.update_llm_index(rebuild=True) @@ -97,6 +98,7 @@ def test_update_llm_index_removes_meta( mock_queryset = MagicMock() mock_queryset.exists.return_value = True mock_queryset.__iter__.return_value = iter([real_document]) + mock_queryset.count.return_value = 1 mock_all.return_value = mock_queryset indexing.update_llm_index(rebuild=True) @@ -129,6 +131,7 @@ def test_update_llm_index_partial_update( mock_queryset = MagicMock() mock_queryset.exists.return_value = True mock_queryset.__iter__.return_value = iter([real_document, doc2]) + mock_queryset.count.return_value = 2 mock_all.return_value = mock_queryset indexing.update_llm_index(rebuild=True) @@ -149,6 +152,7 @@ def test_update_llm_index_partial_update( mock_queryset = MagicMock() mock_queryset.exists.return_value = True mock_queryset.__iter__.return_value = iter([updated_document, doc2, doc3]) + mock_queryset.count.return_value = 3 mock_all.return_value = mock_queryset # assert logs "Updating LLM index with %d new nodes and removing %d old nodes."