From 9f5d47c320970a2e467443fe95d94936ee27ae89 Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Fri, 21 Jul 2023 14:19:26 -0700 Subject: [PATCH] Fixes issues with copy2 or copystat and SELinux see #3665 --- src/documents/barcodes.py | 9 ++-- src/documents/consumer.py | 8 ++-- .../management/commands/document_exporter.py | 3 +- .../management/commands/document_importer.py | 11 +++-- src/documents/parsers.py | 3 +- .../tests/test_management_exporter.py | 8 ++-- src/documents/utils.py | 43 +++++++++++++++++++ 7 files changed, 68 insertions(+), 17 deletions(-) create mode 100644 src/documents/utils.py diff --git a/src/documents/barcodes.py b/src/documents/barcodes.py index 3650593ae..cabc195b3 100644 --- a/src/documents/barcodes.py +++ b/src/documents/barcodes.py @@ -1,5 +1,4 @@ import logging -import shutil import tempfile from dataclasses import dataclass from pathlib import Path @@ -18,6 +17,8 @@ from pikepdf import Pdf from PIL import Image from documents.data_models import DocumentSource +from documents.utils import copy_basic_file_stats +from documents.utils import copy_file_with_basic_stats logger = logging.getLogger("paperless.barcodes") @@ -181,7 +182,7 @@ class BarcodeReader: pdf_file.write(img2pdf.convert(img_file)) # Copy what file stat is possible - shutil.copystat(self.file, self.pdf_file) + copy_basic_file_stats(self.file, self.pdf_file) def detect(self) -> None: """ @@ -306,7 +307,7 @@ class BarcodeReader: with open(savepath, "wb") as out: dst.save(out) - shutil.copystat(self.file, savepath) + copy_basic_file_stats(self.file, savepath) document_paths.append(savepath) @@ -363,5 +364,5 @@ class BarcodeReader: else: dest = save_to_dir logger.info(f"Saving {document_path} to {dest}") - shutil.copy2(document_path, dest) + copy_file_with_basic_stats(document_path, dest) return True diff --git a/src/documents/consumer.py b/src/documents/consumer.py index fde8e2d4c..c2669c00a 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -1,7 +1,6 @@ import datetime import hashlib import os -import shutil import tempfile import uuid from pathlib import Path @@ -21,6 +20,9 @@ from django.utils import timezone from filelock import FileLock from rest_framework.reverse import reverse +from documents.utils import copy_basic_file_stats +from documents.utils import copy_file_with_basic_stats + from .classifier import load_classifier from .file_handling import create_source_path_directory from .file_handling import generate_unique_filename @@ -326,7 +328,7 @@ class Consumer(LoggingMixin): dir=settings.SCRATCH_DIR, ) self.path = Path(tempdir.name) / Path(self.filename) - shutil.copy2(self.original_path, self.path) + copy_file_with_basic_stats(self.original_path, self.path) # Determine the parser class. @@ -585,7 +587,7 @@ class Consumer(LoggingMixin): # Attempt to copy file's original stats, but it's ok if we can't try: - shutil.copystat(source, target) + copy_basic_file_stats(source, target) except Exception: # pragma: no cover pass diff --git a/src/documents/management/commands/document_exporter.py b/src/documents/management/commands/document_exporter.py index 22fb59308..9484d86bb 100644 --- a/src/documents/management/commands/document_exporter.py +++ b/src/documents/management/commands/document_exporter.py @@ -37,6 +37,7 @@ from documents.models import UiSettings from documents.settings import EXPORTER_ARCHIVE_NAME from documents.settings import EXPORTER_FILE_NAME from documents.settings import EXPORTER_THUMBNAIL_NAME +from documents.utils import copy_file_with_basic_stats from paperless import version from paperless.db import GnuPG from paperless_mail.models import MailAccount @@ -437,4 +438,4 @@ class Command(BaseCommand): if perform_copy: target.parent.mkdir(parents=True, exist_ok=True) - shutil.copy2(source, target) + copy_file_with_basic_stats(source, target) diff --git a/src/documents/management/commands/document_importer.py b/src/documents/management/commands/document_importer.py index baf6d7528..eac967dde 100644 --- a/src/documents/management/commands/document_importer.py +++ b/src/documents/management/commands/document_importer.py @@ -1,7 +1,6 @@ import json import logging import os -import shutil from contextlib import contextmanager from pathlib import Path @@ -27,6 +26,7 @@ from documents.settings import EXPORTER_ARCHIVE_NAME from documents.settings import EXPORTER_FILE_NAME from documents.settings import EXPORTER_THUMBNAIL_NAME from documents.signals.handlers import update_filename_and_move_files +from documents.utils import copy_file_with_basic_stats from paperless import version @@ -246,7 +246,7 @@ class Command(BaseCommand): create_source_path_directory(document.source_path) - shutil.copy2(document_path, document.source_path) + copy_file_with_basic_stats(document_path, document.source_path) if thumbnail_path: if thumbnail_path.suffix in {".png", ".PNG"}: @@ -261,13 +261,16 @@ class Command(BaseCommand): output_file=str(document.thumbnail_path), ) else: - shutil.copy2(thumbnail_path, document.thumbnail_path) + copy_file_with_basic_stats( + thumbnail_path, + document.thumbnail_path, + ) if archive_path: create_source_path_directory(document.archive_path) # TODO: this assumes that the export is valid and # archive_filename is present on all documents with # archived files - shutil.copy2(archive_path, document.archive_path) + copy_file_with_basic_stats(archive_path, document.archive_path) document.save() diff --git a/src/documents/parsers.py b/src/documents/parsers.py index e1d7365fb..cdf681398 100644 --- a/src/documents/parsers.py +++ b/src/documents/parsers.py @@ -18,6 +18,7 @@ from django.utils import timezone from documents.loggers import LoggingMixin from documents.signals import document_consumer_declaration +from documents.utils import copy_file_with_basic_stats # This regular expression will try to find dates in the document at # hand and will match the following formats: @@ -206,7 +207,7 @@ def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None) - # so we need to copy it before it gets moved. # https://github.com/paperless-ngx/paperless-ngx/issues/3631 default_thumbnail_path = os.path.join(temp_dir, "document.png") - shutil.copy2(get_default_thumbnail(), default_thumbnail_path) + copy_file_with_basic_stats(get_default_thumbnail(), default_thumbnail_path) return default_thumbnail_path diff --git a/src/documents/tests/test_management_exporter.py b/src/documents/tests/test_management_exporter.py index 421ae51fc..4da93ee50 100644 --- a/src/documents/tests/test_management_exporter.py +++ b/src/documents/tests/test_management_exporter.py @@ -277,7 +277,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase): st_mtime_1 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime with mock.patch( - "documents.management.commands.document_exporter.shutil.copy2", + "documents.management.commands.document_exporter.copy_file_with_basic_stats", ) as m: self._do_export() m.assert_not_called() @@ -288,7 +288,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase): Path(self.d1.source_path).touch() with mock.patch( - "documents.management.commands.document_exporter.shutil.copy2", + "documents.management.commands.document_exporter.copy_file_with_basic_stats", ) as m: self._do_export() self.assertEqual(m.call_count, 1) @@ -311,7 +311,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase): self.assertIsFile(os.path.join(self.target, "manifest.json")) with mock.patch( - "documents.management.commands.document_exporter.shutil.copy2", + "documents.management.commands.document_exporter.copy_file_with_basic_stats", ) as m: self._do_export() m.assert_not_called() @@ -322,7 +322,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase): self.d2.save() with mock.patch( - "documents.management.commands.document_exporter.shutil.copy2", + "documents.management.commands.document_exporter.copy_file_with_basic_stats", ) as m: self._do_export(compare_checksums=True) self.assertEqual(m.call_count, 1) diff --git a/src/documents/utils.py b/src/documents/utils.py new file mode 100644 index 000000000..45496fc9b --- /dev/null +++ b/src/documents/utils.py @@ -0,0 +1,43 @@ +import shutil +from os import utime +from pathlib import Path +from typing import Tuple +from typing import Union + + +def _coerce_to_path( + source: Union[Path, str], + dest: Union[Path, str], +) -> Tuple[Path, Path]: + return Path(source).resolve(), Path(dest).resolve() + + +def copy_basic_file_stats(source: Union[Path, str], dest: Union[Path, str]) -> None: + """ + Copies only the m_time and a_time attributes from source to destination. + Both are expected to exist. + + The extended attribute copy does weird things with SELinux and files + copied from temporary directories and copystat doesn't allow disabling + these copies + """ + source, dest = _coerce_to_path(source, dest) + src_stat = source.stat() + utime(dest, ns=(src_stat.st_atime_ns, src_stat.st_mtime_ns)) + + +def copy_file_with_basic_stats( + source: Union[Path, str], + dest: Union[Path, str], +) -> None: + """ + A sort of simpler copy2 that doesn't copy extended file attributes, + only the access time and modified times from source to dest. + + The extended attribute copy does weird things with SELinux and files + copied from temporary directories. + """ + source, dest = _coerce_to_path(source, dest) + + shutil.copy(source, dest) + copy_basic_file_stats(source, dest)