Fixes issues with copy2 or copystat and SELinux see #3665

This commit is contained in:
Trenton H 2023-07-21 14:19:26 -07:00
parent 4aa452ce63
commit 9f5d47c320
7 changed files with 68 additions and 17 deletions

View File

@ -1,5 +1,4 @@
import logging
import shutil
import tempfile
from dataclasses import dataclass
from pathlib import Path
@ -18,6 +17,8 @@ from pikepdf import Pdf
from PIL import Image
from documents.data_models import DocumentSource
from documents.utils import copy_basic_file_stats
from documents.utils import copy_file_with_basic_stats
logger = logging.getLogger("paperless.barcodes")
@ -181,7 +182,7 @@ class BarcodeReader:
pdf_file.write(img2pdf.convert(img_file))
# Copy what file stat is possible
shutil.copystat(self.file, self.pdf_file)
copy_basic_file_stats(self.file, self.pdf_file)
def detect(self) -> None:
"""
@ -306,7 +307,7 @@ class BarcodeReader:
with open(savepath, "wb") as out:
dst.save(out)
shutil.copystat(self.file, savepath)
copy_basic_file_stats(self.file, savepath)
document_paths.append(savepath)
@ -363,5 +364,5 @@ class BarcodeReader:
else:
dest = save_to_dir
logger.info(f"Saving {document_path} to {dest}")
shutil.copy2(document_path, dest)
copy_file_with_basic_stats(document_path, dest)
return True

View File

@ -1,7 +1,6 @@
import datetime
import hashlib
import os
import shutil
import tempfile
import uuid
from pathlib import Path
@ -21,6 +20,9 @@ from django.utils import timezone
from filelock import FileLock
from rest_framework.reverse import reverse
from documents.utils import copy_basic_file_stats
from documents.utils import copy_file_with_basic_stats
from .classifier import load_classifier
from .file_handling import create_source_path_directory
from .file_handling import generate_unique_filename
@ -326,7 +328,7 @@ class Consumer(LoggingMixin):
dir=settings.SCRATCH_DIR,
)
self.path = Path(tempdir.name) / Path(self.filename)
shutil.copy2(self.original_path, self.path)
copy_file_with_basic_stats(self.original_path, self.path)
# Determine the parser class.
@ -585,7 +587,7 @@ class Consumer(LoggingMixin):
# Attempt to copy file's original stats, but it's ok if we can't
try:
shutil.copystat(source, target)
copy_basic_file_stats(source, target)
except Exception: # pragma: no cover
pass

View File

@ -37,6 +37,7 @@ from documents.models import UiSettings
from documents.settings import EXPORTER_ARCHIVE_NAME
from documents.settings import EXPORTER_FILE_NAME
from documents.settings import EXPORTER_THUMBNAIL_NAME
from documents.utils import copy_file_with_basic_stats
from paperless import version
from paperless.db import GnuPG
from paperless_mail.models import MailAccount
@ -437,4 +438,4 @@ class Command(BaseCommand):
if perform_copy:
target.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(source, target)
copy_file_with_basic_stats(source, target)

View File

@ -1,7 +1,6 @@
import json
import logging
import os
import shutil
from contextlib import contextmanager
from pathlib import Path
@ -27,6 +26,7 @@ from documents.settings import EXPORTER_ARCHIVE_NAME
from documents.settings import EXPORTER_FILE_NAME
from documents.settings import EXPORTER_THUMBNAIL_NAME
from documents.signals.handlers import update_filename_and_move_files
from documents.utils import copy_file_with_basic_stats
from paperless import version
@ -246,7 +246,7 @@ class Command(BaseCommand):
create_source_path_directory(document.source_path)
shutil.copy2(document_path, document.source_path)
copy_file_with_basic_stats(document_path, document.source_path)
if thumbnail_path:
if thumbnail_path.suffix in {".png", ".PNG"}:
@ -261,13 +261,16 @@ class Command(BaseCommand):
output_file=str(document.thumbnail_path),
)
else:
shutil.copy2(thumbnail_path, document.thumbnail_path)
copy_file_with_basic_stats(
thumbnail_path,
document.thumbnail_path,
)
if archive_path:
create_source_path_directory(document.archive_path)
# TODO: this assumes that the export is valid and
# archive_filename is present on all documents with
# archived files
shutil.copy2(archive_path, document.archive_path)
copy_file_with_basic_stats(archive_path, document.archive_path)
document.save()

View File

@ -18,6 +18,7 @@ from django.utils import timezone
from documents.loggers import LoggingMixin
from documents.signals import document_consumer_declaration
from documents.utils import copy_file_with_basic_stats
# This regular expression will try to find dates in the document at
# hand and will match the following formats:
@ -206,7 +207,7 @@ def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None) -
# so we need to copy it before it gets moved.
# https://github.com/paperless-ngx/paperless-ngx/issues/3631
default_thumbnail_path = os.path.join(temp_dir, "document.png")
shutil.copy2(get_default_thumbnail(), default_thumbnail_path)
copy_file_with_basic_stats(get_default_thumbnail(), default_thumbnail_path)
return default_thumbnail_path

View File

@ -277,7 +277,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
st_mtime_1 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime
with mock.patch(
"documents.management.commands.document_exporter.shutil.copy2",
"documents.management.commands.document_exporter.copy_file_with_basic_stats",
) as m:
self._do_export()
m.assert_not_called()
@ -288,7 +288,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
Path(self.d1.source_path).touch()
with mock.patch(
"documents.management.commands.document_exporter.shutil.copy2",
"documents.management.commands.document_exporter.copy_file_with_basic_stats",
) as m:
self._do_export()
self.assertEqual(m.call_count, 1)
@ -311,7 +311,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.assertIsFile(os.path.join(self.target, "manifest.json"))
with mock.patch(
"documents.management.commands.document_exporter.shutil.copy2",
"documents.management.commands.document_exporter.copy_file_with_basic_stats",
) as m:
self._do_export()
m.assert_not_called()
@ -322,7 +322,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.d2.save()
with mock.patch(
"documents.management.commands.document_exporter.shutil.copy2",
"documents.management.commands.document_exporter.copy_file_with_basic_stats",
) as m:
self._do_export(compare_checksums=True)
self.assertEqual(m.call_count, 1)

43
src/documents/utils.py Normal file
View File

@ -0,0 +1,43 @@
import shutil
from os import utime
from pathlib import Path
from typing import Tuple
from typing import Union
def _coerce_to_path(
source: Union[Path, str],
dest: Union[Path, str],
) -> Tuple[Path, Path]:
return Path(source).resolve(), Path(dest).resolve()
def copy_basic_file_stats(source: Union[Path, str], dest: Union[Path, str]) -> None:
"""
Copies only the m_time and a_time attributes from source to destination.
Both are expected to exist.
The extended attribute copy does weird things with SELinux and files
copied from temporary directories and copystat doesn't allow disabling
these copies
"""
source, dest = _coerce_to_path(source, dest)
src_stat = source.stat()
utime(dest, ns=(src_stat.st_atime_ns, src_stat.st_mtime_ns))
def copy_file_with_basic_stats(
source: Union[Path, str],
dest: Union[Path, str],
) -> None:
"""
A sort of simpler copy2 that doesn't copy extended file attributes,
only the access time and modified times from source to dest.
The extended attribute copy does weird things with SELinux and files
copied from temporary directories.
"""
source, dest = _coerce_to_path(source, dest)
shutil.copy(source, dest)
copy_basic_file_stats(source, dest)