Fixes issues with copy2 or copystat and SELinux see #3665

This commit is contained in:
Trenton H 2023-07-21 14:19:26 -07:00
parent 4aa452ce63
commit 9f5d47c320
7 changed files with 68 additions and 17 deletions

View File

@ -1,5 +1,4 @@
import logging import logging
import shutil
import tempfile import tempfile
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path from pathlib import Path
@ -18,6 +17,8 @@ from pikepdf import Pdf
from PIL import Image from PIL import Image
from documents.data_models import DocumentSource from documents.data_models import DocumentSource
from documents.utils import copy_basic_file_stats
from documents.utils import copy_file_with_basic_stats
logger = logging.getLogger("paperless.barcodes") logger = logging.getLogger("paperless.barcodes")
@ -181,7 +182,7 @@ class BarcodeReader:
pdf_file.write(img2pdf.convert(img_file)) pdf_file.write(img2pdf.convert(img_file))
# Copy what file stat is possible # Copy what file stat is possible
shutil.copystat(self.file, self.pdf_file) copy_basic_file_stats(self.file, self.pdf_file)
def detect(self) -> None: def detect(self) -> None:
""" """
@ -306,7 +307,7 @@ class BarcodeReader:
with open(savepath, "wb") as out: with open(savepath, "wb") as out:
dst.save(out) dst.save(out)
shutil.copystat(self.file, savepath) copy_basic_file_stats(self.file, savepath)
document_paths.append(savepath) document_paths.append(savepath)
@ -363,5 +364,5 @@ class BarcodeReader:
else: else:
dest = save_to_dir dest = save_to_dir
logger.info(f"Saving {document_path} to {dest}") logger.info(f"Saving {document_path} to {dest}")
shutil.copy2(document_path, dest) copy_file_with_basic_stats(document_path, dest)
return True return True

View File

@ -1,7 +1,6 @@
import datetime import datetime
import hashlib import hashlib
import os import os
import shutil
import tempfile import tempfile
import uuid import uuid
from pathlib import Path from pathlib import Path
@ -21,6 +20,9 @@ from django.utils import timezone
from filelock import FileLock from filelock import FileLock
from rest_framework.reverse import reverse from rest_framework.reverse import reverse
from documents.utils import copy_basic_file_stats
from documents.utils import copy_file_with_basic_stats
from .classifier import load_classifier from .classifier import load_classifier
from .file_handling import create_source_path_directory from .file_handling import create_source_path_directory
from .file_handling import generate_unique_filename from .file_handling import generate_unique_filename
@ -326,7 +328,7 @@ class Consumer(LoggingMixin):
dir=settings.SCRATCH_DIR, dir=settings.SCRATCH_DIR,
) )
self.path = Path(tempdir.name) / Path(self.filename) self.path = Path(tempdir.name) / Path(self.filename)
shutil.copy2(self.original_path, self.path) copy_file_with_basic_stats(self.original_path, self.path)
# Determine the parser class. # Determine the parser class.
@ -585,7 +587,7 @@ class Consumer(LoggingMixin):
# Attempt to copy file's original stats, but it's ok if we can't # Attempt to copy file's original stats, but it's ok if we can't
try: try:
shutil.copystat(source, target) copy_basic_file_stats(source, target)
except Exception: # pragma: no cover except Exception: # pragma: no cover
pass pass

View File

@ -37,6 +37,7 @@ from documents.models import UiSettings
from documents.settings import EXPORTER_ARCHIVE_NAME from documents.settings import EXPORTER_ARCHIVE_NAME
from documents.settings import EXPORTER_FILE_NAME from documents.settings import EXPORTER_FILE_NAME
from documents.settings import EXPORTER_THUMBNAIL_NAME from documents.settings import EXPORTER_THUMBNAIL_NAME
from documents.utils import copy_file_with_basic_stats
from paperless import version from paperless import version
from paperless.db import GnuPG from paperless.db import GnuPG
from paperless_mail.models import MailAccount from paperless_mail.models import MailAccount
@ -437,4 +438,4 @@ class Command(BaseCommand):
if perform_copy: if perform_copy:
target.parent.mkdir(parents=True, exist_ok=True) target.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(source, target) copy_file_with_basic_stats(source, target)

View File

@ -1,7 +1,6 @@
import json import json
import logging import logging
import os import os
import shutil
from contextlib import contextmanager from contextlib import contextmanager
from pathlib import Path from pathlib import Path
@ -27,6 +26,7 @@ from documents.settings import EXPORTER_ARCHIVE_NAME
from documents.settings import EXPORTER_FILE_NAME from documents.settings import EXPORTER_FILE_NAME
from documents.settings import EXPORTER_THUMBNAIL_NAME from documents.settings import EXPORTER_THUMBNAIL_NAME
from documents.signals.handlers import update_filename_and_move_files from documents.signals.handlers import update_filename_and_move_files
from documents.utils import copy_file_with_basic_stats
from paperless import version from paperless import version
@ -246,7 +246,7 @@ class Command(BaseCommand):
create_source_path_directory(document.source_path) create_source_path_directory(document.source_path)
shutil.copy2(document_path, document.source_path) copy_file_with_basic_stats(document_path, document.source_path)
if thumbnail_path: if thumbnail_path:
if thumbnail_path.suffix in {".png", ".PNG"}: if thumbnail_path.suffix in {".png", ".PNG"}:
@ -261,13 +261,16 @@ class Command(BaseCommand):
output_file=str(document.thumbnail_path), output_file=str(document.thumbnail_path),
) )
else: else:
shutil.copy2(thumbnail_path, document.thumbnail_path) copy_file_with_basic_stats(
thumbnail_path,
document.thumbnail_path,
)
if archive_path: if archive_path:
create_source_path_directory(document.archive_path) create_source_path_directory(document.archive_path)
# TODO: this assumes that the export is valid and # TODO: this assumes that the export is valid and
# archive_filename is present on all documents with # archive_filename is present on all documents with
# archived files # archived files
shutil.copy2(archive_path, document.archive_path) copy_file_with_basic_stats(archive_path, document.archive_path)
document.save() document.save()

View File

@ -18,6 +18,7 @@ from django.utils import timezone
from documents.loggers import LoggingMixin from documents.loggers import LoggingMixin
from documents.signals import document_consumer_declaration from documents.signals import document_consumer_declaration
from documents.utils import copy_file_with_basic_stats
# This regular expression will try to find dates in the document at # This regular expression will try to find dates in the document at
# hand and will match the following formats: # hand and will match the following formats:
@ -206,7 +207,7 @@ def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None) -
# so we need to copy it before it gets moved. # so we need to copy it before it gets moved.
# https://github.com/paperless-ngx/paperless-ngx/issues/3631 # https://github.com/paperless-ngx/paperless-ngx/issues/3631
default_thumbnail_path = os.path.join(temp_dir, "document.png") default_thumbnail_path = os.path.join(temp_dir, "document.png")
shutil.copy2(get_default_thumbnail(), default_thumbnail_path) copy_file_with_basic_stats(get_default_thumbnail(), default_thumbnail_path)
return default_thumbnail_path return default_thumbnail_path

View File

@ -277,7 +277,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
st_mtime_1 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime st_mtime_1 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime
with mock.patch( with mock.patch(
"documents.management.commands.document_exporter.shutil.copy2", "documents.management.commands.document_exporter.copy_file_with_basic_stats",
) as m: ) as m:
self._do_export() self._do_export()
m.assert_not_called() m.assert_not_called()
@ -288,7 +288,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
Path(self.d1.source_path).touch() Path(self.d1.source_path).touch()
with mock.patch( with mock.patch(
"documents.management.commands.document_exporter.shutil.copy2", "documents.management.commands.document_exporter.copy_file_with_basic_stats",
) as m: ) as m:
self._do_export() self._do_export()
self.assertEqual(m.call_count, 1) self.assertEqual(m.call_count, 1)
@ -311,7 +311,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.assertIsFile(os.path.join(self.target, "manifest.json")) self.assertIsFile(os.path.join(self.target, "manifest.json"))
with mock.patch( with mock.patch(
"documents.management.commands.document_exporter.shutil.copy2", "documents.management.commands.document_exporter.copy_file_with_basic_stats",
) as m: ) as m:
self._do_export() self._do_export()
m.assert_not_called() m.assert_not_called()
@ -322,7 +322,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.d2.save() self.d2.save()
with mock.patch( with mock.patch(
"documents.management.commands.document_exporter.shutil.copy2", "documents.management.commands.document_exporter.copy_file_with_basic_stats",
) as m: ) as m:
self._do_export(compare_checksums=True) self._do_export(compare_checksums=True)
self.assertEqual(m.call_count, 1) self.assertEqual(m.call_count, 1)

43
src/documents/utils.py Normal file
View File

@ -0,0 +1,43 @@
import shutil
from os import utime
from pathlib import Path
from typing import Tuple
from typing import Union
def _coerce_to_path(
source: Union[Path, str],
dest: Union[Path, str],
) -> Tuple[Path, Path]:
return Path(source).resolve(), Path(dest).resolve()
def copy_basic_file_stats(source: Union[Path, str], dest: Union[Path, str]) -> None:
"""
Copies only the m_time and a_time attributes from source to destination.
Both are expected to exist.
The extended attribute copy does weird things with SELinux and files
copied from temporary directories and copystat doesn't allow disabling
these copies
"""
source, dest = _coerce_to_path(source, dest)
src_stat = source.stat()
utime(dest, ns=(src_stat.st_atime_ns, src_stat.st_mtime_ns))
def copy_file_with_basic_stats(
source: Union[Path, str],
dest: Union[Path, str],
) -> None:
"""
A sort of simpler copy2 that doesn't copy extended file attributes,
only the access time and modified times from source to dest.
The extended attribute copy does weird things with SELinux and files
copied from temporary directories.
"""
source, dest = _coerce_to_path(source, dest)
shutil.copy(source, dest)
copy_basic_file_stats(source, dest)