mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Fixes issues with copy2 or copystat and SELinux see #3665
This commit is contained in:
parent
4aa452ce63
commit
9f5d47c320
@ -1,5 +1,4 @@
|
||||
import logging
|
||||
import shutil
|
||||
import tempfile
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
@ -18,6 +17,8 @@ from pikepdf import Pdf
|
||||
from PIL import Image
|
||||
|
||||
from documents.data_models import DocumentSource
|
||||
from documents.utils import copy_basic_file_stats
|
||||
from documents.utils import copy_file_with_basic_stats
|
||||
|
||||
logger = logging.getLogger("paperless.barcodes")
|
||||
|
||||
@ -181,7 +182,7 @@ class BarcodeReader:
|
||||
pdf_file.write(img2pdf.convert(img_file))
|
||||
|
||||
# Copy what file stat is possible
|
||||
shutil.copystat(self.file, self.pdf_file)
|
||||
copy_basic_file_stats(self.file, self.pdf_file)
|
||||
|
||||
def detect(self) -> None:
|
||||
"""
|
||||
@ -306,7 +307,7 @@ class BarcodeReader:
|
||||
with open(savepath, "wb") as out:
|
||||
dst.save(out)
|
||||
|
||||
shutil.copystat(self.file, savepath)
|
||||
copy_basic_file_stats(self.file, savepath)
|
||||
|
||||
document_paths.append(savepath)
|
||||
|
||||
@ -363,5 +364,5 @@ class BarcodeReader:
|
||||
else:
|
||||
dest = save_to_dir
|
||||
logger.info(f"Saving {document_path} to {dest}")
|
||||
shutil.copy2(document_path, dest)
|
||||
copy_file_with_basic_stats(document_path, dest)
|
||||
return True
|
||||
|
@ -1,7 +1,6 @@
|
||||
import datetime
|
||||
import hashlib
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
@ -21,6 +20,9 @@ from django.utils import timezone
|
||||
from filelock import FileLock
|
||||
from rest_framework.reverse import reverse
|
||||
|
||||
from documents.utils import copy_basic_file_stats
|
||||
from documents.utils import copy_file_with_basic_stats
|
||||
|
||||
from .classifier import load_classifier
|
||||
from .file_handling import create_source_path_directory
|
||||
from .file_handling import generate_unique_filename
|
||||
@ -326,7 +328,7 @@ class Consumer(LoggingMixin):
|
||||
dir=settings.SCRATCH_DIR,
|
||||
)
|
||||
self.path = Path(tempdir.name) / Path(self.filename)
|
||||
shutil.copy2(self.original_path, self.path)
|
||||
copy_file_with_basic_stats(self.original_path, self.path)
|
||||
|
||||
# Determine the parser class.
|
||||
|
||||
@ -585,7 +587,7 @@ class Consumer(LoggingMixin):
|
||||
|
||||
# Attempt to copy file's original stats, but it's ok if we can't
|
||||
try:
|
||||
shutil.copystat(source, target)
|
||||
copy_basic_file_stats(source, target)
|
||||
except Exception: # pragma: no cover
|
||||
pass
|
||||
|
||||
|
@ -37,6 +37,7 @@ from documents.models import UiSettings
|
||||
from documents.settings import EXPORTER_ARCHIVE_NAME
|
||||
from documents.settings import EXPORTER_FILE_NAME
|
||||
from documents.settings import EXPORTER_THUMBNAIL_NAME
|
||||
from documents.utils import copy_file_with_basic_stats
|
||||
from paperless import version
|
||||
from paperless.db import GnuPG
|
||||
from paperless_mail.models import MailAccount
|
||||
@ -437,4 +438,4 @@ class Command(BaseCommand):
|
||||
|
||||
if perform_copy:
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(source, target)
|
||||
copy_file_with_basic_stats(source, target)
|
||||
|
@ -1,7 +1,6 @@
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
|
||||
@ -27,6 +26,7 @@ from documents.settings import EXPORTER_ARCHIVE_NAME
|
||||
from documents.settings import EXPORTER_FILE_NAME
|
||||
from documents.settings import EXPORTER_THUMBNAIL_NAME
|
||||
from documents.signals.handlers import update_filename_and_move_files
|
||||
from documents.utils import copy_file_with_basic_stats
|
||||
from paperless import version
|
||||
|
||||
|
||||
@ -246,7 +246,7 @@ class Command(BaseCommand):
|
||||
|
||||
create_source_path_directory(document.source_path)
|
||||
|
||||
shutil.copy2(document_path, document.source_path)
|
||||
copy_file_with_basic_stats(document_path, document.source_path)
|
||||
|
||||
if thumbnail_path:
|
||||
if thumbnail_path.suffix in {".png", ".PNG"}:
|
||||
@ -261,13 +261,16 @@ class Command(BaseCommand):
|
||||
output_file=str(document.thumbnail_path),
|
||||
)
|
||||
else:
|
||||
shutil.copy2(thumbnail_path, document.thumbnail_path)
|
||||
copy_file_with_basic_stats(
|
||||
thumbnail_path,
|
||||
document.thumbnail_path,
|
||||
)
|
||||
|
||||
if archive_path:
|
||||
create_source_path_directory(document.archive_path)
|
||||
# TODO: this assumes that the export is valid and
|
||||
# archive_filename is present on all documents with
|
||||
# archived files
|
||||
shutil.copy2(archive_path, document.archive_path)
|
||||
copy_file_with_basic_stats(archive_path, document.archive_path)
|
||||
|
||||
document.save()
|
||||
|
@ -18,6 +18,7 @@ from django.utils import timezone
|
||||
|
||||
from documents.loggers import LoggingMixin
|
||||
from documents.signals import document_consumer_declaration
|
||||
from documents.utils import copy_file_with_basic_stats
|
||||
|
||||
# This regular expression will try to find dates in the document at
|
||||
# hand and will match the following formats:
|
||||
@ -206,7 +207,7 @@ def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None) -
|
||||
# so we need to copy it before it gets moved.
|
||||
# https://github.com/paperless-ngx/paperless-ngx/issues/3631
|
||||
default_thumbnail_path = os.path.join(temp_dir, "document.png")
|
||||
shutil.copy2(get_default_thumbnail(), default_thumbnail_path)
|
||||
copy_file_with_basic_stats(get_default_thumbnail(), default_thumbnail_path)
|
||||
return default_thumbnail_path
|
||||
|
||||
|
||||
|
@ -277,7 +277,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
st_mtime_1 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime
|
||||
|
||||
with mock.patch(
|
||||
"documents.management.commands.document_exporter.shutil.copy2",
|
||||
"documents.management.commands.document_exporter.copy_file_with_basic_stats",
|
||||
) as m:
|
||||
self._do_export()
|
||||
m.assert_not_called()
|
||||
@ -288,7 +288,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
Path(self.d1.source_path).touch()
|
||||
|
||||
with mock.patch(
|
||||
"documents.management.commands.document_exporter.shutil.copy2",
|
||||
"documents.management.commands.document_exporter.copy_file_with_basic_stats",
|
||||
) as m:
|
||||
self._do_export()
|
||||
self.assertEqual(m.call_count, 1)
|
||||
@ -311,7 +311,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
self.assertIsFile(os.path.join(self.target, "manifest.json"))
|
||||
|
||||
with mock.patch(
|
||||
"documents.management.commands.document_exporter.shutil.copy2",
|
||||
"documents.management.commands.document_exporter.copy_file_with_basic_stats",
|
||||
) as m:
|
||||
self._do_export()
|
||||
m.assert_not_called()
|
||||
@ -322,7 +322,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
self.d2.save()
|
||||
|
||||
with mock.patch(
|
||||
"documents.management.commands.document_exporter.shutil.copy2",
|
||||
"documents.management.commands.document_exporter.copy_file_with_basic_stats",
|
||||
) as m:
|
||||
self._do_export(compare_checksums=True)
|
||||
self.assertEqual(m.call_count, 1)
|
||||
|
43
src/documents/utils.py
Normal file
43
src/documents/utils.py
Normal file
@ -0,0 +1,43 @@
|
||||
import shutil
|
||||
from os import utime
|
||||
from pathlib import Path
|
||||
from typing import Tuple
|
||||
from typing import Union
|
||||
|
||||
|
||||
def _coerce_to_path(
|
||||
source: Union[Path, str],
|
||||
dest: Union[Path, str],
|
||||
) -> Tuple[Path, Path]:
|
||||
return Path(source).resolve(), Path(dest).resolve()
|
||||
|
||||
|
||||
def copy_basic_file_stats(source: Union[Path, str], dest: Union[Path, str]) -> None:
|
||||
"""
|
||||
Copies only the m_time and a_time attributes from source to destination.
|
||||
Both are expected to exist.
|
||||
|
||||
The extended attribute copy does weird things with SELinux and files
|
||||
copied from temporary directories and copystat doesn't allow disabling
|
||||
these copies
|
||||
"""
|
||||
source, dest = _coerce_to_path(source, dest)
|
||||
src_stat = source.stat()
|
||||
utime(dest, ns=(src_stat.st_atime_ns, src_stat.st_mtime_ns))
|
||||
|
||||
|
||||
def copy_file_with_basic_stats(
|
||||
source: Union[Path, str],
|
||||
dest: Union[Path, str],
|
||||
) -> None:
|
||||
"""
|
||||
A sort of simpler copy2 that doesn't copy extended file attributes,
|
||||
only the access time and modified times from source to dest.
|
||||
|
||||
The extended attribute copy does weird things with SELinux and files
|
||||
copied from temporary directories.
|
||||
"""
|
||||
source, dest = _coerce_to_path(source, dest)
|
||||
|
||||
shutil.copy(source, dest)
|
||||
copy_basic_file_stats(source, dest)
|
Loading…
x
Reference in New Issue
Block a user