mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Fixes issues with copy2 or copystat and SELinux see #3665
This commit is contained in:
parent
4aa452ce63
commit
9f5d47c320
@ -1,5 +1,4 @@
|
|||||||
import logging
|
import logging
|
||||||
import shutil
|
|
||||||
import tempfile
|
import tempfile
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@ -18,6 +17,8 @@ from pikepdf import Pdf
|
|||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
from documents.data_models import DocumentSource
|
from documents.data_models import DocumentSource
|
||||||
|
from documents.utils import copy_basic_file_stats
|
||||||
|
from documents.utils import copy_file_with_basic_stats
|
||||||
|
|
||||||
logger = logging.getLogger("paperless.barcodes")
|
logger = logging.getLogger("paperless.barcodes")
|
||||||
|
|
||||||
@ -181,7 +182,7 @@ class BarcodeReader:
|
|||||||
pdf_file.write(img2pdf.convert(img_file))
|
pdf_file.write(img2pdf.convert(img_file))
|
||||||
|
|
||||||
# Copy what file stat is possible
|
# Copy what file stat is possible
|
||||||
shutil.copystat(self.file, self.pdf_file)
|
copy_basic_file_stats(self.file, self.pdf_file)
|
||||||
|
|
||||||
def detect(self) -> None:
|
def detect(self) -> None:
|
||||||
"""
|
"""
|
||||||
@ -306,7 +307,7 @@ class BarcodeReader:
|
|||||||
with open(savepath, "wb") as out:
|
with open(savepath, "wb") as out:
|
||||||
dst.save(out)
|
dst.save(out)
|
||||||
|
|
||||||
shutil.copystat(self.file, savepath)
|
copy_basic_file_stats(self.file, savepath)
|
||||||
|
|
||||||
document_paths.append(savepath)
|
document_paths.append(savepath)
|
||||||
|
|
||||||
@ -363,5 +364,5 @@ class BarcodeReader:
|
|||||||
else:
|
else:
|
||||||
dest = save_to_dir
|
dest = save_to_dir
|
||||||
logger.info(f"Saving {document_path} to {dest}")
|
logger.info(f"Saving {document_path} to {dest}")
|
||||||
shutil.copy2(document_path, dest)
|
copy_file_with_basic_stats(document_path, dest)
|
||||||
return True
|
return True
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
import datetime
|
import datetime
|
||||||
import hashlib
|
import hashlib
|
||||||
import os
|
import os
|
||||||
import shutil
|
|
||||||
import tempfile
|
import tempfile
|
||||||
import uuid
|
import uuid
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@ -21,6 +20,9 @@ from django.utils import timezone
|
|||||||
from filelock import FileLock
|
from filelock import FileLock
|
||||||
from rest_framework.reverse import reverse
|
from rest_framework.reverse import reverse
|
||||||
|
|
||||||
|
from documents.utils import copy_basic_file_stats
|
||||||
|
from documents.utils import copy_file_with_basic_stats
|
||||||
|
|
||||||
from .classifier import load_classifier
|
from .classifier import load_classifier
|
||||||
from .file_handling import create_source_path_directory
|
from .file_handling import create_source_path_directory
|
||||||
from .file_handling import generate_unique_filename
|
from .file_handling import generate_unique_filename
|
||||||
@ -326,7 +328,7 @@ class Consumer(LoggingMixin):
|
|||||||
dir=settings.SCRATCH_DIR,
|
dir=settings.SCRATCH_DIR,
|
||||||
)
|
)
|
||||||
self.path = Path(tempdir.name) / Path(self.filename)
|
self.path = Path(tempdir.name) / Path(self.filename)
|
||||||
shutil.copy2(self.original_path, self.path)
|
copy_file_with_basic_stats(self.original_path, self.path)
|
||||||
|
|
||||||
# Determine the parser class.
|
# Determine the parser class.
|
||||||
|
|
||||||
@ -585,7 +587,7 @@ class Consumer(LoggingMixin):
|
|||||||
|
|
||||||
# Attempt to copy file's original stats, but it's ok if we can't
|
# Attempt to copy file's original stats, but it's ok if we can't
|
||||||
try:
|
try:
|
||||||
shutil.copystat(source, target)
|
copy_basic_file_stats(source, target)
|
||||||
except Exception: # pragma: no cover
|
except Exception: # pragma: no cover
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -37,6 +37,7 @@ from documents.models import UiSettings
|
|||||||
from documents.settings import EXPORTER_ARCHIVE_NAME
|
from documents.settings import EXPORTER_ARCHIVE_NAME
|
||||||
from documents.settings import EXPORTER_FILE_NAME
|
from documents.settings import EXPORTER_FILE_NAME
|
||||||
from documents.settings import EXPORTER_THUMBNAIL_NAME
|
from documents.settings import EXPORTER_THUMBNAIL_NAME
|
||||||
|
from documents.utils import copy_file_with_basic_stats
|
||||||
from paperless import version
|
from paperless import version
|
||||||
from paperless.db import GnuPG
|
from paperless.db import GnuPG
|
||||||
from paperless_mail.models import MailAccount
|
from paperless_mail.models import MailAccount
|
||||||
@ -437,4 +438,4 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
if perform_copy:
|
if perform_copy:
|
||||||
target.parent.mkdir(parents=True, exist_ok=True)
|
target.parent.mkdir(parents=True, exist_ok=True)
|
||||||
shutil.copy2(source, target)
|
copy_file_with_basic_stats(source, target)
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import shutil
|
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
@ -27,6 +26,7 @@ from documents.settings import EXPORTER_ARCHIVE_NAME
|
|||||||
from documents.settings import EXPORTER_FILE_NAME
|
from documents.settings import EXPORTER_FILE_NAME
|
||||||
from documents.settings import EXPORTER_THUMBNAIL_NAME
|
from documents.settings import EXPORTER_THUMBNAIL_NAME
|
||||||
from documents.signals.handlers import update_filename_and_move_files
|
from documents.signals.handlers import update_filename_and_move_files
|
||||||
|
from documents.utils import copy_file_with_basic_stats
|
||||||
from paperless import version
|
from paperless import version
|
||||||
|
|
||||||
|
|
||||||
@ -246,7 +246,7 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
create_source_path_directory(document.source_path)
|
create_source_path_directory(document.source_path)
|
||||||
|
|
||||||
shutil.copy2(document_path, document.source_path)
|
copy_file_with_basic_stats(document_path, document.source_path)
|
||||||
|
|
||||||
if thumbnail_path:
|
if thumbnail_path:
|
||||||
if thumbnail_path.suffix in {".png", ".PNG"}:
|
if thumbnail_path.suffix in {".png", ".PNG"}:
|
||||||
@ -261,13 +261,16 @@ class Command(BaseCommand):
|
|||||||
output_file=str(document.thumbnail_path),
|
output_file=str(document.thumbnail_path),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
shutil.copy2(thumbnail_path, document.thumbnail_path)
|
copy_file_with_basic_stats(
|
||||||
|
thumbnail_path,
|
||||||
|
document.thumbnail_path,
|
||||||
|
)
|
||||||
|
|
||||||
if archive_path:
|
if archive_path:
|
||||||
create_source_path_directory(document.archive_path)
|
create_source_path_directory(document.archive_path)
|
||||||
# TODO: this assumes that the export is valid and
|
# TODO: this assumes that the export is valid and
|
||||||
# archive_filename is present on all documents with
|
# archive_filename is present on all documents with
|
||||||
# archived files
|
# archived files
|
||||||
shutil.copy2(archive_path, document.archive_path)
|
copy_file_with_basic_stats(archive_path, document.archive_path)
|
||||||
|
|
||||||
document.save()
|
document.save()
|
||||||
|
@ -18,6 +18,7 @@ from django.utils import timezone
|
|||||||
|
|
||||||
from documents.loggers import LoggingMixin
|
from documents.loggers import LoggingMixin
|
||||||
from documents.signals import document_consumer_declaration
|
from documents.signals import document_consumer_declaration
|
||||||
|
from documents.utils import copy_file_with_basic_stats
|
||||||
|
|
||||||
# This regular expression will try to find dates in the document at
|
# This regular expression will try to find dates in the document at
|
||||||
# hand and will match the following formats:
|
# hand and will match the following formats:
|
||||||
@ -206,7 +207,7 @@ def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None) -
|
|||||||
# so we need to copy it before it gets moved.
|
# so we need to copy it before it gets moved.
|
||||||
# https://github.com/paperless-ngx/paperless-ngx/issues/3631
|
# https://github.com/paperless-ngx/paperless-ngx/issues/3631
|
||||||
default_thumbnail_path = os.path.join(temp_dir, "document.png")
|
default_thumbnail_path = os.path.join(temp_dir, "document.png")
|
||||||
shutil.copy2(get_default_thumbnail(), default_thumbnail_path)
|
copy_file_with_basic_stats(get_default_thumbnail(), default_thumbnail_path)
|
||||||
return default_thumbnail_path
|
return default_thumbnail_path
|
||||||
|
|
||||||
|
|
||||||
|
@ -277,7 +277,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
st_mtime_1 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime
|
st_mtime_1 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime
|
||||||
|
|
||||||
with mock.patch(
|
with mock.patch(
|
||||||
"documents.management.commands.document_exporter.shutil.copy2",
|
"documents.management.commands.document_exporter.copy_file_with_basic_stats",
|
||||||
) as m:
|
) as m:
|
||||||
self._do_export()
|
self._do_export()
|
||||||
m.assert_not_called()
|
m.assert_not_called()
|
||||||
@ -288,7 +288,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
Path(self.d1.source_path).touch()
|
Path(self.d1.source_path).touch()
|
||||||
|
|
||||||
with mock.patch(
|
with mock.patch(
|
||||||
"documents.management.commands.document_exporter.shutil.copy2",
|
"documents.management.commands.document_exporter.copy_file_with_basic_stats",
|
||||||
) as m:
|
) as m:
|
||||||
self._do_export()
|
self._do_export()
|
||||||
self.assertEqual(m.call_count, 1)
|
self.assertEqual(m.call_count, 1)
|
||||||
@ -311,7 +311,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
self.assertIsFile(os.path.join(self.target, "manifest.json"))
|
self.assertIsFile(os.path.join(self.target, "manifest.json"))
|
||||||
|
|
||||||
with mock.patch(
|
with mock.patch(
|
||||||
"documents.management.commands.document_exporter.shutil.copy2",
|
"documents.management.commands.document_exporter.copy_file_with_basic_stats",
|
||||||
) as m:
|
) as m:
|
||||||
self._do_export()
|
self._do_export()
|
||||||
m.assert_not_called()
|
m.assert_not_called()
|
||||||
@ -322,7 +322,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
self.d2.save()
|
self.d2.save()
|
||||||
|
|
||||||
with mock.patch(
|
with mock.patch(
|
||||||
"documents.management.commands.document_exporter.shutil.copy2",
|
"documents.management.commands.document_exporter.copy_file_with_basic_stats",
|
||||||
) as m:
|
) as m:
|
||||||
self._do_export(compare_checksums=True)
|
self._do_export(compare_checksums=True)
|
||||||
self.assertEqual(m.call_count, 1)
|
self.assertEqual(m.call_count, 1)
|
||||||
|
43
src/documents/utils.py
Normal file
43
src/documents/utils.py
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
import shutil
|
||||||
|
from os import utime
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Tuple
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
|
|
||||||
|
def _coerce_to_path(
|
||||||
|
source: Union[Path, str],
|
||||||
|
dest: Union[Path, str],
|
||||||
|
) -> Tuple[Path, Path]:
|
||||||
|
return Path(source).resolve(), Path(dest).resolve()
|
||||||
|
|
||||||
|
|
||||||
|
def copy_basic_file_stats(source: Union[Path, str], dest: Union[Path, str]) -> None:
|
||||||
|
"""
|
||||||
|
Copies only the m_time and a_time attributes from source to destination.
|
||||||
|
Both are expected to exist.
|
||||||
|
|
||||||
|
The extended attribute copy does weird things with SELinux and files
|
||||||
|
copied from temporary directories and copystat doesn't allow disabling
|
||||||
|
these copies
|
||||||
|
"""
|
||||||
|
source, dest = _coerce_to_path(source, dest)
|
||||||
|
src_stat = source.stat()
|
||||||
|
utime(dest, ns=(src_stat.st_atime_ns, src_stat.st_mtime_ns))
|
||||||
|
|
||||||
|
|
||||||
|
def copy_file_with_basic_stats(
|
||||||
|
source: Union[Path, str],
|
||||||
|
dest: Union[Path, str],
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
A sort of simpler copy2 that doesn't copy extended file attributes,
|
||||||
|
only the access time and modified times from source to dest.
|
||||||
|
|
||||||
|
The extended attribute copy does weird things with SELinux and files
|
||||||
|
copied from temporary directories.
|
||||||
|
"""
|
||||||
|
source, dest = _coerce_to_path(source, dest)
|
||||||
|
|
||||||
|
shutil.copy(source, dest)
|
||||||
|
copy_basic_file_stats(source, dest)
|
Loading…
x
Reference in New Issue
Block a user