mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-17 10:13:56 -05:00
archive filenames are now stored in the database and checked for collisions just as original filenames as well, unified method for archive version checking
This commit is contained in:
parent
05f59e7d5e
commit
fca8576d80
@ -292,8 +292,7 @@ class Consumer(LoggingMixin):
|
|||||||
# After everything is in the database, copy the files into
|
# After everything is in the database, copy the files into
|
||||||
# place. If this fails, we'll also rollback the transaction.
|
# place. If this fails, we'll also rollback the transaction.
|
||||||
with FileLock(settings.MEDIA_LOCK):
|
with FileLock(settings.MEDIA_LOCK):
|
||||||
document.filename = generate_unique_filename(
|
document.filename = generate_unique_filename(document)
|
||||||
document, settings.ORIGINALS_DIR)
|
|
||||||
create_source_path_directory(document.source_path)
|
create_source_path_directory(document.source_path)
|
||||||
|
|
||||||
self._write(document.storage_type,
|
self._write(document.storage_type,
|
||||||
@ -303,6 +302,10 @@ class Consumer(LoggingMixin):
|
|||||||
thumbnail, document.thumbnail_path)
|
thumbnail, document.thumbnail_path)
|
||||||
|
|
||||||
if archive_path and os.path.isfile(archive_path):
|
if archive_path and os.path.isfile(archive_path):
|
||||||
|
document.archive_filename = generate_unique_filename(
|
||||||
|
document,
|
||||||
|
archive_filename=True
|
||||||
|
)
|
||||||
create_source_path_directory(document.archive_path)
|
create_source_path_directory(document.archive_path)
|
||||||
self._write(document.storage_type,
|
self._write(document.storage_type,
|
||||||
archive_path, document.archive_path)
|
archive_path, document.archive_path)
|
||||||
|
@ -79,12 +79,20 @@ def many_to_dictionary(field):
|
|||||||
return mydictionary
|
return mydictionary
|
||||||
|
|
||||||
|
|
||||||
def generate_unique_filename(doc, root):
|
def generate_unique_filename(doc, archive_filename=False):
|
||||||
|
if archive_filename:
|
||||||
|
old_filename = doc.archive_filename
|
||||||
|
root = settings.ARCHIVE_DIR
|
||||||
|
else:
|
||||||
|
old_filename = doc.filename
|
||||||
|
root = settings.ORIGINALS_DIR
|
||||||
|
|
||||||
counter = 0
|
counter = 0
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
new_filename = generate_filename(doc, counter)
|
new_filename = generate_filename(
|
||||||
if new_filename == doc.filename:
|
doc, counter, archive_filename=archive_filename)
|
||||||
|
if new_filename == old_filename:
|
||||||
# still the same as before.
|
# still the same as before.
|
||||||
return new_filename
|
return new_filename
|
||||||
|
|
||||||
@ -94,7 +102,7 @@ def generate_unique_filename(doc, root):
|
|||||||
return new_filename
|
return new_filename
|
||||||
|
|
||||||
|
|
||||||
def generate_filename(doc, counter=0, append_gpg=True):
|
def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
|
||||||
path = ""
|
path = ""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -148,21 +156,16 @@ def generate_filename(doc, counter=0, append_gpg=True):
|
|||||||
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")
|
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")
|
||||||
|
|
||||||
counter_str = f"_{counter:02}" if counter else ""
|
counter_str = f"_{counter:02}" if counter else ""
|
||||||
|
|
||||||
|
filetype_str = ".pdf" if archive_filename else doc.file_type
|
||||||
|
|
||||||
if len(path) > 0:
|
if len(path) > 0:
|
||||||
filename = f"{path}{counter_str}{doc.file_type}"
|
filename = f"{path}{counter_str}{filetype_str}"
|
||||||
else:
|
else:
|
||||||
filename = f"{doc.pk:07}{counter_str}{doc.file_type}"
|
filename = f"{doc.pk:07}{counter_str}{filetype_str}"
|
||||||
|
|
||||||
# Append .gpg for encrypted files
|
# Append .gpg for encrypted files
|
||||||
if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG:
|
if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG:
|
||||||
filename += ".gpg"
|
filename += ".gpg"
|
||||||
|
|
||||||
return filename
|
return filename
|
||||||
|
|
||||||
|
|
||||||
def archive_name_from_filename(filename):
|
|
||||||
name, ext = os.path.splitext(filename)
|
|
||||||
if ext == ".pdf":
|
|
||||||
return filename
|
|
||||||
else:
|
|
||||||
return filename + ".pdf"
|
|
||||||
|
@ -16,7 +16,8 @@ from whoosh.writing import AsyncWriter
|
|||||||
|
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
from ... import index
|
from ... import index
|
||||||
from ...file_handling import create_source_path_directory
|
from ...file_handling import create_source_path_directory, \
|
||||||
|
generate_unique_filename
|
||||||
from ...parsers import get_parser_class_for_mime_type
|
from ...parsers import get_parser_class_for_mime_type
|
||||||
|
|
||||||
|
|
||||||
@ -39,13 +40,16 @@ def handle_document(document_id):
|
|||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
with open(parser.get_archive_path(), 'rb') as f:
|
with open(parser.get_archive_path(), 'rb') as f:
|
||||||
checksum = hashlib.md5(f.read()).hexdigest()
|
checksum = hashlib.md5(f.read()).hexdigest()
|
||||||
# i'm going to save first so that in case the file move
|
# I'm going to save first so that in case the file move
|
||||||
# fails, the database is rolled back.
|
# fails, the database is rolled back.
|
||||||
# we also don't use save() since that triggers the filehandling
|
# We also don't use save() since that triggers the filehandling
|
||||||
# logic, and we don't want that yet (file not yet in place)
|
# logic, and we don't want that yet (file not yet in place)
|
||||||
|
document.archive_filename = generate_unique_filename(
|
||||||
|
document, archive_filename=True)
|
||||||
Document.objects.filter(pk=document.pk).update(
|
Document.objects.filter(pk=document.pk).update(
|
||||||
archive_checksum=checksum,
|
archive_checksum=checksum,
|
||||||
content=parser.get_text()
|
content=parser.get_text(),
|
||||||
|
archive_filename=document.archive_filename
|
||||||
)
|
)
|
||||||
with FileLock(settings.MEDIA_LOCK):
|
with FileLock(settings.MEDIA_LOCK):
|
||||||
create_source_path_directory(document.archive_path)
|
create_source_path_directory(document.archive_path)
|
||||||
@ -101,7 +105,7 @@ class Command(BaseCommand):
|
|||||||
document_ids = list(map(
|
document_ids = list(map(
|
||||||
lambda doc: doc.id,
|
lambda doc: doc.id,
|
||||||
filter(
|
filter(
|
||||||
lambda d: overwrite or not d.archive_checksum,
|
lambda d: overwrite or not d.has_archive_version,
|
||||||
documents
|
documents
|
||||||
)
|
)
|
||||||
))
|
))
|
||||||
|
@ -139,7 +139,7 @@ class Command(BaseCommand):
|
|||||||
thumbnail_target = os.path.join(self.target, thumbnail_name)
|
thumbnail_target = os.path.join(self.target, thumbnail_name)
|
||||||
document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name
|
document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name
|
||||||
|
|
||||||
if os.path.exists(document.archive_path):
|
if document.has_archive_version:
|
||||||
archive_name = base_name + "-archive.pdf"
|
archive_name = base_name + "-archive.pdf"
|
||||||
archive_target = os.path.join(self.target, archive_name)
|
archive_target = os.path.join(self.target, archive_name)
|
||||||
document_dict[EXPORTER_ARCHIVE_NAME] = archive_name
|
document_dict[EXPORTER_ARCHIVE_NAME] = archive_name
|
||||||
|
@ -1,43 +1,27 @@
|
|||||||
# Generated by Django 3.1.6 on 2021-02-07 22:26
|
# Generated by Django 3.1.6 on 2021-02-07 22:26
|
||||||
|
import datetime
|
||||||
import hashlib
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
|
import pathvalidate
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.db import migrations
|
from django.db import migrations, models
|
||||||
|
from django.template.defaultfilters import slugify
|
||||||
|
|
||||||
|
from documents.file_handling import defaultdictNoStr, many_to_dictionary
|
||||||
|
|
||||||
logger = logging.getLogger("paperless.migrations")
|
logger = logging.getLogger("paperless.migrations")
|
||||||
|
|
||||||
|
|
||||||
def archive_name_from_filename_old(filename):
|
def archive_name_from_filename(filename):
|
||||||
return os.path.splitext(filename)[0] + ".pdf"
|
return os.path.splitext(filename)[0] + ".pdf"
|
||||||
|
|
||||||
|
|
||||||
def archive_path_old(doc):
|
def archive_path_old(doc):
|
||||||
if doc.filename:
|
if doc.filename:
|
||||||
fname = archive_name_from_filename_old(doc.filename)
|
fname = archive_name_from_filename(doc.filename)
|
||||||
else:
|
|
||||||
fname = "{:07}.pdf".format(doc.pk)
|
|
||||||
|
|
||||||
return os.path.join(
|
|
||||||
settings.ARCHIVE_DIR,
|
|
||||||
fname
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def archive_name_from_filename_new(filename):
|
|
||||||
name, ext = os.path.splitext(filename)
|
|
||||||
if ext == ".pdf":
|
|
||||||
return filename
|
|
||||||
else:
|
|
||||||
return filename + ".pdf"
|
|
||||||
|
|
||||||
|
|
||||||
def archive_path_new(doc):
|
|
||||||
if doc.filename:
|
|
||||||
fname = archive_name_from_filename_new(doc.filename)
|
|
||||||
else:
|
else:
|
||||||
fname = "{:07}.pdf".format(doc.pk)
|
fname = "{:07}.pdf".format(doc.pk)
|
||||||
|
|
||||||
@ -50,6 +34,16 @@ def archive_path_new(doc):
|
|||||||
STORAGE_TYPE_GPG = "gpg"
|
STORAGE_TYPE_GPG = "gpg"
|
||||||
|
|
||||||
|
|
||||||
|
def archive_path_new(doc):
|
||||||
|
if doc.archive_filename is not None:
|
||||||
|
return os.path.join(
|
||||||
|
settings.ARCHIVE_DIR,
|
||||||
|
str(doc.archive_filename)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def source_path(doc):
|
def source_path(doc):
|
||||||
if doc.filename:
|
if doc.filename:
|
||||||
fname = str(doc.filename)
|
fname = str(doc.filename)
|
||||||
@ -64,6 +58,98 @@ def source_path(doc):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_unique_filename(doc, archive_filename=False):
|
||||||
|
if archive_filename:
|
||||||
|
old_filename = doc.archive_filename
|
||||||
|
root = settings.ARCHIVE_DIR
|
||||||
|
else:
|
||||||
|
old_filename = doc.filename
|
||||||
|
root = settings.ORIGINALS_DIR
|
||||||
|
|
||||||
|
counter = 0
|
||||||
|
|
||||||
|
while True:
|
||||||
|
new_filename = generate_filename(
|
||||||
|
doc, counter, archive_filename=archive_filename)
|
||||||
|
if new_filename == old_filename:
|
||||||
|
# still the same as before.
|
||||||
|
return new_filename
|
||||||
|
|
||||||
|
if os.path.exists(os.path.join(root, new_filename)):
|
||||||
|
counter += 1
|
||||||
|
else:
|
||||||
|
return new_filename
|
||||||
|
|
||||||
|
|
||||||
|
def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
|
||||||
|
path = ""
|
||||||
|
|
||||||
|
try:
|
||||||
|
if settings.PAPERLESS_FILENAME_FORMAT is not None:
|
||||||
|
tags = defaultdictNoStr(lambda: slugify(None),
|
||||||
|
many_to_dictionary(doc.tags))
|
||||||
|
|
||||||
|
tag_list = pathvalidate.sanitize_filename(
|
||||||
|
",".join(sorted(
|
||||||
|
[tag.name for tag in doc.tags.all()]
|
||||||
|
)),
|
||||||
|
replacement_text="-"
|
||||||
|
)
|
||||||
|
|
||||||
|
if doc.correspondent:
|
||||||
|
correspondent = pathvalidate.sanitize_filename(
|
||||||
|
doc.correspondent.name, replacement_text="-"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
correspondent = "none"
|
||||||
|
|
||||||
|
if doc.document_type:
|
||||||
|
document_type = pathvalidate.sanitize_filename(
|
||||||
|
doc.document_type.name, replacement_text="-"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
document_type = "none"
|
||||||
|
|
||||||
|
path = settings.PAPERLESS_FILENAME_FORMAT.format(
|
||||||
|
title=pathvalidate.sanitize_filename(
|
||||||
|
doc.title, replacement_text="-"),
|
||||||
|
correspondent=correspondent,
|
||||||
|
document_type=document_type,
|
||||||
|
created=datetime.date.isoformat(doc.created),
|
||||||
|
created_year=doc.created.year if doc.created else "none",
|
||||||
|
created_month=f"{doc.created.month:02}" if doc.created else "none", # NOQA: E501
|
||||||
|
created_day=f"{doc.created.day:02}" if doc.created else "none",
|
||||||
|
added=datetime.date.isoformat(doc.added),
|
||||||
|
added_year=doc.added.year if doc.added else "none",
|
||||||
|
added_month=f"{doc.added.month:02}" if doc.added else "none",
|
||||||
|
added_day=f"{doc.added.day:02}" if doc.added else "none",
|
||||||
|
tags=tags,
|
||||||
|
tag_list=tag_list
|
||||||
|
).strip()
|
||||||
|
|
||||||
|
path = path.strip(os.sep)
|
||||||
|
|
||||||
|
except (ValueError, KeyError, IndexError):
|
||||||
|
logger.warning(
|
||||||
|
f"Invalid PAPERLESS_FILENAME_FORMAT: "
|
||||||
|
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")
|
||||||
|
|
||||||
|
counter_str = f"_{counter:02}" if counter else ""
|
||||||
|
|
||||||
|
filetype_str = ".pdf" if archive_filename else doc.file_type
|
||||||
|
|
||||||
|
if len(path) > 0:
|
||||||
|
filename = f"{path}{counter_str}{filetype_str}"
|
||||||
|
else:
|
||||||
|
filename = f"{doc.pk:07}{counter_str}{filetype_str}"
|
||||||
|
|
||||||
|
# Append .gpg for encrypted files
|
||||||
|
if append_gpg and doc.storage_type == STORAGE_TYPE_GPG:
|
||||||
|
filename += ".gpg"
|
||||||
|
|
||||||
|
return filename
|
||||||
|
|
||||||
|
|
||||||
def move_old_to_new_locations(apps, schema_editor):
|
def move_old_to_new_locations(apps, schema_editor):
|
||||||
Document = apps.get_model("documents", "Document")
|
Document = apps.get_model("documents", "Document")
|
||||||
|
|
||||||
@ -74,18 +160,12 @@ def move_old_to_new_locations(apps, schema_editor):
|
|||||||
# check for documents that have incorrect archive versions
|
# check for documents that have incorrect archive versions
|
||||||
for doc in Document.objects.filter(archive_checksum__isnull=False):
|
for doc in Document.objects.filter(archive_checksum__isnull=False):
|
||||||
old_path = archive_path_old(doc)
|
old_path = archive_path_old(doc)
|
||||||
new_path = archive_path_new(doc)
|
|
||||||
|
|
||||||
if not os.path.isfile(old_path):
|
if not os.path.isfile(old_path):
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Archived document of {doc.filename} does not exist at: "
|
f"Archived document of {doc.filename} does not exist at: "
|
||||||
f"{old_path}")
|
f"{old_path}")
|
||||||
|
|
||||||
if old_path != new_path and os.path.isfile(new_path):
|
|
||||||
raise ValueError(
|
|
||||||
f"Need to move {old_path} to {new_path}, but target file "
|
|
||||||
f"already exists")
|
|
||||||
|
|
||||||
if old_path in old_archive_path_to_id:
|
if old_path in old_archive_path_to_id:
|
||||||
affected_document_ids.add(doc.id)
|
affected_document_ids.add(doc.id)
|
||||||
affected_document_ids.add(old_archive_path_to_id[old_path])
|
affected_document_ids.add(old_archive_path_to_id[old_path])
|
||||||
@ -103,22 +183,19 @@ def move_old_to_new_locations(apps, schema_editor):
|
|||||||
f"document {doc.filename} has an invalid archived document, "
|
f"document {doc.filename} has an invalid archived document, "
|
||||||
f"but no parsers are available. Cannot migrate.")
|
f"but no parsers are available. Cannot migrate.")
|
||||||
|
|
||||||
# move files
|
|
||||||
for doc in Document.objects.filter(archive_checksum__isnull=False):
|
for doc in Document.objects.filter(archive_checksum__isnull=False):
|
||||||
old_path = archive_path_old(doc)
|
|
||||||
new_path = archive_path_new(doc)
|
|
||||||
|
|
||||||
if doc.id in affected_document_ids:
|
if doc.id in affected_document_ids:
|
||||||
|
old_path = archive_path_old(doc)
|
||||||
# remove affected archive versions
|
# remove affected archive versions
|
||||||
if os.path.isfile(old_path):
|
if os.path.isfile(old_path):
|
||||||
os.unlink(old_path)
|
os.unlink(old_path)
|
||||||
else:
|
else:
|
||||||
# move unaffected archive versions
|
# Set archive path for unaffected files
|
||||||
if old_path != new_path and os.path.isfile(old_path) and not os.path.isfile(new_path):
|
doc.archive_filename = archive_path_old(doc)
|
||||||
logger.debug(
|
Document.objects.filter(id=doc.id).update(
|
||||||
f"Moving {old_path} to {new_path}"
|
archive_filename=doc.archive_filename
|
||||||
)
|
)
|
||||||
shutil.move(old_path, new_path)
|
|
||||||
|
|
||||||
# regenerate archive documents
|
# regenerate archive documents
|
||||||
for doc_id in affected_document_ids:
|
for doc_id in affected_document_ids:
|
||||||
@ -135,14 +212,16 @@ def move_old_to_new_locations(apps, schema_editor):
|
|||||||
try:
|
try:
|
||||||
parser.parse(source_path(doc), doc.mime_type, os.path.basename(doc.filename))
|
parser.parse(source_path(doc), doc.mime_type, os.path.basename(doc.filename))
|
||||||
doc.content = parser.get_text()
|
doc.content = parser.get_text()
|
||||||
if parser.archive_path and os.path.isfile(parser.archive_path):
|
|
||||||
with open(parser.archive_path, "rb") as f:
|
if parser.get_archive_path() and os.path.isfile(parser.get_archive_path()):
|
||||||
|
doc.archive_filename = generate_unique_filename(
|
||||||
|
doc, archive_filename=True)
|
||||||
|
with open(parser.get_archive_path(), "rb") as f:
|
||||||
doc.archive_checksum = hashlib.md5(f.read()).hexdigest()
|
doc.archive_checksum = hashlib.md5(f.read()).hexdigest()
|
||||||
shutil.copy2(parser.archive_path, archive_path_new(doc))
|
os.makedirs(os.path.dirname(archive_path_new(doc)), exist_ok=True)
|
||||||
|
shutil.copy2(parser.get_archive_path(), archive_path_new(doc))
|
||||||
else:
|
else:
|
||||||
doc.archive_checksum = None
|
doc.archive_checksum = None
|
||||||
if os.path.isfile(archive_path_new(doc)):
|
|
||||||
os.unlink(archive_path_new(doc))
|
|
||||||
doc.save()
|
doc.save()
|
||||||
except ParseError:
|
except ParseError:
|
||||||
logger.exception(
|
logger.exception(
|
||||||
@ -187,8 +266,18 @@ class Migration(migrations.Migration):
|
|||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='document',
|
||||||
|
name='archive_filename',
|
||||||
|
field=models.FilePathField(default=None, editable=False, help_text='Current archive filename in storage', max_length=1024, null=True, unique=True, verbose_name='archive filename'),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='document',
|
||||||
|
name='filename',
|
||||||
|
field=models.FilePathField(default=None, editable=False, help_text='Current filename in storage', max_length=1024, null=True, unique=True, verbose_name='filename'),
|
||||||
|
),
|
||||||
migrations.RunPython(
|
migrations.RunPython(
|
||||||
move_old_to_new_locations,
|
move_old_to_new_locations,
|
||||||
move_new_to_old_locations
|
move_new_to_old_locations
|
||||||
)
|
),
|
||||||
]
|
]
|
||||||
|
@ -16,7 +16,6 @@ from django.utils.timezone import is_aware
|
|||||||
|
|
||||||
from django.utils.translation import gettext_lazy as _
|
from django.utils.translation import gettext_lazy as _
|
||||||
|
|
||||||
from documents.file_handling import archive_name_from_filename
|
|
||||||
from documents.parsers import get_default_file_extension
|
from documents.parsers import get_default_file_extension
|
||||||
|
|
||||||
|
|
||||||
@ -208,10 +207,21 @@ class Document(models.Model):
|
|||||||
max_length=1024,
|
max_length=1024,
|
||||||
editable=False,
|
editable=False,
|
||||||
default=None,
|
default=None,
|
||||||
|
unique=True,
|
||||||
null=True,
|
null=True,
|
||||||
help_text=_("Current filename in storage")
|
help_text=_("Current filename in storage")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
archive_filename = models.FilePathField(
|
||||||
|
_("archive filename"),
|
||||||
|
max_length=1024,
|
||||||
|
editable=False,
|
||||||
|
default=None,
|
||||||
|
unique=True,
|
||||||
|
null=True,
|
||||||
|
help_text=_("Current archive filename in storage")
|
||||||
|
)
|
||||||
|
|
||||||
archive_serial_number = models.IntegerField(
|
archive_serial_number = models.IntegerField(
|
||||||
_("archive serial number"),
|
_("archive serial number"),
|
||||||
blank=True,
|
blank=True,
|
||||||
@ -256,16 +266,19 @@ class Document(models.Model):
|
|||||||
return open(self.source_path, "rb")
|
return open(self.source_path, "rb")
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def archive_path(self):
|
def has_archive_version(self):
|
||||||
if self.filename:
|
return self.archive_filename is not None
|
||||||
fname = archive_name_from_filename(self.filename)
|
|
||||||
else:
|
@property
|
||||||
fname = "{:07}.pdf".format(self.pk)
|
def archive_path(self):
|
||||||
|
if self.has_archive_version:
|
||||||
|
return os.path.join(
|
||||||
|
settings.ARCHIVE_DIR,
|
||||||
|
str(self.archive_filename)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
return os.path.join(
|
|
||||||
settings.ARCHIVE_DIR,
|
|
||||||
fname
|
|
||||||
)
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def archive_file(self):
|
def archive_file(self):
|
||||||
|
@ -88,7 +88,7 @@ def check_sanity():
|
|||||||
))
|
))
|
||||||
|
|
||||||
# Check sanity of the archive file.
|
# Check sanity of the archive file.
|
||||||
if doc.archive_checksum:
|
if doc.has_archive_version:
|
||||||
if not os.path.isfile(doc.archive_path):
|
if not os.path.isfile(doc.archive_path):
|
||||||
messages.append(SanityError(
|
messages.append(SanityError(
|
||||||
f"Archived version of document {doc.pk} does not exist."
|
f"Archived version of document {doc.pk} does not exist."
|
||||||
|
@ -14,7 +14,7 @@ from filelock import FileLock
|
|||||||
|
|
||||||
from .. import index, matching
|
from .. import index, matching
|
||||||
from ..file_handling import delete_empty_directories, \
|
from ..file_handling import delete_empty_directories, \
|
||||||
create_source_path_directory, archive_name_from_filename, \
|
create_source_path_directory, \
|
||||||
generate_unique_filename
|
generate_unique_filename
|
||||||
from ..models import Document, Tag
|
from ..models import Document, Tag
|
||||||
|
|
||||||
@ -148,18 +148,18 @@ def set_tags(sender,
|
|||||||
@receiver(models.signals.post_delete, sender=Document)
|
@receiver(models.signals.post_delete, sender=Document)
|
||||||
def cleanup_document_deletion(sender, instance, using, **kwargs):
|
def cleanup_document_deletion(sender, instance, using, **kwargs):
|
||||||
with FileLock(settings.MEDIA_LOCK):
|
with FileLock(settings.MEDIA_LOCK):
|
||||||
for f in (instance.source_path,
|
for filename in (instance.source_path,
|
||||||
instance.archive_path,
|
instance.archive_path,
|
||||||
instance.thumbnail_path):
|
instance.thumbnail_path):
|
||||||
if os.path.isfile(f):
|
if filename and os.path.isfile(filename):
|
||||||
try:
|
try:
|
||||||
os.unlink(f)
|
os.unlink(filename)
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"Deleted file {f}.")
|
f"Deleted file {filename}.")
|
||||||
except OSError as e:
|
except OSError as e:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"While deleting document {str(instance)}, the file "
|
f"While deleting document {str(instance)}, the file "
|
||||||
f"{f} could not be deleted: {e}"
|
f"{filename} could not be deleted: {e}"
|
||||||
)
|
)
|
||||||
|
|
||||||
delete_empty_directories(
|
delete_empty_directories(
|
||||||
@ -167,10 +167,11 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
|
|||||||
root=settings.ORIGINALS_DIR
|
root=settings.ORIGINALS_DIR
|
||||||
)
|
)
|
||||||
|
|
||||||
delete_empty_directories(
|
if instance.has_archive_version:
|
||||||
os.path.dirname(instance.archive_path),
|
delete_empty_directories(
|
||||||
root=settings.ARCHIVE_DIR
|
os.path.dirname(instance.archive_path),
|
||||||
)
|
root=settings.ARCHIVE_DIR
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def validate_move(instance, old_path, new_path):
|
def validate_move(instance, old_path, new_path):
|
||||||
@ -207,8 +208,7 @@ def update_filename_and_move_files(sender, instance, **kwargs):
|
|||||||
|
|
||||||
with FileLock(settings.MEDIA_LOCK):
|
with FileLock(settings.MEDIA_LOCK):
|
||||||
old_filename = instance.filename
|
old_filename = instance.filename
|
||||||
new_filename = generate_unique_filename(
|
new_filename = generate_unique_filename(instance)
|
||||||
instance, settings.ORIGINALS_DIR)
|
|
||||||
|
|
||||||
if new_filename == instance.filename:
|
if new_filename == instance.filename:
|
||||||
# Don't do anything if its the same.
|
# Don't do anything if its the same.
|
||||||
@ -222,8 +222,11 @@ def update_filename_and_move_files(sender, instance, **kwargs):
|
|||||||
|
|
||||||
# archive files are optional, archive checksum tells us if we have one,
|
# archive files are optional, archive checksum tells us if we have one,
|
||||||
# since this is None for documents without archived files.
|
# since this is None for documents without archived files.
|
||||||
if instance.archive_checksum:
|
if instance.has_archive_version:
|
||||||
new_archive_filename = archive_name_from_filename(new_filename)
|
old_archive_filename = instance.archive_filename
|
||||||
|
new_archive_filename = generate_unique_filename(
|
||||||
|
instance, archive_filename=True
|
||||||
|
)
|
||||||
old_archive_path = instance.archive_path
|
old_archive_path = instance.archive_path
|
||||||
new_archive_path = os.path.join(settings.ARCHIVE_DIR,
|
new_archive_path = os.path.join(settings.ARCHIVE_DIR,
|
||||||
new_archive_filename)
|
new_archive_filename)
|
||||||
@ -233,6 +236,8 @@ def update_filename_and_move_files(sender, instance, **kwargs):
|
|||||||
|
|
||||||
create_source_path_directory(new_archive_path)
|
create_source_path_directory(new_archive_path)
|
||||||
else:
|
else:
|
||||||
|
old_archive_filename = None
|
||||||
|
new_archive_filename = None
|
||||||
old_archive_path = None
|
old_archive_path = None
|
||||||
new_archive_path = None
|
new_archive_path = None
|
||||||
|
|
||||||
@ -240,22 +245,28 @@ def update_filename_and_move_files(sender, instance, **kwargs):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
os.rename(old_source_path, new_source_path)
|
os.rename(old_source_path, new_source_path)
|
||||||
if instance.archive_checksum:
|
|
||||||
os.rename(old_archive_path, new_archive_path)
|
|
||||||
instance.filename = new_filename
|
instance.filename = new_filename
|
||||||
|
|
||||||
|
if instance.has_archive_version:
|
||||||
|
os.rename(old_archive_path, new_archive_path)
|
||||||
|
instance.archive_filename = new_archive_filename
|
||||||
|
|
||||||
# Don't save() here to prevent infinite recursion.
|
# Don't save() here to prevent infinite recursion.
|
||||||
Document.objects.filter(pk=instance.pk).update(
|
Document.objects.filter(pk=instance.pk).update(
|
||||||
filename=new_filename)
|
filename=instance.filename,
|
||||||
|
archive_filename=instance.archive_filename,
|
||||||
|
)
|
||||||
|
|
||||||
except OSError as e:
|
except OSError as e:
|
||||||
instance.filename = old_filename
|
instance.filename = old_filename
|
||||||
|
instance.archive_filename = old_archive_filename
|
||||||
# this happens when we can't move a file. If that's the case for
|
# this happens when we can't move a file. If that's the case for
|
||||||
# the archive file, we try our best to revert the changes.
|
# the archive file, we try our best to revert the changes.
|
||||||
# no need to save the instance, the update() has not happened yet.
|
# no need to save the instance, the update() has not happened yet.
|
||||||
try:
|
try:
|
||||||
os.rename(new_source_path, old_source_path)
|
os.rename(new_source_path, old_source_path)
|
||||||
os.rename(new_archive_path, old_archive_path)
|
if instance.has_archive_version:
|
||||||
|
os.rename(new_archive_path, old_archive_path)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# This is fine, since:
|
# This is fine, since:
|
||||||
# A: if we managed to move source from A to B, we will also
|
# A: if we managed to move source from A to B, we will also
|
||||||
@ -271,9 +282,10 @@ def update_filename_and_move_files(sender, instance, **kwargs):
|
|||||||
# since moving them once succeeded, it's very likely going to
|
# since moving them once succeeded, it's very likely going to
|
||||||
# succeed again.
|
# succeed again.
|
||||||
os.rename(new_source_path, old_source_path)
|
os.rename(new_source_path, old_source_path)
|
||||||
if instance.archive_checksum:
|
if instance.has_archive_version:
|
||||||
os.rename(new_archive_path, old_archive_path)
|
os.rename(new_archive_path, old_archive_path)
|
||||||
instance.filename = old_filename
|
instance.filename = old_filename
|
||||||
|
instance.archive_filename = old_archive_filename
|
||||||
# again, no need to save the instance, since the actual update()
|
# again, no need to save the instance, since the actual update()
|
||||||
# operation failed.
|
# operation failed.
|
||||||
|
|
||||||
@ -283,7 +295,7 @@ def update_filename_and_move_files(sender, instance, **kwargs):
|
|||||||
delete_empty_directories(os.path.dirname(old_source_path),
|
delete_empty_directories(os.path.dirname(old_source_path),
|
||||||
root=settings.ORIGINALS_DIR)
|
root=settings.ORIGINALS_DIR)
|
||||||
|
|
||||||
if old_archive_path and not os.path.isfile(old_archive_path):
|
if instance.has_archive_version and not os.path.isfile(old_archive_path): # NOQA: E501
|
||||||
delete_empty_directories(os.path.dirname(old_archive_path),
|
delete_empty_directories(os.path.dirname(old_archive_path),
|
||||||
root=settings.ARCHIVE_DIR)
|
root=settings.ARCHIVE_DIR)
|
||||||
|
|
||||||
|
@ -192,7 +192,7 @@ class DocumentViewSet(RetrieveModelMixin,
|
|||||||
|
|
||||||
def file_response(self, pk, request, disposition):
|
def file_response(self, pk, request, disposition):
|
||||||
doc = Document.objects.get(id=pk)
|
doc = Document.objects.get(id=pk)
|
||||||
if not self.original_requested(request) and os.path.isfile(doc.archive_path): # NOQA: E501
|
if not self.original_requested(request) and doc.has_archive_version: # NOQA: E501
|
||||||
file_handle = doc.archive_file
|
file_handle = doc.archive_file
|
||||||
filename = doc.get_public_filename(archive=True)
|
filename = doc.get_public_filename(archive=True)
|
||||||
mime_type = 'application/pdf'
|
mime_type = 'application/pdf'
|
||||||
@ -237,18 +237,18 @@ class DocumentViewSet(RetrieveModelMixin,
|
|||||||
"original_size": os.stat(doc.source_path).st_size,
|
"original_size": os.stat(doc.source_path).st_size,
|
||||||
"original_mime_type": doc.mime_type,
|
"original_mime_type": doc.mime_type,
|
||||||
"media_filename": doc.filename,
|
"media_filename": doc.filename,
|
||||||
"has_archive_version": os.path.isfile(doc.archive_path),
|
"has_archive_version": doc.has_archive_version,
|
||||||
"original_metadata": self.get_metadata(
|
"original_metadata": self.get_metadata(
|
||||||
doc.source_path, doc.mime_type)
|
doc.source_path, doc.mime_type),
|
||||||
|
"archive_checksum": doc.archive_checksum,
|
||||||
|
"archive_media_filename": doc.archive_filename
|
||||||
}
|
}
|
||||||
|
|
||||||
if doc.archive_checksum and os.path.isfile(doc.archive_path):
|
if doc.has_archive_version:
|
||||||
meta['archive_checksum'] = doc.archive_checksum
|
|
||||||
meta['archive_size'] = os.stat(doc.archive_path).st_size,
|
meta['archive_size'] = os.stat(doc.archive_path).st_size,
|
||||||
meta['archive_metadata'] = self.get_metadata(
|
meta['archive_metadata'] = self.get_metadata(
|
||||||
doc.archive_path, "application/pdf")
|
doc.archive_path, "application/pdf")
|
||||||
else:
|
else:
|
||||||
meta['archive_checksum'] = None
|
|
||||||
meta['archive_size'] = None
|
meta['archive_size'] = None
|
||||||
meta['archive_metadata'] = None
|
meta['archive_metadata'] = None
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user