archive filenames are now stored in the database and checked for collisions just as original filenames as well, unified method for archive version checking

This commit is contained in:
jonaswinkler 2021-02-09 19:46:19 +01:00
parent 05f59e7d5e
commit fca8576d80
9 changed files with 229 additions and 105 deletions

View File

@ -292,8 +292,7 @@ class Consumer(LoggingMixin):
# After everything is in the database, copy the files into # After everything is in the database, copy the files into
# place. If this fails, we'll also rollback the transaction. # place. If this fails, we'll also rollback the transaction.
with FileLock(settings.MEDIA_LOCK): with FileLock(settings.MEDIA_LOCK):
document.filename = generate_unique_filename( document.filename = generate_unique_filename(document)
document, settings.ORIGINALS_DIR)
create_source_path_directory(document.source_path) create_source_path_directory(document.source_path)
self._write(document.storage_type, self._write(document.storage_type,
@ -303,6 +302,10 @@ class Consumer(LoggingMixin):
thumbnail, document.thumbnail_path) thumbnail, document.thumbnail_path)
if archive_path and os.path.isfile(archive_path): if archive_path and os.path.isfile(archive_path):
document.archive_filename = generate_unique_filename(
document,
archive_filename=True
)
create_source_path_directory(document.archive_path) create_source_path_directory(document.archive_path)
self._write(document.storage_type, self._write(document.storage_type,
archive_path, document.archive_path) archive_path, document.archive_path)

View File

@ -79,12 +79,20 @@ def many_to_dictionary(field):
return mydictionary return mydictionary
def generate_unique_filename(doc, root): def generate_unique_filename(doc, archive_filename=False):
if archive_filename:
old_filename = doc.archive_filename
root = settings.ARCHIVE_DIR
else:
old_filename = doc.filename
root = settings.ORIGINALS_DIR
counter = 0 counter = 0
while True: while True:
new_filename = generate_filename(doc, counter) new_filename = generate_filename(
if new_filename == doc.filename: doc, counter, archive_filename=archive_filename)
if new_filename == old_filename:
# still the same as before. # still the same as before.
return new_filename return new_filename
@ -94,7 +102,7 @@ def generate_unique_filename(doc, root):
return new_filename return new_filename
def generate_filename(doc, counter=0, append_gpg=True): def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
path = "" path = ""
try: try:
@ -148,21 +156,16 @@ def generate_filename(doc, counter=0, append_gpg=True):
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default") f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")
counter_str = f"_{counter:02}" if counter else "" counter_str = f"_{counter:02}" if counter else ""
filetype_str = ".pdf" if archive_filename else doc.file_type
if len(path) > 0: if len(path) > 0:
filename = f"{path}{counter_str}{doc.file_type}" filename = f"{path}{counter_str}{filetype_str}"
else: else:
filename = f"{doc.pk:07}{counter_str}{doc.file_type}" filename = f"{doc.pk:07}{counter_str}{filetype_str}"
# Append .gpg for encrypted files # Append .gpg for encrypted files
if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG: if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG:
filename += ".gpg" filename += ".gpg"
return filename return filename
def archive_name_from_filename(filename):
name, ext = os.path.splitext(filename)
if ext == ".pdf":
return filename
else:
return filename + ".pdf"

View File

@ -16,7 +16,8 @@ from whoosh.writing import AsyncWriter
from documents.models import Document from documents.models import Document
from ... import index from ... import index
from ...file_handling import create_source_path_directory from ...file_handling import create_source_path_directory, \
generate_unique_filename
from ...parsers import get_parser_class_for_mime_type from ...parsers import get_parser_class_for_mime_type
@ -39,13 +40,16 @@ def handle_document(document_id):
with transaction.atomic(): with transaction.atomic():
with open(parser.get_archive_path(), 'rb') as f: with open(parser.get_archive_path(), 'rb') as f:
checksum = hashlib.md5(f.read()).hexdigest() checksum = hashlib.md5(f.read()).hexdigest()
# i'm going to save first so that in case the file move # I'm going to save first so that in case the file move
# fails, the database is rolled back. # fails, the database is rolled back.
# we also don't use save() since that triggers the filehandling # We also don't use save() since that triggers the filehandling
# logic, and we don't want that yet (file not yet in place) # logic, and we don't want that yet (file not yet in place)
document.archive_filename = generate_unique_filename(
document, archive_filename=True)
Document.objects.filter(pk=document.pk).update( Document.objects.filter(pk=document.pk).update(
archive_checksum=checksum, archive_checksum=checksum,
content=parser.get_text() content=parser.get_text(),
archive_filename=document.archive_filename
) )
with FileLock(settings.MEDIA_LOCK): with FileLock(settings.MEDIA_LOCK):
create_source_path_directory(document.archive_path) create_source_path_directory(document.archive_path)
@ -101,7 +105,7 @@ class Command(BaseCommand):
document_ids = list(map( document_ids = list(map(
lambda doc: doc.id, lambda doc: doc.id,
filter( filter(
lambda d: overwrite or not d.archive_checksum, lambda d: overwrite or not d.has_archive_version,
documents documents
) )
)) ))

View File

@ -139,7 +139,7 @@ class Command(BaseCommand):
thumbnail_target = os.path.join(self.target, thumbnail_name) thumbnail_target = os.path.join(self.target, thumbnail_name)
document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name document_dict[EXPORTER_THUMBNAIL_NAME] = thumbnail_name
if os.path.exists(document.archive_path): if document.has_archive_version:
archive_name = base_name + "-archive.pdf" archive_name = base_name + "-archive.pdf"
archive_target = os.path.join(self.target, archive_name) archive_target = os.path.join(self.target, archive_name)
document_dict[EXPORTER_ARCHIVE_NAME] = archive_name document_dict[EXPORTER_ARCHIVE_NAME] = archive_name

View File

@ -1,43 +1,27 @@
# Generated by Django 3.1.6 on 2021-02-07 22:26 # Generated by Django 3.1.6 on 2021-02-07 22:26
import datetime
import hashlib import hashlib
import logging import logging
import os import os
import shutil import shutil
import pathvalidate
from django.conf import settings from django.conf import settings
from django.db import migrations from django.db import migrations, models
from django.template.defaultfilters import slugify
from documents.file_handling import defaultdictNoStr, many_to_dictionary
logger = logging.getLogger("paperless.migrations") logger = logging.getLogger("paperless.migrations")
def archive_name_from_filename_old(filename): def archive_name_from_filename(filename):
return os.path.splitext(filename)[0] + ".pdf" return os.path.splitext(filename)[0] + ".pdf"
def archive_path_old(doc): def archive_path_old(doc):
if doc.filename: if doc.filename:
fname = archive_name_from_filename_old(doc.filename) fname = archive_name_from_filename(doc.filename)
else:
fname = "{:07}.pdf".format(doc.pk)
return os.path.join(
settings.ARCHIVE_DIR,
fname
)
def archive_name_from_filename_new(filename):
name, ext = os.path.splitext(filename)
if ext == ".pdf":
return filename
else:
return filename + ".pdf"
def archive_path_new(doc):
if doc.filename:
fname = archive_name_from_filename_new(doc.filename)
else: else:
fname = "{:07}.pdf".format(doc.pk) fname = "{:07}.pdf".format(doc.pk)
@ -50,6 +34,16 @@ def archive_path_new(doc):
STORAGE_TYPE_GPG = "gpg" STORAGE_TYPE_GPG = "gpg"
def archive_path_new(doc):
if doc.archive_filename is not None:
return os.path.join(
settings.ARCHIVE_DIR,
str(doc.archive_filename)
)
else:
return None
def source_path(doc): def source_path(doc):
if doc.filename: if doc.filename:
fname = str(doc.filename) fname = str(doc.filename)
@ -64,6 +58,98 @@ def source_path(doc):
) )
def generate_unique_filename(doc, archive_filename=False):
if archive_filename:
old_filename = doc.archive_filename
root = settings.ARCHIVE_DIR
else:
old_filename = doc.filename
root = settings.ORIGINALS_DIR
counter = 0
while True:
new_filename = generate_filename(
doc, counter, archive_filename=archive_filename)
if new_filename == old_filename:
# still the same as before.
return new_filename
if os.path.exists(os.path.join(root, new_filename)):
counter += 1
else:
return new_filename
def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
path = ""
try:
if settings.PAPERLESS_FILENAME_FORMAT is not None:
tags = defaultdictNoStr(lambda: slugify(None),
many_to_dictionary(doc.tags))
tag_list = pathvalidate.sanitize_filename(
",".join(sorted(
[tag.name for tag in doc.tags.all()]
)),
replacement_text="-"
)
if doc.correspondent:
correspondent = pathvalidate.sanitize_filename(
doc.correspondent.name, replacement_text="-"
)
else:
correspondent = "none"
if doc.document_type:
document_type = pathvalidate.sanitize_filename(
doc.document_type.name, replacement_text="-"
)
else:
document_type = "none"
path = settings.PAPERLESS_FILENAME_FORMAT.format(
title=pathvalidate.sanitize_filename(
doc.title, replacement_text="-"),
correspondent=correspondent,
document_type=document_type,
created=datetime.date.isoformat(doc.created),
created_year=doc.created.year if doc.created else "none",
created_month=f"{doc.created.month:02}" if doc.created else "none", # NOQA: E501
created_day=f"{doc.created.day:02}" if doc.created else "none",
added=datetime.date.isoformat(doc.added),
added_year=doc.added.year if doc.added else "none",
added_month=f"{doc.added.month:02}" if doc.added else "none",
added_day=f"{doc.added.day:02}" if doc.added else "none",
tags=tags,
tag_list=tag_list
).strip()
path = path.strip(os.sep)
except (ValueError, KeyError, IndexError):
logger.warning(
f"Invalid PAPERLESS_FILENAME_FORMAT: "
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")
counter_str = f"_{counter:02}" if counter else ""
filetype_str = ".pdf" if archive_filename else doc.file_type
if len(path) > 0:
filename = f"{path}{counter_str}{filetype_str}"
else:
filename = f"{doc.pk:07}{counter_str}{filetype_str}"
# Append .gpg for encrypted files
if append_gpg and doc.storage_type == STORAGE_TYPE_GPG:
filename += ".gpg"
return filename
def move_old_to_new_locations(apps, schema_editor): def move_old_to_new_locations(apps, schema_editor):
Document = apps.get_model("documents", "Document") Document = apps.get_model("documents", "Document")
@ -74,18 +160,12 @@ def move_old_to_new_locations(apps, schema_editor):
# check for documents that have incorrect archive versions # check for documents that have incorrect archive versions
for doc in Document.objects.filter(archive_checksum__isnull=False): for doc in Document.objects.filter(archive_checksum__isnull=False):
old_path = archive_path_old(doc) old_path = archive_path_old(doc)
new_path = archive_path_new(doc)
if not os.path.isfile(old_path): if not os.path.isfile(old_path):
raise ValueError( raise ValueError(
f"Archived document of {doc.filename} does not exist at: " f"Archived document of {doc.filename} does not exist at: "
f"{old_path}") f"{old_path}")
if old_path != new_path and os.path.isfile(new_path):
raise ValueError(
f"Need to move {old_path} to {new_path}, but target file "
f"already exists")
if old_path in old_archive_path_to_id: if old_path in old_archive_path_to_id:
affected_document_ids.add(doc.id) affected_document_ids.add(doc.id)
affected_document_ids.add(old_archive_path_to_id[old_path]) affected_document_ids.add(old_archive_path_to_id[old_path])
@ -103,22 +183,19 @@ def move_old_to_new_locations(apps, schema_editor):
f"document {doc.filename} has an invalid archived document, " f"document {doc.filename} has an invalid archived document, "
f"but no parsers are available. Cannot migrate.") f"but no parsers are available. Cannot migrate.")
# move files
for doc in Document.objects.filter(archive_checksum__isnull=False): for doc in Document.objects.filter(archive_checksum__isnull=False):
old_path = archive_path_old(doc)
new_path = archive_path_new(doc)
if doc.id in affected_document_ids: if doc.id in affected_document_ids:
old_path = archive_path_old(doc)
# remove affected archive versions # remove affected archive versions
if os.path.isfile(old_path): if os.path.isfile(old_path):
os.unlink(old_path) os.unlink(old_path)
else: else:
# move unaffected archive versions # Set archive path for unaffected files
if old_path != new_path and os.path.isfile(old_path) and not os.path.isfile(new_path): doc.archive_filename = archive_path_old(doc)
logger.debug( Document.objects.filter(id=doc.id).update(
f"Moving {old_path} to {new_path}" archive_filename=doc.archive_filename
) )
shutil.move(old_path, new_path)
# regenerate archive documents # regenerate archive documents
for doc_id in affected_document_ids: for doc_id in affected_document_ids:
@ -135,14 +212,16 @@ def move_old_to_new_locations(apps, schema_editor):
try: try:
parser.parse(source_path(doc), doc.mime_type, os.path.basename(doc.filename)) parser.parse(source_path(doc), doc.mime_type, os.path.basename(doc.filename))
doc.content = parser.get_text() doc.content = parser.get_text()
if parser.archive_path and os.path.isfile(parser.archive_path):
with open(parser.archive_path, "rb") as f: if parser.get_archive_path() and os.path.isfile(parser.get_archive_path()):
doc.archive_filename = generate_unique_filename(
doc, archive_filename=True)
with open(parser.get_archive_path(), "rb") as f:
doc.archive_checksum = hashlib.md5(f.read()).hexdigest() doc.archive_checksum = hashlib.md5(f.read()).hexdigest()
shutil.copy2(parser.archive_path, archive_path_new(doc)) os.makedirs(os.path.dirname(archive_path_new(doc)), exist_ok=True)
shutil.copy2(parser.get_archive_path(), archive_path_new(doc))
else: else:
doc.archive_checksum = None doc.archive_checksum = None
if os.path.isfile(archive_path_new(doc)):
os.unlink(archive_path_new(doc))
doc.save() doc.save()
except ParseError: except ParseError:
logger.exception( logger.exception(
@ -187,8 +266,18 @@ class Migration(migrations.Migration):
] ]
operations = [ operations = [
migrations.AddField(
model_name='document',
name='archive_filename',
field=models.FilePathField(default=None, editable=False, help_text='Current archive filename in storage', max_length=1024, null=True, unique=True, verbose_name='archive filename'),
),
migrations.AlterField(
model_name='document',
name='filename',
field=models.FilePathField(default=None, editable=False, help_text='Current filename in storage', max_length=1024, null=True, unique=True, verbose_name='filename'),
),
migrations.RunPython( migrations.RunPython(
move_old_to_new_locations, move_old_to_new_locations,
move_new_to_old_locations move_new_to_old_locations
) ),
] ]

View File

@ -16,7 +16,6 @@ from django.utils.timezone import is_aware
from django.utils.translation import gettext_lazy as _ from django.utils.translation import gettext_lazy as _
from documents.file_handling import archive_name_from_filename
from documents.parsers import get_default_file_extension from documents.parsers import get_default_file_extension
@ -208,10 +207,21 @@ class Document(models.Model):
max_length=1024, max_length=1024,
editable=False, editable=False,
default=None, default=None,
unique=True,
null=True, null=True,
help_text=_("Current filename in storage") help_text=_("Current filename in storage")
) )
archive_filename = models.FilePathField(
_("archive filename"),
max_length=1024,
editable=False,
default=None,
unique=True,
null=True,
help_text=_("Current archive filename in storage")
)
archive_serial_number = models.IntegerField( archive_serial_number = models.IntegerField(
_("archive serial number"), _("archive serial number"),
blank=True, blank=True,
@ -256,16 +266,19 @@ class Document(models.Model):
return open(self.source_path, "rb") return open(self.source_path, "rb")
@property @property
def archive_path(self): def has_archive_version(self):
if self.filename: return self.archive_filename is not None
fname = archive_name_from_filename(self.filename)
else:
fname = "{:07}.pdf".format(self.pk)
@property
def archive_path(self):
if self.has_archive_version:
return os.path.join( return os.path.join(
settings.ARCHIVE_DIR, settings.ARCHIVE_DIR,
fname str(self.archive_filename)
) )
else:
return None
@property @property
def archive_file(self): def archive_file(self):

View File

@ -88,7 +88,7 @@ def check_sanity():
)) ))
# Check sanity of the archive file. # Check sanity of the archive file.
if doc.archive_checksum: if doc.has_archive_version:
if not os.path.isfile(doc.archive_path): if not os.path.isfile(doc.archive_path):
messages.append(SanityError( messages.append(SanityError(
f"Archived version of document {doc.pk} does not exist." f"Archived version of document {doc.pk} does not exist."

View File

@ -14,7 +14,7 @@ from filelock import FileLock
from .. import index, matching from .. import index, matching
from ..file_handling import delete_empty_directories, \ from ..file_handling import delete_empty_directories, \
create_source_path_directory, archive_name_from_filename, \ create_source_path_directory, \
generate_unique_filename generate_unique_filename
from ..models import Document, Tag from ..models import Document, Tag
@ -148,18 +148,18 @@ def set_tags(sender,
@receiver(models.signals.post_delete, sender=Document) @receiver(models.signals.post_delete, sender=Document)
def cleanup_document_deletion(sender, instance, using, **kwargs): def cleanup_document_deletion(sender, instance, using, **kwargs):
with FileLock(settings.MEDIA_LOCK): with FileLock(settings.MEDIA_LOCK):
for f in (instance.source_path, for filename in (instance.source_path,
instance.archive_path, instance.archive_path,
instance.thumbnail_path): instance.thumbnail_path):
if os.path.isfile(f): if filename and os.path.isfile(filename):
try: try:
os.unlink(f) os.unlink(filename)
logger.debug( logger.debug(
f"Deleted file {f}.") f"Deleted file {filename}.")
except OSError as e: except OSError as e:
logger.warning( logger.warning(
f"While deleting document {str(instance)}, the file " f"While deleting document {str(instance)}, the file "
f"{f} could not be deleted: {e}" f"{filename} could not be deleted: {e}"
) )
delete_empty_directories( delete_empty_directories(
@ -167,6 +167,7 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
root=settings.ORIGINALS_DIR root=settings.ORIGINALS_DIR
) )
if instance.has_archive_version:
delete_empty_directories( delete_empty_directories(
os.path.dirname(instance.archive_path), os.path.dirname(instance.archive_path),
root=settings.ARCHIVE_DIR root=settings.ARCHIVE_DIR
@ -207,8 +208,7 @@ def update_filename_and_move_files(sender, instance, **kwargs):
with FileLock(settings.MEDIA_LOCK): with FileLock(settings.MEDIA_LOCK):
old_filename = instance.filename old_filename = instance.filename
new_filename = generate_unique_filename( new_filename = generate_unique_filename(instance)
instance, settings.ORIGINALS_DIR)
if new_filename == instance.filename: if new_filename == instance.filename:
# Don't do anything if its the same. # Don't do anything if its the same.
@ -222,8 +222,11 @@ def update_filename_and_move_files(sender, instance, **kwargs):
# archive files are optional, archive checksum tells us if we have one, # archive files are optional, archive checksum tells us if we have one,
# since this is None for documents without archived files. # since this is None for documents without archived files.
if instance.archive_checksum: if instance.has_archive_version:
new_archive_filename = archive_name_from_filename(new_filename) old_archive_filename = instance.archive_filename
new_archive_filename = generate_unique_filename(
instance, archive_filename=True
)
old_archive_path = instance.archive_path old_archive_path = instance.archive_path
new_archive_path = os.path.join(settings.ARCHIVE_DIR, new_archive_path = os.path.join(settings.ARCHIVE_DIR,
new_archive_filename) new_archive_filename)
@ -233,6 +236,8 @@ def update_filename_and_move_files(sender, instance, **kwargs):
create_source_path_directory(new_archive_path) create_source_path_directory(new_archive_path)
else: else:
old_archive_filename = None
new_archive_filename = None
old_archive_path = None old_archive_path = None
new_archive_path = None new_archive_path = None
@ -240,21 +245,27 @@ def update_filename_and_move_files(sender, instance, **kwargs):
try: try:
os.rename(old_source_path, new_source_path) os.rename(old_source_path, new_source_path)
if instance.archive_checksum:
os.rename(old_archive_path, new_archive_path)
instance.filename = new_filename instance.filename = new_filename
if instance.has_archive_version:
os.rename(old_archive_path, new_archive_path)
instance.archive_filename = new_archive_filename
# Don't save() here to prevent infinite recursion. # Don't save() here to prevent infinite recursion.
Document.objects.filter(pk=instance.pk).update( Document.objects.filter(pk=instance.pk).update(
filename=new_filename) filename=instance.filename,
archive_filename=instance.archive_filename,
)
except OSError as e: except OSError as e:
instance.filename = old_filename instance.filename = old_filename
instance.archive_filename = old_archive_filename
# this happens when we can't move a file. If that's the case for # this happens when we can't move a file. If that's the case for
# the archive file, we try our best to revert the changes. # the archive file, we try our best to revert the changes.
# no need to save the instance, the update() has not happened yet. # no need to save the instance, the update() has not happened yet.
try: try:
os.rename(new_source_path, old_source_path) os.rename(new_source_path, old_source_path)
if instance.has_archive_version:
os.rename(new_archive_path, old_archive_path) os.rename(new_archive_path, old_archive_path)
except Exception as e: except Exception as e:
# This is fine, since: # This is fine, since:
@ -271,9 +282,10 @@ def update_filename_and_move_files(sender, instance, **kwargs):
# since moving them once succeeded, it's very likely going to # since moving them once succeeded, it's very likely going to
# succeed again. # succeed again.
os.rename(new_source_path, old_source_path) os.rename(new_source_path, old_source_path)
if instance.archive_checksum: if instance.has_archive_version:
os.rename(new_archive_path, old_archive_path) os.rename(new_archive_path, old_archive_path)
instance.filename = old_filename instance.filename = old_filename
instance.archive_filename = old_archive_filename
# again, no need to save the instance, since the actual update() # again, no need to save the instance, since the actual update()
# operation failed. # operation failed.
@ -283,7 +295,7 @@ def update_filename_and_move_files(sender, instance, **kwargs):
delete_empty_directories(os.path.dirname(old_source_path), delete_empty_directories(os.path.dirname(old_source_path),
root=settings.ORIGINALS_DIR) root=settings.ORIGINALS_DIR)
if old_archive_path and not os.path.isfile(old_archive_path): if instance.has_archive_version and not os.path.isfile(old_archive_path): # NOQA: E501
delete_empty_directories(os.path.dirname(old_archive_path), delete_empty_directories(os.path.dirname(old_archive_path),
root=settings.ARCHIVE_DIR) root=settings.ARCHIVE_DIR)

View File

@ -192,7 +192,7 @@ class DocumentViewSet(RetrieveModelMixin,
def file_response(self, pk, request, disposition): def file_response(self, pk, request, disposition):
doc = Document.objects.get(id=pk) doc = Document.objects.get(id=pk)
if not self.original_requested(request) and os.path.isfile(doc.archive_path): # NOQA: E501 if not self.original_requested(request) and doc.has_archive_version: # NOQA: E501
file_handle = doc.archive_file file_handle = doc.archive_file
filename = doc.get_public_filename(archive=True) filename = doc.get_public_filename(archive=True)
mime_type = 'application/pdf' mime_type = 'application/pdf'
@ -237,18 +237,18 @@ class DocumentViewSet(RetrieveModelMixin,
"original_size": os.stat(doc.source_path).st_size, "original_size": os.stat(doc.source_path).st_size,
"original_mime_type": doc.mime_type, "original_mime_type": doc.mime_type,
"media_filename": doc.filename, "media_filename": doc.filename,
"has_archive_version": os.path.isfile(doc.archive_path), "has_archive_version": doc.has_archive_version,
"original_metadata": self.get_metadata( "original_metadata": self.get_metadata(
doc.source_path, doc.mime_type) doc.source_path, doc.mime_type),
"archive_checksum": doc.archive_checksum,
"archive_media_filename": doc.archive_filename
} }
if doc.archive_checksum and os.path.isfile(doc.archive_path): if doc.has_archive_version:
meta['archive_checksum'] = doc.archive_checksum
meta['archive_size'] = os.stat(doc.archive_path).st_size, meta['archive_size'] = os.stat(doc.archive_path).st_size,
meta['archive_metadata'] = self.get_metadata( meta['archive_metadata'] = self.get_metadata(
doc.archive_path, "application/pdf") doc.archive_path, "application/pdf")
else: else:
meta['archive_checksum'] = None
meta['archive_size'] = None meta['archive_size'] = None
meta['archive_metadata'] = None meta['archive_metadata'] = None