add support for archive files.

This commit is contained in:
Jonas Winkler 2020-11-25 14:47:01 +01:00
parent 9a33f191a7
commit 8069c2eb6a
4 changed files with 40 additions and 9 deletions

View File

@ -134,6 +134,7 @@ class Consumer(LoggingMixin):
self.log("debug", "Parsing {}...".format(self.filename))
text = document_parser.get_text()
date = document_parser.get_date()
archive_path = document_parser.get_archive_path()
except ParseError as e:
document_parser.cleanup()
raise ConsumerError(e)
@ -178,8 +179,16 @@ class Consumer(LoggingMixin):
# place. If this fails, we'll also rollback the transaction.
create_source_path_directory(document.source_path)
self._write(document, self.path, document.source_path)
self._write(document, thumbnail, document.thumbnail_path)
self._write(document.storage_type,
self.path, document.source_path)
self._write(document.storage_type,
thumbnail, document.thumbnail_path)
if archive_path and os.path.isfile(archive_path):
self._write(Document.STORAGE_TYPE_UNENCRYPTED,
archive_path, document.archive_path)
# Delete the file only if it was successfully consumed
self.log("debug", "Deleting file {}".format(self.path))
@ -258,10 +267,10 @@ class Consumer(LoggingMixin):
for tag_id in self.override_tag_ids:
document.tags.add(Tag.objects.get(pk=tag_id))
def _write(self, document, source, target):
def _write(self, storage_type, source, target):
with open(source, "rb") as read_file:
with open(target, "wb") as write_file:
if document.storage_type == Document.STORAGE_TYPE_UNENCRYPTED:
if storage_type == Document.STORAGE_TYPE_UNENCRYPTED:
write_file.write(read_file.read())
return
self.log("debug", "Encrypting")

View File

@ -224,6 +224,19 @@ class Document(models.Model):
def source_file(self):
return open(self.source_path, "rb")
@property
def archive_path(self):
fname = "{:07}{}".format(self.pk, ".pdf")
return os.path.join(
settings.ARCHIVE_DIR,
fname
)
@property
def archive_file(self):
return open(self.archive_path, "rb")
@property
def file_name(self):
return slugify(str(self)) + self.file_type

View File

@ -141,6 +141,9 @@ class DocumentParser(LoggingMixin):
self.tempdir = tempfile.mkdtemp(
prefix="paperless-", dir=settings.SCRATCH_DIR)
def get_archive_path(self):
return None
def get_thumbnail(self):
"""
Returns the path to a file we can use as a thumbnail for this document.

View File

@ -168,11 +168,17 @@ def run_post_consume_script(sender, document, **kwargs):
@receiver(models.signals.post_delete, sender=Document)
def cleanup_document_deletion(sender, instance, using, **kwargs):
for f in (instance.source_path, instance.thumbnail_path):
for f in (instance.source_path,
instance.archive_path,
instance.thumbnail_path):
if os.path.isfile(f):
try:
os.unlink(f)
except FileNotFoundError:
pass # The file's already gone, so we're cool with it.
except OSError as e:
logging.getLogger(__name__).warning(
f"While deleting document {instance.file_name}, the file "
f"{f} could not be deleted: {e}"
)
delete_empty_directories(os.path.dirname(instance.source_path))