diff --git a/src/documents/consumer.py b/src/documents/consumer.py index b273d331d..b6a0a5912 100755 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -134,6 +134,7 @@ class Consumer(LoggingMixin): self.log("debug", "Parsing {}...".format(self.filename)) text = document_parser.get_text() date = document_parser.get_date() + archive_path = document_parser.get_archive_path() except ParseError as e: document_parser.cleanup() raise ConsumerError(e) @@ -178,8 +179,16 @@ class Consumer(LoggingMixin): # place. If this fails, we'll also rollback the transaction. create_source_path_directory(document.source_path) - self._write(document, self.path, document.source_path) - self._write(document, thumbnail, document.thumbnail_path) + + self._write(document.storage_type, + self.path, document.source_path) + + self._write(document.storage_type, + thumbnail, document.thumbnail_path) + + if archive_path and os.path.isfile(archive_path): + self._write(Document.STORAGE_TYPE_UNENCRYPTED, + archive_path, document.archive_path) # Delete the file only if it was successfully consumed self.log("debug", "Deleting file {}".format(self.path)) @@ -258,10 +267,10 @@ class Consumer(LoggingMixin): for tag_id in self.override_tag_ids: document.tags.add(Tag.objects.get(pk=tag_id)) - def _write(self, document, source, target): + def _write(self, storage_type, source, target): with open(source, "rb") as read_file: with open(target, "wb") as write_file: - if document.storage_type == Document.STORAGE_TYPE_UNENCRYPTED: + if storage_type == Document.STORAGE_TYPE_UNENCRYPTED: write_file.write(read_file.read()) return self.log("debug", "Encrypting") diff --git a/src/documents/models.py b/src/documents/models.py index 8e0435647..c1ab9a44d 100755 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -224,6 +224,19 @@ class Document(models.Model): def source_file(self): return open(self.source_path, "rb") + @property + def archive_path(self): + fname = "{:07}{}".format(self.pk, ".pdf") + + return os.path.join( + settings.ARCHIVE_DIR, + fname + ) + + @property + def archive_file(self): + return open(self.archive_path, "rb") + @property def file_name(self): return slugify(str(self)) + self.file_type diff --git a/src/documents/parsers.py b/src/documents/parsers.py index eb8ccf45e..3ad60dccd 100644 --- a/src/documents/parsers.py +++ b/src/documents/parsers.py @@ -141,6 +141,9 @@ class DocumentParser(LoggingMixin): self.tempdir = tempfile.mkdtemp( prefix="paperless-", dir=settings.SCRATCH_DIR) + def get_archive_path(self): + return None + def get_thumbnail(self): """ Returns the path to a file we can use as a thumbnail for this document. diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index f83f88783..9672b884b 100755 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -168,11 +168,17 @@ def run_post_consume_script(sender, document, **kwargs): @receiver(models.signals.post_delete, sender=Document) def cleanup_document_deletion(sender, instance, using, **kwargs): - for f in (instance.source_path, instance.thumbnail_path): - try: - os.unlink(f) - except FileNotFoundError: - pass # The file's already gone, so we're cool with it. + for f in (instance.source_path, + instance.archive_path, + instance.thumbnail_path): + if os.path.isfile(f): + try: + os.unlink(f) + except OSError as e: + logging.getLogger(__name__).warning( + f"While deleting document {instance.file_name}, the file " + f"{f} could not be deleted: {e}" + ) delete_empty_directories(os.path.dirname(instance.source_path))