From 6cedbb3307f4a2a70e98938f57a1ee341017aa8e Mon Sep 17 00:00:00 2001 From: Mike Cronce Date: Sun, 6 Aug 2017 13:07:54 -0400 Subject: [PATCH 01/30] src/paperless/settings.py: Added DISABLE_ENCRYPTION environment variable --- src/paperless/settings.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 1c88c6bb8..450bd30d8 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -259,6 +259,7 @@ CONSUMER_LOOP_TIME = int(os.getenv("PAPERLESS_CONSUMER_LOOP_TIME", 10)) # with GPG, including an interesting case where it may "encrypt" zero-byte # files. PASSPHRASE = os.getenv("PAPERLESS_PASSPHRASE") +ENABLE_ENCRYPTION = os.getenv('DISABLE_ENCRYPTION') != 'true' # Trigger a script after every successful document consumption? PRE_CONSUME_SCRIPT = os.getenv("PAPERLESS_PRE_CONSUME_SCRIPT") From 8783c2af884aec01c6605e0293c9f9d44548fd48 Mon Sep 17 00:00:00 2001 From: Mike Cronce Date: Mon, 7 Aug 2017 18:32:03 -0400 Subject: [PATCH 02/30] src/manage.py: Added check to see whether or not encryption is enabled before prompting for passphrase if it's empty --- src/manage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/manage.py b/src/manage.py index db13d9d62..040af1249 100755 --- a/src/manage.py +++ b/src/manage.py @@ -11,7 +11,7 @@ if __name__ == "__main__": # The runserver and consumer need to have access to the passphrase, so it # must be entered at start time to keep it safe. if "runserver" in sys.argv or "document_consumer" in sys.argv: - if not settings.PASSPHRASE: + if(settings.ENABLE_ENCRYPTION and not settings.PASSPHRASE): settings.PASSPHRASE = input( "settings.PASSPHRASE is unset. Input passphrase: ") From 3b6a3219f59e389386349f80f613e9ead2b992b0 Mon Sep 17 00:00:00 2001 From: Mike Cronce Date: Mon, 7 Aug 2017 18:33:56 -0400 Subject: [PATCH 03/30] src/paperless/db.py: If encryption is disabled, just directly read the file contents --- src/paperless/db.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/paperless/db.py b/src/paperless/db.py index 49e4fea4b..07cef0024 100644 --- a/src/paperless/db.py +++ b/src/paperless/db.py @@ -12,11 +12,15 @@ class GnuPG(object): @classmethod def decrypted(cls, file_handle): + if(not settings.ENABLE_ENCRYPTION): + return file_handle.read() return cls.gpg.decrypt_file( file_handle, passphrase=settings.PASSPHRASE).data @classmethod def encrypted(cls, file_handle): + if(not settings.ENABLE_ENCRYPTION): + return file_handle.read() return cls.gpg.encrypt_file( file_handle, recipients=None, From 02b40a54e051e06372895d33aa88b1153fe0eff4 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 4 Feb 2018 13:11:10 +0000 Subject: [PATCH 04/30] Try to be more pep8 in the settings file --- src/paperless/settings.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 450bd30d8..a019ac7fb 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -221,12 +221,12 @@ OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng") OCR_THREADS = os.getenv("PAPERLESS_OCR_THREADS") # OCR all documents? -OCR_ALWAYS = bool(os.getenv("PAPERLESS_OCR_ALWAYS", "NO").lower() in ("yes", "y", "1", "t", "true")) +OCR_ALWAYS = bool(os.getenv("PAPERLESS_OCR_ALWAYS", "NO").lower() in ("yes", "y", "1", "t", "true")) # NOQA # If this is true, any failed attempts to OCR a PDF will result in the PDF # being indexed anyway, with whatever we could get. If it's False, the file # will simply be left in the CONSUMPTION_DIR. -FORGIVING_OCR = bool(os.getenv("PAPERLESS_FORGIVING_OCR", "YES").lower() in ("yes", "y", "1", "t", "true")) +FORGIVING_OCR = bool(os.getenv("PAPERLESS_FORGIVING_OCR", "YES").lower() in ("yes", "y", "1", "t", "true")) # NOQA # GNUPG needs a home directory for some reason GNUPG_HOME = os.getenv("HOME", "/tmp") From 885dbf67d560328ba8346da9c5674fc609f23938 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 4 Feb 2018 13:11:35 +0000 Subject: [PATCH 05/30] Set STORAGE_TYPE instead of ENABLE_ENCRYPTION boolean This allows for future decisions around the types of encryption used (if any). Ideally, I want to replace GPG one day with something elegant out of the cryptography module. --- paperless.conf.example | 8 ++++++++ src/paperless/settings.py | 9 ++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/paperless.conf.example b/paperless.conf.example index 45c532fe1..72adf9d48 100644 --- a/paperless.conf.example +++ b/paperless.conf.example @@ -59,6 +59,14 @@ PAPERLESS_EMAIL_SECRET="" #### Security #### ############################################################################### +# By default, Paperless will attempt to GPG encrypt your PDF files using the +# PAPERLESS_PASSPHRASE specified below. If however you're not concerned about +# encrypting these files (for example if you have disk encryption locally) then +# you don't need this and can safely turn it off by setting +# PAPERLESS_STORAGE_TYPE="unencrypted" here. In such a case, the PASSPHRASE +# value set below will be ignored. +#PAPERLESS_STORAGE_TYPE="gpg" + # You must have a passphrase in order for Paperless to work at all. If you set # this to "", GNUGPG will "encrypt" your PDF by writing it out as a zero-byte # file. diff --git a/src/paperless/settings.py b/src/paperless/settings.py index a019ac7fb..33765c748 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -251,6 +251,14 @@ CONSUMPTION_DIR = os.getenv("PAPERLESS_CONSUMPTION_DIR") # slowly, you may want to use a higher value than the default. CONSUMER_LOOP_TIME = int(os.getenv("PAPERLESS_CONSUMER_LOOP_TIME", 10)) +# By default, Paperless will attempt to GPG encrypt your PDF files using the +# PASSPHRASE specified below. If however you're not concerned about encrypting +# these files (for example if you have disk encryption locally) then +# you don't need this and can safely turn it off by setting STORAGE_TYPE to +# "unencrypted" here. In such a case, the PASSPHRASE value set below will be +# ignored. +STORAGE_TYPE = os.getenv("PAPERLESS_STORAGE_TYPE", "gpg") + # This is used to encrypt the original documents and decrypt them later when # you want to download them. Set it and change the permissions on this file to # 0600, or set it to `None` and you'll be prompted for the passphrase at @@ -259,7 +267,6 @@ CONSUMER_LOOP_TIME = int(os.getenv("PAPERLESS_CONSUMER_LOOP_TIME", 10)) # with GPG, including an interesting case where it may "encrypt" zero-byte # files. PASSPHRASE = os.getenv("PAPERLESS_PASSPHRASE") -ENABLE_ENCRYPTION = os.getenv('DISABLE_ENCRYPTION') != 'true' # Trigger a script after every successful document consumption? PRE_CONSUME_SCRIPT = os.getenv("PAPERLESS_PRE_CONSUME_SCRIPT") From da6dc2ad5babc289375fef64680b1f7f8a76c741 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 4 Feb 2018 13:13:24 +0000 Subject: [PATCH 06/30] Attach storage_type to Documents --- .../migrations/0019_document_storage_type.py | 20 ++++++++++++++ src/documents/models.py | 27 +++++++++++++++++-- 2 files changed, 45 insertions(+), 2 deletions(-) create mode 100644 src/documents/migrations/0019_document_storage_type.py diff --git a/src/documents/migrations/0019_document_storage_type.py b/src/documents/migrations/0019_document_storage_type.py new file mode 100644 index 000000000..bd3595643 --- /dev/null +++ b/src/documents/migrations/0019_document_storage_type.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.10 on 2018-02-04 13:07 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('documents', '0018_auto_20170715_1712'), + ] + + operations = [ + migrations.AddField( + model_name='document', + name='storage_type', + field=models.CharField(choices=[('unencrypted', 'Unencrypted'), ('gpg', 'Encrypted with GNU Privacy Guard')], default='gpg', editable=False, max_length=11), + ), + ] diff --git a/src/documents/models.py b/src/documents/models.py index 420afa426..8e072a1cc 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -190,6 +190,13 @@ class Document(models.Model): TYPE_TIF = "tiff" TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF,) + STORAGE_TYPE_UNENCRYPTED = "unencrypted" + STORAGE_TYPE_GPG = "gpg" + STORAGE_TYPES = ( + (STORAGE_TYPE_UNENCRYPTED, "Unencrypted"), + (STORAGE_TYPE_GPG, "Encrypted with GNU Privacy Guard") + ) + correspondent = models.ForeignKey( Correspondent, blank=True, @@ -229,6 +236,12 @@ class Document(models.Model): default=timezone.now, db_index=True) modified = models.DateTimeField( auto_now=True, editable=False, db_index=True) + storage_type = models.CharField( + max_length=11, + choices=STORAGE_TYPES, + default=STORAGE_TYPE_GPG, + editable=False + ) class Meta(object): ordering = ("correspondent", "title") @@ -244,11 +257,16 @@ class Document(models.Model): @property def source_path(self): + + file_name = "{:07}.{}".format(self.pk, self.file_type) + if self.storage_type == self.STORAGE_TYPE_GPG: + file_name += ".gpg" + return os.path.join( settings.MEDIA_ROOT, "documents", "originals", - "{:07}.{}.gpg".format(self.pk, self.file_type) + file_name ) @property @@ -265,11 +283,16 @@ class Document(models.Model): @property def thumbnail_path(self): + + file_name = "{:07}.png".format(self.pk) + if self.storage_type == self.STORAGE_TYPE_GPG: + file_name += ".gpg" + return os.path.join( settings.MEDIA_ROOT, "documents", "thumbnails", - "{:07}.png.gpg".format(self.pk) + file_name ) @property From cdc07cf15352aea632141b81ff9dc6fb3dc3ef8e Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 4 Feb 2018 13:14:23 +0000 Subject: [PATCH 07/30] Move the encrypt/decrypt decision out of db and into the view --- src/documents/views.py | 9 +++++++-- src/paperless/db.py | 4 ---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/documents/views.py b/src/documents/views.py index da9a45e49..e297e0984 100644 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -52,12 +52,12 @@ class FetchView(SessionOrBasicAuthMixin, DetailView): if self.kwargs["kind"] == "thumb": return HttpResponse( - GnuPG.decrypted(self.object.thumbnail_file), + self._get_raw_data(self.object.thumbnail_file), content_type=content_types[Document.TYPE_PNG] ) response = HttpResponse( - GnuPG.decrypted(self.object.source_file), + self._get_raw_data(self.object.source_file), content_type=content_types[self.object.file_type] ) response["Content-Disposition"] = 'attachment; filename="{}"'.format( @@ -65,6 +65,11 @@ class FetchView(SessionOrBasicAuthMixin, DetailView): return response + def _get_raw_data(self, file_handle): + if self.object.storage_type == Document.STORAGE_TYPE_UNENCRYPTED: + return file_handle + return GnuPG.decrypted(file_handle) + class PushView(SessionOrBasicAuthMixin, FormView): """ diff --git a/src/paperless/db.py b/src/paperless/db.py index 07cef0024..49e4fea4b 100644 --- a/src/paperless/db.py +++ b/src/paperless/db.py @@ -12,15 +12,11 @@ class GnuPG(object): @classmethod def decrypted(cls, file_handle): - if(not settings.ENABLE_ENCRYPTION): - return file_handle.read() return cls.gpg.decrypt_file( file_handle, passphrase=settings.PASSPHRASE).data @classmethod def encrypted(cls, file_handle): - if(not settings.ENABLE_ENCRYPTION): - return file_handle.read() return cls.gpg.encrypt_file( file_handle, recipients=None, From d8740ee5ca18ab6e887148ac1de1d47560c8cdb3 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 4 Feb 2018 13:14:47 +0000 Subject: [PATCH 08/30] Make the consumer aware of the different storage types --- src/documents/consumer.py | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 886b0dd69..5abba9862 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -48,6 +48,19 @@ class Consumer: except FileExistsError: pass + acceptable_storage_types = [_[0] for _ in Document.STORAGE_TYPES] + if settings.STORAGE_TYPE not in acceptable_storage_types: + raise ConsumerError( + 'Invalid STORAGE_TYPE "{}" defined. It must be one of {}. ' + 'Exiting.'.format( + settings.STORAGE_TYPE, + ", ".join(acceptable_storage_types) + ) + ) + + self.stats = {} + self._ignore = [] + if not self.consume: raise ConsumerError( "The CONSUMPTION_DIR settings variable does not appear to be " @@ -195,7 +208,8 @@ class Consumer: file_type=file_info.extension, checksum=hashlib.md5(f.read()).hexdigest(), created=created, - modified=created + modified=created, + storage_type=settings.STORAGE_TYPE ) relevant_tags = set(list(Tag.match_all(text)) + list(file_info.tags)) @@ -204,22 +218,22 @@ class Consumer: self.log("debug", "Tagging with {}".format(tag_names)) document.tags.add(*relevant_tags) - # Encrypt and store the actual document - with open(doc, "rb") as unencrypted: - with open(document.source_path, "wb") as encrypted: - self.log("debug", "Encrypting the document") - encrypted.write(GnuPG.encrypted(unencrypted)) - - # Encrypt and store the thumbnail - with open(thumbnail, "rb") as unencrypted: - with open(document.thumbnail_path, "wb") as encrypted: - self.log("debug", "Encrypting the thumbnail") - encrypted.write(GnuPG.encrypted(unencrypted)) + self._write(document, doc, document.source_path) + self._write(document, thumbnail, document.thumbnail_path) self.log("info", "Completed") return document + def _write(self, document, source, target): + with open(source, "rb") as read_file: + with open(target, "wb") as write_file: + if document.storage_type == Document.STORAGE_TYPE_UNENCRYPTED: + write_file.write(read_file.read()) + return + self.log("debug", "Encrypting the thumbnail") + write_file.write(GnuPG.encrypted(read_file)) + def _cleanup_doc(self, doc): self.log("debug", "Deleting document {}".format(doc)) os.unlink(doc) From e5b7e93eff2c27c825bea86d18a6efdf8b488a2d Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 4 Feb 2018 13:15:08 +0000 Subject: [PATCH 09/30] Only require a passphrase if STORAGE_TYPE is not "unencrypted" --- src/manage.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/manage.py b/src/manage.py index 040af1249..823f41541 100755 --- a/src/manage.py +++ b/src/manage.py @@ -11,8 +11,10 @@ if __name__ == "__main__": # The runserver and consumer need to have access to the passphrase, so it # must be entered at start time to keep it safe. if "runserver" in sys.argv or "document_consumer" in sys.argv: - if(settings.ENABLE_ENCRYPTION and not settings.PASSPHRASE): - settings.PASSPHRASE = input( - "settings.PASSPHRASE is unset. Input passphrase: ") + if not settings.STORAGE_TYPE == "unencrypted": + if not settings.PASSPHRASE: + settings.PASSPHRASE = input( + "settings.PASSPHRASE is unset. Input passphrase: " + ) execute_from_command_line(sys.argv) From b79caa64d0f864500e266b726b503b4edf1f98a7 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 18 Mar 2018 15:42:19 +0000 Subject: [PATCH 10/30] Remove checks we weren't using --- src/paperless/checks.py | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/src/paperless/checks.py b/src/paperless/checks.py index 3a9408a71..666425f9c 100644 --- a/src/paperless/checks.py +++ b/src/paperless/checks.py @@ -2,7 +2,7 @@ import os import shutil from django.conf import settings -from django.core.checks import Error, register, Warning +from django.core.checks import Error, Warning, register @register() @@ -84,20 +84,3 @@ def binaries_check(app_configs, **kwargs): check_messages.append(Warning(error.format(binary), hint)) return check_messages - - -@register() -def config_check(app_configs, **kwargs): - warning = ( - "It looks like you have PAPERLESS_SHARED_SECRET defined. Note that " - "in the \npast, this variable was used for both API authentication " - "and as the mail \nkeyword. As the API no no longer uses it, this " - "variable has been renamed to \nPAPERLESS_EMAIL_SECRET, so if you're " - "using the mail feature, you'd best update \nyour variable name.\n\n" - "The old variable will stop working in a few months." - ) - - if os.getenv("PAPERLESS_SHARED_SECRET"): - return [Warning(warning)] - - return [] From c0ad6cd58abb04bbc9b3aae2ae7d140a4b2ef206 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 18 Mar 2018 15:42:32 +0000 Subject: [PATCH 11/30] Add "fat finger" check to password status --- src/manage.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/manage.py b/src/manage.py index 823f41541..782981628 100755 --- a/src/manage.py +++ b/src/manage.py @@ -3,6 +3,7 @@ import os import sys if __name__ == "__main__": + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings") from django.conf import settings @@ -12,7 +13,7 @@ if __name__ == "__main__": # must be entered at start time to keep it safe. if "runserver" in sys.argv or "document_consumer" in sys.argv: if not settings.STORAGE_TYPE == "unencrypted": - if not settings.PASSPHRASE: + while not settings.PASSPHRASE: settings.PASSPHRASE = input( "settings.PASSPHRASE is unset. Input passphrase: " ) From f72fa43e8691b3708f4ad9d6ec340ead3eca2fcb Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 18 Mar 2018 15:42:51 +0000 Subject: [PATCH 12/30] Add check for changed password These tests are incomplete, but I have no idea how to write the other half. --- src/documents/__init__.py | 1 + src/documents/checks.py | 26 ++++++++++++++++++++++++++ src/documents/tests/test_checks.py | 25 +++++++++++++++++++++++++ 3 files changed, 52 insertions(+) create mode 100644 src/documents/checks.py create mode 100644 src/documents/tests/test_checks.py diff --git a/src/documents/__init__.py b/src/documents/__init__.py index e69de29bb..864b5f5fe 100644 --- a/src/documents/__init__.py +++ b/src/documents/__init__.py @@ -0,0 +1 @@ +from .checks import changed_password_check diff --git a/src/documents/checks.py b/src/documents/checks.py new file mode 100644 index 000000000..b3b8729c4 --- /dev/null +++ b/src/documents/checks.py @@ -0,0 +1,26 @@ +from django.core.checks import Warning, register + + +@register() +def changed_password_check(app_configs, **kwargs): + + from documents.models import Document + from paperless.db import GnuPG + + warning = ( + "At least one document:\n\n {}\n\nin your data store was encrypted " + "with a password other than the one currently\nin use. This means " + "that this file, and others encrypted with the other\npassword are no " + "longer acessible, which is probably not what you want. If\nyou " + "intend to change your Paperless password, you must first export all " + "of\nthe old documents, start fresh with the new password and then " + "re-import them." + ) + + document = Document.objects.order_by("-pk").filter( + storage_type=Document.STORAGE_TYPE_GPG + ).first() + + if document and not GnuPG.decrypted(document.source_file): + return [Warning(warning.format(document))] + return [] diff --git a/src/documents/tests/test_checks.py b/src/documents/tests/test_checks.py new file mode 100644 index 000000000..da3a4adf0 --- /dev/null +++ b/src/documents/tests/test_checks.py @@ -0,0 +1,25 @@ +import unittest + +from django.test import TestCase + +from ..checks import changed_password_check +from ..models import Document +from .factories import DocumentFactory + + +class ChecksTestCase(TestCase): + + def test_changed_password_check_empty_db(self): + self.assertEqual(changed_password_check(None), []) + + def test_changed_password_check_no_encryption(self): + DocumentFactory.create(storage_type=Document.STORAGE_TYPE_UNENCRYPTED) + self.assertEqual(changed_password_check(None), []) + + @unittest.skip("I don't know how to test this") + def test_changed_password_check_gpg_encryption_with_good_password(self): + pass + + @unittest.skip("I don't know how to test this") + def test_changed_password_check_fail(self): + pass From 2ab2c37f5a0788cbf587602d56b90092783fe55c Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 27 May 2018 14:28:33 +0100 Subject: [PATCH 13/30] Fix migration conflict --- ...9_document_storage_type.py => 0020_document_storage_type.py} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename src/documents/migrations/{0019_document_storage_type.py => 0020_document_storage_type.py} (91%) diff --git a/src/documents/migrations/0019_document_storage_type.py b/src/documents/migrations/0020_document_storage_type.py similarity index 91% rename from src/documents/migrations/0019_document_storage_type.py rename to src/documents/migrations/0020_document_storage_type.py index bd3595643..83a3a2b5d 100644 --- a/src/documents/migrations/0019_document_storage_type.py +++ b/src/documents/migrations/0020_document_storage_type.py @@ -8,7 +8,7 @@ from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ - ('documents', '0018_auto_20170715_1712'), + ('documents', '0019_add_consumer_user'), ] operations = [ From 52b0249d711812a1b4d656cbcadebb9b71c42117 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 27 May 2018 14:28:41 +0100 Subject: [PATCH 14/30] Don't run document checks if table doesn't exist yet --- src/documents/checks.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/documents/checks.py b/src/documents/checks.py index b3b8729c4..2f7ebd89e 100644 --- a/src/documents/checks.py +++ b/src/documents/checks.py @@ -1,4 +1,5 @@ from django.core.checks import Warning, register +from django.db.utils import OperationalError @register() @@ -17,10 +18,12 @@ def changed_password_check(app_configs, **kwargs): "re-import them." ) - document = Document.objects.order_by("-pk").filter( - storage_type=Document.STORAGE_TYPE_GPG - ).first() + try: + document = Document.objects.order_by("-pk").filter( + storage_type=Document.STORAGE_TYPE_GPG).first() + if document and not GnuPG.decrypted(document.source_file): + return [Warning(warning.format(document))] + except OperationalError: + pass # No documents table yet - if document and not GnuPG.decrypted(document.source_file): - return [Warning(warning.format(document))] return [] From 5643d89270fe21f821006fc977e31e36c879d346 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 27 May 2018 23:17:21 +0100 Subject: [PATCH 15/30] Change default storage_type to unencrypted --- .../migrations/0021_auto_20180527_1653.py | 20 +++++++++++++++++++ src/documents/models.py | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 src/documents/migrations/0021_auto_20180527_1653.py diff --git a/src/documents/migrations/0021_auto_20180527_1653.py b/src/documents/migrations/0021_auto_20180527_1653.py new file mode 100644 index 000000000..61d120406 --- /dev/null +++ b/src/documents/migrations/0021_auto_20180527_1653.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.13 on 2018-05-27 16:53 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('documents', '0020_document_storage_type'), + ] + + operations = [ + migrations.AlterField( + model_name='document', + name='storage_type', + field=models.CharField(choices=[('unencrypted', 'Unencrypted'), ('gpg', 'Encrypted with GNU Privacy Guard')], default='unencrypted', editable=False, max_length=11), + ), + ] diff --git a/src/documents/models.py b/src/documents/models.py index 8e072a1cc..5c8c804a4 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -239,7 +239,7 @@ class Document(models.Model): storage_type = models.CharField( max_length=11, choices=STORAGE_TYPES, - default=STORAGE_TYPE_GPG, + default=STORAGE_TYPE_UNENCRYPTED, editable=False ) From 6e1f2b3f03bcf3ffee956ea64486fd014de6ca63 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 27 May 2018 23:20:04 +0100 Subject: [PATCH 16/30] Drop STORAGE_TYPE in favour of just using PAPERLESS_PASSPHRASE --- src/documents/consumer.py | 16 +++++----------- src/manage.py | 10 ---------- src/paperless/settings.py | 26 +++++++++++--------------- 3 files changed, 16 insertions(+), 36 deletions(-) diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 5abba9862..84a1ff3ca 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -48,15 +48,9 @@ class Consumer: except FileExistsError: pass - acceptable_storage_types = [_[0] for _ in Document.STORAGE_TYPES] - if settings.STORAGE_TYPE not in acceptable_storage_types: - raise ConsumerError( - 'Invalid STORAGE_TYPE "{}" defined. It must be one of {}. ' - 'Exiting.'.format( - settings.STORAGE_TYPE, - ", ".join(acceptable_storage_types) - ) - ) + self.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + if settings.PASSPHRASE: + self.storage_type = Document.STORAGE_TYPE_GPG self.stats = {} self._ignore = [] @@ -209,7 +203,7 @@ class Consumer: checksum=hashlib.md5(f.read()).hexdigest(), created=created, modified=created, - storage_type=settings.STORAGE_TYPE + storage_type=self.storage_type ) relevant_tags = set(list(Tag.match_all(text)) + list(file_info.tags)) @@ -231,7 +225,7 @@ class Consumer: if document.storage_type == Document.STORAGE_TYPE_UNENCRYPTED: write_file.write(read_file.read()) return - self.log("debug", "Encrypting the thumbnail") + self.log("debug", "Encrypting") write_file.write(GnuPG.encrypted(read_file)) def _cleanup_doc(self, doc): diff --git a/src/manage.py b/src/manage.py index 782981628..e708eaba6 100755 --- a/src/manage.py +++ b/src/manage.py @@ -6,16 +6,6 @@ if __name__ == "__main__": os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings") - from django.conf import settings from django.core.management import execute_from_command_line - # The runserver and consumer need to have access to the passphrase, so it - # must be entered at start time to keep it safe. - if "runserver" in sys.argv or "document_consumer" in sys.argv: - if not settings.STORAGE_TYPE == "unencrypted": - while not settings.PASSPHRASE: - settings.PASSPHRASE = input( - "settings.PASSPHRASE is unset. Input passphrase: " - ) - execute_from_command_line(sys.argv) diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 33765c748..ccafe956d 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -251,21 +251,17 @@ CONSUMPTION_DIR = os.getenv("PAPERLESS_CONSUMPTION_DIR") # slowly, you may want to use a higher value than the default. CONSUMER_LOOP_TIME = int(os.getenv("PAPERLESS_CONSUMER_LOOP_TIME", 10)) -# By default, Paperless will attempt to GPG encrypt your PDF files using the -# PASSPHRASE specified below. If however you're not concerned about encrypting -# these files (for example if you have disk encryption locally) then -# you don't need this and can safely turn it off by setting STORAGE_TYPE to -# "unencrypted" here. In such a case, the PASSPHRASE value set below will be -# ignored. -STORAGE_TYPE = os.getenv("PAPERLESS_STORAGE_TYPE", "gpg") - -# This is used to encrypt the original documents and decrypt them later when -# you want to download them. Set it and change the permissions on this file to -# 0600, or set it to `None` and you'll be prompted for the passphrase at -# runtime. The default looks for an environment variable. -# DON'T FORGET TO SET THIS as leaving it blank may cause some strange things -# with GPG, including an interesting case where it may "encrypt" zero-byte -# files. +# Pre-2.x versions of Paperless stored your documents locally with GPG +# encryption, but that is no longer the default. This behaviour is still +# available, but it must be explicitly enabled by setting +# `PAPERLESS_PASSPHRASE` in your environment or config file. The default is to +# store these files unencrypted. +# +# Translation: +# * If you're a new user, you can safely ignore this setting. +# * If you're upgrading from 1.x, this must be set, OR you can run +# `./manage.py change_storage_type gpg unencrypted` to decrypt your files, +# after which you can unset this value. PASSPHRASE = os.getenv("PAPERLESS_PASSPHRASE") # Trigger a script after every successful document consumption? From 27a936f9bf3b173af0915696e65febeabfcf7273 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 27 May 2018 23:20:33 +0100 Subject: [PATCH 17/30] Add script to (de|en)crypt all documents --- .../commands/change_storage_type.py | 119 ++++++++++++++++++ src/paperless/db.py | 19 ++- 2 files changed, 132 insertions(+), 6 deletions(-) create mode 100644 src/documents/management/commands/change_storage_type.py diff --git a/src/documents/management/commands/change_storage_type.py b/src/documents/management/commands/change_storage_type.py new file mode 100644 index 000000000..010b0900c --- /dev/null +++ b/src/documents/management/commands/change_storage_type.py @@ -0,0 +1,119 @@ +import os + +from django.conf import settings +from django.core.management.base import BaseCommand, CommandError +from termcolor import colored as coloured + +from documents.models import Document +from paperless.db import GnuPG + + +class Command(BaseCommand): + + help = ( + "This is how you migrate your stored documents from an encrypted " + "state to an unencrypted one (or vice-versa)" + ) + + def add_arguments(self, parser): + + parser.add_argument( + "from", + choices=("gpg", "unencrypted"), + help="The state you want to change your documents from" + ) + parser.add_argument( + "to", + choices=("gpg", "unencrypted"), + help="The state you want to change your documents to" + ) + parser.add_argument( + "--passphrase", + help="If PAPERLESS_PASSPHRASE isn't set already, you need to " + "specify it here" + ) + + def handle(self, *args, **options): + + try: + print(coloured( + "\n\nWARNING: This script is going to work directly on your " + "document originals, so\nWARNING: you probably shouldn't run " + "this unless you've got a recent backup\nWARNING: handy. It " + "*should* work without a hitch, but be safe and backup your\n" + "WARNING: stuff first.\n\nHit Ctrl+C to exit now, or Enter to " + "continue.\n\n", + "yellow", + attrs=("bold",) + )) + __ = input() + except KeyboardInterrupt: + return + + if options["from"] == options["to"]: + raise CommandError( + 'The "from" and "to" values can\'t be the same.' + ) + + passphrase = options["passphrase"] or settings.PASSPHRASE + if not passphrase: + raise CommandError( + "Passphrase not defined. Please set it with --passphrase or " + "by declaring it in your environment or your config." + ) + + if options["from"] == "gpg" and options["to"] == "unencrypted": + self.__gpg_to_unencrypted(passphrase) + elif options["from"] == "unencrypted" and options["to"] == "gpg": + self.__unencrypted_to_gpg(passphrase) + + @staticmethod + def __gpg_to_unencrypted(passphrase): + + encrypted_files = Document.objects.filter( + storage_type=Document.STORAGE_TYPE_GPG) + + for document in encrypted_files: + + print(coloured("🔓 Decrypting {}".format(document), "green")) + + old_paths = [document.source_path, document.thumbnail_path] + raw_document = GnuPG.decrypted(document.source_file, passphrase) + raw_thumb = GnuPG.decrypted(document.thumbnail_file, passphrase) + + document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED + + with open(document.source_path, "wb") as f: + f.write(raw_document) + + with open(document.thumbnail_path, "wb") as f: + f.write(raw_thumb) + + document.save(update_fields=("storage_type",)) + + for path in old_paths: + os.unlink(path) + + @staticmethod + def __unencrypted_to_gpg(passphrase): + + unencrypted_files = Document.objects.filter( + storage_type=Document.STORAGE_TYPE_UNENCRYPTED) + + for document in unencrypted_files: + + print(coloured("🔒 Encrypting {}".format(document), "green")) + + old_paths = [document.source_path, document.thumbnail_path] + with open(document.source_path, "rb") as raw_document: + with open(document.thumbnail_path, "rb") as raw_thumb: + document.storage_type = Document.STORAGE_TYPE_GPG + with open(document.source_path, "wb") as f: + f.write(GnuPG.encrypted(raw_document, passphrase)) + with open(document.thumbnail_path, "wb") as f: + f.write(GnuPG.encrypted(raw_thumb, passphrase)) + + document.save(update_fields=("storage_type",)) + + for path in old_paths: + os.unlink(path) diff --git a/src/paperless/db.py b/src/paperless/db.py index 49e4fea4b..92275808b 100644 --- a/src/paperless/db.py +++ b/src/paperless/db.py @@ -3,7 +3,7 @@ import gnupg from django.conf import settings -class GnuPG(object): +class GnuPG: """ A handy singleton to use when handling encrypted files. """ @@ -11,15 +11,22 @@ class GnuPG(object): gpg = gnupg.GPG(gnupghome=settings.GNUPG_HOME) @classmethod - def decrypted(cls, file_handle): - return cls.gpg.decrypt_file( - file_handle, passphrase=settings.PASSPHRASE).data + def decrypted(cls, file_handle, passphrase=None): + + if not passphrase: + passphrase = settings.PASSPHRASE + + return cls.gpg.decrypt_file(file_handle, passphrase=passphrase).data @classmethod - def encrypted(cls, file_handle): + def encrypted(cls, file_handle, passphrase=None): + + if not passphrase: + passphrase = settings.PASSPHRASE + return cls.gpg.encrypt_file( file_handle, recipients=None, - passphrase=settings.PASSPHRASE, + passphrase=passphrase, symmetric=True ).data From 0abf637c67d53fb42e1ec077db0894d6e3e4113c Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 27 May 2018 23:20:55 +0100 Subject: [PATCH 18/30] Exclude unencrypted documents & thumbnails --- .gitignore | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 6ca9671fb..617cf507a 100644 --- a/.gitignore +++ b/.gitignore @@ -59,8 +59,8 @@ target/ # Stored PDFs media/documents/*.gpg -media/documents/thumbnails/*.gpg -media/documents/originals/*.gpg +media/documents/thumbnails/* +media/documents/originals/* # Sqlite database db.sqlite3 From 9df06fbb12eecfd25d34cacb17f4b4b16ded6abb Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 27 May 2018 23:21:20 +0100 Subject: [PATCH 19/30] Document the big changes for 2.0 --- docker-compose.env.example | 5 +++-- docs/changelog.rst | 29 +++++++++++++++++++++++++++++ paperless.conf.example | 24 ++++++++---------------- 3 files changed, 40 insertions(+), 18 deletions(-) diff --git a/docker-compose.env.example b/docker-compose.env.example index 13c74b6ab..d1c4a2887 100644 --- a/docker-compose.env.example +++ b/docker-compose.env.example @@ -1,8 +1,9 @@ # Environment variables to set for Paperless # Commented out variables will be replaced by a default within Paperless. -# Passphrase Paperless uses to encrypt and decrypt your documents -PAPERLESS_PASSPHRASE=CHANGE_ME +# Passphrase Paperless uses to encrypt and decrypt your documents, if you want +# encryption at all. +# PAPERLESS_PASSPHRASE=CHANGE_ME # The amount of threads to use for text recognition # PAPERLESS_OCR_THREADS=4 diff --git a/docs/changelog.rst b/docs/changelog.rst index 6945b90f6..64e78d6c9 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,6 +1,35 @@ Changelog ######### +2.0.0 +===== + +This is a big release as we've changed a core-functionality of Paperless: we no +longer encrypt files with GPG by default. + +The reasons for this are many, but it boils down to that the encryption wasn't +really all that useful, as files on-disk were still accessible so long as you +had the key, and the key was most typically stored in the config file. In +other words, your files are only as safe as the ``paperless`` user is. In +addition to that, *the contents of the documents were never encrypted*, so +important numbers etc. were always accessible simply by querying the database. +Still, it was better than nothing, but the consensus from users appears to be +that it was more an annoyance than anything else, so this feature is now turned +off unless you explicitly set a passphrase in your config file. + +Migrating from 1.x +------------------ + +Encryption isn't gone, it's just off for new users. So long as you have +``PAPERLESS_PASSPHRASE`` set in your config or your environment, Paperless +should continue to operate as it always has. If however, you want to drop +encryption too, you only need to do two things: + +1. Run ``./manage.py migrate && ./manage.py change_storage_type gpg unencrypted``. + This will go through your entire database and Decrypt All The Things. +2. Remove ``PAPERLESS_PASSPHRASE`` from your ``paperless.conf`` file, or simply + stop declaring it in your environment. + 1.4.0 ===== diff --git a/paperless.conf.example b/paperless.conf.example index 72adf9d48..6acba5f25 100644 --- a/paperless.conf.example +++ b/paperless.conf.example @@ -59,27 +59,19 @@ PAPERLESS_EMAIL_SECRET="" #### Security #### ############################################################################### -# By default, Paperless will attempt to GPG encrypt your PDF files using the -# PAPERLESS_PASSPHRASE specified below. If however you're not concerned about -# encrypting these files (for example if you have disk encryption locally) then -# you don't need this and can safely turn it off by setting -# PAPERLESS_STORAGE_TYPE="unencrypted" here. In such a case, the PASSPHRASE -# value set below will be ignored. -#PAPERLESS_STORAGE_TYPE="gpg" - -# You must have a passphrase in order for Paperless to work at all. If you set -# this to "", GNUGPG will "encrypt" your PDF by writing it out as a zero-byte -# file. -# -# The passphrase you use here will be used when storing your documents in -# Paperless, but you can always export them in an unencrypted format by using -# document exporter. See the documentation for more information. +# Paperless can be instructed to attempt to encrypt your PDF files with GPG +# using the PAPERLESS_PASSPHRASE specified below. If however you're not +# concerned about encrypting these files (for example if you have disk +# encryption locally) then you don't need this and can safely leave this value +# un-set. # # One final note about the passphrase. Once you've consumed a document with # one passphrase, DON'T CHANGE IT. Paperless assumes this to be a constant and # can't properly export documents that were encrypted with an old passphrase if # you've since changed it to a new one. -PAPERLESS_PASSPHRASE="secret" +# +# The default is to not use encryption at all. +#PAPERLESS_PASSPHRASE="secret" # The secret key has a default that should be fine so long as you're hosting From c37f642cff731e6b8eaedf18cfe42241b74d9a70 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 27 May 2018 23:21:36 +0100 Subject: [PATCH 20/30] Remove old Python2.7-style code --- src/documents/models.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/documents/models.py b/src/documents/models.py index 5c8c804a4..245655117 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -57,7 +57,7 @@ class MatchingModel(models.Model): is_insensitive = models.BooleanField(default=True) - class Meta(object): + class Meta: abstract = True def __str__(self): @@ -156,7 +156,7 @@ class Correspondent(MatchingModel): # better safe than sorry. SAFE_REGEX = re.compile(r"^[\w\- ,.']+$") - class Meta(object): + class Meta: ordering = ("name",) @@ -243,7 +243,7 @@ class Document(models.Model): editable=False ) - class Meta(object): + class Meta: ordering = ("correspondent", "title") def __str__(self): @@ -322,7 +322,7 @@ class Log(models.Model): objects = LogManager() - class Meta(object): + class Meta: ordering = ("-modified",) def __str__(self): From a1cb67c4ce53d1053a337630bea52870063baf73 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 27 May 2018 23:32:18 +0100 Subject: [PATCH 21/30] Don't check changed passphrase if no passphrase set --- src/documents/checks.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/documents/checks.py b/src/documents/checks.py index 2f7ebd89e..37386f37c 100644 --- a/src/documents/checks.py +++ b/src/documents/checks.py @@ -1,3 +1,4 @@ +from django.conf import settings from django.core.checks import Warning, register from django.db.utils import OperationalError @@ -8,6 +9,9 @@ def changed_password_check(app_configs, **kwargs): from documents.models import Document from paperless.db import GnuPG + if not settings.PASSPHRASE: + return [] + warning = ( "At least one document:\n\n {}\n\nin your data store was encrypted " "with a password other than the one currently\nin use. This means " From 3d188ec623411f2daa7b0fe6341eb94ae0912a6b Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 17 Jun 2018 16:47:38 +0100 Subject: [PATCH 22/30] Fix migrations --- ...0_document_storage_type.py => 0021_document_storage_type.py} | 2 +- .../{0021_auto_20180527_1653.py => 0022_auto_20180527_1653.py} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename src/documents/migrations/{0020_document_storage_type.py => 0021_document_storage_type.py} (91%) rename src/documents/migrations/{0021_auto_20180527_1653.py => 0022_auto_20180527_1653.py} (91%) diff --git a/src/documents/migrations/0020_document_storage_type.py b/src/documents/migrations/0021_document_storage_type.py similarity index 91% rename from src/documents/migrations/0020_document_storage_type.py rename to src/documents/migrations/0021_document_storage_type.py index 83a3a2b5d..52187ea9b 100644 --- a/src/documents/migrations/0020_document_storage_type.py +++ b/src/documents/migrations/0021_document_storage_type.py @@ -8,7 +8,7 @@ from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ - ('documents', '0019_add_consumer_user'), + ('documents', '0020_document_added'), ] operations = [ diff --git a/src/documents/migrations/0021_auto_20180527_1653.py b/src/documents/migrations/0022_auto_20180527_1653.py similarity index 91% rename from src/documents/migrations/0021_auto_20180527_1653.py rename to src/documents/migrations/0022_auto_20180527_1653.py index 61d120406..2d382da7b 100644 --- a/src/documents/migrations/0021_auto_20180527_1653.py +++ b/src/documents/migrations/0022_auto_20180527_1653.py @@ -8,7 +8,7 @@ from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ - ('documents', '0020_document_storage_type'), + ('documents', '0021_document_storage_type'), ] operations = [ From 988adf963a9c200e8fc8dd9bb48e6e8eb1d8abb2 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 17 Jun 2018 17:06:22 +0100 Subject: [PATCH 23/30] Update import & export to handle encryption toggle --- .../management/commands/document_exporter.py | 24 +++++++------ .../management/commands/document_importer.py | 34 ++++++++++--------- 2 files changed, 32 insertions(+), 26 deletions(-) diff --git a/src/documents/management/commands/document_exporter.py b/src/documents/management/commands/document_exporter.py index e7b7a8639..fce09092c 100644 --- a/src/documents/management/commands/document_exporter.py +++ b/src/documents/management/commands/document_exporter.py @@ -1,8 +1,8 @@ import json import os import time +import shutil -from django.conf import settings from django.core.management.base import BaseCommand, CommandError from django.core import serializers @@ -45,9 +45,6 @@ class Command(Renderable, BaseCommand): if not os.access(self.target, os.W_OK): raise CommandError("That path doesn't appear to be writable") - if not settings.PASSPHRASE: - settings.PASSPHRASE = input("Please enter the passphrase: ") - if options["legacy"]: self.dump_legacy() else: @@ -73,13 +70,20 @@ class Command(Renderable, BaseCommand): print("Exporting: {}".format(file_target)) t = int(time.mktime(document.created.timetuple())) - with open(file_target, "wb") as f: - f.write(GnuPG.decrypted(document.source_file)) - os.utime(file_target, times=(t, t)) + if document.storage_type == Document.STORAGE_TYPE_GPG: - with open(thumbnail_target, "wb") as f: - f.write(GnuPG.decrypted(document.thumbnail_file)) - os.utime(thumbnail_target, times=(t, t)) + with open(file_target, "wb") as f: + f.write(GnuPG.decrypted(document.source_file)) + os.utime(file_target, times=(t, t)) + + with open(thumbnail_target, "wb") as f: + f.write(GnuPG.decrypted(document.thumbnail_file)) + os.utime(thumbnail_target, times=(t, t)) + + else: + + shutil.copy(document.source_path, file_target) + shutil.copy(document.thumbnail_path, thumbnail_target) manifest += json.loads( serializers.serialize("json", Correspondent.objects.all())) diff --git a/src/documents/management/commands/document_importer.py b/src/documents/management/commands/document_importer.py index a89f0d4ef..15401722c 100644 --- a/src/documents/management/commands/document_importer.py +++ b/src/documents/management/commands/document_importer.py @@ -1,5 +1,6 @@ import json import os +import shutil from django.conf import settings from django.core.management.base import BaseCommand, CommandError @@ -46,12 +47,6 @@ class Command(Renderable, BaseCommand): self._check_manifest() - if not settings.PASSPHRASE: - raise CommandError( - "You need to define a passphrase before continuing. Please " - "consult the documentation for setting up Paperless." - ) - # Fill up the database with whatever is in the manifest call_command("loaddata", manifest_path) @@ -99,14 +94,21 @@ class Command(Renderable, BaseCommand): document_path = os.path.join(self.source, doc_file) thumbnail_path = os.path.join(self.source, thumb_file) - with open(document_path, "rb") as unencrypted: - with open(document.source_path, "wb") as encrypted: - print("Encrypting {} and saving it to {}".format( - doc_file, document.source_path)) - encrypted.write(GnuPG.encrypted(unencrypted)) + if document.storage_type == Document.STORAGE_TYPE_GPG: - with open(thumbnail_path, "rb") as unencrypted: - with open(document.thumbnail_path, "wb") as encrypted: - print("Encrypting {} and saving it to {}".format( - thumb_file, document.thumbnail_path)) - encrypted.write(GnuPG.encrypted(unencrypted)) + with open(document_path, "rb") as unencrypted: + with open(document.source_path, "wb") as encrypted: + print("Encrypting {} and saving it to {}".format( + doc_file, document.source_path)) + encrypted.write(GnuPG.encrypted(unencrypted)) + + with open(thumbnail_path, "rb") as unencrypted: + with open(document.thumbnail_path, "wb") as encrypted: + print("Encrypting {} and saving it to {}".format( + thumb_file, document.thumbnail_path)) + encrypted.write(GnuPG.encrypted(unencrypted)) + + else: + + shutil.copy(document_path, document.source_path) + shutil.copy(thumbnail_path, document.thumbnail_path) From a0240cace3e3eeaeb565ab77dae08aab3e17e86f Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 17 Jun 2018 17:08:24 +0100 Subject: [PATCH 24/30] Update docs for new encryption toggle --- docs/consumption.rst | 3 ++- docs/migrating.rst | 2 +- docs/setup.rst | 23 +++++++++++++---------- docs/utilities.rst | 4 ++-- 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/docs/consumption.rst b/docs/consumption.rst index 61517b5bf..bf62ed0a2 100644 --- a/docs/consumption.rst +++ b/docs/consumption.rst @@ -17,7 +17,8 @@ The primary method of getting documents into your database is by putting them in the consumption directory. The ``document_consumer`` script runs in an infinite loop looking for new additions to this directory and when it finds them, it goes about the process of parsing them with the OCR, indexing what it finds, and -encrypting the PDF, storing it in the media directory. +encrypting the PDF (if ``PAPERLESS_PASSPHRASE`` is set), storing it in the +media directory. Getting stuff into this directory is up to you. If you're running Paperless on your local computer, you might just want to drag and drop files there, but if diff --git a/docs/migrating.rst b/docs/migrating.rst index 4d20f4714..d97d3d4bf 100644 --- a/docs/migrating.rst +++ b/docs/migrating.rst @@ -16,7 +16,7 @@ Backing Up ---------- So you're bored of this whole project, or you want to make a remote backup of -the unencrypted files for whatever reason. This is easy to do, simply use the +your files for whatever reason. This is easy to do, simply use the :ref:`exporter ` to dump your documents and database out into an arbitrary directory. diff --git a/docs/setup.rst b/docs/setup.rst index 1467fb2c9..046589c40 100644 --- a/docs/setup.rst +++ b/docs/setup.rst @@ -63,17 +63,18 @@ Standard (Bare Metal) 1. Install the requirements as per the :ref:`requirements ` page. 2. Within the extract of master.zip go to the ``src`` directory. -3. Copy ``../paperless.conf.example`` to ``/etc/paperless.conf`` also the virtual - envrionment look there for it and open it in your favourite editor. - Because this file contains passwords it should only be readable by user root - and paperless ! Set the values for: +3. Copy ``../paperless.conf.example`` to ``/etc/paperless.conf`` and open it in + your favourite editor. Because this file contains passwords it should only + be readable by user root and paperless! Set the values for: * ``PAPERLESS_CONSUMPTION_DIR``: this is where your documents will be dumped to be consumed by Paperless. - * ``PAPERLESS_PASSPHRASE``: this is the passphrase Paperless uses to - encrypt/decrypt the original document. * ``PAPERLESS_OCR_THREADS``: this is the number of threads the OCR process will spawn to process document pages in parallel. + * ``PAPERLESS_PASSPHRASE``: this is only required if you want to use GPG to + encrypt your document files. This is the passphrase Paperless uses to + encrypt/decrypt the original documents. Don't worry about defining this + if you don't want to use encryption (the default). 4. Initialise the SQLite database with ``./manage.py migrate``. 5. Create a user for your Paperless instance with @@ -139,7 +140,8 @@ Docker Method ``PAPERLESS_PASSPHRASE`` This is the passphrase Paperless uses to encrypt/decrypt the original - document. + document. If you aren't planning on using GPG encryption, you can just + leave this undefined. ``PAPERLESS_OCR_THREADS`` This is the number of threads the OCR process will spawn to process @@ -265,10 +267,11 @@ Vagrant Method 3. Run ``vagrant ssh`` and once inside your new vagrant box, edit ``/etc/paperless.conf`` and set the values for: - * ``PAPERLESS_CONSUMPTION_DIR``: this is where your documents will be + * ``PAPERLESS_CONSUMPTION_DIR``: This is where your documents will be dumped to be consumed by Paperless. - * ``PAPERLESS_PASSPHRASE``: this is the passphrase Paperless uses to - encrypt/decrypt the original document. + * ``PAPERLESS_PASSPHRASE``: This is the passphrase Paperless uses to + encrypt/decrypt the original document. It's only required if you want + your original files to be encrypted, otherwise, just leave it unset. * ``PAPERLESS_EMAIL_SECRET``: this is the "magic word" used when consuming documents from mail or via the API. If you don't use either, leaving it blank is just fine. diff --git a/docs/utilities.rst b/docs/utilities.rst index b9ded25fc..782472c3a 100644 --- a/docs/utilities.rst +++ b/docs/utilities.rst @@ -59,8 +59,8 @@ for documents to parse and index. The process is pretty straightforward: 4. Attempt to automatically assign document attributes by doing some guesswork. Read up on the :ref:`guesswork documentation` for more information about this process. -5. Encrypt the document and store it in the ``media`` directory under - ``documents/originals``. +5. Encrypt the document (if you have a passphrase set) and store it in the + ``media`` directory under ``documents/originals``. 6. Go to #1. From 90cd9f3eb707b96a0767eb216dd5f7d6d1f73c75 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 17 Jun 2018 17:10:45 +0100 Subject: [PATCH 25/30] Drop lines thanks to @erikarvstedt's eagle-eye --- src/documents/consumer.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/documents/consumer.py b/src/documents/consumer.py index b390f6800..2b1d09e49 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -54,9 +54,6 @@ class Consumer: if settings.PASSPHRASE: self.storage_type = Document.STORAGE_TYPE_GPG - self.stats = {} - self._ignore = [] - if not self.consume: raise ConsumerError( "The CONSUMPTION_DIR settings variable does not appear to be " From d6d8537b690d03f82aa5f7ebe9468839d5bb0839 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 17 Jun 2018 17:23:50 +0100 Subject: [PATCH 26/30] Remove emoji from storage-type changer --- src/documents/management/commands/change_storage_type.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/documents/management/commands/change_storage_type.py b/src/documents/management/commands/change_storage_type.py index 010b0900c..668f534a9 100644 --- a/src/documents/management/commands/change_storage_type.py +++ b/src/documents/management/commands/change_storage_type.py @@ -75,7 +75,7 @@ class Command(BaseCommand): for document in encrypted_files: - print(coloured("🔓 Decrypting {}".format(document), "green")) + print(coloured("Decrypting {}".format(document), "green")) old_paths = [document.source_path, document.thumbnail_path] raw_document = GnuPG.decrypted(document.source_file, passphrase) @@ -102,7 +102,7 @@ class Command(BaseCommand): for document in unencrypted_files: - print(coloured("🔒 Encrypting {}".format(document), "green")) + print(coloured("Encrypting {}".format(document), "green")) old_paths = [document.source_path, document.thumbnail_path] with open(document.source_path, "rb") as raw_document: From b3624f637582e1b06f1ccad1810280b4012342fe Mon Sep 17 00:00:00 2001 From: Erik Arvstedt Date: Tue, 29 May 2018 23:59:30 +0200 Subject: [PATCH 27/30] Improve password check 1. Fail when the db contains encrypted docs and no password is set. Previously, this case wasn't detected. 2. Exit with an error instead of showing warnings. This ensures that we never store docs with different encryption passwords. --- src/documents/checks.py | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/src/documents/checks.py b/src/documents/checks.py index 37386f37c..d564d8e8e 100644 --- a/src/documents/checks.py +++ b/src/documents/checks.py @@ -1,5 +1,5 @@ from django.conf import settings -from django.core.checks import Warning, register +from django.core.checks import Error, register from django.db.utils import OperationalError @@ -9,25 +9,28 @@ def changed_password_check(app_configs, **kwargs): from documents.models import Document from paperless.db import GnuPG - if not settings.PASSPHRASE: - return [] - - warning = ( - "At least one document:\n\n {}\n\nin your data store was encrypted " - "with a password other than the one currently\nin use. This means " - "that this file, and others encrypted with the other\npassword are no " - "longer acessible, which is probably not what you want. If\nyou " - "intend to change your Paperless password, you must first export all " - "of\nthe old documents, start fresh with the new password and then " - "re-import them." - ) - try: - document = Document.objects.order_by("-pk").filter( + encrypted_doc = Document.objects.filter( storage_type=Document.STORAGE_TYPE_GPG).first() - if document and not GnuPG.decrypted(document.source_file): - return [Warning(warning.format(document))] except OperationalError: - pass # No documents table yet + return [] # No documents table yet + + if encrypted_doc: + if not settings.PASSPHRASE: + return [Error( + "The database contains encrypted documents but no password " + "is set." + )] + elif not GnuPG.decrypted(encrypted_doc.source_file): + import textwrap + return [Error(textwrap.dedent( + """ + The current password doesn't match the password of the + existing documents. + + If you intend to change your password, you must first export + all of the old documents, start fresh with the new password + and then re-import them." + """))] return [] From d37aabfb067a7326ea14ff453a70f19c4189c93d Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 17 Jun 2018 20:14:46 +0100 Subject: [PATCH 28/30] Put imports at the top --- src/documents/checks.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/documents/checks.py b/src/documents/checks.py index d564d8e8e..c80b63863 100644 --- a/src/documents/checks.py +++ b/src/documents/checks.py @@ -1,3 +1,5 @@ +import textwrap + from django.conf import settings from django.core.checks import Error, register from django.db.utils import OperationalError @@ -16,13 +18,14 @@ def changed_password_check(app_configs, **kwargs): return [] # No documents table yet if encrypted_doc: + if not settings.PASSPHRASE: return [Error( "The database contains encrypted documents but no password " "is set." )] - elif not GnuPG.decrypted(encrypted_doc.source_file): - import textwrap + + if not GnuPG.decrypted(encrypted_doc.source_file): return [Error(textwrap.dedent( """ The current password doesn't match the password of the From 742b01d1f5a905114f66201d554393b8e4ad8963 Mon Sep 17 00:00:00 2001 From: Erik Arvstedt Date: Tue, 29 May 2018 23:59:33 +0200 Subject: [PATCH 29/30] Update Consumer class documentation --- src/documents/consumer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 2b1d09e49..28fc28f9e 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -29,7 +29,7 @@ class Consumer: Loop over every file found in CONSUMPTION_DIR and: 1. Convert it to a greyscale pnm 2. Use tesseract on the pnm - 3. Encrypt and store the document in the MEDIA_ROOT + 3. Store the document in the MEDIA_ROOT with optional encryption 4. Store the OCR'd text in the database 5. Delete the document and image(s) """ From 631d3169851cdb817c0c5021a43eceffb2d68d15 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Sun, 17 Jun 2018 20:23:54 +0100 Subject: [PATCH 30/30] Merge the storage_type migrations --- .../migrations/0021_document_storage_type.py | 10 ++++++++++ .../migrations/0022_auto_20180527_1653.py | 20 ------------------- 2 files changed, 10 insertions(+), 20 deletions(-) delete mode 100644 src/documents/migrations/0022_auto_20180527_1653.py diff --git a/src/documents/migrations/0021_document_storage_type.py b/src/documents/migrations/0021_document_storage_type.py index 52187ea9b..cec172b93 100644 --- a/src/documents/migrations/0021_document_storage_type.py +++ b/src/documents/migrations/0021_document_storage_type.py @@ -12,9 +12,19 @@ class Migration(migrations.Migration): ] operations = [ + + # Add the field with the default GPG-encrypted value migrations.AddField( model_name='document', name='storage_type', field=models.CharField(choices=[('unencrypted', 'Unencrypted'), ('gpg', 'Encrypted with GNU Privacy Guard')], default='gpg', editable=False, max_length=11), ), + + # Now that the field is added, change the default to unencrypted + migrations.AlterField( + model_name='document', + name='storage_type', + field=models.CharField(choices=[('unencrypted', 'Unencrypted'), ('gpg', 'Encrypted with GNU Privacy Guard')], default='unencrypted', editable=False, max_length=11), + ), + ] diff --git a/src/documents/migrations/0022_auto_20180527_1653.py b/src/documents/migrations/0022_auto_20180527_1653.py deleted file mode 100644 index 2d382da7b..000000000 --- a/src/documents/migrations/0022_auto_20180527_1653.py +++ /dev/null @@ -1,20 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by Django 1.11.13 on 2018-05-27 16:53 -from __future__ import unicode_literals - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('documents', '0021_document_storage_type'), - ] - - operations = [ - migrations.AlterField( - model_name='document', - name='storage_type', - field=models.CharField(choices=[('unencrypted', 'Unencrypted'), ('gpg', 'Encrypted with GNU Privacy Guard')], default='unencrypted', editable=False, max_length=11), - ), - ]