mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Added GPG encryption for the PDFs
This commit is contained in:
		| @@ -1,5 +1,5 @@ | |||||||
| from django.conf import settings |  | ||||||
| from django.contrib import admin | from django.contrib import admin | ||||||
|  | from django.core.urlresolvers import reverse | ||||||
| from django.templatetags.static import static | from django.templatetags.static import static | ||||||
|  |  | ||||||
| from .models import Document | from .models import Document | ||||||
| @@ -8,27 +8,20 @@ from .models import Document | |||||||
| class DocumentAdmin(admin.ModelAdmin): | class DocumentAdmin(admin.ModelAdmin): | ||||||
|  |  | ||||||
|     search_fields = ("sender", "title", "content",) |     search_fields = ("sender", "title", "content",) | ||||||
|     list_display = ("edit", "created", "sender", "title", "thumbnail", "pdf") |     list_display = ("edit", "created", "sender", "title", "pdf") | ||||||
|     list_filter = ("created", "sender") |     list_filter = ("created", "sender") | ||||||
|     save_on_top = True |     save_on_top = True | ||||||
|  |  | ||||||
|     def edit(self, obj): |     def edit(self, obj): | ||||||
|         return '<img src="{}" width="64" height="64" alt="Edit icon" />'.format( |         return '<img src="{}" width="22" height="22" alt="Edit icon" />'.format( | ||||||
|             static("documents/img/edit.png")) |             static("documents/img/edit.png")) | ||||||
|     edit.allow_tags = True |     edit.allow_tags = True | ||||||
|  |  | ||||||
|     def thumbnail(self, obj): |  | ||||||
|         return '<a href="{media}documents/img/{pk:07}.jpg" target="_blank">' \ |  | ||||||
|                  '<img src="{media}documents/img/{pk:07}.jpg" width="100" />' \ |  | ||||||
|                '</a>'.format(media=settings.MEDIA_URL, pk=obj.pk) |  | ||||||
|     thumbnail.allow_tags = True |  | ||||||
|  |  | ||||||
|     def pdf(self, obj): |     def pdf(self, obj): | ||||||
|         return '<a href="{}documents/pdf/{:07}.pdf">' \ |         return '<a href="{}">' \ | ||||||
|                  '<img src="{}" width="64" height="64" alt="PDF icon">' \ |                  '<img src="{}" width="22" height="22" alt="PDF icon">' \ | ||||||
|                '</a>'.format( |                '</a>'.format( | ||||||
|                     settings.MEDIA_URL, |                     reverse("fetch", kwargs={"pk": obj.pk}), | ||||||
|                     obj.pk, |  | ||||||
|                     static("documents/img/application-pdf.png") |                     static("documents/img/application-pdf.png") | ||||||
|                 ) |                 ) | ||||||
|     pdf.allow_tags = True |     pdf.allow_tags = True | ||||||
|   | |||||||
| @@ -1,9 +1,9 @@ | |||||||
| import datetime | import datetime | ||||||
| import glob | import glob | ||||||
|  | import gnupg | ||||||
| import os | import os | ||||||
| import random | import random | ||||||
| import re | import re | ||||||
| import shutil |  | ||||||
| import subprocess | import subprocess | ||||||
| import time | import time | ||||||
|  |  | ||||||
| @@ -36,8 +36,6 @@ class Command(BaseCommand): | |||||||
|     CONSUME = settings.CONSUMPTION_DIR |     CONSUME = settings.CONSUMPTION_DIR | ||||||
|  |  | ||||||
|     OCR = pyocr.get_available_tools()[0] |     OCR = pyocr.get_available_tools()[0] | ||||||
|  |  | ||||||
|     MEDIA_IMG = os.path.join(settings.MEDIA_ROOT, "documents", "img") |  | ||||||
|     MEDIA_PDF = os.path.join(settings.MEDIA_ROOT, "documents", "pdf") |     MEDIA_PDF = os.path.join(settings.MEDIA_ROOT, "documents", "pdf") | ||||||
|  |  | ||||||
|     PARSER_REGEX = re.compile(r"^.*/(.*) - (.*)\.pdf$") |     PARSER_REGEX = re.compile(r"^.*/(.*) - (.*)\.pdf$") | ||||||
| @@ -45,6 +43,7 @@ class Command(BaseCommand): | |||||||
|     def __init__(self, *args, **kwargs): |     def __init__(self, *args, **kwargs): | ||||||
|         self.verbosity = 0 |         self.verbosity = 0 | ||||||
|         self.stats = {} |         self.stats = {} | ||||||
|  |         self.gpg = gnupg.GPG(gnupghome=settings.GNUPG_HOME) | ||||||
|         BaseCommand.__init__(self, *args, **kwargs) |         BaseCommand.__init__(self, *args, **kwargs) | ||||||
|  |  | ||||||
|     def handle(self, *args, **options): |     def handle(self, *args, **options): | ||||||
| @@ -77,18 +76,16 @@ class Command(BaseCommand): | |||||||
|             if self._is_ready(pdf): |             if self._is_ready(pdf): | ||||||
|                 continue |                 continue | ||||||
|  |  | ||||||
|             if self.verbosity > 1: |             self._render("Consuming {}".format(pdf), 1) | ||||||
|                 print("Consuming {}".format(pdf)) |  | ||||||
|  |  | ||||||
|             pngs = self._get_greyscale(pdf) |             pngs = self._get_greyscale(pdf) | ||||||
|             jpgs = self._get_colour(pdf) |  | ||||||
|             text = self._get_ocr(pngs) |             text = self._get_ocr(pngs) | ||||||
|  |  | ||||||
|             self._store(text, jpgs, pdf) |             self._store(text, pdf) | ||||||
|             self._cleanup(pngs, jpgs) |             self._cleanup(pngs, pdf) | ||||||
|  |  | ||||||
|     def _setup(self): |     def _setup(self): | ||||||
|         for d in (self.SCRATCH, self.MEDIA_IMG, self.MEDIA_PDF): |         for d in (self.SCRATCH, self.MEDIA_PDF): | ||||||
|             try: |             try: | ||||||
|                 os.makedirs(d) |                 os.makedirs(d) | ||||||
|             except FileExistsError: |             except FileExistsError: | ||||||
| @@ -112,7 +109,9 @@ class Command(BaseCommand): | |||||||
|  |  | ||||||
|     def _get_greyscale(self, pdf): |     def _get_greyscale(self, pdf): | ||||||
|  |  | ||||||
|         i = random.randint(1000000, 4999999) |         self._render("  Generating greyscale image", 2) | ||||||
|  |  | ||||||
|  |         i = random.randint(1000000, 9999999) | ||||||
|         png = os.path.join(self.SCRATCH, "{}.png".format(i)) |         png = os.path.join(self.SCRATCH, "{}.png".format(i)) | ||||||
|  |  | ||||||
|         subprocess.Popen(( |         subprocess.Popen(( | ||||||
| @@ -122,45 +121,46 @@ class Command(BaseCommand): | |||||||
|  |  | ||||||
|         return sorted(glob.glob(os.path.join(self.SCRATCH, "{}*".format(i)))) |         return sorted(glob.glob(os.path.join(self.SCRATCH, "{}*".format(i)))) | ||||||
|  |  | ||||||
|     def _get_colour(self, pdf): |  | ||||||
|  |  | ||||||
|         i = random.randint(5000000, 9999999) |  | ||||||
|         jpg = os.path.join(self.SCRATCH, "{}.jpg".format(i)) |  | ||||||
|  |  | ||||||
|         subprocess.Popen((self.CONVERT, pdf, jpg)).wait() |  | ||||||
|  |  | ||||||
|         return sorted(glob.glob(os.path.join(self.SCRATCH, "{}*".format(i)))) |  | ||||||
|  |  | ||||||
|     def _get_ocr(self, pngs): |     def _get_ocr(self, pngs): | ||||||
|  |  | ||||||
|  |         self._render("  OCRing the PDF", 2) | ||||||
|  |  | ||||||
|         r = "" |         r = "" | ||||||
|         for png in pngs: |         for png in pngs: | ||||||
|             with Image.open(os.path.join(self.SCRATCH, png)) as f: |             with Image.open(os.path.join(self.SCRATCH, png)) as f: | ||||||
|  |                 self._render("    {}".format(f.filename), 3) | ||||||
|                 r += self.OCR.image_to_string(f) |                 r += self.OCR.image_to_string(f) | ||||||
|                 r += "\n\n\n\n\n\n\n\n" |                 r += "\n\n\n\n\n\n\n\n" | ||||||
|  |  | ||||||
|         return r |         return r | ||||||
|  |  | ||||||
|     def _store(self, text, jpgs, pdf): |     def _store(self, text, pdf): | ||||||
|  |  | ||||||
|         sender, title = self._parse_file_name(pdf) |         sender, title = self._parse_file_name(pdf) | ||||||
|  |  | ||||||
|         stats = os.stat(pdf) |         stats = os.stat(pdf) | ||||||
|  |  | ||||||
|  |         self._render("  Saving record to database", 2) | ||||||
|  |  | ||||||
|         doc = Document.objects.create( |         doc = Document.objects.create( | ||||||
|                 sender=sender, |             sender=sender, | ||||||
|                 title=title, |             title=title, | ||||||
|                 content=text, |             content=text, | ||||||
|                 created=timezone.make_aware( |             created=timezone.make_aware( | ||||||
|                     datetime.datetime.fromtimestamp(stats.st_ctime)), |                 datetime.datetime.fromtimestamp(stats.st_mtime)), | ||||||
|                 modified=timezone.make_aware( |             modified=timezone.make_aware( | ||||||
|                     datetime.datetime.fromtimestamp(stats.st_mtime)), |                 datetime.datetime.fromtimestamp(stats.st_mtime)) | ||||||
|         ) |         ) | ||||||
|  |  | ||||||
|         shutil.move(jpgs[0], os.path.join( |         with open(pdf, "rb") as unencrypted: | ||||||
|             self.MEDIA_IMG, "{:07}.jpg".format(doc.pk))) |             with open(doc.pdf_path, "wb") as encrypted: | ||||||
|         shutil.move(pdf, os.path.join( |                 self._render("  Encrypting", 3) | ||||||
|             self.MEDIA_PDF, "{:07}.pdf".format(doc.pk))) |                 encrypted.write(self.gpg.encrypt_file( | ||||||
|  |                     unencrypted, | ||||||
|  |                     recipients=None, | ||||||
|  |                     passphrase=settings.PASSPHRASE, | ||||||
|  |                     symmetric=True | ||||||
|  |                 ).data) | ||||||
|  |  | ||||||
|     def _parse_file_name(self, pdf): |     def _parse_file_name(self, pdf): | ||||||
|         """ |         """ | ||||||
| @@ -175,12 +175,15 @@ class Command(BaseCommand): | |||||||
|  |  | ||||||
|         return "", "" |         return "", "" | ||||||
|  |  | ||||||
|     def _cleanup(self, pngs, jpgs): |     def _cleanup(self, pngs, pdf): | ||||||
|  |  | ||||||
|         jpg_glob = os.path.join( |  | ||||||
|             self.SCRATCH, re.sub(r"^.*/(\d+)-\d+.jpg$", "\\1*", jpgs[0])) |  | ||||||
|         png_glob = os.path.join( |         png_glob = os.path.join( | ||||||
|             self.SCRATCH, re.sub(r"^.*/(\d+)-\d+.png$", "\\1*", pngs[0])) |             self.SCRATCH, re.sub(r"^.*/(\d+)-\d+.png$", "\\1*", pngs[0])) | ||||||
|  |  | ||||||
|         for f in list(glob.glob(jpg_glob)) + list(glob.glob(png_glob)): |         for f in list(glob.glob(png_glob)) + [pdf]: | ||||||
|  |             self._render("  Deleting {}".format(f), 2) | ||||||
|             os.unlink(f) |             os.unlink(f) | ||||||
|  |  | ||||||
|  |     def _render(self, text, verbosity): | ||||||
|  |         if self.verbosity >= verbosity: | ||||||
|  |             print(text) | ||||||
|   | |||||||
| @@ -1,3 +1,6 @@ | |||||||
|  | import os | ||||||
|  |  | ||||||
|  | from django.conf import settings | ||||||
| from django.db import models | from django.db import models | ||||||
| from django.utils import timezone | from django.utils import timezone | ||||||
|  |  | ||||||
| @@ -20,3 +23,16 @@ class Document(models.Model): | |||||||
|         if self.sender or self.title: |         if self.sender or self.title: | ||||||
|             return "{}: {}, {}".format(created, self.sender or self.title) |             return "{}: {}, {}".format(created, self.sender or self.title) | ||||||
|         return str(created) |         return str(created) | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def pdf_path(self): | ||||||
|  |         return os.path.join( | ||||||
|  |             settings.MEDIA_ROOT, | ||||||
|  |             "documents", | ||||||
|  |             "pdf", | ||||||
|  |             "{:07}.pdf.gpg".format(self.pk) | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |     @property | ||||||
|  |     def pdf(self): | ||||||
|  |         return open(self.pdf_path, "rb") | ||||||
|   | |||||||
| @@ -1,3 +1,29 @@ | |||||||
| from django.shortcuts import render | import gnupg | ||||||
|  |  | ||||||
| # Create your views here. | from django.conf import settings | ||||||
|  | from django.http import HttpResponse | ||||||
|  | from django.template.defaultfilters import slugify | ||||||
|  | from django.views.generic.detail import DetailView | ||||||
|  |  | ||||||
|  | from .models import Document | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class PdfView(DetailView): | ||||||
|  |  | ||||||
|  |     model = Document | ||||||
|  |  | ||||||
|  |     def render_to_response(self, context, **response_kwargs): | ||||||
|  |         """ | ||||||
|  |         Override the default to return the unencrypted PDF as raw data. | ||||||
|  |         """ | ||||||
|  |  | ||||||
|  |         gpg = gnupg.GPG(gnupghome=settings.GNUPG_HOME) | ||||||
|  |  | ||||||
|  |         response = HttpResponse(gpg.decrypt_file( | ||||||
|  |             self.object.pdf, | ||||||
|  |             passphrase=settings.PASSPHRASE, | ||||||
|  |         ).data, content_type="application/pdf") | ||||||
|  |         response["Content-Disposition"] = 'attachment; filename="{}"'.format( | ||||||
|  |             slugify(str(self.object)) + ".pdf") | ||||||
|  |  | ||||||
|  |         return response | ||||||
|   | |||||||
| @@ -5,6 +5,15 @@ import sys | |||||||
| if __name__ == "__main__": | if __name__ == "__main__": | ||||||
|     os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings") |     os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings") | ||||||
|  |  | ||||||
|  |     from django.conf import settings | ||||||
|     from django.core.management import execute_from_command_line |     from django.core.management import execute_from_command_line | ||||||
|  |  | ||||||
|  |     # The runserver and consumer need to have access to the passphrase, so it | ||||||
|  |     # must be entered at start time to keep it safe. | ||||||
|  |     if "runserver" in sys.argv or "consume" in sys.argv: | ||||||
|  |         settings.PASSPHRASE = "asdf" | ||||||
|  |         if not settings.DEBUG: | ||||||
|  |             settings.PASSPHRASE = input( | ||||||
|  |                 "Production environment.  Input passphrase: ") | ||||||
|  |  | ||||||
|     execute_from_command_line(sys.argv) |     execute_from_command_line(sys.argv) | ||||||
|   | |||||||
							
								
								
									
										4
									
								
								src/paperless/requirements.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								src/paperless/requirements.txt
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,4 @@ | |||||||
|  | Django==1.9 | ||||||
|  | Pillow==3.0.0 | ||||||
|  | pyocr==0.3.1 | ||||||
|  | python-gnupg==0.3.8 | ||||||
| @@ -135,3 +135,5 @@ MEDIA_URL = "/media/" | |||||||
| CONVERT_BINARY = "/usr/bin/convert" | CONVERT_BINARY = "/usr/bin/convert" | ||||||
| SCRATCH_DIR = "/tmp/paperless"  # Will be created if it doesn't exist | SCRATCH_DIR = "/tmp/paperless"  # Will be created if it doesn't exist | ||||||
| CONSUMPTION_DIR = "/tmp/paperless/consume" | CONSUMPTION_DIR = "/tmp/paperless/consume" | ||||||
|  | GNUPG_HOME = os.environ.get("HOME", "/dev/null") | ||||||
|  | PASSPHRASE = None  # Set via manage.py | ||||||
|   | |||||||
| @@ -18,6 +18,9 @@ from django.conf import settings | |||||||
| from django.conf.urls import url, static | from django.conf.urls import url, static | ||||||
| from django.contrib import admin | from django.contrib import admin | ||||||
|  |  | ||||||
|  | from documents.views import PdfView | ||||||
|  |  | ||||||
| urlpatterns = [ | urlpatterns = [ | ||||||
|  |     url(r"^fetch/(?P<pk>\d+)$", PdfView.as_view(), name="fetch"), | ||||||
|     url(r'', admin.site.urls), |     url(r'', admin.site.urls), | ||||||
| ] + static.static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT) | ] + static.static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Daniel Quinn
					Daniel Quinn