mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-09 09:58:20 -05:00
Added GPG encryption for the PDFs
This commit is contained in:
parent
6956376d71
commit
f72c515742
@ -1,5 +1,5 @@
|
|||||||
from django.conf import settings
|
|
||||||
from django.contrib import admin
|
from django.contrib import admin
|
||||||
|
from django.core.urlresolvers import reverse
|
||||||
from django.templatetags.static import static
|
from django.templatetags.static import static
|
||||||
|
|
||||||
from .models import Document
|
from .models import Document
|
||||||
@ -8,27 +8,20 @@ from .models import Document
|
|||||||
class DocumentAdmin(admin.ModelAdmin):
|
class DocumentAdmin(admin.ModelAdmin):
|
||||||
|
|
||||||
search_fields = ("sender", "title", "content",)
|
search_fields = ("sender", "title", "content",)
|
||||||
list_display = ("edit", "created", "sender", "title", "thumbnail", "pdf")
|
list_display = ("edit", "created", "sender", "title", "pdf")
|
||||||
list_filter = ("created", "sender")
|
list_filter = ("created", "sender")
|
||||||
save_on_top = True
|
save_on_top = True
|
||||||
|
|
||||||
def edit(self, obj):
|
def edit(self, obj):
|
||||||
return '<img src="{}" width="64" height="64" alt="Edit icon" />'.format(
|
return '<img src="{}" width="22" height="22" alt="Edit icon" />'.format(
|
||||||
static("documents/img/edit.png"))
|
static("documents/img/edit.png"))
|
||||||
edit.allow_tags = True
|
edit.allow_tags = True
|
||||||
|
|
||||||
def thumbnail(self, obj):
|
|
||||||
return '<a href="{media}documents/img/{pk:07}.jpg" target="_blank">' \
|
|
||||||
'<img src="{media}documents/img/{pk:07}.jpg" width="100" />' \
|
|
||||||
'</a>'.format(media=settings.MEDIA_URL, pk=obj.pk)
|
|
||||||
thumbnail.allow_tags = True
|
|
||||||
|
|
||||||
def pdf(self, obj):
|
def pdf(self, obj):
|
||||||
return '<a href="{}documents/pdf/{:07}.pdf">' \
|
return '<a href="{}">' \
|
||||||
'<img src="{}" width="64" height="64" alt="PDF icon">' \
|
'<img src="{}" width="22" height="22" alt="PDF icon">' \
|
||||||
'</a>'.format(
|
'</a>'.format(
|
||||||
settings.MEDIA_URL,
|
reverse("fetch", kwargs={"pk": obj.pk}),
|
||||||
obj.pk,
|
|
||||||
static("documents/img/application-pdf.png")
|
static("documents/img/application-pdf.png")
|
||||||
)
|
)
|
||||||
pdf.allow_tags = True
|
pdf.allow_tags = True
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
import datetime
|
import datetime
|
||||||
import glob
|
import glob
|
||||||
|
import gnupg
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
import shutil
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import time
|
import time
|
||||||
|
|
||||||
@ -36,8 +36,6 @@ class Command(BaseCommand):
|
|||||||
CONSUME = settings.CONSUMPTION_DIR
|
CONSUME = settings.CONSUMPTION_DIR
|
||||||
|
|
||||||
OCR = pyocr.get_available_tools()[0]
|
OCR = pyocr.get_available_tools()[0]
|
||||||
|
|
||||||
MEDIA_IMG = os.path.join(settings.MEDIA_ROOT, "documents", "img")
|
|
||||||
MEDIA_PDF = os.path.join(settings.MEDIA_ROOT, "documents", "pdf")
|
MEDIA_PDF = os.path.join(settings.MEDIA_ROOT, "documents", "pdf")
|
||||||
|
|
||||||
PARSER_REGEX = re.compile(r"^.*/(.*) - (.*)\.pdf$")
|
PARSER_REGEX = re.compile(r"^.*/(.*) - (.*)\.pdf$")
|
||||||
@ -45,6 +43,7 @@ class Command(BaseCommand):
|
|||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
self.verbosity = 0
|
self.verbosity = 0
|
||||||
self.stats = {}
|
self.stats = {}
|
||||||
|
self.gpg = gnupg.GPG(gnupghome=settings.GNUPG_HOME)
|
||||||
BaseCommand.__init__(self, *args, **kwargs)
|
BaseCommand.__init__(self, *args, **kwargs)
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
@ -77,18 +76,16 @@ class Command(BaseCommand):
|
|||||||
if self._is_ready(pdf):
|
if self._is_ready(pdf):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if self.verbosity > 1:
|
self._render("Consuming {}".format(pdf), 1)
|
||||||
print("Consuming {}".format(pdf))
|
|
||||||
|
|
||||||
pngs = self._get_greyscale(pdf)
|
pngs = self._get_greyscale(pdf)
|
||||||
jpgs = self._get_colour(pdf)
|
|
||||||
text = self._get_ocr(pngs)
|
text = self._get_ocr(pngs)
|
||||||
|
|
||||||
self._store(text, jpgs, pdf)
|
self._store(text, pdf)
|
||||||
self._cleanup(pngs, jpgs)
|
self._cleanup(pngs, pdf)
|
||||||
|
|
||||||
def _setup(self):
|
def _setup(self):
|
||||||
for d in (self.SCRATCH, self.MEDIA_IMG, self.MEDIA_PDF):
|
for d in (self.SCRATCH, self.MEDIA_PDF):
|
||||||
try:
|
try:
|
||||||
os.makedirs(d)
|
os.makedirs(d)
|
||||||
except FileExistsError:
|
except FileExistsError:
|
||||||
@ -112,7 +109,9 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
def _get_greyscale(self, pdf):
|
def _get_greyscale(self, pdf):
|
||||||
|
|
||||||
i = random.randint(1000000, 4999999)
|
self._render(" Generating greyscale image", 2)
|
||||||
|
|
||||||
|
i = random.randint(1000000, 9999999)
|
||||||
png = os.path.join(self.SCRATCH, "{}.png".format(i))
|
png = os.path.join(self.SCRATCH, "{}.png".format(i))
|
||||||
|
|
||||||
subprocess.Popen((
|
subprocess.Popen((
|
||||||
@ -122,45 +121,46 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
return sorted(glob.glob(os.path.join(self.SCRATCH, "{}*".format(i))))
|
return sorted(glob.glob(os.path.join(self.SCRATCH, "{}*".format(i))))
|
||||||
|
|
||||||
def _get_colour(self, pdf):
|
|
||||||
|
|
||||||
i = random.randint(5000000, 9999999)
|
|
||||||
jpg = os.path.join(self.SCRATCH, "{}.jpg".format(i))
|
|
||||||
|
|
||||||
subprocess.Popen((self.CONVERT, pdf, jpg)).wait()
|
|
||||||
|
|
||||||
return sorted(glob.glob(os.path.join(self.SCRATCH, "{}*".format(i))))
|
|
||||||
|
|
||||||
def _get_ocr(self, pngs):
|
def _get_ocr(self, pngs):
|
||||||
|
|
||||||
|
self._render(" OCRing the PDF", 2)
|
||||||
|
|
||||||
r = ""
|
r = ""
|
||||||
for png in pngs:
|
for png in pngs:
|
||||||
with Image.open(os.path.join(self.SCRATCH, png)) as f:
|
with Image.open(os.path.join(self.SCRATCH, png)) as f:
|
||||||
|
self._render(" {}".format(f.filename), 3)
|
||||||
r += self.OCR.image_to_string(f)
|
r += self.OCR.image_to_string(f)
|
||||||
r += "\n\n\n\n\n\n\n\n"
|
r += "\n\n\n\n\n\n\n\n"
|
||||||
|
|
||||||
return r
|
return r
|
||||||
|
|
||||||
def _store(self, text, jpgs, pdf):
|
def _store(self, text, pdf):
|
||||||
|
|
||||||
sender, title = self._parse_file_name(pdf)
|
sender, title = self._parse_file_name(pdf)
|
||||||
|
|
||||||
stats = os.stat(pdf)
|
stats = os.stat(pdf)
|
||||||
|
|
||||||
|
self._render(" Saving record to database", 2)
|
||||||
|
|
||||||
doc = Document.objects.create(
|
doc = Document.objects.create(
|
||||||
sender=sender,
|
sender=sender,
|
||||||
title=title,
|
title=title,
|
||||||
content=text,
|
content=text,
|
||||||
created=timezone.make_aware(
|
created=timezone.make_aware(
|
||||||
datetime.datetime.fromtimestamp(stats.st_ctime)),
|
|
||||||
modified=timezone.make_aware(
|
|
||||||
datetime.datetime.fromtimestamp(stats.st_mtime)),
|
datetime.datetime.fromtimestamp(stats.st_mtime)),
|
||||||
|
modified=timezone.make_aware(
|
||||||
|
datetime.datetime.fromtimestamp(stats.st_mtime))
|
||||||
)
|
)
|
||||||
|
|
||||||
shutil.move(jpgs[0], os.path.join(
|
with open(pdf, "rb") as unencrypted:
|
||||||
self.MEDIA_IMG, "{:07}.jpg".format(doc.pk)))
|
with open(doc.pdf_path, "wb") as encrypted:
|
||||||
shutil.move(pdf, os.path.join(
|
self._render(" Encrypting", 3)
|
||||||
self.MEDIA_PDF, "{:07}.pdf".format(doc.pk)))
|
encrypted.write(self.gpg.encrypt_file(
|
||||||
|
unencrypted,
|
||||||
|
recipients=None,
|
||||||
|
passphrase=settings.PASSPHRASE,
|
||||||
|
symmetric=True
|
||||||
|
).data)
|
||||||
|
|
||||||
def _parse_file_name(self, pdf):
|
def _parse_file_name(self, pdf):
|
||||||
"""
|
"""
|
||||||
@ -175,12 +175,15 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
return "", ""
|
return "", ""
|
||||||
|
|
||||||
def _cleanup(self, pngs, jpgs):
|
def _cleanup(self, pngs, pdf):
|
||||||
|
|
||||||
jpg_glob = os.path.join(
|
|
||||||
self.SCRATCH, re.sub(r"^.*/(\d+)-\d+.jpg$", "\\1*", jpgs[0]))
|
|
||||||
png_glob = os.path.join(
|
png_glob = os.path.join(
|
||||||
self.SCRATCH, re.sub(r"^.*/(\d+)-\d+.png$", "\\1*", pngs[0]))
|
self.SCRATCH, re.sub(r"^.*/(\d+)-\d+.png$", "\\1*", pngs[0]))
|
||||||
|
|
||||||
for f in list(glob.glob(jpg_glob)) + list(glob.glob(png_glob)):
|
for f in list(glob.glob(png_glob)) + [pdf]:
|
||||||
|
self._render(" Deleting {}".format(f), 2)
|
||||||
os.unlink(f)
|
os.unlink(f)
|
||||||
|
|
||||||
|
def _render(self, text, verbosity):
|
||||||
|
if self.verbosity >= verbosity:
|
||||||
|
print(text)
|
||||||
|
@ -1,3 +1,6 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
from django.db import models
|
from django.db import models
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
|
||||||
@ -20,3 +23,16 @@ class Document(models.Model):
|
|||||||
if self.sender or self.title:
|
if self.sender or self.title:
|
||||||
return "{}: {}, {}".format(created, self.sender or self.title)
|
return "{}: {}, {}".format(created, self.sender or self.title)
|
||||||
return str(created)
|
return str(created)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pdf_path(self):
|
||||||
|
return os.path.join(
|
||||||
|
settings.MEDIA_ROOT,
|
||||||
|
"documents",
|
||||||
|
"pdf",
|
||||||
|
"{:07}.pdf.gpg".format(self.pk)
|
||||||
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pdf(self):
|
||||||
|
return open(self.pdf_path, "rb")
|
||||||
|
@ -1,3 +1,29 @@
|
|||||||
from django.shortcuts import render
|
import gnupg
|
||||||
|
|
||||||
# Create your views here.
|
from django.conf import settings
|
||||||
|
from django.http import HttpResponse
|
||||||
|
from django.template.defaultfilters import slugify
|
||||||
|
from django.views.generic.detail import DetailView
|
||||||
|
|
||||||
|
from .models import Document
|
||||||
|
|
||||||
|
|
||||||
|
class PdfView(DetailView):
|
||||||
|
|
||||||
|
model = Document
|
||||||
|
|
||||||
|
def render_to_response(self, context, **response_kwargs):
|
||||||
|
"""
|
||||||
|
Override the default to return the unencrypted PDF as raw data.
|
||||||
|
"""
|
||||||
|
|
||||||
|
gpg = gnupg.GPG(gnupghome=settings.GNUPG_HOME)
|
||||||
|
|
||||||
|
response = HttpResponse(gpg.decrypt_file(
|
||||||
|
self.object.pdf,
|
||||||
|
passphrase=settings.PASSPHRASE,
|
||||||
|
).data, content_type="application/pdf")
|
||||||
|
response["Content-Disposition"] = 'attachment; filename="{}"'.format(
|
||||||
|
slugify(str(self.object)) + ".pdf")
|
||||||
|
|
||||||
|
return response
|
||||||
|
@ -5,6 +5,15 @@ import sys
|
|||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings")
|
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings")
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
from django.core.management import execute_from_command_line
|
from django.core.management import execute_from_command_line
|
||||||
|
|
||||||
|
# The runserver and consumer need to have access to the passphrase, so it
|
||||||
|
# must be entered at start time to keep it safe.
|
||||||
|
if "runserver" in sys.argv or "consume" in sys.argv:
|
||||||
|
settings.PASSPHRASE = "asdf"
|
||||||
|
if not settings.DEBUG:
|
||||||
|
settings.PASSPHRASE = input(
|
||||||
|
"Production environment. Input passphrase: ")
|
||||||
|
|
||||||
execute_from_command_line(sys.argv)
|
execute_from_command_line(sys.argv)
|
||||||
|
4
src/paperless/requirements.txt
Normal file
4
src/paperless/requirements.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
Django==1.9
|
||||||
|
Pillow==3.0.0
|
||||||
|
pyocr==0.3.1
|
||||||
|
python-gnupg==0.3.8
|
@ -135,3 +135,5 @@ MEDIA_URL = "/media/"
|
|||||||
CONVERT_BINARY = "/usr/bin/convert"
|
CONVERT_BINARY = "/usr/bin/convert"
|
||||||
SCRATCH_DIR = "/tmp/paperless" # Will be created if it doesn't exist
|
SCRATCH_DIR = "/tmp/paperless" # Will be created if it doesn't exist
|
||||||
CONSUMPTION_DIR = "/tmp/paperless/consume"
|
CONSUMPTION_DIR = "/tmp/paperless/consume"
|
||||||
|
GNUPG_HOME = os.environ.get("HOME", "/dev/null")
|
||||||
|
PASSPHRASE = None # Set via manage.py
|
||||||
|
@ -18,6 +18,9 @@ from django.conf import settings
|
|||||||
from django.conf.urls import url, static
|
from django.conf.urls import url, static
|
||||||
from django.contrib import admin
|
from django.contrib import admin
|
||||||
|
|
||||||
|
from documents.views import PdfView
|
||||||
|
|
||||||
urlpatterns = [
|
urlpatterns = [
|
||||||
|
url(r"^fetch/(?P<pk>\d+)$", PdfView.as_view(), name="fetch"),
|
||||||
url(r'', admin.site.urls),
|
url(r'', admin.site.urls),
|
||||||
] + static.static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)
|
] + static.static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user