Fixed a few consumer bugs and added an exporter

Rename exporter to export and fixt some debugging

Account for files not matching the sender/title pattern

Added a safety note

Wrong regex on the name parser

Renamed the command to something slightly less ambiguous
This commit is contained in:
Daniel Quinn
2016-01-14 19:47:57 +00:00
parent 2e48036f92
commit 17615d43cb
9 changed files with 141 additions and 24 deletions

View File

@@ -12,11 +12,12 @@ import pyocr
from PIL import Image
from django.conf import settings
from django.core.management.base import BaseCommand
from django.core.management.base import BaseCommand, CommandError
from django.template.defaultfilters import slugify
from django.utils import timezone
from documents.models import Document, Sender
from paperless.db import GnuPG
class Command(BaseCommand):
@@ -38,7 +39,8 @@ class Command(BaseCommand):
OCR = pyocr.get_available_tools()[0]
MEDIA_PDF = os.path.join(settings.MEDIA_ROOT, "documents", "pdf")
PARSER_REGEX = re.compile(r"^.*/(.*) - (.*)\.pdf$")
PARSER_REGEX_TITLE = re.compile(r"^.*/(.*)\.pdf$")
PARSER_REGEX_SENDER_TITLE = re.compile(r"^.*/(.*) - (.*)\.pdf$")
def __init__(self, *args, **kwargs):
self.verbosity = 0
@@ -50,6 +52,10 @@ class Command(BaseCommand):
self.verbosity = options["verbosity"]
if not os.path.exists(self.CONSUME):
raise CommandError("Consumption directory {} does not exist".format(
self.CONSUME))
self._setup()
try:
@@ -70,7 +76,7 @@ class Command(BaseCommand):
if not os.path.isfile(pdf):
continue
if not pdf.endswith(".pdf"):
if not re.match(self.PARSER_REGEX_TITLE, pdf):
continue
if self._is_ready(pdf):
@@ -155,12 +161,7 @@ class Command(BaseCommand):
with open(pdf, "rb") as unencrypted:
with open(doc.pdf_path, "wb") as encrypted:
self._render(" Encrypting", 3)
encrypted.write(self.gpg.encrypt_file(
unencrypted,
recipients=None,
passphrase=settings.PASSPHRASE,
symmetric=True
).data)
encrypted.write(GnuPG.encrypted(unencrypted))
def _parse_file_name(self, pdf):
"""
@@ -169,14 +170,17 @@ class Command(BaseCommand):
"sender - title.pdf"
"""
m = re.match(self.PARSER_REGEX, pdf)
# First we attempt "sender - title.pdf"
m = re.match(self.PARSER_REGEX_SENDER_TITLE, pdf)
if m:
sender_name, title = m.group(1), m.group(2)
sender, __ = Sender.objects.get_or_create(
name=sender_name, defaults={"slug": slugify(sender_name)})
return sender, title
return "", ""
# That didn't work, so we assume sender is None
m = re.match(self.PARSER_REGEX_TITLE, pdf)
return None, m.group(1)
def _cleanup(self, pngs, pdf):
@@ -187,6 +191,8 @@ class Command(BaseCommand):
self._render(" Deleting {}".format(f), 2)
os.unlink(f)
self._render("", 2)
def _render(self, text, verbosity):
if self.verbosity >= verbosity:
print(text)

View File

@@ -0,0 +1,53 @@
import gnupg
import os
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
from documents.models import Document
from paperless.db import GnuPG
class Command(BaseCommand):
help = """
Decrypt and rename all files in our collection into a given target
directory. Note that we don't export any of the parsed data since
that can always be re-collected via the consumer.
""".replace(" ", "")
def add_arguments(self, parser):
parser.add_argument("target")
def __init__(self, *args, **kwargs):
self.verbosity = 0
self.target = None
self.gpg = gnupg.GPG(gnupghome=settings.GNUPG_HOME)
BaseCommand.__init__(self, *args, **kwargs)
def handle(self, *args, **options):
self.verbosity = options["verbosity"]
self.target = options["target"]
if not os.path.exists(self.target):
raise CommandError("That path doesn't exist")
if not os.access(self.target, os.W_OK):
raise CommandError("That path doesn't appear to be writable")
if not settings.PASSPHRASE:
settings.PASSPHRASE = input("Please enter the passphrase: ")
for document in Document.objects.all():
target = os.path.join(self.target, document.parseable_file_name)
self._render("Exporting: {}".format(target), 1)
with open(target, "wb") as f:
f.write(GnuPG.decrypted(document.pdf))
def _render(self, text, verbosity):
if self.verbosity >= verbosity:
print(text)