s/Sender/Correspondent & reworked the (im|ex)porter

This commit is contained in:
Daniel Quinn
2016-03-03 20:52:42 +00:00
parent fad466477b
commit 070463b85a
14 changed files with 342 additions and 184 deletions

View File

@@ -3,7 +3,7 @@ from django.contrib.auth.models import User, Group
from django.core.urlresolvers import reverse
from django.templatetags.static import static
from .models import Sender, Tag, Document, Log
from .models import Correspondent, Tag, Document, Log
class MonthListFilter(admin.SimpleListFilter):
@@ -107,7 +107,7 @@ class LogAdmin(admin.ModelAdmin):
list_filter = ("level", "component",)
admin.site.register(Sender)
admin.site.register(Correspondent)
admin.site.register(Tag, TagAdmin)
admin.site.register(Document, DocumentAdmin)
admin.site.register(Log, LogAdmin)

View File

@@ -24,7 +24,7 @@ from pyocr.tesseract import TesseractError
from paperless.db import GnuPG
from .models import Sender, Tag, Document, Log
from .models import Correspondent, Tag, Document, Log
from .languages import ISO639
@@ -246,7 +246,7 @@ class Consumer(object):
"""
def get_sender(sender_name):
return Sender.objects.get_or_create(
return Correspondent.objects.get_or_create(
name=sender_name, defaults={"slug": slugify(sender_name)})[0]
def get_tags(tags):

View File

@@ -8,7 +8,7 @@ from time import mktime
from django import forms
from django.conf import settings
from .models import Document, Sender
from .models import Document, Correspondent
from .consumer import Consumer
@@ -24,7 +24,9 @@ class UploadForm(forms.Form):
}
sender = forms.CharField(
max_length=Sender._meta.get_field("name").max_length, required=False)
max_length=Correspondent._meta.get_field("name").max_length,
required=False
)
title = forms.CharField(
max_length=Document._meta.get_field("title").max_length,
required=False
@@ -41,7 +43,7 @@ class UploadForm(forms.Form):
sender = self.cleaned_data.get("sender")
if not sender:
return None
if not Sender.SAFE_REGEX.match(sender) or " - " in sender:
if not Correspondent.SAFE_REGEX.match(sender) or " - " in sender:
raise forms.ValidationError("That sender name is suspicious.")
return sender
@@ -49,7 +51,7 @@ class UploadForm(forms.Form):
title = self.cleaned_data.get("title")
if not title:
return None
if not Sender.SAFE_REGEX.match(title) or " - " in title:
if not Correspondent.SAFE_REGEX.match(title) or " - " in title:
raise forms.ValidationError("That title is suspicious.")
def clean_document(self):

View File

@@ -14,7 +14,7 @@ from dateutil import parser
from django.conf import settings
from .consumer import Consumer
from .models import Sender, Log
from .models import Correspondent, Log
class MailFetcherError(Exception):
@@ -103,7 +103,7 @@ class Message(Loggable):
def check_subject(self):
if self.subject is None:
raise InvalidMessageError("Message does not have a subject")
if not Sender.SAFE_REGEX.match(self.subject):
if not Correspondent.SAFE_REGEX.match(self.subject):
raise InvalidMessageError("Message subject is unsafe: {}".format(
self.subject))

View File

@@ -1,10 +1,12 @@
import json
import os
import time
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
from django.core import serializers
from documents.models import Document
from documents.models import Document, Correspondent, Tag
from paperless.db import GnuPG
from ...mixins import Renderable
@@ -14,21 +16,19 @@ class Command(Renderable, BaseCommand):
help = """
Decrypt and rename all files in our collection into a given target
directory. Note that we don't export any of the parsed data since
that can always be re-collected via the consumer.
directory. And include a manifest file containing document data for
easy import.
""".replace(" ", "")
def add_arguments(self, parser):
parser.add_argument("target")
def __init__(self, *args, **kwargs):
self.verbosity = 0
self.target = None
BaseCommand.__init__(self, *args, **kwargs)
self.target = None
def handle(self, *args, **options):
self.verbosity = options["verbosity"]
self.target = options["target"]
if not os.path.exists(self.target):
@@ -40,9 +40,15 @@ class Command(Renderable, BaseCommand):
if not settings.PASSPHRASE:
settings.PASSPHRASE = input("Please enter the passphrase: ")
for document in Document.objects.all():
documents = Document.objects.all()
document_map = {d.pk: d for d in documents}
manifest = json.loads(serializers.serialize("json", documents))
for document_dict in manifest:
document = document_map[document_dict["pk"]]
target = os.path.join(self.target, document.file_name)
document_dict["__exported_file_name__"] = target
print("Exporting: {}".format(target))
@@ -50,3 +56,12 @@ class Command(Renderable, BaseCommand):
f.write(GnuPG.decrypted(document.source_file))
t = int(time.mktime(document.created.timetuple()))
os.utime(target, times=(t, t))
manifest += json.loads(
serializers.serialize("json", Correspondent.objects.all()))
manifest += json.loads(serializers.serialize(
"json", Tag.objects.all()))
with open(os.path.join(self.target, "manifest.json"), "w") as f:
json.dump(manifest, f, indent=2)

View File

@@ -0,0 +1,110 @@
import json
import os
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
from django.core.management import call_command
from documents.models import Document
from paperless.db import GnuPG
from ...mixins import Renderable
class Command(Renderable, BaseCommand):
help = """
Using a manifest.json file, load the data from there, and import the
documents it refers to.
""".replace(" ", "")
def add_arguments(self, parser):
parser.add_argument("source")
parser.add_argument(
'--ignore-absent',
action='store_true',
default=False,
help="If the manifest refers to a document that doesn't exist, "
"ignore it and attempt to import what it can"
)
def __init__(self, *args, **kwargs):
BaseCommand.__init__(self, *args, **kwargs)
self.source = None
self.manifest = None
def handle(self, *args, **options):
self.source = options["source"]
if not os.path.exists(self.source):
raise CommandError("That path doesn't exist")
if not os.access(self.source, os.R_OK):
raise CommandError("That path doesn't appear to be readable")
manifest_path = os.path.join(self.source, "manifest.json")
self._check_manifest_exists(manifest_path)
with open(manifest_path) as f:
self.manifest = json.load(f)
self._check_manifest()
if not settings.PASSPHRASE:
raise CommandError(
"You need to define a passphrase before continuing. Please "
"consult the documentation for setting up Paperless."
)
# Fill up the database with whatever is in the manifest
call_command("loaddata", manifest_path)
self._import_files_from_manifest()
@staticmethod
def _check_manifest_exists(path):
if not os.path.exists(path):
raise CommandError(
"That directory doesn't appear to contain a manifest.json "
"file."
)
def _check_manifest(self):
for record in self.manifest:
if not record["model"] == "documents.document":
continue
if "__exported_file_name__" not in record:
raise CommandError(
'The manifest file contains a record which does not '
'refer to an actual document file. If you want to import '
'the rest anyway (skipping such references) call the '
'importer with --ignore-absent'
)
doc_file = record["__exported_file_name__"]
if not os.path.exists(os.path.join(self.source, doc_file)):
raise CommandError(
'The manifest file refers to "{}" which does not '
'appear to be in the source directory. If you want to '
'import the rest anyway (skipping such references) call '
'the importer with --ignore-absent'.format(doc_file)
)
def _import_files_from_manifest(self):
for record in self.manifest:
if not record["model"] == "documents.document":
continue
doc_file = record["__exported_file_name__"]
document = Document.objects.get(pk=record["pk"])
with open(doc_file, "rb") as unencrypted:
with open(document.source_path, "wb") as encrypted:
print("Encrypting {} and saving it to {}".format(
doc_file, document.source_path))
encrypted.write(GnuPG.encrypted(unencrypted))

View File

@@ -0,0 +1,19 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.9.2 on 2016-03-03 19:29
from __future__ import unicode_literals
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('documents', '0010_log'),
]
operations = [
migrations.RenameModel(
old_name='Sender',
new_name='Correspondent',
),
]

View File

@@ -28,7 +28,7 @@ class SluggedModel(models.Model):
return self.name
class Sender(SluggedModel):
class Correspondent(SluggedModel):
# This regex is probably more restrictive than it needs to be, but it's
# better safe than sorry.
@@ -141,7 +141,7 @@ class Document(models.Model):
TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF,)
sender = models.ForeignKey(
Sender, blank=True, null=True, related_name="documents")
Correspondent, blank=True, null=True, related_name="documents")
title = models.CharField(max_length=128, blank=True, db_index=True)
content = models.TextField(db_index=True)
file_type = models.CharField(
@@ -158,9 +158,9 @@ class Document(models.Model):
ordering = ("sender", "title")
def __str__(self):
created = self.created.strftime("%Y-%m-%d")
created = self.created.strftime("%Y%m%d%H%M%S")
if self.sender and self.title:
return "{}: {}, {}".format(created, self.sender, self.title)
return "{}: {} - {}".format(created, self.sender, self.title)
if self.sender or self.title:
return "{}: {}".format(created, self.sender or self.title)
return str(created)
@@ -179,13 +179,7 @@ class Document(models.Model):
@property
def file_name(self):
if self.sender and self.title:
tags = ",".join([t.slug for t in self.tags.all()])
if tags:
return "{} - {} - {}.{}".format(
self.sender, self.title, tags, self.file_type)
return "{} - {}.{}".format(self.sender, self.title, self.file_type)
return os.path.basename(self.source_path)
return slugify(str(self)) + "." + self.file_type
@property
def download_url(self):

View File

@@ -1,12 +1,12 @@
from rest_framework import serializers
from .models import Sender, Tag, Document, Log
from .models import Correspondent, Tag, Document, Log
class SenderSerializer(serializers.HyperlinkedModelSerializer):
class CorrespondentSerializer(serializers.HyperlinkedModelSerializer):
class Meta(object):
model = Sender
model = Correspondent
fields = ("id", "slug", "name")

View File

@@ -1,6 +1,5 @@
from django.contrib.auth.mixins import LoginRequiredMixin
from django.http import HttpResponse
from django.template.defaultfilters import slugify
from django.views.decorators.csrf import csrf_exempt
from django.views.generic import FormView, DetailView, TemplateView
@@ -14,9 +13,9 @@ from rest_framework.viewsets import (
from paperless.db import GnuPG
from .forms import UploadForm
from .models import Sender, Tag, Document, Log
from .models import Correspondent, Tag, Document, Log
from .serialisers import (
SenderSerializer, TagSerializer, DocumentSerializer, LogSerializer)
CorrespondentSerializer, TagSerializer, DocumentSerializer, LogSerializer)
class IndexView(TemplateView):
@@ -52,7 +51,7 @@ class FetchView(LoginRequiredMixin, DetailView):
content_type=content_types[self.object.file_type]
)
response["Content-Disposition"] = 'attachment; filename="{}"'.format(
slugify(str(self.object)) + "." + self.object.file_type)
self.object.file_name)
return response
@@ -81,10 +80,10 @@ class StandardPagination(PageNumberPagination):
max_page_size = 100000
class SenderViewSet(ModelViewSet):
model = Sender
queryset = Sender.objects.all()
serializer_class = SenderSerializer
class CorrespondentViewSet(ModelViewSet):
model = Correspondent
queryset = Correspondent.objects.all()
serializer_class = CorrespondentSerializer
pagination_class = StandardPagination
permission_classes = (IsAuthenticated,)