Accounted for .sender in a few places

This commit is contained in:
Daniel Quinn 2016-03-04 09:14:50 +00:00
parent ba7878b9aa
commit 5d4587ef8b
7 changed files with 95 additions and 41 deletions

View File

@ -45,9 +45,9 @@ class DocumentAdmin(admin.ModelAdmin):
"all": ("paperless.css",)
}
search_fields = ("sender__name", "title", "content")
list_display = ("created_", "sender", "title", "tags_", "document")
list_filter = ("tags", "sender", MonthListFilter)
search_fields = ("correspondent__name", "title", "content")
list_display = ("created_", "correspondent", "title", "tags_", "document")
list_filter = ("tags", "correspondent", MonthListFilter)
list_per_page = 25
def created_(self, obj):

View File

@ -57,11 +57,11 @@ class Consumer(object):
r"^.*/(.*)\.(pdf|jpe?g|png|gif|tiff)$",
flags=re.IGNORECASE
)
REGEX_SENDER_TITLE = re.compile(
REGEX_CORRESPONDENT_TITLE = re.compile(
r"^.*/(.+) - (.*)\.(pdf|jpe?g|png|gif|tiff)$",
flags=re.IGNORECASE
)
REGEX_SENDER_TITLE_TAGS = re.compile(
REGEX_CORRESPONDENT_TITLE_TAGS = re.compile(
r"^.*/(.*) - (.*) - ([a-z0-9\-,]*)\.(pdf|jpe?g|png|gif|tiff)$",
flags=re.IGNORECASE
)
@ -238,16 +238,18 @@ class Consumer(object):
def _guess_attributes_from_name(self, parseable):
"""
We use a crude naming convention to make handling the sender, title,
and tags easier:
"<sender> - <title> - <tags>.<suffix>"
"<sender> - <title>.<suffix>"
We use a crude naming convention to make handling the correspondent,
title, and tags easier:
"<correspondent> - <title> - <tags>.<suffix>"
"<correspondent> - <title>.<suffix>"
"<title>.<suffix>"
"""
def get_sender(sender_name):
def get_correspondent(correspondent_name):
return Correspondent.objects.get_or_create(
name=sender_name, defaults={"slug": slugify(sender_name)})[0]
name=correspondent_name,
defaults={"slug": slugify(correspondent_name)}
)[0]
def get_tags(tags):
r = []
@ -262,27 +264,27 @@ class Consumer(object):
return "jpg"
return suffix
# First attempt: "<sender> - <title> - <tags>.<suffix>"
m = re.match(self.REGEX_SENDER_TITLE_TAGS, parseable)
# First attempt: "<correspondent> - <title> - <tags>.<suffix>"
m = re.match(self.REGEX_CORRESPONDENT_TITLE_TAGS, parseable)
if m:
return (
get_sender(m.group(1)),
get_correspondent(m.group(1)),
m.group(2),
get_tags(m.group(3)),
get_suffix(m.group(4))
)
# Second attempt: "<sender> - <title>.<suffix>"
m = re.match(self.REGEX_SENDER_TITLE, parseable)
# Second attempt: "<correspondent> - <title>.<suffix>"
m = re.match(self.REGEX_CORRESPONDENT_TITLE, parseable)
if m:
return (
get_sender(m.group(1)),
get_correspondent(m.group(1)),
m.group(2),
(),
get_suffix(m.group(3))
)
# That didn't work, so we assume sender and tags are None
# That didn't work, so we assume correspondent and tags are None
m = re.match(self.REGEX_TITLE, parseable)
return None, m.group(1), (), get_suffix(m.group(2))
@ -296,7 +298,7 @@ class Consumer(object):
self.log("debug", "Saving record to database")
document = Document.objects.create(
sender=sender,
correspondent=sender,
title=title,
content=text,
file_type=file_type,

View File

@ -23,7 +23,7 @@ class UploadForm(forms.Form):
"image/tiff": Document.TYPE_TIF,
}
sender = forms.CharField(
correspondent = forms.CharField(
max_length=Correspondent._meta.get_field("name").max_length,
required=False
)
@ -34,18 +34,19 @@ class UploadForm(forms.Form):
document = forms.FileField()
signature = forms.CharField(max_length=256)
def clean_sender(self):
def clean_correspondent(self):
"""
I suppose it might look cleaner to use .get_or_create() here, but that
would also allow someone to fill up the db with bogus senders before
all validation was met.
would also allow someone to fill up the db with bogus correspondents
before all validation was met.
"""
sender = self.cleaned_data.get("sender")
if not sender:
corresp = self.cleaned_data.get("correspondent")
if not corresp:
return None
if not Correspondent.SAFE_REGEX.match(sender) or " - " in sender:
raise forms.ValidationError("That sender name is suspicious.")
return sender
if not Correspondent.SAFE_REGEX.match(corresp) or " - " in corresp:
raise forms.ValidationError(
"That correspondent name is suspicious.")
return corresp
def clean_title(self):
title = self.cleaned_data.get("title")
@ -63,10 +64,10 @@ class UploadForm(forms.Form):
return document, self.TYPE_LOOKUP[file_type]
def clean(self):
sender = self.clened_data("sender")
corresp = self.clened_data("correspondent")
title = self.cleaned_data("title")
signature = self.cleaned_data("signature")
if sha256(sender + title + self.SECRET).hexdigest() == signature:
if sha256(corresp + title + self.SECRET).hexdigest() == signature:
return True
return False
@ -77,13 +78,15 @@ class UploadForm(forms.Form):
form do that as well. Think of it as a poor-man's queue server.
"""
sender = self.clened_data("sender")
correspondent = self.clened_data("correspondent")
title = self.cleaned_data("title")
document, file_type = self.cleaned_data.get("document")
t = int(mktime(datetime.now()))
file_name = os.path.join(
Consumer.CONSUME, "{} - {}.{}".format(sender, title, file_type))
Consumer.CONSUME,
"{} - {}.{}".format(correspondent, title, file_type)
)
with open(file_name, "wb") as f:
f.write(document)

View File

@ -22,6 +22,13 @@ class Command(Renderable, BaseCommand):
def add_arguments(self, parser):
parser.add_argument("target")
parser.add_argument(
"--legacy",
action="store_true",
help="Don't try to export all of the document data, just dump the "
"original document files out in a format that makes "
"re-consuming them easy."
)
def __init__(self, *args, **kwargs):
BaseCommand.__init__(self, *args, **kwargs)
@ -40,6 +47,13 @@ class Command(Renderable, BaseCommand):
if not settings.PASSPHRASE:
settings.PASSPHRASE = input("Please enter the passphrase: ")
if options["legacy"]:
self.dump_legacy()
else:
self.dump()
def dump(self):
documents = Document.objects.all()
document_map = {d.pk: d for d in documents}
manifest = json.loads(serializers.serialize("json", documents))
@ -65,3 +79,28 @@ class Command(Renderable, BaseCommand):
with open(os.path.join(self.target, "manifest.json"), "w") as f:
json.dump(manifest, f, indent=2)
def dump_legacy(self):
for document in Document.objects.all():
target = os.path.join(
self.target, self._get_legacy_file_name(document))
print("Exporting: {}".format(target))
with open(target, "wb") as f:
f.write(GnuPG.decrypted(document.source_file))
t = int(time.mktime(document.created.timetuple()))
os.utime(target, times=(t, t))
@staticmethod
def _get_legacy_file_name(doc):
if doc.correspondent and doc.title:
tags = ",".join([t.slug for t in doc.tags.all()])
if tags:
return "{} - {} - {}.{}".format(
doc.correspondent, doc.title, tags, doc.file_type)
return "{} - {}.{}".format(
doc.correspondent, doc.title, doc.file_type)
return os.path.basename(doc.source_path)

View File

@ -16,4 +16,13 @@ class Migration(migrations.Migration):
old_name='Sender',
new_name='Correspondent',
),
migrations.AlterModelOptions(
name='document',
options={'ordering': ('correspondent', 'title')},
),
migrations.RenameField(
model_name='document',
old_name='sender',
new_name='correspondent',
),
]

View File

@ -140,7 +140,7 @@ class Document(models.Model):
TYPE_TIF = "tiff"
TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF,)
sender = models.ForeignKey(
correspondent = models.ForeignKey(
Correspondent, blank=True, null=True, related_name="documents")
title = models.CharField(max_length=128, blank=True, db_index=True)
content = models.TextField(db_index=True)
@ -155,14 +155,15 @@ class Document(models.Model):
modified = models.DateTimeField(auto_now=True, editable=False)
class Meta(object):
ordering = ("sender", "title")
ordering = ("correspondent", "title")
def __str__(self):
created = self.created.strftime("%Y%m%d%H%M%S")
if self.sender and self.title:
return "{}: {} - {}".format(created, self.sender, self.title)
if self.sender or self.title:
return "{}: {}".format(created, self.sender or self.title)
if self.correspondent and self.title:
return "{}: {} - {}".format(
created, self.correspondent, self.title)
if self.correspondent or self.title:
return "{}: {}".format(created, self.correspondent or self.title)
return str(created)
@property

View File

@ -20,8 +20,8 @@ class TagSerializer(serializers.HyperlinkedModelSerializer):
class DocumentSerializer(serializers.ModelSerializer):
sender = serializers.HyperlinkedRelatedField(
read_only=True, view_name="drf:sender-detail", allow_null=True)
correspondent = serializers.HyperlinkedRelatedField(
read_only=True, view_name="drf:correspondent-detail", allow_null=True)
tags = serializers.HyperlinkedRelatedField(
read_only=True, view_name="drf:tag-detail", many=True)
@ -29,7 +29,7 @@ class DocumentSerializer(serializers.ModelSerializer):
model = Document
fields = (
"id",
"sender",
"correspondent",
"title",
"content",
"file_type",