Accounted for .sender in a few places

This commit is contained in:
Daniel Quinn 2016-03-04 09:14:50 +00:00
parent ba7878b9aa
commit 5d4587ef8b
7 changed files with 95 additions and 41 deletions

View File

@ -45,9 +45,9 @@ class DocumentAdmin(admin.ModelAdmin):
"all": ("paperless.css",) "all": ("paperless.css",)
} }
search_fields = ("sender__name", "title", "content") search_fields = ("correspondent__name", "title", "content")
list_display = ("created_", "sender", "title", "tags_", "document") list_display = ("created_", "correspondent", "title", "tags_", "document")
list_filter = ("tags", "sender", MonthListFilter) list_filter = ("tags", "correspondent", MonthListFilter)
list_per_page = 25 list_per_page = 25
def created_(self, obj): def created_(self, obj):

View File

@ -57,11 +57,11 @@ class Consumer(object):
r"^.*/(.*)\.(pdf|jpe?g|png|gif|tiff)$", r"^.*/(.*)\.(pdf|jpe?g|png|gif|tiff)$",
flags=re.IGNORECASE flags=re.IGNORECASE
) )
REGEX_SENDER_TITLE = re.compile( REGEX_CORRESPONDENT_TITLE = re.compile(
r"^.*/(.+) - (.*)\.(pdf|jpe?g|png|gif|tiff)$", r"^.*/(.+) - (.*)\.(pdf|jpe?g|png|gif|tiff)$",
flags=re.IGNORECASE flags=re.IGNORECASE
) )
REGEX_SENDER_TITLE_TAGS = re.compile( REGEX_CORRESPONDENT_TITLE_TAGS = re.compile(
r"^.*/(.*) - (.*) - ([a-z0-9\-,]*)\.(pdf|jpe?g|png|gif|tiff)$", r"^.*/(.*) - (.*) - ([a-z0-9\-,]*)\.(pdf|jpe?g|png|gif|tiff)$",
flags=re.IGNORECASE flags=re.IGNORECASE
) )
@ -238,16 +238,18 @@ class Consumer(object):
def _guess_attributes_from_name(self, parseable): def _guess_attributes_from_name(self, parseable):
""" """
We use a crude naming convention to make handling the sender, title, We use a crude naming convention to make handling the correspondent,
and tags easier: title, and tags easier:
"<sender> - <title> - <tags>.<suffix>" "<correspondent> - <title> - <tags>.<suffix>"
"<sender> - <title>.<suffix>" "<correspondent> - <title>.<suffix>"
"<title>.<suffix>" "<title>.<suffix>"
""" """
def get_sender(sender_name): def get_correspondent(correspondent_name):
return Correspondent.objects.get_or_create( return Correspondent.objects.get_or_create(
name=sender_name, defaults={"slug": slugify(sender_name)})[0] name=correspondent_name,
defaults={"slug": slugify(correspondent_name)}
)[0]
def get_tags(tags): def get_tags(tags):
r = [] r = []
@ -262,27 +264,27 @@ class Consumer(object):
return "jpg" return "jpg"
return suffix return suffix
# First attempt: "<sender> - <title> - <tags>.<suffix>" # First attempt: "<correspondent> - <title> - <tags>.<suffix>"
m = re.match(self.REGEX_SENDER_TITLE_TAGS, parseable) m = re.match(self.REGEX_CORRESPONDENT_TITLE_TAGS, parseable)
if m: if m:
return ( return (
get_sender(m.group(1)), get_correspondent(m.group(1)),
m.group(2), m.group(2),
get_tags(m.group(3)), get_tags(m.group(3)),
get_suffix(m.group(4)) get_suffix(m.group(4))
) )
# Second attempt: "<sender> - <title>.<suffix>" # Second attempt: "<correspondent> - <title>.<suffix>"
m = re.match(self.REGEX_SENDER_TITLE, parseable) m = re.match(self.REGEX_CORRESPONDENT_TITLE, parseable)
if m: if m:
return ( return (
get_sender(m.group(1)), get_correspondent(m.group(1)),
m.group(2), m.group(2),
(), (),
get_suffix(m.group(3)) get_suffix(m.group(3))
) )
# That didn't work, so we assume sender and tags are None # That didn't work, so we assume correspondent and tags are None
m = re.match(self.REGEX_TITLE, parseable) m = re.match(self.REGEX_TITLE, parseable)
return None, m.group(1), (), get_suffix(m.group(2)) return None, m.group(1), (), get_suffix(m.group(2))
@ -296,7 +298,7 @@ class Consumer(object):
self.log("debug", "Saving record to database") self.log("debug", "Saving record to database")
document = Document.objects.create( document = Document.objects.create(
sender=sender, correspondent=sender,
title=title, title=title,
content=text, content=text,
file_type=file_type, file_type=file_type,

View File

@ -23,7 +23,7 @@ class UploadForm(forms.Form):
"image/tiff": Document.TYPE_TIF, "image/tiff": Document.TYPE_TIF,
} }
sender = forms.CharField( correspondent = forms.CharField(
max_length=Correspondent._meta.get_field("name").max_length, max_length=Correspondent._meta.get_field("name").max_length,
required=False required=False
) )
@ -34,18 +34,19 @@ class UploadForm(forms.Form):
document = forms.FileField() document = forms.FileField()
signature = forms.CharField(max_length=256) signature = forms.CharField(max_length=256)
def clean_sender(self): def clean_correspondent(self):
""" """
I suppose it might look cleaner to use .get_or_create() here, but that I suppose it might look cleaner to use .get_or_create() here, but that
would also allow someone to fill up the db with bogus senders before would also allow someone to fill up the db with bogus correspondents
all validation was met. before all validation was met.
""" """
sender = self.cleaned_data.get("sender") corresp = self.cleaned_data.get("correspondent")
if not sender: if not corresp:
return None return None
if not Correspondent.SAFE_REGEX.match(sender) or " - " in sender: if not Correspondent.SAFE_REGEX.match(corresp) or " - " in corresp:
raise forms.ValidationError("That sender name is suspicious.") raise forms.ValidationError(
return sender "That correspondent name is suspicious.")
return corresp
def clean_title(self): def clean_title(self):
title = self.cleaned_data.get("title") title = self.cleaned_data.get("title")
@ -63,10 +64,10 @@ class UploadForm(forms.Form):
return document, self.TYPE_LOOKUP[file_type] return document, self.TYPE_LOOKUP[file_type]
def clean(self): def clean(self):
sender = self.clened_data("sender") corresp = self.clened_data("correspondent")
title = self.cleaned_data("title") title = self.cleaned_data("title")
signature = self.cleaned_data("signature") signature = self.cleaned_data("signature")
if sha256(sender + title + self.SECRET).hexdigest() == signature: if sha256(corresp + title + self.SECRET).hexdigest() == signature:
return True return True
return False return False
@ -77,13 +78,15 @@ class UploadForm(forms.Form):
form do that as well. Think of it as a poor-man's queue server. form do that as well. Think of it as a poor-man's queue server.
""" """
sender = self.clened_data("sender") correspondent = self.clened_data("correspondent")
title = self.cleaned_data("title") title = self.cleaned_data("title")
document, file_type = self.cleaned_data.get("document") document, file_type = self.cleaned_data.get("document")
t = int(mktime(datetime.now())) t = int(mktime(datetime.now()))
file_name = os.path.join( file_name = os.path.join(
Consumer.CONSUME, "{} - {}.{}".format(sender, title, file_type)) Consumer.CONSUME,
"{} - {}.{}".format(correspondent, title, file_type)
)
with open(file_name, "wb") as f: with open(file_name, "wb") as f:
f.write(document) f.write(document)

View File

@ -22,6 +22,13 @@ class Command(Renderable, BaseCommand):
def add_arguments(self, parser): def add_arguments(self, parser):
parser.add_argument("target") parser.add_argument("target")
parser.add_argument(
"--legacy",
action="store_true",
help="Don't try to export all of the document data, just dump the "
"original document files out in a format that makes "
"re-consuming them easy."
)
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
BaseCommand.__init__(self, *args, **kwargs) BaseCommand.__init__(self, *args, **kwargs)
@ -40,6 +47,13 @@ class Command(Renderable, BaseCommand):
if not settings.PASSPHRASE: if not settings.PASSPHRASE:
settings.PASSPHRASE = input("Please enter the passphrase: ") settings.PASSPHRASE = input("Please enter the passphrase: ")
if options["legacy"]:
self.dump_legacy()
else:
self.dump()
def dump(self):
documents = Document.objects.all() documents = Document.objects.all()
document_map = {d.pk: d for d in documents} document_map = {d.pk: d for d in documents}
manifest = json.loads(serializers.serialize("json", documents)) manifest = json.loads(serializers.serialize("json", documents))
@ -65,3 +79,28 @@ class Command(Renderable, BaseCommand):
with open(os.path.join(self.target, "manifest.json"), "w") as f: with open(os.path.join(self.target, "manifest.json"), "w") as f:
json.dump(manifest, f, indent=2) json.dump(manifest, f, indent=2)
def dump_legacy(self):
for document in Document.objects.all():
target = os.path.join(
self.target, self._get_legacy_file_name(document))
print("Exporting: {}".format(target))
with open(target, "wb") as f:
f.write(GnuPG.decrypted(document.source_file))
t = int(time.mktime(document.created.timetuple()))
os.utime(target, times=(t, t))
@staticmethod
def _get_legacy_file_name(doc):
if doc.correspondent and doc.title:
tags = ",".join([t.slug for t in doc.tags.all()])
if tags:
return "{} - {} - {}.{}".format(
doc.correspondent, doc.title, tags, doc.file_type)
return "{} - {}.{}".format(
doc.correspondent, doc.title, doc.file_type)
return os.path.basename(doc.source_path)

View File

@ -16,4 +16,13 @@ class Migration(migrations.Migration):
old_name='Sender', old_name='Sender',
new_name='Correspondent', new_name='Correspondent',
), ),
migrations.AlterModelOptions(
name='document',
options={'ordering': ('correspondent', 'title')},
),
migrations.RenameField(
model_name='document',
old_name='sender',
new_name='correspondent',
),
] ]

View File

@ -140,7 +140,7 @@ class Document(models.Model):
TYPE_TIF = "tiff" TYPE_TIF = "tiff"
TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF,) TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF,)
sender = models.ForeignKey( correspondent = models.ForeignKey(
Correspondent, blank=True, null=True, related_name="documents") Correspondent, blank=True, null=True, related_name="documents")
title = models.CharField(max_length=128, blank=True, db_index=True) title = models.CharField(max_length=128, blank=True, db_index=True)
content = models.TextField(db_index=True) content = models.TextField(db_index=True)
@ -155,14 +155,15 @@ class Document(models.Model):
modified = models.DateTimeField(auto_now=True, editable=False) modified = models.DateTimeField(auto_now=True, editable=False)
class Meta(object): class Meta(object):
ordering = ("sender", "title") ordering = ("correspondent", "title")
def __str__(self): def __str__(self):
created = self.created.strftime("%Y%m%d%H%M%S") created = self.created.strftime("%Y%m%d%H%M%S")
if self.sender and self.title: if self.correspondent and self.title:
return "{}: {} - {}".format(created, self.sender, self.title) return "{}: {} - {}".format(
if self.sender or self.title: created, self.correspondent, self.title)
return "{}: {}".format(created, self.sender or self.title) if self.correspondent or self.title:
return "{}: {}".format(created, self.correspondent or self.title)
return str(created) return str(created)
@property @property

View File

@ -20,8 +20,8 @@ class TagSerializer(serializers.HyperlinkedModelSerializer):
class DocumentSerializer(serializers.ModelSerializer): class DocumentSerializer(serializers.ModelSerializer):
sender = serializers.HyperlinkedRelatedField( correspondent = serializers.HyperlinkedRelatedField(
read_only=True, view_name="drf:sender-detail", allow_null=True) read_only=True, view_name="drf:correspondent-detail", allow_null=True)
tags = serializers.HyperlinkedRelatedField( tags = serializers.HyperlinkedRelatedField(
read_only=True, view_name="drf:tag-detail", many=True) read_only=True, view_name="drf:tag-detail", many=True)
@ -29,7 +29,7 @@ class DocumentSerializer(serializers.ModelSerializer):
model = Document model = Document
fields = ( fields = (
"id", "id",
"sender", "correspondent",
"title", "title",
"content", "content",
"file_type", "file_type",