From 5d4587ef8b599fbe91c74740ded81e35d1b711f8 Mon Sep 17 00:00:00 2001 From: Daniel Quinn Date: Fri, 4 Mar 2016 09:14:50 +0000 Subject: [PATCH] Accounted for .sender in a few places --- src/documents/admin.py | 6 +-- src/documents/consumer.py | 34 ++++++++-------- src/documents/forms.py | 29 +++++++------- .../management/commands/document_exporter.py | 39 +++++++++++++++++++ .../migrations/0011_auto_20160303_1929.py | 9 +++++ src/documents/models.py | 13 ++++--- src/documents/serialisers.py | 6 +-- 7 files changed, 95 insertions(+), 41 deletions(-) diff --git a/src/documents/admin.py b/src/documents/admin.py index 3baad817b..a5b523492 100644 --- a/src/documents/admin.py +++ b/src/documents/admin.py @@ -45,9 +45,9 @@ class DocumentAdmin(admin.ModelAdmin): "all": ("paperless.css",) } - search_fields = ("sender__name", "title", "content") - list_display = ("created_", "sender", "title", "tags_", "document") - list_filter = ("tags", "sender", MonthListFilter) + search_fields = ("correspondent__name", "title", "content") + list_display = ("created_", "correspondent", "title", "tags_", "document") + list_filter = ("tags", "correspondent", MonthListFilter) list_per_page = 25 def created_(self, obj): diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 4233cded8..eeb42cdf1 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -57,11 +57,11 @@ class Consumer(object): r"^.*/(.*)\.(pdf|jpe?g|png|gif|tiff)$", flags=re.IGNORECASE ) - REGEX_SENDER_TITLE = re.compile( + REGEX_CORRESPONDENT_TITLE = re.compile( r"^.*/(.+) - (.*)\.(pdf|jpe?g|png|gif|tiff)$", flags=re.IGNORECASE ) - REGEX_SENDER_TITLE_TAGS = re.compile( + REGEX_CORRESPONDENT_TITLE_TAGS = re.compile( r"^.*/(.*) - (.*) - ([a-z0-9\-,]*)\.(pdf|jpe?g|png|gif|tiff)$", flags=re.IGNORECASE ) @@ -238,16 +238,18 @@ class Consumer(object): def _guess_attributes_from_name(self, parseable): """ - We use a crude naming convention to make handling the sender, title, - and tags easier: - " - - <tags>.<suffix>" - "<sender> - <title>.<suffix>" + We use a crude naming convention to make handling the correspondent, + title, and tags easier: + "<correspondent> - <title> - <tags>.<suffix>" + "<correspondent> - <title>.<suffix>" "<title>.<suffix>" """ - def get_sender(sender_name): + def get_correspondent(correspondent_name): return Correspondent.objects.get_or_create( - name=sender_name, defaults={"slug": slugify(sender_name)})[0] + name=correspondent_name, + defaults={"slug": slugify(correspondent_name)} + )[0] def get_tags(tags): r = [] @@ -262,27 +264,27 @@ class Consumer(object): return "jpg" return suffix - # First attempt: "<sender> - <title> - <tags>.<suffix>" - m = re.match(self.REGEX_SENDER_TITLE_TAGS, parseable) + # First attempt: "<correspondent> - <title> - <tags>.<suffix>" + m = re.match(self.REGEX_CORRESPONDENT_TITLE_TAGS, parseable) if m: return ( - get_sender(m.group(1)), + get_correspondent(m.group(1)), m.group(2), get_tags(m.group(3)), get_suffix(m.group(4)) ) - # Second attempt: "<sender> - <title>.<suffix>" - m = re.match(self.REGEX_SENDER_TITLE, parseable) + # Second attempt: "<correspondent> - <title>.<suffix>" + m = re.match(self.REGEX_CORRESPONDENT_TITLE, parseable) if m: return ( - get_sender(m.group(1)), + get_correspondent(m.group(1)), m.group(2), (), get_suffix(m.group(3)) ) - # That didn't work, so we assume sender and tags are None + # That didn't work, so we assume correspondent and tags are None m = re.match(self.REGEX_TITLE, parseable) return None, m.group(1), (), get_suffix(m.group(2)) @@ -296,7 +298,7 @@ class Consumer(object): self.log("debug", "Saving record to database") document = Document.objects.create( - sender=sender, + correspondent=sender, title=title, content=text, file_type=file_type, diff --git a/src/documents/forms.py b/src/documents/forms.py index d8960f88b..d4c01745a 100644 --- a/src/documents/forms.py +++ b/src/documents/forms.py @@ -23,7 +23,7 @@ class UploadForm(forms.Form): "image/tiff": Document.TYPE_TIF, } - sender = forms.CharField( + correspondent = forms.CharField( max_length=Correspondent._meta.get_field("name").max_length, required=False ) @@ -34,18 +34,19 @@ class UploadForm(forms.Form): document = forms.FileField() signature = forms.CharField(max_length=256) - def clean_sender(self): + def clean_correspondent(self): """ I suppose it might look cleaner to use .get_or_create() here, but that - would also allow someone to fill up the db with bogus senders before - all validation was met. + would also allow someone to fill up the db with bogus correspondents + before all validation was met. """ - sender = self.cleaned_data.get("sender") - if not sender: + corresp = self.cleaned_data.get("correspondent") + if not corresp: return None - if not Correspondent.SAFE_REGEX.match(sender) or " - " in sender: - raise forms.ValidationError("That sender name is suspicious.") - return sender + if not Correspondent.SAFE_REGEX.match(corresp) or " - " in corresp: + raise forms.ValidationError( + "That correspondent name is suspicious.") + return corresp def clean_title(self): title = self.cleaned_data.get("title") @@ -63,10 +64,10 @@ class UploadForm(forms.Form): return document, self.TYPE_LOOKUP[file_type] def clean(self): - sender = self.clened_data("sender") + corresp = self.clened_data("correspondent") title = self.cleaned_data("title") signature = self.cleaned_data("signature") - if sha256(sender + title + self.SECRET).hexdigest() == signature: + if sha256(corresp + title + self.SECRET).hexdigest() == signature: return True return False @@ -77,13 +78,15 @@ class UploadForm(forms.Form): form do that as well. Think of it as a poor-man's queue server. """ - sender = self.clened_data("sender") + correspondent = self.clened_data("correspondent") title = self.cleaned_data("title") document, file_type = self.cleaned_data.get("document") t = int(mktime(datetime.now())) file_name = os.path.join( - Consumer.CONSUME, "{} - {}.{}".format(sender, title, file_type)) + Consumer.CONSUME, + "{} - {}.{}".format(correspondent, title, file_type) + ) with open(file_name, "wb") as f: f.write(document) diff --git a/src/documents/management/commands/document_exporter.py b/src/documents/management/commands/document_exporter.py index 87ed804a2..913f7ae79 100644 --- a/src/documents/management/commands/document_exporter.py +++ b/src/documents/management/commands/document_exporter.py @@ -22,6 +22,13 @@ class Command(Renderable, BaseCommand): def add_arguments(self, parser): parser.add_argument("target") + parser.add_argument( + "--legacy", + action="store_true", + help="Don't try to export all of the document data, just dump the " + "original document files out in a format that makes " + "re-consuming them easy." + ) def __init__(self, *args, **kwargs): BaseCommand.__init__(self, *args, **kwargs) @@ -40,6 +47,13 @@ class Command(Renderable, BaseCommand): if not settings.PASSPHRASE: settings.PASSPHRASE = input("Please enter the passphrase: ") + if options["legacy"]: + self.dump_legacy() + else: + self.dump() + + def dump(self): + documents = Document.objects.all() document_map = {d.pk: d for d in documents} manifest = json.loads(serializers.serialize("json", documents)) @@ -65,3 +79,28 @@ class Command(Renderable, BaseCommand): with open(os.path.join(self.target, "manifest.json"), "w") as f: json.dump(manifest, f, indent=2) + + def dump_legacy(self): + + for document in Document.objects.all(): + + target = os.path.join( + self.target, self._get_legacy_file_name(document)) + + print("Exporting: {}".format(target)) + + with open(target, "wb") as f: + f.write(GnuPG.decrypted(document.source_file)) + t = int(time.mktime(document.created.timetuple())) + os.utime(target, times=(t, t)) + + @staticmethod + def _get_legacy_file_name(doc): + if doc.correspondent and doc.title: + tags = ",".join([t.slug for t in doc.tags.all()]) + if tags: + return "{} - {} - {}.{}".format( + doc.correspondent, doc.title, tags, doc.file_type) + return "{} - {}.{}".format( + doc.correspondent, doc.title, doc.file_type) + return os.path.basename(doc.source_path) diff --git a/src/documents/migrations/0011_auto_20160303_1929.py b/src/documents/migrations/0011_auto_20160303_1929.py index a9aefddaf..af4ee4c66 100644 --- a/src/documents/migrations/0011_auto_20160303_1929.py +++ b/src/documents/migrations/0011_auto_20160303_1929.py @@ -16,4 +16,13 @@ class Migration(migrations.Migration): old_name='Sender', new_name='Correspondent', ), + migrations.AlterModelOptions( + name='document', + options={'ordering': ('correspondent', 'title')}, + ), + migrations.RenameField( + model_name='document', + old_name='sender', + new_name='correspondent', + ), ] diff --git a/src/documents/models.py b/src/documents/models.py index 0fb6489c4..a82f7643f 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -140,7 +140,7 @@ class Document(models.Model): TYPE_TIF = "tiff" TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF,) - sender = models.ForeignKey( + correspondent = models.ForeignKey( Correspondent, blank=True, null=True, related_name="documents") title = models.CharField(max_length=128, blank=True, db_index=True) content = models.TextField(db_index=True) @@ -155,14 +155,15 @@ class Document(models.Model): modified = models.DateTimeField(auto_now=True, editable=False) class Meta(object): - ordering = ("sender", "title") + ordering = ("correspondent", "title") def __str__(self): created = self.created.strftime("%Y%m%d%H%M%S") - if self.sender and self.title: - return "{}: {} - {}".format(created, self.sender, self.title) - if self.sender or self.title: - return "{}: {}".format(created, self.sender or self.title) + if self.correspondent and self.title: + return "{}: {} - {}".format( + created, self.correspondent, self.title) + if self.correspondent or self.title: + return "{}: {}".format(created, self.correspondent or self.title) return str(created) @property diff --git a/src/documents/serialisers.py b/src/documents/serialisers.py index 340fdaa25..c2b2ae7fd 100644 --- a/src/documents/serialisers.py +++ b/src/documents/serialisers.py @@ -20,8 +20,8 @@ class TagSerializer(serializers.HyperlinkedModelSerializer): class DocumentSerializer(serializers.ModelSerializer): - sender = serializers.HyperlinkedRelatedField( - read_only=True, view_name="drf:sender-detail", allow_null=True) + correspondent = serializers.HyperlinkedRelatedField( + read_only=True, view_name="drf:correspondent-detail", allow_null=True) tags = serializers.HyperlinkedRelatedField( read_only=True, view_name="drf:tag-detail", many=True) @@ -29,7 +29,7 @@ class DocumentSerializer(serializers.ModelSerializer): model = Document fields = ( "id", - "sender", + "correspondent", "title", "content", "file_type",