From 5c3cb1e4ab2bb286659339c721323406c5f7cc04 Mon Sep 17 00:00:00 2001
From: Daniel Quinn <code@danielquinn.org>
Date: Sun, 7 Oct 2018 16:24:05 +0100
Subject: [PATCH] Rework how slugs are generated/referenced #393

---
 docs/changelog.rst                            | 20 +++++++
 src/documents/admin.py                        |  4 ++
 .../migrations/0022_auto_20181007_1420.py     | 52 +++++++++++++++++++
 src/documents/models.py                       |  9 ++--
 4 files changed, 80 insertions(+), 5 deletions(-)
 create mode 100644 src/documents/migrations/0022_auto_20181007_1420.py

diff --git a/docs/changelog.rst b/docs/changelog.rst
index 598241938..b446e45cf 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -9,13 +9,32 @@ Changelog
   its location in ``PAPERLESS_OPTIPNG_BINARY``.  The Docker image has already
   been updated on the Docker Hub, so you just need to pull the latest one from
   there if you're a Docker user.
+
+* A problem in how we handle slug values on Tags and Correspondents required a
+  few changes to how we handle this field `#393`_:
+
+  1. Slugs are no longer editable.  They're derived from the name of the tag or
+     correspondent at save time, so if you wanna change the slug, you have to
+     change the name, and even then you're restricted to the rules of the
+     ``slugify()`` function.  The slug value is still visible in the admin
+     though.
+  2. I've added a migration to go over all existing tags & correspondents and
+     rewrite the ``.slug`` values to ones conforming to the ``slugify()``
+     rules.
+  3. The consumption process now uses the same rules as ``.save()`` in
+     determining a slug and using that to check for an existing
+     tag/correspondent.
+
 * An annoying bug in the date capture code was causing some bogus dates to be
   attached to documents, which in turn busted the UI.  Thanks to `Andrew Peng`_
   for reporting this. `#414`_.
+
 * A bug in the Dockerfile meant that Tesseract language files weren't being
   installed correctly.  `euri10`_ was quick to provide a fix: `#406`_, `#413`_.
+
 * Document consumption is now wrapped in a transaction as per an old ticket
   `#262`_.
+
 * The ``get_date()`` functionality of the parsers has been consolidated onto
   the ``DocumentParser`` class since much of that code was redundant anyway.
 
@@ -627,6 +646,7 @@ bulk of the work on this big change.
 .. _#391: https://github.com/danielquinn/paperless/pull/391
 .. _#390: https://github.com/danielquinn/paperless/pull/390
 .. _#392: https://github.com/danielquinn/paperless/issues/392
+.. _#393: https://github.com/danielquinn/paperless/issues/393
 .. _#395: https://github.com/danielquinn/paperless/pull/395
 .. _#396: https://github.com/danielquinn/paperless/pull/396
 .. _#399: https://github.com/danielquinn/paperless/pull/399
diff --git a/src/documents/admin.py b/src/documents/admin.py
index 365a99c1a..6dbe7f835 100644
--- a/src/documents/admin.py
+++ b/src/documents/admin.py
@@ -125,6 +125,8 @@ class CorrespondentAdmin(CommonAdmin):
     list_filter = ("matching_algorithm",)
     list_editable = ("match", "matching_algorithm")
 
+    readonly_fields = ("slug",)
+
     def get_queryset(self, request):
         qs = super(CorrespondentAdmin, self).get_queryset(request)
         qs = qs.annotate(
@@ -149,6 +151,8 @@ class TagAdmin(CommonAdmin):
     list_filter = ("colour", "matching_algorithm")
     list_editable = ("colour", "match", "matching_algorithm")
 
+    readonly_fields = ("slug",)
+
     def get_queryset(self, request):
         qs = super(TagAdmin, self).get_queryset(request)
         qs = qs.annotate(document_count=models.Count("documents"))
diff --git a/src/documents/migrations/0022_auto_20181007_1420.py b/src/documents/migrations/0022_auto_20181007_1420.py
new file mode 100644
index 000000000..937695bc8
--- /dev/null
+++ b/src/documents/migrations/0022_auto_20181007_1420.py
@@ -0,0 +1,52 @@
+# Generated by Django 2.0.8 on 2018-10-07 14:20
+
+from django.db import migrations, models
+from django.utils.text import slugify
+
+
+def re_slug_all_the_things(apps, schema_editor):
+    """
+    Rewrite all slug values to make sure they're actually slugs before we brand
+    them as uneditable.
+    """
+
+    Tag = apps.get_model("documents", "Tag")
+    Correspondent = apps.get_model("documents", "Tag")
+
+    for klass in (Tag, Correspondent):
+        for instance in klass.objects.all():
+            klass.objects.filter(
+                pk=instance.pk
+            ).update(
+                slug=slugify(instance.slug)
+            )
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('documents', '0021_document_storage_type'),
+    ]
+
+    operations = [
+        migrations.AlterModelOptions(
+            name='tag',
+            options={'ordering': ('name',)},
+        ),
+        migrations.AlterField(
+            model_name='correspondent',
+            name='slug',
+            field=models.SlugField(blank=True, editable=False),
+        ),
+        migrations.AlterField(
+            model_name='document',
+            name='file_type',
+            field=models.CharField(choices=[('pdf', 'PDF'), ('png', 'PNG'), ('jpg', 'JPG'), ('gif', 'GIF'), ('tiff', 'TIFF'), ('txt', 'TXT'), ('csv', 'CSV'), ('md', 'MD')], editable=False, max_length=4),
+        ),
+        migrations.AlterField(
+            model_name='tag',
+            name='slug',
+            field=models.SlugField(blank=True, editable=False),
+        ),
+        migrations.RunPython(re_slug_all_the_things, migrations.RunPython.noop)
+    ]
diff --git a/src/documents/models.py b/src/documents/models.py
index c66bb5b0f..37c1cfdbf 100644
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -11,6 +11,7 @@ from django.conf import settings
 from django.db import models
 from django.template.defaultfilters import slugify
 from django.utils import timezone
+from django.utils.text import slugify
 from fuzzywuzzy import fuzz
 
 from .managers import LogManager
@@ -37,7 +38,7 @@ class MatchingModel(models.Model):
     )
 
     name = models.CharField(max_length=128, unique=True)
-    slug = models.SlugField(blank=True)
+    slug = models.SlugField(blank=True, editable=False)
 
     match = models.CharField(max_length=256, blank=True)
     matching_algorithm = models.PositiveIntegerField(
@@ -147,9 +148,7 @@ class MatchingModel(models.Model):
     def save(self, *args, **kwargs):
 
         self.match = self.match.lower()
-
-        if not self.slug:
-            self.slug = slugify(self.name)
+        self.slug = slugify(self.name)
 
         models.Model.save(self, *args, **kwargs)
 
@@ -452,7 +451,7 @@ class FileInfo:
         r = []
         for t in tags.split(","):
             r.append(Tag.objects.get_or_create(
-                slug=t.lower(),
+                slug=slugify(t),
                 defaults={"name": t}
             )[0])
         return tuple(r)