# Generated by Django 1.9.4 on 2016-03-28 19:09 import hashlib import os import django.utils.timezone import gnupg from django.conf import settings from django.db import migrations from django.db import models from django.template.defaultfilters import slugify from django.utils.termcolors import colorize as colourise # Spelling hurts me class GnuPG: """ A handy singleton to use when handling encrypted files. """ gpg = gnupg.GPG(gnupghome=settings.GNUPG_HOME) @classmethod def decrypted(cls, file_handle): return cls.gpg.decrypt_file(file_handle, passphrase=settings.PASSPHRASE).data @classmethod def encrypted(cls, file_handle): return cls.gpg.encrypt_file( file_handle, recipients=None, passphrase=settings.PASSPHRASE, symmetric=True, ).data class Document: """ Django's migrations restrict access to model methods, so this is a snapshot of the methods that existed at the time this migration was written, since we need to make use of a lot of these shortcuts here. """ def __init__(self, doc): self.pk = doc.pk self.correspondent = doc.correspondent self.title = doc.title self.file_type = doc.file_type self.tags = doc.tags self.created = doc.created def __str__(self): created = self.created.strftime("%Y%m%d%H%M%S") if self.correspondent and self.title: return f"{created}: {self.correspondent} - {self.title}" if self.correspondent or self.title: return f"{created}: {self.correspondent or self.title}" return str(created) @property def source_path(self): return os.path.join( settings.MEDIA_ROOT, "documents", "originals", f"{self.pk:07}.{self.file_type}.gpg", ) @property def source_file(self): return open(self.source_path, "rb") @property def file_name(self): return slugify(str(self)) + "." + self.file_type def set_checksums(apps, schema_editor): document_model = apps.get_model("documents", "Document") if not document_model.objects.all().exists(): return print( colourise( "\n\n" " This is a one-time only migration to generate checksums for all\n" " of your existing documents. If you have a lot of documents\n" " though, this may take a while, so a coffee break may be in\n" " order." "\n", opts=("bold",), ), ) sums = {} for d in document_model.objects.all(): document = Document(d) print( " {} {} {}".format( colourise("*", fg="green"), colourise("Generating a checksum for", fg="white"), colourise(document.file_name, fg="cyan"), ), ) with document.source_file as encrypted: checksum = hashlib.md5(GnuPG.decrypted(encrypted)).hexdigest() if checksum in sums: error = "\n{line}{p1}\n\n{doc1}\n{doc2}\n\n{p2}\n\n{code}\n\n{p3}{line}".format( p1=colourise( "It appears that you have two identical documents in your collection and \nPaperless no longer supports this (see issue #97). The documents in question\nare:", fg="yellow", ), p2=colourise( "To fix this problem, you'll have to remove one of them from the database, a task\nmost easily done by running the following command in the same\ndirectory as manage.py:", fg="yellow", ), p3=colourise( "When that's finished, re-run the migrate, and provided that there aren't any\nother duplicates, you should be good to go.", fg="yellow", ), doc1=colourise( f" * {sums[checksum][1]} (id: {sums[checksum][0]})", fg="red", ), doc2=colourise( f" * {document.file_name} (id: {document.pk})", fg="red", ), code=colourise( f" $ echo 'DELETE FROM documents_document WHERE id = {document.pk};' | ./manage.py dbshell", fg="green", ), line=colourise("\n{}\n".format("=" * 80), fg="white", opts=("bold",)), ) raise RuntimeError(error) sums[checksum] = (document.pk, document.file_name) document_model.objects.filter(pk=document.pk).update(checksum=checksum) def do_nothing(apps, schema_editor): pass class Migration(migrations.Migration): dependencies = [ ("documents", "0013_auto_20160325_2111"), ] operations = [ migrations.AddField( model_name="document", name="checksum", field=models.CharField( default="-", db_index=True, editable=False, max_length=32, help_text="The checksum of the original document (before it " "was encrypted). We use this to prevent duplicate " "document imports.", ), preserve_default=False, ), migrations.RunPython(set_checksums, do_nothing), migrations.AlterField( model_name="document", name="created", field=models.DateTimeField( db_index=True, default=django.utils.timezone.now, ), ), migrations.AlterField( model_name="document", name="modified", field=models.DateTimeField(auto_now=True, db_index=True), ), ]