From 26bb6cba1a6dc08bb592a19eacc7ff0087d01db8 Mon Sep 17 00:00:00 2001 From: Jonas Winkler Date: Sat, 7 Nov 2020 23:22:44 +0100 Subject: [PATCH] combined migrations --- .../migrations/1000_update_paperless.py | 73 --------- .../migrations/1000_update_paperless_all.py | 148 ++++++++++++++++++ .../migrations/1001_workflow_improvements.py | 23 --- .../migrations/1002_auto_20180823_1155.py | 33 ---- .../migrations/1003_auto_20201028_1751.py | 32 ---- .../migrations/1004_auto_20201029_1331.py | 18 --- .../migrations/1005_auto_20201102_0007.py | 26 --- 7 files changed, 148 insertions(+), 205 deletions(-) delete mode 100644 src/documents/migrations/1000_update_paperless.py create mode 100644 src/documents/migrations/1000_update_paperless_all.py delete mode 100755 src/documents/migrations/1001_workflow_improvements.py delete mode 100644 src/documents/migrations/1002_auto_20180823_1155.py delete mode 100644 src/documents/migrations/1003_auto_20201028_1751.py delete mode 100644 src/documents/migrations/1004_auto_20201029_1331.py delete mode 100644 src/documents/migrations/1005_auto_20201102_0007.py diff --git a/src/documents/migrations/1000_update_paperless.py b/src/documents/migrations/1000_update_paperless.py deleted file mode 100644 index 900510c72..000000000 --- a/src/documents/migrations/1000_update_paperless.py +++ /dev/null @@ -1,73 +0,0 @@ -# Generated by Django 3.1.2 on 2020-10-29 14:29 -import os - -from django.db import migrations - -from django.conf import settings - - -def make_index(apps, schema_editor): - Document = apps.get_model("documents", "Document") - documents = Document.objects.all() - print() - try: - print(" --> Creating document index...") - from whoosh.writing import AsyncWriter - from documents import index - ix = index.open_index(recreate=True) - with AsyncWriter(ix) as writer: - for document in documents: - index.update_document(writer, document) - except ImportError: - # index may not be relevant anymore - print(" --> Cannot create document index.") - - -def restore_filenames(apps, schema_editor): - Document = apps.get_model("documents", "Document") - for doc in Document.objects.all(): - file_name = "{:07}.{}".format(doc.pk, doc.file_type) - if doc.storage_type == "gpg": - file_name += ".gpg" - - if not doc.filename == file_name: - try: - print("file was renamed, restoring {} to {}".format(doc.filename, file_name)) - os.rename(os.path.join(settings.ORIGINALS_DIR, doc.filename), - os.path.join(settings.ORIGINALS_DIR, file_name)) - except PermissionError: - pass - except FileNotFoundError: - pass - - -def initialize_document_classifier(apps, schema_editor): - try: - print("Initalizing document classifier...") - from documents.classifier import DocumentClassifier - classifier = DocumentClassifier() - try: - classifier.train() - classifier.save_classifier() - except Exception as e: - print("Classifier error: {}".format(e)) - except ImportError: - print("Document classifier not found, skipping") - - - -class Migration(migrations.Migration): - - dependencies = [ - ('documents', '0023_document_current_filename'), - ] - - operations = [ - migrations.RunPython(make_index, migrations.RunPython.noop), - migrations.RunPython(restore_filenames), - migrations.RunPython(initialize_document_classifier, migrations.RunPython.noop), - migrations.RemoveField( - model_name='document', - name='filename', - ), - ] diff --git a/src/documents/migrations/1000_update_paperless_all.py b/src/documents/migrations/1000_update_paperless_all.py new file mode 100644 index 000000000..79e2b3668 --- /dev/null +++ b/src/documents/migrations/1000_update_paperless_all.py @@ -0,0 +1,148 @@ +# Generated by Django 3.1.3 on 2020-11-07 12:35 +import os + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion + + +def make_index(apps, schema_editor): + Document = apps.get_model("documents", "Document") + documents = Document.objects.all() + print() + try: + print(" --> Creating document index...") + from whoosh.writing import AsyncWriter + from documents import index + ix = index.open_index(recreate=True) + with AsyncWriter(ix) as writer: + for document in documents: + index.update_document(writer, document) + except ImportError: + # index may not be relevant anymore + print(" --> Cannot create document index.") + + +def restore_filenames(apps, schema_editor): + Document = apps.get_model("documents", "Document") + + rename_operations = [] + + for doc in Document.objects.all(): + file_name = "{:07}.{}".format(doc.pk, doc.file_type) + if doc.storage_type == "gpg": + file_name += ".gpg" + + if not doc.filename == file_name: + try: + src = os.path.join(settings.ORIGINALS_DIR, doc.filename) + dst = os.path.join(settings.ORIGINALS_DIR, file_name) + if os.path.exists(dst): + raise Exception("Cannot move {}, {} already exists!".format(src, dst)) + if not os.path.exists(src): + raise Exception("Cannot move {}, file does not exist! (this is bad, one of your documents is missing".format(src)) + + rename_operations.append( (src,dst) ) + except (PermissionError, FileNotFoundError) as e: + raise Exception(e) + + for (src, dst) in rename_operations: + print("file was renamed, restoring {} to {}".format(src, dst)) + os.rename(src, dst) + + +def initialize_document_classifier(apps, schema_editor): + try: + print("Initalizing document classifier...") + from documents.classifier import DocumentClassifier + classifier = DocumentClassifier() + try: + classifier.train() + classifier.save_classifier() + except Exception as e: + print("Classifier error: {}".format(e)) + except ImportError: + print("Document classifier not found, skipping") + + +class Migration(migrations.Migration): + + dependencies = [ + ('documents', '0023_document_current_filename'), + ] + + operations = [ + migrations.RunPython( + code=restore_filenames, + ), + migrations.RemoveField( + model_name='document', + name='filename', + ), + migrations.AddField( + model_name='document', + name='archive_serial_number', + field=models.IntegerField(blank=True, db_index=True, help_text='The position of this document in your physical document archive.', null=True, unique=True), + ), + migrations.AddField( + model_name='tag', + name='is_inbox_tag', + field=models.BooleanField(default=False, help_text='Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.'), + ), + migrations.CreateModel( + name='DocumentType', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=128, unique=True)), + ('slug', models.SlugField(blank=True, editable=False)), + ('match', models.CharField(blank=True, max_length=256)), + ('matching_algorithm', models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.')), + ('is_insensitive', models.BooleanField(default=True)), + ], + options={ + 'abstract': False, + 'ordering': ('name',), + }, + ), + migrations.AddField( + model_name='document', + name='document_type', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.documenttype'), + ), + migrations.AlterField( + model_name='correspondent', + name='matching_algorithm', + field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'), + ), + migrations.AlterField( + model_name='tag', + name='matching_algorithm', + field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'), + ), + migrations.AlterField( + model_name='document', + name='content', + field=models.TextField(blank=True, help_text='The raw, text-only data of the document. This field is primarily used for searching.'), + ), + migrations.AlterModelOptions( + name='log', + options={'ordering': ('-created',)}, + ), + migrations.RemoveField( + model_name='log', + name='modified', + ), + migrations.AlterField( + model_name='log', + name='group', + field=models.UUIDField(blank=True, null=True), + ), + migrations.RunPython( + code=make_index, + reverse_code=django.db.migrations.operations.special.RunPython.noop, + ), + migrations.RunPython( + code=initialize_document_classifier, + reverse_code=django.db.migrations.operations.special.RunPython.noop, + ), + ] diff --git a/src/documents/migrations/1001_workflow_improvements.py b/src/documents/migrations/1001_workflow_improvements.py deleted file mode 100755 index 94ad8135d..000000000 --- a/src/documents/migrations/1001_workflow_improvements.py +++ /dev/null @@ -1,23 +0,0 @@ -# Generated by Django 2.0.7 on 2018-07-12 09:52 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('documents', '1000_update_paperless'), - ] - - operations = [ - migrations.AddField( - model_name='document', - name='archive_serial_number', - field=models.IntegerField(blank=True, db_index=True, help_text='The position of this document in your physical document archive.', null=True, unique=True), - ), - migrations.AddField( - model_name='tag', - name='is_inbox_tag', - field=models.BooleanField(default=False, help_text='Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.'), - ), - ] diff --git a/src/documents/migrations/1002_auto_20180823_1155.py b/src/documents/migrations/1002_auto_20180823_1155.py deleted file mode 100644 index d44a131ab..000000000 --- a/src/documents/migrations/1002_auto_20180823_1155.py +++ /dev/null @@ -1,33 +0,0 @@ -# Generated by Django 2.0.7 on 2018-08-23 11:55 - -from django.db import migrations, models -import django.db.models.deletion - - -class Migration(migrations.Migration): - - dependencies = [ - ('documents', '1001_workflow_improvements'), - ] - - operations = [ - migrations.CreateModel( - name='DocumentType', - fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('name', models.CharField(max_length=128, unique=True)), - ('slug', models.SlugField(blank=True, editable=False)), - ('match', models.CharField(blank=True, max_length=256)), - ('matching_algorithm', models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.')), - ('is_insensitive', models.BooleanField(default=True)), - ], - options={ - 'abstract': False, - }, - ), - migrations.AddField( - model_name='document', - name='document_type', - field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.DocumentType'), - ), - ] diff --git a/src/documents/migrations/1003_auto_20201028_1751.py b/src/documents/migrations/1003_auto_20201028_1751.py deleted file mode 100644 index 66dd329e1..000000000 --- a/src/documents/migrations/1003_auto_20201028_1751.py +++ /dev/null @@ -1,32 +0,0 @@ -# Generated by Django 3.1.2 on 2020-10-28 17:51 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('documents', '1002_auto_20180823_1155'), - ] - - operations = [ - migrations.AlterModelOptions( - name='documenttype', - options={'ordering': ('name',)}, - ), - migrations.AlterField( - model_name='correspondent', - name='matching_algorithm', - field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'), - ), - migrations.AlterField( - model_name='documenttype', - name='matching_algorithm', - field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'), - ), - migrations.AlterField( - model_name='tag', - name='matching_algorithm', - field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'), - ), - ] diff --git a/src/documents/migrations/1004_auto_20201029_1331.py b/src/documents/migrations/1004_auto_20201029_1331.py deleted file mode 100644 index b845bde33..000000000 --- a/src/documents/migrations/1004_auto_20201029_1331.py +++ /dev/null @@ -1,18 +0,0 @@ -# Generated by Django 3.1.2 on 2020-10-29 13:31 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('documents', '1003_auto_20201028_1751'), - ] - - operations = [ - migrations.AlterField( - model_name='document', - name='content', - field=models.TextField(blank=True, help_text='The raw, text-only data of the document. This field is primarily used for searching.'), - ), - ] diff --git a/src/documents/migrations/1005_auto_20201102_0007.py b/src/documents/migrations/1005_auto_20201102_0007.py deleted file mode 100644 index 146cc0b5a..000000000 --- a/src/documents/migrations/1005_auto_20201102_0007.py +++ /dev/null @@ -1,26 +0,0 @@ -# Generated by Django 3.1.2 on 2020-11-02 00:07 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('documents', '1004_auto_20201029_1331'), - ] - - operations = [ - migrations.AlterModelOptions( - name='log', - options={'ordering': ('-created',)}, - ), - migrations.RemoveField( - model_name='log', - name='modified', - ), - migrations.AlterField( - model_name='log', - name='group', - field=models.UUIDField(blank=True, null=True), - ), - ]