combined migrations

This commit is contained in:
Jonas Winkler 2020-11-07 23:22:44 +01:00
parent 4ba717b301
commit 26bb6cba1a
7 changed files with 148 additions and 205 deletions

View File

@ -1,73 +0,0 @@
# Generated by Django 3.1.2 on 2020-10-29 14:29
import os
from django.db import migrations
from django.conf import settings
def make_index(apps, schema_editor):
Document = apps.get_model("documents", "Document")
documents = Document.objects.all()
print()
try:
print(" --> Creating document index...")
from whoosh.writing import AsyncWriter
from documents import index
ix = index.open_index(recreate=True)
with AsyncWriter(ix) as writer:
for document in documents:
index.update_document(writer, document)
except ImportError:
# index may not be relevant anymore
print(" --> Cannot create document index.")
def restore_filenames(apps, schema_editor):
Document = apps.get_model("documents", "Document")
for doc in Document.objects.all():
file_name = "{:07}.{}".format(doc.pk, doc.file_type)
if doc.storage_type == "gpg":
file_name += ".gpg"
if not doc.filename == file_name:
try:
print("file was renamed, restoring {} to {}".format(doc.filename, file_name))
os.rename(os.path.join(settings.ORIGINALS_DIR, doc.filename),
os.path.join(settings.ORIGINALS_DIR, file_name))
except PermissionError:
pass
except FileNotFoundError:
pass
def initialize_document_classifier(apps, schema_editor):
try:
print("Initalizing document classifier...")
from documents.classifier import DocumentClassifier
classifier = DocumentClassifier()
try:
classifier.train()
classifier.save_classifier()
except Exception as e:
print("Classifier error: {}".format(e))
except ImportError:
print("Document classifier not found, skipping")
class Migration(migrations.Migration):
dependencies = [
('documents', '0023_document_current_filename'),
]
operations = [
migrations.RunPython(make_index, migrations.RunPython.noop),
migrations.RunPython(restore_filenames),
migrations.RunPython(initialize_document_classifier, migrations.RunPython.noop),
migrations.RemoveField(
model_name='document',
name='filename',
),
]

View File

@ -0,0 +1,148 @@
# Generated by Django 3.1.3 on 2020-11-07 12:35
import os
from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion
def make_index(apps, schema_editor):
Document = apps.get_model("documents", "Document")
documents = Document.objects.all()
print()
try:
print(" --> Creating document index...")
from whoosh.writing import AsyncWriter
from documents import index
ix = index.open_index(recreate=True)
with AsyncWriter(ix) as writer:
for document in documents:
index.update_document(writer, document)
except ImportError:
# index may not be relevant anymore
print(" --> Cannot create document index.")
def restore_filenames(apps, schema_editor):
Document = apps.get_model("documents", "Document")
rename_operations = []
for doc in Document.objects.all():
file_name = "{:07}.{}".format(doc.pk, doc.file_type)
if doc.storage_type == "gpg":
file_name += ".gpg"
if not doc.filename == file_name:
try:
src = os.path.join(settings.ORIGINALS_DIR, doc.filename)
dst = os.path.join(settings.ORIGINALS_DIR, file_name)
if os.path.exists(dst):
raise Exception("Cannot move {}, {} already exists!".format(src, dst))
if not os.path.exists(src):
raise Exception("Cannot move {}, file does not exist! (this is bad, one of your documents is missing".format(src))
rename_operations.append( (src,dst) )
except (PermissionError, FileNotFoundError) as e:
raise Exception(e)
for (src, dst) in rename_operations:
print("file was renamed, restoring {} to {}".format(src, dst))
os.rename(src, dst)
def initialize_document_classifier(apps, schema_editor):
try:
print("Initalizing document classifier...")
from documents.classifier import DocumentClassifier
classifier = DocumentClassifier()
try:
classifier.train()
classifier.save_classifier()
except Exception as e:
print("Classifier error: {}".format(e))
except ImportError:
print("Document classifier not found, skipping")
class Migration(migrations.Migration):
dependencies = [
('documents', '0023_document_current_filename'),
]
operations = [
migrations.RunPython(
code=restore_filenames,
),
migrations.RemoveField(
model_name='document',
name='filename',
),
migrations.AddField(
model_name='document',
name='archive_serial_number',
field=models.IntegerField(blank=True, db_index=True, help_text='The position of this document in your physical document archive.', null=True, unique=True),
),
migrations.AddField(
model_name='tag',
name='is_inbox_tag',
field=models.BooleanField(default=False, help_text='Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.'),
),
migrations.CreateModel(
name='DocumentType',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=128, unique=True)),
('slug', models.SlugField(blank=True, editable=False)),
('match', models.CharField(blank=True, max_length=256)),
('matching_algorithm', models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.')),
('is_insensitive', models.BooleanField(default=True)),
],
options={
'abstract': False,
'ordering': ('name',),
},
),
migrations.AddField(
model_name='document',
name='document_type',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.documenttype'),
),
migrations.AlterField(
model_name='correspondent',
name='matching_algorithm',
field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'),
),
migrations.AlterField(
model_name='tag',
name='matching_algorithm',
field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'),
),
migrations.AlterField(
model_name='document',
name='content',
field=models.TextField(blank=True, help_text='The raw, text-only data of the document. This field is primarily used for searching.'),
),
migrations.AlterModelOptions(
name='log',
options={'ordering': ('-created',)},
),
migrations.RemoveField(
model_name='log',
name='modified',
),
migrations.AlterField(
model_name='log',
name='group',
field=models.UUIDField(blank=True, null=True),
),
migrations.RunPython(
code=make_index,
reverse_code=django.db.migrations.operations.special.RunPython.noop,
),
migrations.RunPython(
code=initialize_document_classifier,
reverse_code=django.db.migrations.operations.special.RunPython.noop,
),
]

View File

@ -1,23 +0,0 @@
# Generated by Django 2.0.7 on 2018-07-12 09:52
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('documents', '1000_update_paperless'),
]
operations = [
migrations.AddField(
model_name='document',
name='archive_serial_number',
field=models.IntegerField(blank=True, db_index=True, help_text='The position of this document in your physical document archive.', null=True, unique=True),
),
migrations.AddField(
model_name='tag',
name='is_inbox_tag',
field=models.BooleanField(default=False, help_text='Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.'),
),
]

View File

@ -1,33 +0,0 @@
# Generated by Django 2.0.7 on 2018-08-23 11:55
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('documents', '1001_workflow_improvements'),
]
operations = [
migrations.CreateModel(
name='DocumentType',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=128, unique=True)),
('slug', models.SlugField(blank=True, editable=False)),
('match', models.CharField(blank=True, max_length=256)),
('matching_algorithm', models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.')),
('is_insensitive', models.BooleanField(default=True)),
],
options={
'abstract': False,
},
),
migrations.AddField(
model_name='document',
name='document_type',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.DocumentType'),
),
]

View File

@ -1,32 +0,0 @@
# Generated by Django 3.1.2 on 2020-10-28 17:51
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('documents', '1002_auto_20180823_1155'),
]
operations = [
migrations.AlterModelOptions(
name='documenttype',
options={'ordering': ('name',)},
),
migrations.AlterField(
model_name='correspondent',
name='matching_algorithm',
field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'),
),
migrations.AlterField(
model_name='documenttype',
name='matching_algorithm',
field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'),
),
migrations.AlterField(
model_name='tag',
name='matching_algorithm',
field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'),
),
]

View File

@ -1,18 +0,0 @@
# Generated by Django 3.1.2 on 2020-10-29 13:31
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('documents', '1003_auto_20201028_1751'),
]
operations = [
migrations.AlterField(
model_name='document',
name='content',
field=models.TextField(blank=True, help_text='The raw, text-only data of the document. This field is primarily used for searching.'),
),
]

View File

@ -1,26 +0,0 @@
# Generated by Django 3.1.2 on 2020-11-02 00:07
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('documents', '1004_auto_20201029_1331'),
]
operations = [
migrations.AlterModelOptions(
name='log',
options={'ordering': ('-created',)},
),
migrations.RemoveField(
model_name='log',
name='modified',
),
migrations.AlterField(
model_name='log',
name='group',
field=models.UUIDField(blank=True, null=True),
),
]