mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
combined migrations
This commit is contained in:
parent
4ba717b301
commit
26bb6cba1a
@ -1,73 +0,0 @@
|
||||
# Generated by Django 3.1.2 on 2020-10-29 14:29
|
||||
import os
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
|
||||
def make_index(apps, schema_editor):
|
||||
Document = apps.get_model("documents", "Document")
|
||||
documents = Document.objects.all()
|
||||
print()
|
||||
try:
|
||||
print(" --> Creating document index...")
|
||||
from whoosh.writing import AsyncWriter
|
||||
from documents import index
|
||||
ix = index.open_index(recreate=True)
|
||||
with AsyncWriter(ix) as writer:
|
||||
for document in documents:
|
||||
index.update_document(writer, document)
|
||||
except ImportError:
|
||||
# index may not be relevant anymore
|
||||
print(" --> Cannot create document index.")
|
||||
|
||||
|
||||
def restore_filenames(apps, schema_editor):
|
||||
Document = apps.get_model("documents", "Document")
|
||||
for doc in Document.objects.all():
|
||||
file_name = "{:07}.{}".format(doc.pk, doc.file_type)
|
||||
if doc.storage_type == "gpg":
|
||||
file_name += ".gpg"
|
||||
|
||||
if not doc.filename == file_name:
|
||||
try:
|
||||
print("file was renamed, restoring {} to {}".format(doc.filename, file_name))
|
||||
os.rename(os.path.join(settings.ORIGINALS_DIR, doc.filename),
|
||||
os.path.join(settings.ORIGINALS_DIR, file_name))
|
||||
except PermissionError:
|
||||
pass
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
|
||||
def initialize_document_classifier(apps, schema_editor):
|
||||
try:
|
||||
print("Initalizing document classifier...")
|
||||
from documents.classifier import DocumentClassifier
|
||||
classifier = DocumentClassifier()
|
||||
try:
|
||||
classifier.train()
|
||||
classifier.save_classifier()
|
||||
except Exception as e:
|
||||
print("Classifier error: {}".format(e))
|
||||
except ImportError:
|
||||
print("Document classifier not found, skipping")
|
||||
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '0023_document_current_filename'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunPython(make_index, migrations.RunPython.noop),
|
||||
migrations.RunPython(restore_filenames),
|
||||
migrations.RunPython(initialize_document_classifier, migrations.RunPython.noop),
|
||||
migrations.RemoveField(
|
||||
model_name='document',
|
||||
name='filename',
|
||||
),
|
||||
]
|
148
src/documents/migrations/1000_update_paperless_all.py
Normal file
148
src/documents/migrations/1000_update_paperless_all.py
Normal file
@ -0,0 +1,148 @@
|
||||
# Generated by Django 3.1.3 on 2020-11-07 12:35
|
||||
import os
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
|
||||
|
||||
def make_index(apps, schema_editor):
|
||||
Document = apps.get_model("documents", "Document")
|
||||
documents = Document.objects.all()
|
||||
print()
|
||||
try:
|
||||
print(" --> Creating document index...")
|
||||
from whoosh.writing import AsyncWriter
|
||||
from documents import index
|
||||
ix = index.open_index(recreate=True)
|
||||
with AsyncWriter(ix) as writer:
|
||||
for document in documents:
|
||||
index.update_document(writer, document)
|
||||
except ImportError:
|
||||
# index may not be relevant anymore
|
||||
print(" --> Cannot create document index.")
|
||||
|
||||
|
||||
def restore_filenames(apps, schema_editor):
|
||||
Document = apps.get_model("documents", "Document")
|
||||
|
||||
rename_operations = []
|
||||
|
||||
for doc in Document.objects.all():
|
||||
file_name = "{:07}.{}".format(doc.pk, doc.file_type)
|
||||
if doc.storage_type == "gpg":
|
||||
file_name += ".gpg"
|
||||
|
||||
if not doc.filename == file_name:
|
||||
try:
|
||||
src = os.path.join(settings.ORIGINALS_DIR, doc.filename)
|
||||
dst = os.path.join(settings.ORIGINALS_DIR, file_name)
|
||||
if os.path.exists(dst):
|
||||
raise Exception("Cannot move {}, {} already exists!".format(src, dst))
|
||||
if not os.path.exists(src):
|
||||
raise Exception("Cannot move {}, file does not exist! (this is bad, one of your documents is missing".format(src))
|
||||
|
||||
rename_operations.append( (src,dst) )
|
||||
except (PermissionError, FileNotFoundError) as e:
|
||||
raise Exception(e)
|
||||
|
||||
for (src, dst) in rename_operations:
|
||||
print("file was renamed, restoring {} to {}".format(src, dst))
|
||||
os.rename(src, dst)
|
||||
|
||||
|
||||
def initialize_document_classifier(apps, schema_editor):
|
||||
try:
|
||||
print("Initalizing document classifier...")
|
||||
from documents.classifier import DocumentClassifier
|
||||
classifier = DocumentClassifier()
|
||||
try:
|
||||
classifier.train()
|
||||
classifier.save_classifier()
|
||||
except Exception as e:
|
||||
print("Classifier error: {}".format(e))
|
||||
except ImportError:
|
||||
print("Document classifier not found, skipping")
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '0023_document_current_filename'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunPython(
|
||||
code=restore_filenames,
|
||||
),
|
||||
migrations.RemoveField(
|
||||
model_name='document',
|
||||
name='filename',
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='document',
|
||||
name='archive_serial_number',
|
||||
field=models.IntegerField(blank=True, db_index=True, help_text='The position of this document in your physical document archive.', null=True, unique=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='tag',
|
||||
name='is_inbox_tag',
|
||||
field=models.BooleanField(default=False, help_text='Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.'),
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='DocumentType',
|
||||
fields=[
|
||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('name', models.CharField(max_length=128, unique=True)),
|
||||
('slug', models.SlugField(blank=True, editable=False)),
|
||||
('match', models.CharField(blank=True, max_length=256)),
|
||||
('matching_algorithm', models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.')),
|
||||
('is_insensitive', models.BooleanField(default=True)),
|
||||
],
|
||||
options={
|
||||
'abstract': False,
|
||||
'ordering': ('name',),
|
||||
},
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='document',
|
||||
name='document_type',
|
||||
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.documenttype'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='correspondent',
|
||||
name='matching_algorithm',
|
||||
field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='tag',
|
||||
name='matching_algorithm',
|
||||
field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='content',
|
||||
field=models.TextField(blank=True, help_text='The raw, text-only data of the document. This field is primarily used for searching.'),
|
||||
),
|
||||
migrations.AlterModelOptions(
|
||||
name='log',
|
||||
options={'ordering': ('-created',)},
|
||||
),
|
||||
migrations.RemoveField(
|
||||
model_name='log',
|
||||
name='modified',
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='log',
|
||||
name='group',
|
||||
field=models.UUIDField(blank=True, null=True),
|
||||
),
|
||||
migrations.RunPython(
|
||||
code=make_index,
|
||||
reverse_code=django.db.migrations.operations.special.RunPython.noop,
|
||||
),
|
||||
migrations.RunPython(
|
||||
code=initialize_document_classifier,
|
||||
reverse_code=django.db.migrations.operations.special.RunPython.noop,
|
||||
),
|
||||
]
|
@ -1,23 +0,0 @@
|
||||
# Generated by Django 2.0.7 on 2018-07-12 09:52
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '1000_update_paperless'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='document',
|
||||
name='archive_serial_number',
|
||||
field=models.IntegerField(blank=True, db_index=True, help_text='The position of this document in your physical document archive.', null=True, unique=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='tag',
|
||||
name='is_inbox_tag',
|
||||
field=models.BooleanField(default=False, help_text='Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.'),
|
||||
),
|
||||
]
|
@ -1,33 +0,0 @@
|
||||
# Generated by Django 2.0.7 on 2018-08-23 11:55
|
||||
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '1001_workflow_improvements'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='DocumentType',
|
||||
fields=[
|
||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('name', models.CharField(max_length=128, unique=True)),
|
||||
('slug', models.SlugField(blank=True, editable=False)),
|
||||
('match', models.CharField(blank=True, max_length=256)),
|
||||
('matching_algorithm', models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.')),
|
||||
('is_insensitive', models.BooleanField(default=True)),
|
||||
],
|
||||
options={
|
||||
'abstract': False,
|
||||
},
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='document',
|
||||
name='document_type',
|
||||
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.DocumentType'),
|
||||
),
|
||||
]
|
@ -1,32 +0,0 @@
|
||||
# Generated by Django 3.1.2 on 2020-10-28 17:51
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '1002_auto_20180823_1155'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterModelOptions(
|
||||
name='documenttype',
|
||||
options={'ordering': ('name',)},
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='correspondent',
|
||||
name='matching_algorithm',
|
||||
field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='documenttype',
|
||||
name='matching_algorithm',
|
||||
field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='tag',
|
||||
name='matching_algorithm',
|
||||
field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'),
|
||||
),
|
||||
]
|
@ -1,18 +0,0 @@
|
||||
# Generated by Django 3.1.2 on 2020-10-29 13:31
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '1003_auto_20201028_1751'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='content',
|
||||
field=models.TextField(blank=True, help_text='The raw, text-only data of the document. This field is primarily used for searching.'),
|
||||
),
|
||||
]
|
@ -1,26 +0,0 @@
|
||||
# Generated by Django 3.1.2 on 2020-11-02 00:07
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '1004_auto_20201029_1331'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterModelOptions(
|
||||
name='log',
|
||||
options={'ordering': ('-created',)},
|
||||
),
|
||||
migrations.RemoveField(
|
||||
model_name='log',
|
||||
name='modified',
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='log',
|
||||
name='group',
|
||||
field=models.UUIDField(blank=True, null=True),
|
||||
),
|
||||
]
|
Loading…
x
Reference in New Issue
Block a user