unified document matching, legacy and automatching work alongside now

This commit is contained in:
Jonas Winkler
2020-10-28 11:45:11 +01:00
parent 368b6d0512
commit dd16b7262e
16 changed files with 629 additions and 225 deletions

View File

@@ -12,9 +12,8 @@ def re_slug_all_the_things(apps, schema_editor):
Tag = apps.get_model("documents", "Tag")
Correspondent = apps.get_model("documents", "Correspondent")
DocumentType = apps.get_model("documents", "DocumentType")
for klass in (Tag, Correspondent, DocumentType):
for klass in (Tag, Correspondent):
for instance in klass.objects.all():
klass.objects.filter(
pk=instance.pk
@@ -26,7 +25,7 @@ def re_slug_all_the_things(apps, schema_editor):
class Migration(migrations.Migration):
dependencies = [
('documents', '1003_auto_20180904_1425'),
('documents', '0021_document_storage_type'),
]
operations = [
@@ -49,10 +48,5 @@ class Migration(migrations.Migration):
name='slug',
field=models.SlugField(blank=True, editable=False),
),
migrations.AlterField(
model_name='documenttype',
name='slug',
field=models.SlugField(blank=True, editable=False),
),
migrations.RunPython(re_slug_all_the_things, migrations.RunPython.noop)
]

View File

@@ -6,7 +6,7 @@ from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('documents', '0021_document_storage_type'),
('documents', '0022_auto_20181007_1420'),
]
operations = [

View File

@@ -16,7 +16,7 @@ class Migration(migrations.Migration):
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=128, unique=True)),
('slug', models.SlugField(blank=True)),
('slug', models.SlugField(blank=True, editable=False)),
('match', models.CharField(blank=True, max_length=256)),
('matching_algorithm', models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.')),
('is_insensitive', models.BooleanField(default=True)),

View File

@@ -1,77 +0,0 @@
# Generated by Django 2.0.8 on 2018-09-04 14:25
from django.db import migrations, models
def transfer_automatic_classification(apps, schema_editor):
for model_name in ["Tag", "Correspondent", "DocumentType"]:
model_class = apps.get_model("documents", model_name)
for o in model_class.objects.all():
o.automatic_classification = o.match is not None and len(o.match) > 0
o.save()
def reverse_automatic_classification(apps, schema_editor):
pass
class Migration(migrations.Migration):
dependencies = [
('documents', '1002_auto_20180823_1155'),
]
operations = [
migrations.AddField(
model_name='correspondent',
name='automatic_classification',
field=models.BooleanField(default=False, help_text='Automatically assign to newly added documents based on current usage in your document collection.'),
),
migrations.AddField(
model_name='documenttype',
name='automatic_classification',
field=models.BooleanField(default=False, help_text='Automatically assign to newly added documents based on current usage in your document collection.'),
),
migrations.AddField(
model_name='tag',
name='automatic_classification',
field=models.BooleanField(default=False, help_text='Automatically assign to newly added documents based on current usage in your document collection.'),
),
migrations.RunPython(transfer_automatic_classification, reverse_automatic_classification),
migrations.RemoveField(
model_name='correspondent',
name='is_insensitive',
),
migrations.RemoveField(
model_name='correspondent',
name='match',
),
migrations.RemoveField(
model_name='correspondent',
name='matching_algorithm',
),
migrations.RemoveField(
model_name='documenttype',
name='is_insensitive',
),
migrations.RemoveField(
model_name='documenttype',
name='match',
),
migrations.RemoveField(
model_name='documenttype',
name='matching_algorithm',
),
migrations.RemoveField(
model_name='tag',
name='is_insensitive',
),
migrations.RemoveField(
model_name='tag',
name='match',
),
migrations.RemoveField(
model_name='tag',
name='matching_algorithm',
),
]