From 8b2b7bbe6dbead7ebe5b8846a494fb698a6949ea Mon Sep 17 00:00:00 2001 From: Brandon Rothweiler Date: Tue, 21 Feb 2023 20:01:30 -0500 Subject: [PATCH] Add an option to disable matching --- docs/advanced_usage.md | 5 +- .../edit-dialog/edit-dialog.component.ts | 11 ++- .../management-list.component.ts | 3 + src-ui/src/app/data/matching-model.ts | 6 ++ src/documents/matching.py | 5 +- ...rrespondent_matching_algorithm_and_more.py | 81 +++++++++++++++++++ src/documents/models.py | 2 + 7 files changed, 108 insertions(+), 5 deletions(-) create mode 100644 src/documents/migrations/1032_alter_correspondent_matching_algorithm_and_more.py diff --git a/docs/advanced_usage.md b/docs/advanced_usage.md index cd82ab78e..55ca8ee74 100644 --- a/docs/advanced_usage.md +++ b/docs/advanced_usage.md @@ -9,7 +9,7 @@ Paperless will compare the matching algorithms defined by every tag, correspondent, document type, and storage path in your database to see if they apply to the text in a document. In other words, if you define a tag called `Home Utility` that had a `match` property of `bc hydro` and -a `matching_algorithm` of `literal`, Paperless will automatically tag +a `matching_algorithm` of `Exact`, Paperless will automatically tag your newly-consumed document with your `Home Utility` tag so long as the text `bc hydro` appears in the body of the document somewhere. @@ -25,12 +25,13 @@ documents. The following algorithms are available: +- **None:** No matching will be performed. - **Any:** Looks for any occurrence of any word provided in match in the PDF. If you define the match as `Bank1 Bank2`, it will match documents containing either of these terms. - **All:** Requires that every word provided appears in the PDF, albeit not in the order provided. -- **Literal:** Matches only if the match appears exactly as provided +- **Exact:** Matches only if the match appears exactly as provided (i.e. preserve ordering) in the PDF. - **Regular expression:** Parses the match as a regular expression and tries to find a match within the document. diff --git a/src-ui/src/app/components/common/edit-dialog/edit-dialog.component.ts b/src-ui/src/app/components/common/edit-dialog/edit-dialog.component.ts index 056ad65a0..94fe79d2a 100644 --- a/src-ui/src/app/components/common/edit-dialog/edit-dialog.component.ts +++ b/src-ui/src/app/components/common/edit-dialog/edit-dialog.component.ts @@ -2,7 +2,11 @@ import { Directive, EventEmitter, Input, OnInit, Output } from '@angular/core' import { FormGroup } from '@angular/forms' import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap' import { Observable } from 'rxjs' -import { MATCHING_ALGORITHMS, MATCH_AUTO } from 'src/app/data/matching-model' +import { + MATCHING_ALGORITHMS, + MATCH_AUTO, + MATCH_NONE, +} from 'src/app/data/matching-model' import { ObjectWithId } from 'src/app/data/object-with-id' import { ObjectWithPermissions } from 'src/app/data/object-with-permissions' import { PaperlessUser } from 'src/app/data/paperless-user' @@ -91,7 +95,10 @@ export abstract class EditDialogComponent< } get patternRequired(): boolean { - return this.objectForm?.value.matching_algorithm !== MATCH_AUTO + return ( + this.objectForm?.value.matching_algorithm !== MATCH_AUTO && + this.objectForm?.value.matching_algorithm !== MATCH_NONE + ) } save() { diff --git a/src-ui/src/app/components/manage/management-list/management-list.component.ts b/src-ui/src/app/components/manage/management-list/management-list.component.ts index 0fa24e252..43a31f1ca 100644 --- a/src-ui/src/app/components/manage/management-list/management-list.component.ts +++ b/src-ui/src/app/components/manage/management-list/management-list.component.ts @@ -12,6 +12,7 @@ import { MatchingModel, MATCHING_ALGORITHMS, MATCH_AUTO, + MATCH_NONE, } from 'src/app/data/matching-model' import { ObjectWithId } from 'src/app/data/object-with-id' import { ObjectWithPermissions } from 'src/app/data/object-with-permissions' @@ -96,6 +97,8 @@ export abstract class ManagementListComponent getMatching(o: MatchingModel) { if (o.matching_algorithm == MATCH_AUTO) { return $localize`Automatic` + } else if (o.matching_algorithm == MATCH_NONE) { + return $localize`None` } else if (o.match && o.match.length > 0) { return `${ MATCHING_ALGORITHMS.find((a) => a.id == o.matching_algorithm).shortName diff --git a/src-ui/src/app/data/matching-model.ts b/src-ui/src/app/data/matching-model.ts index 387625b54..dc2f8298d 100644 --- a/src-ui/src/app/data/matching-model.ts +++ b/src-ui/src/app/data/matching-model.ts @@ -1,5 +1,6 @@ import { ObjectWithPermissions } from './object-with-permissions' +export const MATCH_NONE = 0 export const MATCH_ANY = 1 export const MATCH_ALL = 2 export const MATCH_LITERAL = 3 @@ -9,6 +10,11 @@ export const MATCH_AUTO = 6 export const DEFAULT_MATCHING_ALGORITHM = MATCH_AUTO export const MATCHING_ALGORITHMS = [ + { + id: MATCH_NONE, + shortName: $localize`None`, + name: $localize`None: Disable matching`, + }, { id: MATCH_ANY, shortName: $localize`Any word`, diff --git a/src/documents/matching.py b/src/documents/matching.py index 235263aa6..c38761afa 100644 --- a/src/documents/matching.py +++ b/src/documents/matching.py @@ -86,7 +86,10 @@ def matches(matching_model, document): if matching_model.is_insensitive: search_kwargs = {"flags": re.IGNORECASE} - if matching_model.matching_algorithm == MatchingModel.MATCH_ALL: + if matching_model.matching_algorithm == MatchingModel.MATCH_NONE: + return False + + elif matching_model.matching_algorithm == MatchingModel.MATCH_ALL: for word in _split_match(matching_model): search_result = re.search(rf"\b{word}\b", document_content, **search_kwargs) if not search_result: diff --git a/src/documents/migrations/1032_alter_correspondent_matching_algorithm_and_more.py b/src/documents/migrations/1032_alter_correspondent_matching_algorithm_and_more.py new file mode 100644 index 000000000..88aa7f2bc --- /dev/null +++ b/src/documents/migrations/1032_alter_correspondent_matching_algorithm_and_more.py @@ -0,0 +1,81 @@ +# Generated by Django 4.1.7 on 2023-02-22 00:45 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("documents", "1031_remove_savedview_user_correspondent_owner_and_more"), + ] + + operations = [ + migrations.AlterField( + model_name="correspondent", + name="matching_algorithm", + field=models.PositiveIntegerField( + choices=[ + (0, "None"), + (1, "Any word"), + (2, "All words"), + (3, "Exact match"), + (4, "Regular expression"), + (5, "Fuzzy word"), + (6, "Automatic"), + ], + default=1, + verbose_name="matching algorithm", + ), + ), + migrations.AlterField( + model_name="documenttype", + name="matching_algorithm", + field=models.PositiveIntegerField( + choices=[ + (0, "None"), + (1, "Any word"), + (2, "All words"), + (3, "Exact match"), + (4, "Regular expression"), + (5, "Fuzzy word"), + (6, "Automatic"), + ], + default=1, + verbose_name="matching algorithm", + ), + ), + migrations.AlterField( + model_name="storagepath", + name="matching_algorithm", + field=models.PositiveIntegerField( + choices=[ + (0, "None"), + (1, "Any word"), + (2, "All words"), + (3, "Exact match"), + (4, "Regular expression"), + (5, "Fuzzy word"), + (6, "Automatic"), + ], + default=1, + verbose_name="matching algorithm", + ), + ), + migrations.AlterField( + model_name="tag", + name="matching_algorithm", + field=models.PositiveIntegerField( + choices=[ + (0, "None"), + (1, "Any word"), + (2, "All words"), + (3, "Exact match"), + (4, "Regular expression"), + (5, "Fuzzy word"), + (6, "Automatic"), + ], + default=1, + verbose_name="matching algorithm", + ), + ), + ] diff --git a/src/documents/models.py b/src/documents/models.py index 77dc80944..177885de0 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -24,6 +24,7 @@ TASK_STATE_CHOICES = sorted(zip(ALL_STATES, ALL_STATES)) class MatchingModel(models.Model): + MATCH_NONE = 0 MATCH_ANY = 1 MATCH_ALL = 2 MATCH_LITERAL = 3 @@ -32,6 +33,7 @@ class MatchingModel(models.Model): MATCH_AUTO = 6 MATCHING_ALGORITHMS = ( + (MATCH_NONE, _("None")), (MATCH_ANY, _("Any word")), (MATCH_ALL, _("All words")), (MATCH_LITERAL, _("Exact match")),