From 8b2b7bbe6dbead7ebe5b8846a494fb698a6949ea Mon Sep 17 00:00:00 2001 From: Brandon Rothweiler Date: Tue, 21 Feb 2023 20:01:30 -0500 Subject: [PATCH 1/3] Add an option to disable matching --- docs/advanced_usage.md | 5 +- .../edit-dialog/edit-dialog.component.ts | 11 ++- .../management-list.component.ts | 3 + src-ui/src/app/data/matching-model.ts | 6 ++ src/documents/matching.py | 5 +- ...rrespondent_matching_algorithm_and_more.py | 81 +++++++++++++++++++ src/documents/models.py | 2 + 7 files changed, 108 insertions(+), 5 deletions(-) create mode 100644 src/documents/migrations/1032_alter_correspondent_matching_algorithm_and_more.py diff --git a/docs/advanced_usage.md b/docs/advanced_usage.md index cd82ab78e..55ca8ee74 100644 --- a/docs/advanced_usage.md +++ b/docs/advanced_usage.md @@ -9,7 +9,7 @@ Paperless will compare the matching algorithms defined by every tag, correspondent, document type, and storage path in your database to see if they apply to the text in a document. In other words, if you define a tag called `Home Utility` that had a `match` property of `bc hydro` and -a `matching_algorithm` of `literal`, Paperless will automatically tag +a `matching_algorithm` of `Exact`, Paperless will automatically tag your newly-consumed document with your `Home Utility` tag so long as the text `bc hydro` appears in the body of the document somewhere. @@ -25,12 +25,13 @@ documents. The following algorithms are available: +- **None:** No matching will be performed. - **Any:** Looks for any occurrence of any word provided in match in the PDF. If you define the match as `Bank1 Bank2`, it will match documents containing either of these terms. - **All:** Requires that every word provided appears in the PDF, albeit not in the order provided. -- **Literal:** Matches only if the match appears exactly as provided +- **Exact:** Matches only if the match appears exactly as provided (i.e. preserve ordering) in the PDF. - **Regular expression:** Parses the match as a regular expression and tries to find a match within the document. diff --git a/src-ui/src/app/components/common/edit-dialog/edit-dialog.component.ts b/src-ui/src/app/components/common/edit-dialog/edit-dialog.component.ts index 056ad65a0..94fe79d2a 100644 --- a/src-ui/src/app/components/common/edit-dialog/edit-dialog.component.ts +++ b/src-ui/src/app/components/common/edit-dialog/edit-dialog.component.ts @@ -2,7 +2,11 @@ import { Directive, EventEmitter, Input, OnInit, Output } from '@angular/core' import { FormGroup } from '@angular/forms' import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap' import { Observable } from 'rxjs' -import { MATCHING_ALGORITHMS, MATCH_AUTO } from 'src/app/data/matching-model' +import { + MATCHING_ALGORITHMS, + MATCH_AUTO, + MATCH_NONE, +} from 'src/app/data/matching-model' import { ObjectWithId } from 'src/app/data/object-with-id' import { ObjectWithPermissions } from 'src/app/data/object-with-permissions' import { PaperlessUser } from 'src/app/data/paperless-user' @@ -91,7 +95,10 @@ export abstract class EditDialogComponent< } get patternRequired(): boolean { - return this.objectForm?.value.matching_algorithm !== MATCH_AUTO + return ( + this.objectForm?.value.matching_algorithm !== MATCH_AUTO && + this.objectForm?.value.matching_algorithm !== MATCH_NONE + ) } save() { diff --git a/src-ui/src/app/components/manage/management-list/management-list.component.ts b/src-ui/src/app/components/manage/management-list/management-list.component.ts index 0fa24e252..43a31f1ca 100644 --- a/src-ui/src/app/components/manage/management-list/management-list.component.ts +++ b/src-ui/src/app/components/manage/management-list/management-list.component.ts @@ -12,6 +12,7 @@ import { MatchingModel, MATCHING_ALGORITHMS, MATCH_AUTO, + MATCH_NONE, } from 'src/app/data/matching-model' import { ObjectWithId } from 'src/app/data/object-with-id' import { ObjectWithPermissions } from 'src/app/data/object-with-permissions' @@ -96,6 +97,8 @@ export abstract class ManagementListComponent getMatching(o: MatchingModel) { if (o.matching_algorithm == MATCH_AUTO) { return $localize`Automatic` + } else if (o.matching_algorithm == MATCH_NONE) { + return $localize`None` } else if (o.match && o.match.length > 0) { return `${ MATCHING_ALGORITHMS.find((a) => a.id == o.matching_algorithm).shortName diff --git a/src-ui/src/app/data/matching-model.ts b/src-ui/src/app/data/matching-model.ts index 387625b54..dc2f8298d 100644 --- a/src-ui/src/app/data/matching-model.ts +++ b/src-ui/src/app/data/matching-model.ts @@ -1,5 +1,6 @@ import { ObjectWithPermissions } from './object-with-permissions' +export const MATCH_NONE = 0 export const MATCH_ANY = 1 export const MATCH_ALL = 2 export const MATCH_LITERAL = 3 @@ -9,6 +10,11 @@ export const MATCH_AUTO = 6 export const DEFAULT_MATCHING_ALGORITHM = MATCH_AUTO export const MATCHING_ALGORITHMS = [ + { + id: MATCH_NONE, + shortName: $localize`None`, + name: $localize`None: Disable matching`, + }, { id: MATCH_ANY, shortName: $localize`Any word`, diff --git a/src/documents/matching.py b/src/documents/matching.py index 235263aa6..c38761afa 100644 --- a/src/documents/matching.py +++ b/src/documents/matching.py @@ -86,7 +86,10 @@ def matches(matching_model, document): if matching_model.is_insensitive: search_kwargs = {"flags": re.IGNORECASE} - if matching_model.matching_algorithm == MatchingModel.MATCH_ALL: + if matching_model.matching_algorithm == MatchingModel.MATCH_NONE: + return False + + elif matching_model.matching_algorithm == MatchingModel.MATCH_ALL: for word in _split_match(matching_model): search_result = re.search(rf"\b{word}\b", document_content, **search_kwargs) if not search_result: diff --git a/src/documents/migrations/1032_alter_correspondent_matching_algorithm_and_more.py b/src/documents/migrations/1032_alter_correspondent_matching_algorithm_and_more.py new file mode 100644 index 000000000..88aa7f2bc --- /dev/null +++ b/src/documents/migrations/1032_alter_correspondent_matching_algorithm_and_more.py @@ -0,0 +1,81 @@ +# Generated by Django 4.1.7 on 2023-02-22 00:45 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("documents", "1031_remove_savedview_user_correspondent_owner_and_more"), + ] + + operations = [ + migrations.AlterField( + model_name="correspondent", + name="matching_algorithm", + field=models.PositiveIntegerField( + choices=[ + (0, "None"), + (1, "Any word"), + (2, "All words"), + (3, "Exact match"), + (4, "Regular expression"), + (5, "Fuzzy word"), + (6, "Automatic"), + ], + default=1, + verbose_name="matching algorithm", + ), + ), + migrations.AlterField( + model_name="documenttype", + name="matching_algorithm", + field=models.PositiveIntegerField( + choices=[ + (0, "None"), + (1, "Any word"), + (2, "All words"), + (3, "Exact match"), + (4, "Regular expression"), + (5, "Fuzzy word"), + (6, "Automatic"), + ], + default=1, + verbose_name="matching algorithm", + ), + ), + migrations.AlterField( + model_name="storagepath", + name="matching_algorithm", + field=models.PositiveIntegerField( + choices=[ + (0, "None"), + (1, "Any word"), + (2, "All words"), + (3, "Exact match"), + (4, "Regular expression"), + (5, "Fuzzy word"), + (6, "Automatic"), + ], + default=1, + verbose_name="matching algorithm", + ), + ), + migrations.AlterField( + model_name="tag", + name="matching_algorithm", + field=models.PositiveIntegerField( + choices=[ + (0, "None"), + (1, "Any word"), + (2, "All words"), + (3, "Exact match"), + (4, "Regular expression"), + (5, "Fuzzy word"), + (6, "Automatic"), + ], + default=1, + verbose_name="matching algorithm", + ), + ), + ] diff --git a/src/documents/models.py b/src/documents/models.py index 77dc80944..177885de0 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -24,6 +24,7 @@ TASK_STATE_CHOICES = sorted(zip(ALL_STATES, ALL_STATES)) class MatchingModel(models.Model): + MATCH_NONE = 0 MATCH_ANY = 1 MATCH_ALL = 2 MATCH_LITERAL = 3 @@ -32,6 +33,7 @@ class MatchingModel(models.Model): MATCH_AUTO = 6 MATCHING_ALGORITHMS = ( + (MATCH_NONE, _("None")), (MATCH_ANY, _("Any word")), (MATCH_ALL, _("All words")), (MATCH_LITERAL, _("Exact match")), From 927616decb1fa5adf170db9de2ac3a79a3bfd1dd Mon Sep 17 00:00:00 2001 From: shamoon <4887959+shamoon@users.noreply.github.com> Date: Tue, 21 Feb 2023 20:58:52 -0800 Subject: [PATCH 2/3] Reorder frontend matching model options & update strings --- src-ui/messages.xlf | 72 +++++++++++++++++---------- src-ui/src/app/data/matching-model.ts | 16 +++--- 2 files changed, 53 insertions(+), 35 deletions(-) diff --git a/src-ui/messages.xlf b/src-ui/messages.xlf index 4a71b06ed..50dc362ee 100644 --- a/src-ui/messages.xlf +++ b/src-ui/messages.xlf @@ -1109,21 +1109,21 @@ Create new item src/app/components/common/edit-dialog/edit-dialog.component.ts - 67 + 71 Edit item src/app/components/common/edit-dialog/edit-dialog.component.ts - 71 + 75 Could not save element: src/app/components/common/edit-dialog/edit-dialog.component.ts - 75 + 79 @@ -1498,7 +1498,7 @@ src/app/components/manage/management-list/management-list.component.ts - 192 + 195 src/app/components/manage/settings/settings.component.html @@ -2671,7 +2671,7 @@ src/app/components/manage/management-list/management-list.component.ts - 188 + 191 @@ -3677,53 +3677,64 @@ Automatic src/app/components/manage/management-list/management-list.component.ts - 98 + 99 src/app/data/matching-model.ts - 39 + 15 + + + + None + + src/app/components/manage/management-list/management-list.component.ts + 101 + + + src/app/data/matching-model.ts + 45 Successfully created . src/app/components/manage/management-list/management-list.component.ts - 138 + 141 Error occurred while creating : . src/app/components/manage/management-list/management-list.component.ts - 143,145 + 146,148 Successfully updated . src/app/components/manage/management-list/management-list.component.ts - 161 + 164 Error occurred while saving : . src/app/components/manage/management-list/management-list.component.ts - 166,168 + 169,171 Do you really want to delete the ? src/app/components/manage/management-list/management-list.component.ts - 175 + 178 Associated documents will not be deleted. src/app/components/manage/management-list/management-list.component.ts - 190 + 193 @@ -3732,7 +3743,7 @@ )"/> src/app/components/manage/management-list/management-list.component.ts - 203,205 + 206,208 @@ -4501,81 +4512,88 @@ 7 + + Auto: Learn matching automatically + + src/app/data/matching-model.ts + 16 + + Any word src/app/data/matching-model.ts - 14 + 20 Any: Document contains any of these words (space separated) src/app/data/matching-model.ts - 15 + 21 All words src/app/data/matching-model.ts - 19 + 25 All: Document contains all of these words (space separated) src/app/data/matching-model.ts - 20 + 26 Exact match src/app/data/matching-model.ts - 24 + 30 Exact: Document contains this string src/app/data/matching-model.ts - 25 + 31 Regular expression src/app/data/matching-model.ts - 29 + 35 Regular expression: Document matches this regular expression src/app/data/matching-model.ts - 30 + 36 Fuzzy word src/app/data/matching-model.ts - 34 + 40 Fuzzy: Document contains a word similar to this word src/app/data/matching-model.ts - 35 + 41 - - Auto: Learn matching automatically + + None: Disable matching src/app/data/matching-model.ts - 40 + 46 diff --git a/src-ui/src/app/data/matching-model.ts b/src-ui/src/app/data/matching-model.ts index dc2f8298d..a65cb9956 100644 --- a/src-ui/src/app/data/matching-model.ts +++ b/src-ui/src/app/data/matching-model.ts @@ -1,4 +1,4 @@ -import { ObjectWithPermissions } from './object-with-permissions' +import { ObjectWithId } from './object-with-id' export const MATCH_NONE = 0 export const MATCH_ANY = 1 @@ -11,9 +11,9 @@ export const DEFAULT_MATCHING_ALGORITHM = MATCH_AUTO export const MATCHING_ALGORITHMS = [ { - id: MATCH_NONE, - shortName: $localize`None`, - name: $localize`None: Disable matching`, + id: MATCH_AUTO, + shortName: $localize`Automatic`, + name: $localize`Auto: Learn matching automatically`, }, { id: MATCH_ANY, @@ -41,13 +41,13 @@ export const MATCHING_ALGORITHMS = [ name: $localize`Fuzzy: Document contains a word similar to this word`, }, { - id: MATCH_AUTO, - shortName: $localize`Automatic`, - name: $localize`Auto: Learn matching automatically`, + id: MATCH_NONE, + shortName: $localize`None`, + name: $localize`None: Disable matching`, }, ] -export interface MatchingModel extends ObjectWithPermissions { +export interface MatchingModel extends ObjectWithId { name?: string slug?: string From 7610a0459e37cd7fc53ebe91fdf1ea3e0130d7df Mon Sep 17 00:00:00 2001 From: Brandon Rothweiler Date: Wed, 22 Feb 2023 09:39:29 -0500 Subject: [PATCH 3/3] Add test --- src/documents/tests/test_matchables.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/documents/tests/test_matchables.py b/src/documents/tests/test_matchables.py index 8dc629b0b..8d5cd6695 100644 --- a/src/documents/tests/test_matchables.py +++ b/src/documents/tests/test_matchables.py @@ -47,6 +47,18 @@ class _TestMatchingBase(TestCase): class TestMatching(_TestMatchingBase): + def test_match_none(self): + + self._test_matching( + "", + "MATCH_NONE", + (), + ( + "no", + "match", + ), + ) + def test_match_all(self): self._test_matching(