Add an option to disable matching

This commit is contained in:
Brandon Rothweiler 2023-02-21 20:01:30 -05:00
parent ca30dbc832
commit 8b2b7bbe6d
7 changed files with 108 additions and 5 deletions

View File

@ -9,7 +9,7 @@ Paperless will compare the matching algorithms defined by every tag,
correspondent, document type, and storage path in your database to see correspondent, document type, and storage path in your database to see
if they apply to the text in a document. In other words, if you define a if they apply to the text in a document. In other words, if you define a
tag called `Home Utility` that had a `match` property of `bc hydro` and tag called `Home Utility` that had a `match` property of `bc hydro` and
a `matching_algorithm` of `literal`, Paperless will automatically tag a `matching_algorithm` of `Exact`, Paperless will automatically tag
your newly-consumed document with your `Home Utility` tag so long as the your newly-consumed document with your `Home Utility` tag so long as the
text `bc hydro` appears in the body of the document somewhere. text `bc hydro` appears in the body of the document somewhere.
@ -25,12 +25,13 @@ documents.
The following algorithms are available: The following algorithms are available:
- **None:** No matching will be performed.
- **Any:** Looks for any occurrence of any word provided in match in - **Any:** Looks for any occurrence of any word provided in match in
the PDF. If you define the match as `Bank1 Bank2`, it will match the PDF. If you define the match as `Bank1 Bank2`, it will match
documents containing either of these terms. documents containing either of these terms.
- **All:** Requires that every word provided appears in the PDF, - **All:** Requires that every word provided appears in the PDF,
albeit not in the order provided. albeit not in the order provided.
- **Literal:** Matches only if the match appears exactly as provided - **Exact:** Matches only if the match appears exactly as provided
(i.e. preserve ordering) in the PDF. (i.e. preserve ordering) in the PDF.
- **Regular expression:** Parses the match as a regular expression and - **Regular expression:** Parses the match as a regular expression and
tries to find a match within the document. tries to find a match within the document.

View File

@ -2,7 +2,11 @@ import { Directive, EventEmitter, Input, OnInit, Output } from '@angular/core'
import { FormGroup } from '@angular/forms' import { FormGroup } from '@angular/forms'
import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap' import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'
import { Observable } from 'rxjs' import { Observable } from 'rxjs'
import { MATCHING_ALGORITHMS, MATCH_AUTO } from 'src/app/data/matching-model' import {
MATCHING_ALGORITHMS,
MATCH_AUTO,
MATCH_NONE,
} from 'src/app/data/matching-model'
import { ObjectWithId } from 'src/app/data/object-with-id' import { ObjectWithId } from 'src/app/data/object-with-id'
import { ObjectWithPermissions } from 'src/app/data/object-with-permissions' import { ObjectWithPermissions } from 'src/app/data/object-with-permissions'
import { PaperlessUser } from 'src/app/data/paperless-user' import { PaperlessUser } from 'src/app/data/paperless-user'
@ -91,7 +95,10 @@ export abstract class EditDialogComponent<
} }
get patternRequired(): boolean { get patternRequired(): boolean {
return this.objectForm?.value.matching_algorithm !== MATCH_AUTO return (
this.objectForm?.value.matching_algorithm !== MATCH_AUTO &&
this.objectForm?.value.matching_algorithm !== MATCH_NONE
)
} }
save() { save() {

View File

@ -12,6 +12,7 @@ import {
MatchingModel, MatchingModel,
MATCHING_ALGORITHMS, MATCHING_ALGORITHMS,
MATCH_AUTO, MATCH_AUTO,
MATCH_NONE,
} from 'src/app/data/matching-model' } from 'src/app/data/matching-model'
import { ObjectWithId } from 'src/app/data/object-with-id' import { ObjectWithId } from 'src/app/data/object-with-id'
import { ObjectWithPermissions } from 'src/app/data/object-with-permissions' import { ObjectWithPermissions } from 'src/app/data/object-with-permissions'
@ -96,6 +97,8 @@ export abstract class ManagementListComponent<T extends ObjectWithId>
getMatching(o: MatchingModel) { getMatching(o: MatchingModel) {
if (o.matching_algorithm == MATCH_AUTO) { if (o.matching_algorithm == MATCH_AUTO) {
return $localize`Automatic` return $localize`Automatic`
} else if (o.matching_algorithm == MATCH_NONE) {
return $localize`None`
} else if (o.match && o.match.length > 0) { } else if (o.match && o.match.length > 0) {
return `${ return `${
MATCHING_ALGORITHMS.find((a) => a.id == o.matching_algorithm).shortName MATCHING_ALGORITHMS.find((a) => a.id == o.matching_algorithm).shortName

View File

@ -1,5 +1,6 @@
import { ObjectWithPermissions } from './object-with-permissions' import { ObjectWithPermissions } from './object-with-permissions'
export const MATCH_NONE = 0
export const MATCH_ANY = 1 export const MATCH_ANY = 1
export const MATCH_ALL = 2 export const MATCH_ALL = 2
export const MATCH_LITERAL = 3 export const MATCH_LITERAL = 3
@ -9,6 +10,11 @@ export const MATCH_AUTO = 6
export const DEFAULT_MATCHING_ALGORITHM = MATCH_AUTO export const DEFAULT_MATCHING_ALGORITHM = MATCH_AUTO
export const MATCHING_ALGORITHMS = [ export const MATCHING_ALGORITHMS = [
{
id: MATCH_NONE,
shortName: $localize`None`,
name: $localize`None: Disable matching`,
},
{ {
id: MATCH_ANY, id: MATCH_ANY,
shortName: $localize`Any word`, shortName: $localize`Any word`,

View File

@ -86,7 +86,10 @@ def matches(matching_model, document):
if matching_model.is_insensitive: if matching_model.is_insensitive:
search_kwargs = {"flags": re.IGNORECASE} search_kwargs = {"flags": re.IGNORECASE}
if matching_model.matching_algorithm == MatchingModel.MATCH_ALL: if matching_model.matching_algorithm == MatchingModel.MATCH_NONE:
return False
elif matching_model.matching_algorithm == MatchingModel.MATCH_ALL:
for word in _split_match(matching_model): for word in _split_match(matching_model):
search_result = re.search(rf"\b{word}\b", document_content, **search_kwargs) search_result = re.search(rf"\b{word}\b", document_content, **search_kwargs)
if not search_result: if not search_result:

View File

@ -0,0 +1,81 @@
# Generated by Django 4.1.7 on 2023-02-22 00:45
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("documents", "1031_remove_savedview_user_correspondent_owner_and_more"),
]
operations = [
migrations.AlterField(
model_name="correspondent",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(0, "None"),
(1, "Any word"),
(2, "All words"),
(3, "Exact match"),
(4, "Regular expression"),
(5, "Fuzzy word"),
(6, "Automatic"),
],
default=1,
verbose_name="matching algorithm",
),
),
migrations.AlterField(
model_name="documenttype",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(0, "None"),
(1, "Any word"),
(2, "All words"),
(3, "Exact match"),
(4, "Regular expression"),
(5, "Fuzzy word"),
(6, "Automatic"),
],
default=1,
verbose_name="matching algorithm",
),
),
migrations.AlterField(
model_name="storagepath",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(0, "None"),
(1, "Any word"),
(2, "All words"),
(3, "Exact match"),
(4, "Regular expression"),
(5, "Fuzzy word"),
(6, "Automatic"),
],
default=1,
verbose_name="matching algorithm",
),
),
migrations.AlterField(
model_name="tag",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(0, "None"),
(1, "Any word"),
(2, "All words"),
(3, "Exact match"),
(4, "Regular expression"),
(5, "Fuzzy word"),
(6, "Automatic"),
],
default=1,
verbose_name="matching algorithm",
),
),
]

View File

@ -24,6 +24,7 @@ TASK_STATE_CHOICES = sorted(zip(ALL_STATES, ALL_STATES))
class MatchingModel(models.Model): class MatchingModel(models.Model):
MATCH_NONE = 0
MATCH_ANY = 1 MATCH_ANY = 1
MATCH_ALL = 2 MATCH_ALL = 2
MATCH_LITERAL = 3 MATCH_LITERAL = 3
@ -32,6 +33,7 @@ class MatchingModel(models.Model):
MATCH_AUTO = 6 MATCH_AUTO = 6
MATCHING_ALGORITHMS = ( MATCHING_ALGORITHMS = (
(MATCH_NONE, _("None")),
(MATCH_ANY, _("Any word")), (MATCH_ANY, _("Any word")),
(MATCH_ALL, _("All words")), (MATCH_ALL, _("All words")),
(MATCH_LITERAL, _("Exact match")), (MATCH_LITERAL, _("Exact match")),