Add an option to disable matching

This commit is contained in:
Brandon Rothweiler 2023-02-21 20:01:30 -05:00
parent ca30dbc832
commit 8b2b7bbe6d
7 changed files with 108 additions and 5 deletions

View File

@ -9,7 +9,7 @@ Paperless will compare the matching algorithms defined by every tag,
correspondent, document type, and storage path in your database to see
if they apply to the text in a document. In other words, if you define a
tag called `Home Utility` that had a `match` property of `bc hydro` and
a `matching_algorithm` of `literal`, Paperless will automatically tag
a `matching_algorithm` of `Exact`, Paperless will automatically tag
your newly-consumed document with your `Home Utility` tag so long as the
text `bc hydro` appears in the body of the document somewhere.
@ -25,12 +25,13 @@ documents.
The following algorithms are available:
- **None:** No matching will be performed.
- **Any:** Looks for any occurrence of any word provided in match in
the PDF. If you define the match as `Bank1 Bank2`, it will match
documents containing either of these terms.
- **All:** Requires that every word provided appears in the PDF,
albeit not in the order provided.
- **Literal:** Matches only if the match appears exactly as provided
- **Exact:** Matches only if the match appears exactly as provided
(i.e. preserve ordering) in the PDF.
- **Regular expression:** Parses the match as a regular expression and
tries to find a match within the document.

View File

@ -2,7 +2,11 @@ import { Directive, EventEmitter, Input, OnInit, Output } from '@angular/core'
import { FormGroup } from '@angular/forms'
import { NgbActiveModal } from '@ng-bootstrap/ng-bootstrap'
import { Observable } from 'rxjs'
import { MATCHING_ALGORITHMS, MATCH_AUTO } from 'src/app/data/matching-model'
import {
MATCHING_ALGORITHMS,
MATCH_AUTO,
MATCH_NONE,
} from 'src/app/data/matching-model'
import { ObjectWithId } from 'src/app/data/object-with-id'
import { ObjectWithPermissions } from 'src/app/data/object-with-permissions'
import { PaperlessUser } from 'src/app/data/paperless-user'
@ -91,7 +95,10 @@ export abstract class EditDialogComponent<
}
get patternRequired(): boolean {
return this.objectForm?.value.matching_algorithm !== MATCH_AUTO
return (
this.objectForm?.value.matching_algorithm !== MATCH_AUTO &&
this.objectForm?.value.matching_algorithm !== MATCH_NONE
)
}
save() {

View File

@ -12,6 +12,7 @@ import {
MatchingModel,
MATCHING_ALGORITHMS,
MATCH_AUTO,
MATCH_NONE,
} from 'src/app/data/matching-model'
import { ObjectWithId } from 'src/app/data/object-with-id'
import { ObjectWithPermissions } from 'src/app/data/object-with-permissions'
@ -96,6 +97,8 @@ export abstract class ManagementListComponent<T extends ObjectWithId>
getMatching(o: MatchingModel) {
if (o.matching_algorithm == MATCH_AUTO) {
return $localize`Automatic`
} else if (o.matching_algorithm == MATCH_NONE) {
return $localize`None`
} else if (o.match && o.match.length > 0) {
return `${
MATCHING_ALGORITHMS.find((a) => a.id == o.matching_algorithm).shortName

View File

@ -1,5 +1,6 @@
import { ObjectWithPermissions } from './object-with-permissions'
export const MATCH_NONE = 0
export const MATCH_ANY = 1
export const MATCH_ALL = 2
export const MATCH_LITERAL = 3
@ -9,6 +10,11 @@ export const MATCH_AUTO = 6
export const DEFAULT_MATCHING_ALGORITHM = MATCH_AUTO
export const MATCHING_ALGORITHMS = [
{
id: MATCH_NONE,
shortName: $localize`None`,
name: $localize`None: Disable matching`,
},
{
id: MATCH_ANY,
shortName: $localize`Any word`,

View File

@ -86,7 +86,10 @@ def matches(matching_model, document):
if matching_model.is_insensitive:
search_kwargs = {"flags": re.IGNORECASE}
if matching_model.matching_algorithm == MatchingModel.MATCH_ALL:
if matching_model.matching_algorithm == MatchingModel.MATCH_NONE:
return False
elif matching_model.matching_algorithm == MatchingModel.MATCH_ALL:
for word in _split_match(matching_model):
search_result = re.search(rf"\b{word}\b", document_content, **search_kwargs)
if not search_result:

View File

@ -0,0 +1,81 @@
# Generated by Django 4.1.7 on 2023-02-22 00:45
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("documents", "1031_remove_savedview_user_correspondent_owner_and_more"),
]
operations = [
migrations.AlterField(
model_name="correspondent",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(0, "None"),
(1, "Any word"),
(2, "All words"),
(3, "Exact match"),
(4, "Regular expression"),
(5, "Fuzzy word"),
(6, "Automatic"),
],
default=1,
verbose_name="matching algorithm",
),
),
migrations.AlterField(
model_name="documenttype",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(0, "None"),
(1, "Any word"),
(2, "All words"),
(3, "Exact match"),
(4, "Regular expression"),
(5, "Fuzzy word"),
(6, "Automatic"),
],
default=1,
verbose_name="matching algorithm",
),
),
migrations.AlterField(
model_name="storagepath",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(0, "None"),
(1, "Any word"),
(2, "All words"),
(3, "Exact match"),
(4, "Regular expression"),
(5, "Fuzzy word"),
(6, "Automatic"),
],
default=1,
verbose_name="matching algorithm",
),
),
migrations.AlterField(
model_name="tag",
name="matching_algorithm",
field=models.PositiveIntegerField(
choices=[
(0, "None"),
(1, "Any word"),
(2, "All words"),
(3, "Exact match"),
(4, "Regular expression"),
(5, "Fuzzy word"),
(6, "Automatic"),
],
default=1,
verbose_name="matching algorithm",
),
),
]

View File

@ -24,6 +24,7 @@ TASK_STATE_CHOICES = sorted(zip(ALL_STATES, ALL_STATES))
class MatchingModel(models.Model):
MATCH_NONE = 0
MATCH_ANY = 1
MATCH_ALL = 2
MATCH_LITERAL = 3
@ -32,6 +33,7 @@ class MatchingModel(models.Model):
MATCH_AUTO = 6
MATCHING_ALGORITHMS = (
(MATCH_NONE, _("None")),
(MATCH_ANY, _("Any word")),
(MATCH_ALL, _("All words")),
(MATCH_LITERAL, _("Exact match")),