mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Add to handler, matching, retagger
This commit is contained in:
parent
a632b6b711
commit
6dc6c6c7bb
@ -372,17 +372,19 @@ currently-imported docs. This problem is common enough that there are
|
||||
tools for it.
|
||||
|
||||
```
|
||||
document_retagger [-h] [-c] [-T] [-t] [-i] [--id-range] [--use-first] [-f]
|
||||
document_retagger [-h] [-c] [-T] [-t] [-cf] [-i] [--id-range] [--use-first] [-f] [--suggest]
|
||||
|
||||
optional arguments:
|
||||
-c, --correspondent
|
||||
-T, --tags
|
||||
-t, --document_type
|
||||
-s, --storage_path
|
||||
-cf, --custom_fields
|
||||
-i, --inbox-only
|
||||
--id-range
|
||||
--use-first
|
||||
-f, --overwrite
|
||||
--suggest
|
||||
```
|
||||
|
||||
Run this after changing or adding matching rules. It'll loop over all
|
||||
@ -408,6 +410,8 @@ to override this behavior and just use the first correspondent or type
|
||||
it finds. This option does not apply to tags, since any amount of tags
|
||||
can be applied to a document.
|
||||
|
||||
If you want to suggest changes but not apply them, specify `--suggest`.
|
||||
|
||||
Finally, `-f` specifies that you wish to overwrite already assigned
|
||||
correspondents, types and/or tags. The default behavior is to not assign
|
||||
correspondents and types to documents that have this data already
|
||||
|
@ -15,6 +15,7 @@ class DocumentsConfig(AppConfig):
|
||||
from documents.signals.handlers import run_workflows_added
|
||||
from documents.signals.handlers import run_workflows_updated
|
||||
from documents.signals.handlers import set_correspondent
|
||||
from documents.signals.handlers import set_custom_fields
|
||||
from documents.signals.handlers import set_document_type
|
||||
from documents.signals.handlers import set_storage_path
|
||||
from documents.signals.handlers import set_tags
|
||||
@ -24,6 +25,7 @@ class DocumentsConfig(AppConfig):
|
||||
document_consumption_finished.connect(set_document_type)
|
||||
document_consumption_finished.connect(set_tags)
|
||||
document_consumption_finished.connect(set_storage_path)
|
||||
document_consumption_finished.connect(set_custom_fields)
|
||||
document_consumption_finished.connect(add_to_index)
|
||||
document_consumption_finished.connect(run_workflows_added)
|
||||
document_updated.connect(run_workflows_updated)
|
||||
|
@ -7,6 +7,7 @@ from documents.classifier import load_classifier
|
||||
from documents.management.commands.mixins import ProgressBarMixin
|
||||
from documents.models import Document
|
||||
from documents.signals.handlers import set_correspondent
|
||||
from documents.signals.handlers import set_custom_fields
|
||||
from documents.signals.handlers import set_document_type
|
||||
from documents.signals.handlers import set_storage_path
|
||||
from documents.signals.handlers import set_tags
|
||||
@ -17,9 +18,9 @@ logger = logging.getLogger("paperless.management.retagger")
|
||||
class Command(ProgressBarMixin, BaseCommand):
|
||||
help = (
|
||||
"Using the current classification model, assigns correspondents, tags "
|
||||
"and document types to all documents, effectively allowing you to "
|
||||
"back-tag all previously indexed documents with metadata created (or "
|
||||
"modified) after their initial import."
|
||||
"document types, storage paths and custom fields to all documents, effectively"
|
||||
"allowing you to back-tag all previously indexed documents with metadata created "
|
||||
"(or modified) after their initial import."
|
||||
)
|
||||
|
||||
def add_arguments(self, parser):
|
||||
@ -27,6 +28,12 @@ class Command(ProgressBarMixin, BaseCommand):
|
||||
parser.add_argument("-T", "--tags", default=False, action="store_true")
|
||||
parser.add_argument("-t", "--document_type", default=False, action="store_true")
|
||||
parser.add_argument("-s", "--storage_path", default=False, action="store_true")
|
||||
parser.add_argument(
|
||||
"-cf",
|
||||
"--custom_fields",
|
||||
default=False,
|
||||
action="store_true",
|
||||
)
|
||||
parser.add_argument("-i", "--inbox-only", default=False, action="store_true")
|
||||
parser.add_argument(
|
||||
"--use-first",
|
||||
@ -134,3 +141,16 @@ class Command(ProgressBarMixin, BaseCommand):
|
||||
stdout=self.stdout,
|
||||
style_func=self.style,
|
||||
)
|
||||
|
||||
if options["custom_fields"]:
|
||||
set_custom_fields(
|
||||
sender=None,
|
||||
document=document,
|
||||
classifier=classifier,
|
||||
replace=options["overwrite"],
|
||||
use_first=options["use_first"],
|
||||
suggest=options["suggest"],
|
||||
base_url=options["base_url"],
|
||||
stdout=self.stdout,
|
||||
style_func=self.style,
|
||||
)
|
||||
|
@ -132,6 +132,25 @@ def match_storage_paths(document: Document, classifier: DocumentClassifier, user
|
||||
)
|
||||
|
||||
|
||||
def match_custom_fields(document: Document, classifier: DocumentClassifier, user=None):
|
||||
predicted_custom_field_ids = (
|
||||
classifier.predict_custom_fields(document.content) if classifier else []
|
||||
)
|
||||
|
||||
fields = [instance.field for instance in document.custom_fields.all()]
|
||||
|
||||
return list(
|
||||
filter(
|
||||
lambda o: matches(o, document)
|
||||
or (
|
||||
o.matching_algorithm == MatchingModel.MATCH_AUTO
|
||||
and o.pk in predicted_custom_field_ids
|
||||
),
|
||||
fields,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def matches(matching_model: MatchingModel, document: Document):
|
||||
search_kwargs = {}
|
||||
|
||||
|
@ -318,6 +318,67 @@ def set_storage_path(
|
||||
document.save(update_fields=("storage_path",))
|
||||
|
||||
|
||||
def set_custom_fields(
|
||||
document: Document,
|
||||
logging_group=None,
|
||||
classifier: DocumentClassifier | None = None,
|
||||
replace=False,
|
||||
suggest=False,
|
||||
base_url=None,
|
||||
stdout=None,
|
||||
style_func=None,
|
||||
**kwargs,
|
||||
):
|
||||
if replace:
|
||||
CustomFieldInstance.objects.filter(document=document).exclude(
|
||||
Q(field__match="") & ~Q(field__matching_algorithm=CustomField.MATCH_AUTO),
|
||||
).delete()
|
||||
|
||||
current_fields = set([instance.field for instance in document.custom_fields.all()])
|
||||
|
||||
matched_fields = matching.match_custom_fields(document, classifier)
|
||||
|
||||
relevant_fields = set(matched_fields) - current_fields
|
||||
|
||||
if suggest:
|
||||
extra_fields = current_fields - set(matched_fields)
|
||||
extra_fields = [
|
||||
f for f in extra_fields if f.matching_algorithm == MatchingModel.MATCH_AUTO
|
||||
]
|
||||
if not relevant_fields and not extra_fields:
|
||||
return
|
||||
doc_str = style_func.SUCCESS(str(document))
|
||||
if base_url:
|
||||
stdout.write(doc_str)
|
||||
stdout.write(f"{base_url}/documents/{document.pk}")
|
||||
else:
|
||||
stdout.write(doc_str + style_func.SUCCESS(f" [{document.pk}]"))
|
||||
if relevant_fields:
|
||||
stdout.write(
|
||||
"Suggest custom fields: "
|
||||
+ ", ".join([f.name for f in relevant_fields]),
|
||||
)
|
||||
if extra_fields:
|
||||
stdout.write(
|
||||
"Extra custom fields: " + ", ".join([f.name for f in extra_fields]),
|
||||
)
|
||||
else:
|
||||
if not relevant_fields:
|
||||
return
|
||||
|
||||
message = 'Assigning custom fields "{}" to "{}"'
|
||||
logger.info(
|
||||
message.format(document, ", ".join([f.name for f in relevant_fields])),
|
||||
extra={"group": logging_group},
|
||||
)
|
||||
|
||||
for field in relevant_fields:
|
||||
CustomFieldInstance.objects.create(
|
||||
field=field,
|
||||
document=document,
|
||||
)
|
||||
|
||||
|
||||
# see empty_trash in documents/tasks.py for signal handling
|
||||
def cleanup_document_deletion(sender, instance, **kwargs):
|
||||
with FileLock(settings.MEDIA_LOCK):
|
||||
|
Loading…
x
Reference in New Issue
Block a user