mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Add to handler, matching, retagger
This commit is contained in:
parent
a632b6b711
commit
6dc6c6c7bb
@ -372,17 +372,19 @@ currently-imported docs. This problem is common enough that there are
|
|||||||
tools for it.
|
tools for it.
|
||||||
|
|
||||||
```
|
```
|
||||||
document_retagger [-h] [-c] [-T] [-t] [-i] [--id-range] [--use-first] [-f]
|
document_retagger [-h] [-c] [-T] [-t] [-cf] [-i] [--id-range] [--use-first] [-f] [--suggest]
|
||||||
|
|
||||||
optional arguments:
|
optional arguments:
|
||||||
-c, --correspondent
|
-c, --correspondent
|
||||||
-T, --tags
|
-T, --tags
|
||||||
-t, --document_type
|
-t, --document_type
|
||||||
-s, --storage_path
|
-s, --storage_path
|
||||||
|
-cf, --custom_fields
|
||||||
-i, --inbox-only
|
-i, --inbox-only
|
||||||
--id-range
|
--id-range
|
||||||
--use-first
|
--use-first
|
||||||
-f, --overwrite
|
-f, --overwrite
|
||||||
|
--suggest
|
||||||
```
|
```
|
||||||
|
|
||||||
Run this after changing or adding matching rules. It'll loop over all
|
Run this after changing or adding matching rules. It'll loop over all
|
||||||
@ -408,6 +410,8 @@ to override this behavior and just use the first correspondent or type
|
|||||||
it finds. This option does not apply to tags, since any amount of tags
|
it finds. This option does not apply to tags, since any amount of tags
|
||||||
can be applied to a document.
|
can be applied to a document.
|
||||||
|
|
||||||
|
If you want to suggest changes but not apply them, specify `--suggest`.
|
||||||
|
|
||||||
Finally, `-f` specifies that you wish to overwrite already assigned
|
Finally, `-f` specifies that you wish to overwrite already assigned
|
||||||
correspondents, types and/or tags. The default behavior is to not assign
|
correspondents, types and/or tags. The default behavior is to not assign
|
||||||
correspondents and types to documents that have this data already
|
correspondents and types to documents that have this data already
|
||||||
|
@ -15,6 +15,7 @@ class DocumentsConfig(AppConfig):
|
|||||||
from documents.signals.handlers import run_workflows_added
|
from documents.signals.handlers import run_workflows_added
|
||||||
from documents.signals.handlers import run_workflows_updated
|
from documents.signals.handlers import run_workflows_updated
|
||||||
from documents.signals.handlers import set_correspondent
|
from documents.signals.handlers import set_correspondent
|
||||||
|
from documents.signals.handlers import set_custom_fields
|
||||||
from documents.signals.handlers import set_document_type
|
from documents.signals.handlers import set_document_type
|
||||||
from documents.signals.handlers import set_storage_path
|
from documents.signals.handlers import set_storage_path
|
||||||
from documents.signals.handlers import set_tags
|
from documents.signals.handlers import set_tags
|
||||||
@ -24,6 +25,7 @@ class DocumentsConfig(AppConfig):
|
|||||||
document_consumption_finished.connect(set_document_type)
|
document_consumption_finished.connect(set_document_type)
|
||||||
document_consumption_finished.connect(set_tags)
|
document_consumption_finished.connect(set_tags)
|
||||||
document_consumption_finished.connect(set_storage_path)
|
document_consumption_finished.connect(set_storage_path)
|
||||||
|
document_consumption_finished.connect(set_custom_fields)
|
||||||
document_consumption_finished.connect(add_to_index)
|
document_consumption_finished.connect(add_to_index)
|
||||||
document_consumption_finished.connect(run_workflows_added)
|
document_consumption_finished.connect(run_workflows_added)
|
||||||
document_updated.connect(run_workflows_updated)
|
document_updated.connect(run_workflows_updated)
|
||||||
|
@ -7,6 +7,7 @@ from documents.classifier import load_classifier
|
|||||||
from documents.management.commands.mixins import ProgressBarMixin
|
from documents.management.commands.mixins import ProgressBarMixin
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
from documents.signals.handlers import set_correspondent
|
from documents.signals.handlers import set_correspondent
|
||||||
|
from documents.signals.handlers import set_custom_fields
|
||||||
from documents.signals.handlers import set_document_type
|
from documents.signals.handlers import set_document_type
|
||||||
from documents.signals.handlers import set_storage_path
|
from documents.signals.handlers import set_storage_path
|
||||||
from documents.signals.handlers import set_tags
|
from documents.signals.handlers import set_tags
|
||||||
@ -17,9 +18,9 @@ logger = logging.getLogger("paperless.management.retagger")
|
|||||||
class Command(ProgressBarMixin, BaseCommand):
|
class Command(ProgressBarMixin, BaseCommand):
|
||||||
help = (
|
help = (
|
||||||
"Using the current classification model, assigns correspondents, tags "
|
"Using the current classification model, assigns correspondents, tags "
|
||||||
"and document types to all documents, effectively allowing you to "
|
"document types, storage paths and custom fields to all documents, effectively"
|
||||||
"back-tag all previously indexed documents with metadata created (or "
|
"allowing you to back-tag all previously indexed documents with metadata created "
|
||||||
"modified) after their initial import."
|
"(or modified) after their initial import."
|
||||||
)
|
)
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
@ -27,6 +28,12 @@ class Command(ProgressBarMixin, BaseCommand):
|
|||||||
parser.add_argument("-T", "--tags", default=False, action="store_true")
|
parser.add_argument("-T", "--tags", default=False, action="store_true")
|
||||||
parser.add_argument("-t", "--document_type", default=False, action="store_true")
|
parser.add_argument("-t", "--document_type", default=False, action="store_true")
|
||||||
parser.add_argument("-s", "--storage_path", default=False, action="store_true")
|
parser.add_argument("-s", "--storage_path", default=False, action="store_true")
|
||||||
|
parser.add_argument(
|
||||||
|
"-cf",
|
||||||
|
"--custom_fields",
|
||||||
|
default=False,
|
||||||
|
action="store_true",
|
||||||
|
)
|
||||||
parser.add_argument("-i", "--inbox-only", default=False, action="store_true")
|
parser.add_argument("-i", "--inbox-only", default=False, action="store_true")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--use-first",
|
"--use-first",
|
||||||
@ -134,3 +141,16 @@ class Command(ProgressBarMixin, BaseCommand):
|
|||||||
stdout=self.stdout,
|
stdout=self.stdout,
|
||||||
style_func=self.style,
|
style_func=self.style,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if options["custom_fields"]:
|
||||||
|
set_custom_fields(
|
||||||
|
sender=None,
|
||||||
|
document=document,
|
||||||
|
classifier=classifier,
|
||||||
|
replace=options["overwrite"],
|
||||||
|
use_first=options["use_first"],
|
||||||
|
suggest=options["suggest"],
|
||||||
|
base_url=options["base_url"],
|
||||||
|
stdout=self.stdout,
|
||||||
|
style_func=self.style,
|
||||||
|
)
|
||||||
|
@ -132,6 +132,25 @@ def match_storage_paths(document: Document, classifier: DocumentClassifier, user
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def match_custom_fields(document: Document, classifier: DocumentClassifier, user=None):
|
||||||
|
predicted_custom_field_ids = (
|
||||||
|
classifier.predict_custom_fields(document.content) if classifier else []
|
||||||
|
)
|
||||||
|
|
||||||
|
fields = [instance.field for instance in document.custom_fields.all()]
|
||||||
|
|
||||||
|
return list(
|
||||||
|
filter(
|
||||||
|
lambda o: matches(o, document)
|
||||||
|
or (
|
||||||
|
o.matching_algorithm == MatchingModel.MATCH_AUTO
|
||||||
|
and o.pk in predicted_custom_field_ids
|
||||||
|
),
|
||||||
|
fields,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def matches(matching_model: MatchingModel, document: Document):
|
def matches(matching_model: MatchingModel, document: Document):
|
||||||
search_kwargs = {}
|
search_kwargs = {}
|
||||||
|
|
||||||
|
@ -318,6 +318,67 @@ def set_storage_path(
|
|||||||
document.save(update_fields=("storage_path",))
|
document.save(update_fields=("storage_path",))
|
||||||
|
|
||||||
|
|
||||||
|
def set_custom_fields(
|
||||||
|
document: Document,
|
||||||
|
logging_group=None,
|
||||||
|
classifier: DocumentClassifier | None = None,
|
||||||
|
replace=False,
|
||||||
|
suggest=False,
|
||||||
|
base_url=None,
|
||||||
|
stdout=None,
|
||||||
|
style_func=None,
|
||||||
|
**kwargs,
|
||||||
|
):
|
||||||
|
if replace:
|
||||||
|
CustomFieldInstance.objects.filter(document=document).exclude(
|
||||||
|
Q(field__match="") & ~Q(field__matching_algorithm=CustomField.MATCH_AUTO),
|
||||||
|
).delete()
|
||||||
|
|
||||||
|
current_fields = set([instance.field for instance in document.custom_fields.all()])
|
||||||
|
|
||||||
|
matched_fields = matching.match_custom_fields(document, classifier)
|
||||||
|
|
||||||
|
relevant_fields = set(matched_fields) - current_fields
|
||||||
|
|
||||||
|
if suggest:
|
||||||
|
extra_fields = current_fields - set(matched_fields)
|
||||||
|
extra_fields = [
|
||||||
|
f for f in extra_fields if f.matching_algorithm == MatchingModel.MATCH_AUTO
|
||||||
|
]
|
||||||
|
if not relevant_fields and not extra_fields:
|
||||||
|
return
|
||||||
|
doc_str = style_func.SUCCESS(str(document))
|
||||||
|
if base_url:
|
||||||
|
stdout.write(doc_str)
|
||||||
|
stdout.write(f"{base_url}/documents/{document.pk}")
|
||||||
|
else:
|
||||||
|
stdout.write(doc_str + style_func.SUCCESS(f" [{document.pk}]"))
|
||||||
|
if relevant_fields:
|
||||||
|
stdout.write(
|
||||||
|
"Suggest custom fields: "
|
||||||
|
+ ", ".join([f.name for f in relevant_fields]),
|
||||||
|
)
|
||||||
|
if extra_fields:
|
||||||
|
stdout.write(
|
||||||
|
"Extra custom fields: " + ", ".join([f.name for f in extra_fields]),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
if not relevant_fields:
|
||||||
|
return
|
||||||
|
|
||||||
|
message = 'Assigning custom fields "{}" to "{}"'
|
||||||
|
logger.info(
|
||||||
|
message.format(document, ", ".join([f.name for f in relevant_fields])),
|
||||||
|
extra={"group": logging_group},
|
||||||
|
)
|
||||||
|
|
||||||
|
for field in relevant_fields:
|
||||||
|
CustomFieldInstance.objects.create(
|
||||||
|
field=field,
|
||||||
|
document=document,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# see empty_trash in documents/tasks.py for signal handling
|
# see empty_trash in documents/tasks.py for signal handling
|
||||||
def cleanup_document_deletion(sender, instance, **kwargs):
|
def cleanup_document_deletion(sender, instance, **kwargs):
|
||||||
with FileLock(settings.MEDIA_LOCK):
|
with FileLock(settings.MEDIA_LOCK):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user