Merge pull request #933 from sbrunner/suggest

Add suggest
This commit is contained in:
Jonas Winkler 2021-06-13 12:20:34 +02:00 committed by GitHub
commit 67d0773231
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 178 additions and 30 deletions

View File

@ -63,8 +63,20 @@ class Command(BaseCommand):
action="store_true",
help="If set, the progress bar will not be shown"
)
parser.add_argument(
"--suggest",
default=False,
action="store_true",
help="Return the suggestion, don't change anything."
)
parser.add_argument(
"--base-url",
help="The base URL to use to build the link to the documents."
)
def handle(self, *args, **options):
# Detect if we support color
color = self.style.ERROR("test") != "test"
if options["inbox_only"]:
queryset = Document.objects.filter(tags__is_inbox_tag=True)
@ -85,18 +97,27 @@ class Command(BaseCommand):
document=document,
classifier=classifier,
replace=options['overwrite'],
use_first=options['use_first'])
use_first=options['use_first'],
suggest=options['suggest'],
base_url=options['base_url'],
color=color)
if options['document_type']:
set_document_type(sender=None,
document=document,
classifier=classifier,
replace=options['overwrite'],
use_first=options['use_first'])
use_first=options['use_first'],
suggest=options['suggest'],
base_url=options['base_url'],
color=color)
if options['tags']:
set_tags(
sender=None,
document=document,
classifier=classifier,
replace=options['overwrite'])
replace=options['overwrite'],
suggest=options['suggest'],
base_url=options['base_url'],
color=color)

View File

@ -1,6 +1,7 @@
import logging
import os
from django.utils import termcolors
from django.conf import settings
from django.contrib.admin.models import ADDITION, LogEntry
from django.contrib.auth.models import User
@ -8,14 +9,14 @@ from django.contrib.contenttypes.models import ContentType
from django.db import models, DatabaseError
from django.db.models import Q
from django.dispatch import receiver
from django.utils import timezone
from django.utils import termcolors, timezone
from filelock import FileLock
from .. import matching
from ..file_handling import delete_empty_directories, \
create_source_path_directory, \
generate_unique_filename
from ..models import Document, Tag
from ..models import Document, Tag, MatchingModel
logger = logging.getLogger("paperless.handlers")
@ -32,6 +33,9 @@ def set_correspondent(sender,
classifier=None,
replace=False,
use_first=True,
suggest=False,
base_url=None,
color=False,
**kwargs):
if document.correspondent and not replace:
return
@ -60,13 +64,31 @@ def set_correspondent(sender,
return
if selected or replace:
logger.info(
f"Assigning correspondent {selected} to {document}",
extra={'group': logging_group}
)
if suggest:
if base_url:
print(
termcolors.colorize(str(document), fg='green')
if color
else str(document)
)
print(f"{base_url}/documents/{document.pk}")
else:
print(
(
termcolors.colorize(str(document), fg='green')
if color
else str(document)
) + f" [{document.pk}]"
)
print(f"Suggest correspondent {selected}")
else:
logger.info(
f"Assigning correspondent {selected} to {document}",
extra={'group': logging_group}
)
document.correspondent = selected
document.save(update_fields=("correspondent",))
document.correspondent = selected
document.save(update_fields=("correspondent",))
def set_document_type(sender,
@ -75,6 +97,9 @@ def set_document_type(sender,
classifier=None,
replace=False,
use_first=True,
suggest=False,
base_url=None,
color=False,
**kwargs):
if document.document_type and not replace:
return
@ -104,13 +129,31 @@ def set_document_type(sender,
return
if selected or replace:
logger.info(
f"Assigning document type {selected} to {document}",
extra={'group': logging_group}
)
if suggest:
if base_url:
print(
termcolors.colorize(str(document), fg='green')
if color
else str(document)
)
print(f"{base_url}/documents/{document.pk}")
else:
print(
(
termcolors.colorize(str(document), fg='green')
if color
else str(document)
) + f" [{document.pk}]"
)
print(f"Sugest document type {selected}")
else:
logger.info(
f"Assigning document type {selected} to {document}",
extra={'group': logging_group}
)
document.document_type = selected
document.save(update_fields=("document_type",))
document.document_type = selected
document.save(update_fields=("document_type",))
def set_tags(sender,
@ -118,6 +161,9 @@ def set_tags(sender,
logging_group=None,
classifier=None,
replace=False,
suggest=False,
base_url=None,
color=False,
**kwargs):
if replace:
@ -132,16 +178,48 @@ def set_tags(sender,
relevant_tags = set(matched_tags) - current_tags
if not relevant_tags:
return
if suggest:
extra_tags = current_tags - set(matched_tags)
extra_tags = [
t for t in extra_tags
if t.matching_algorithm == MatchingModel.MATCH_AUTO
]
if not relevant_tags and not extra_tags:
return
if base_url:
print(
termcolors.colorize(str(document), fg='green')
if color
else str(document)
)
print(f"{base_url}/documents/{document.pk}")
else:
print(
(
termcolors.colorize(str(document), fg='green')
if color
else str(document)
) + f" [{document.pk}]"
)
if relevant_tags:
print(
"Suggest tags: " + ", ".join([t.name for t in relevant_tags])
)
if extra_tags:
print("Extra tags: " + ", ".join([t.name for t in extra_tags]))
else:
if not relevant_tags:
return
message = 'Tagging "{}" with "{}"'
logger.info(
message.format(document, ", ".join([t.name for t in relevant_tags])),
extra={'group': logging_group}
)
message = 'Tagging "{}" with "{}"'
logger.info(
message.format(
document, ", ".join([t.name for t in relevant_tags])
),
extra={'group': logging_group}
)
document.tags.add(*relevant_tags)
document.tags.add(*relevant_tags)
@receiver(models.signals.post_delete, sender=Document)

View File

@ -11,14 +11,17 @@ class TestRetagger(DirectoriesMixin, TestCase):
self.d1 = Document.objects.create(checksum="A", title="A", content="first document")
self.d2 = Document.objects.create(checksum="B", title="B", content="second document")
self.d3 = Document.objects.create(checksum="C", title="C", content="unrelated document")
self.d4 = Document.objects.create(checksum="D", title="D", content="auto document")
self.tag_first = Tag.objects.create(name="tag1", match="first", matching_algorithm=Tag.MATCH_ANY)
self.tag_second = Tag.objects.create(name="tag2", match="second", matching_algorithm=Tag.MATCH_ANY)
self.tag_inbox = Tag.objects.create(name="test", is_inbox_tag=True)
self.tag_no_match = Tag.objects.create(name="test2")
self.tag_auto = Tag.objects.create(name="tagauto", matching_algorithm=Tag.MATCH_AUTO)
self.d3.tags.add(self.tag_inbox)
self.d3.tags.add(self.tag_no_match)
self.d4.tags.add(self.tag_auto)
self.correspondent_first = Correspondent.objects.create(
@ -32,7 +35,8 @@ class TestRetagger(DirectoriesMixin, TestCase):
name="dt2", match="second", matching_algorithm=DocumentType.MATCH_ANY)
def get_updated_docs(self):
return Document.objects.get(title="A"), Document.objects.get(title="B"), Document.objects.get(title="C")
return Document.objects.get(title="A"), Document.objects.get(title="B"), \
Document.objects.get(title="C"), Document.objects.get(title="D")
def setUp(self) -> None:
super(TestRetagger, self).setUp()
@ -40,25 +44,26 @@ class TestRetagger(DirectoriesMixin, TestCase):
def test_add_tags(self):
call_command('document_retagger', '--tags')
d_first, d_second, d_unrelated = self.get_updated_docs()
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
self.assertEqual(d_first.tags.count(), 1)
self.assertEqual(d_second.tags.count(), 1)
self.assertEqual(d_unrelated.tags.count(), 2)
self.assertEqual(d_auto.tags.count(), 1)
self.assertEqual(d_first.tags.first(), self.tag_first)
self.assertEqual(d_second.tags.first(), self.tag_second)
def test_add_type(self):
call_command('document_retagger', '--document_type')
d_first, d_second, d_unrelated = self.get_updated_docs()
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
self.assertEqual(d_first.document_type, self.doctype_first)
self.assertEqual(d_second.document_type, self.doctype_second)
def test_add_correspondent(self):
call_command('document_retagger', '--correspondent')
d_first, d_second, d_unrelated = self.get_updated_docs()
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
self.assertEqual(d_first.correspondent, self.correspondent_first)
self.assertEqual(d_second.correspondent, self.correspondent_second)
@ -68,11 +73,55 @@ class TestRetagger(DirectoriesMixin, TestCase):
call_command('document_retagger', '--tags', '--overwrite')
d_first, d_second, d_unrelated = self.get_updated_docs()
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
self.assertIsNotNone(Tag.objects.get(id=self.tag_second.id))
self.assertCountEqual([tag.id for tag in d_first.tags.all()], [self.tag_first.id])
self.assertCountEqual([tag.id for tag in d_second.tags.all()], [self.tag_second.id])
self.assertCountEqual([tag.id for tag in d_unrelated.tags.all()], [self.tag_inbox.id, self.tag_no_match.id])
self.assertEqual(d_auto.tags.count(), 0)
def test_add_tags_suggest(self):
call_command('document_retagger', '--tags', '--suggest')
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
self.assertEqual(d_first.tags.count(), 0)
self.assertEqual(d_second.tags.count(), 0)
self.assertEqual(d_auto.tags.count(), 1)
def test_add_type_suggest(self):
call_command('document_retagger', '--document_type', '--suggest')
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
self.assertEqual(d_first.document_type, None)
self.assertEqual(d_second.document_type, None)
def test_add_correspondent_suggest(self):
call_command('document_retagger', '--correspondent', '--suggest')
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
self.assertEqual(d_first.correspondent, None)
self.assertEqual(d_second.correspondent, None)
def test_add_tags_suggest_url(self):
call_command('document_retagger', '--tags', '--suggest', '--base-url=http://localhost')
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
self.assertEqual(d_first.tags.count(), 0)
self.assertEqual(d_second.tags.count(), 0)
self.assertEqual(d_auto.tags.count(), 1)
def test_add_type_suggest_url(self):
call_command('document_retagger', '--document_type', '--suggest', '--base-url=http://localhost')
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
self.assertEqual(d_first.document_type, None)
self.assertEqual(d_second.document_type, None)
def test_add_correspondent_suggest_url(self):
call_command('document_retagger', '--correspondent', '--suggest', '--base-url=http://localhost')
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
self.assertEqual(d_first.correspondent, None)
self.assertEqual(d_second.correspondent, None)