Merge pull request #933 from sbrunner/suggest

Add suggest
This commit is contained in:
Jonas Winkler 2021-06-13 12:20:34 +02:00 committed by GitHub
commit 67d0773231
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 178 additions and 30 deletions

View File

@ -63,8 +63,20 @@ class Command(BaseCommand):
action="store_true", action="store_true",
help="If set, the progress bar will not be shown" help="If set, the progress bar will not be shown"
) )
parser.add_argument(
"--suggest",
default=False,
action="store_true",
help="Return the suggestion, don't change anything."
)
parser.add_argument(
"--base-url",
help="The base URL to use to build the link to the documents."
)
def handle(self, *args, **options): def handle(self, *args, **options):
# Detect if we support color
color = self.style.ERROR("test") != "test"
if options["inbox_only"]: if options["inbox_only"]:
queryset = Document.objects.filter(tags__is_inbox_tag=True) queryset = Document.objects.filter(tags__is_inbox_tag=True)
@ -85,18 +97,27 @@ class Command(BaseCommand):
document=document, document=document,
classifier=classifier, classifier=classifier,
replace=options['overwrite'], replace=options['overwrite'],
use_first=options['use_first']) use_first=options['use_first'],
suggest=options['suggest'],
base_url=options['base_url'],
color=color)
if options['document_type']: if options['document_type']:
set_document_type(sender=None, set_document_type(sender=None,
document=document, document=document,
classifier=classifier, classifier=classifier,
replace=options['overwrite'], replace=options['overwrite'],
use_first=options['use_first']) use_first=options['use_first'],
suggest=options['suggest'],
base_url=options['base_url'],
color=color)
if options['tags']: if options['tags']:
set_tags( set_tags(
sender=None, sender=None,
document=document, document=document,
classifier=classifier, classifier=classifier,
replace=options['overwrite']) replace=options['overwrite'],
suggest=options['suggest'],
base_url=options['base_url'],
color=color)

View File

@ -1,6 +1,7 @@
import logging import logging
import os import os
from django.utils import termcolors
from django.conf import settings from django.conf import settings
from django.contrib.admin.models import ADDITION, LogEntry from django.contrib.admin.models import ADDITION, LogEntry
from django.contrib.auth.models import User from django.contrib.auth.models import User
@ -8,14 +9,14 @@ from django.contrib.contenttypes.models import ContentType
from django.db import models, DatabaseError from django.db import models, DatabaseError
from django.db.models import Q from django.db.models import Q
from django.dispatch import receiver from django.dispatch import receiver
from django.utils import timezone from django.utils import termcolors, timezone
from filelock import FileLock from filelock import FileLock
from .. import matching from .. import matching
from ..file_handling import delete_empty_directories, \ from ..file_handling import delete_empty_directories, \
create_source_path_directory, \ create_source_path_directory, \
generate_unique_filename generate_unique_filename
from ..models import Document, Tag from ..models import Document, Tag, MatchingModel
logger = logging.getLogger("paperless.handlers") logger = logging.getLogger("paperless.handlers")
@ -32,6 +33,9 @@ def set_correspondent(sender,
classifier=None, classifier=None,
replace=False, replace=False,
use_first=True, use_first=True,
suggest=False,
base_url=None,
color=False,
**kwargs): **kwargs):
if document.correspondent and not replace: if document.correspondent and not replace:
return return
@ -60,13 +64,31 @@ def set_correspondent(sender,
return return
if selected or replace: if selected or replace:
logger.info( if suggest:
f"Assigning correspondent {selected} to {document}", if base_url:
extra={'group': logging_group} print(
) termcolors.colorize(str(document), fg='green')
if color
else str(document)
)
print(f"{base_url}/documents/{document.pk}")
else:
print(
(
termcolors.colorize(str(document), fg='green')
if color
else str(document)
) + f" [{document.pk}]"
)
print(f"Suggest correspondent {selected}")
else:
logger.info(
f"Assigning correspondent {selected} to {document}",
extra={'group': logging_group}
)
document.correspondent = selected document.correspondent = selected
document.save(update_fields=("correspondent",)) document.save(update_fields=("correspondent",))
def set_document_type(sender, def set_document_type(sender,
@ -75,6 +97,9 @@ def set_document_type(sender,
classifier=None, classifier=None,
replace=False, replace=False,
use_first=True, use_first=True,
suggest=False,
base_url=None,
color=False,
**kwargs): **kwargs):
if document.document_type and not replace: if document.document_type and not replace:
return return
@ -104,13 +129,31 @@ def set_document_type(sender,
return return
if selected or replace: if selected or replace:
logger.info( if suggest:
f"Assigning document type {selected} to {document}", if base_url:
extra={'group': logging_group} print(
) termcolors.colorize(str(document), fg='green')
if color
else str(document)
)
print(f"{base_url}/documents/{document.pk}")
else:
print(
(
termcolors.colorize(str(document), fg='green')
if color
else str(document)
) + f" [{document.pk}]"
)
print(f"Sugest document type {selected}")
else:
logger.info(
f"Assigning document type {selected} to {document}",
extra={'group': logging_group}
)
document.document_type = selected document.document_type = selected
document.save(update_fields=("document_type",)) document.save(update_fields=("document_type",))
def set_tags(sender, def set_tags(sender,
@ -118,6 +161,9 @@ def set_tags(sender,
logging_group=None, logging_group=None,
classifier=None, classifier=None,
replace=False, replace=False,
suggest=False,
base_url=None,
color=False,
**kwargs): **kwargs):
if replace: if replace:
@ -132,16 +178,48 @@ def set_tags(sender,
relevant_tags = set(matched_tags) - current_tags relevant_tags = set(matched_tags) - current_tags
if not relevant_tags: if suggest:
return extra_tags = current_tags - set(matched_tags)
extra_tags = [
t for t in extra_tags
if t.matching_algorithm == MatchingModel.MATCH_AUTO
]
if not relevant_tags and not extra_tags:
return
if base_url:
print(
termcolors.colorize(str(document), fg='green')
if color
else str(document)
)
print(f"{base_url}/documents/{document.pk}")
else:
print(
(
termcolors.colorize(str(document), fg='green')
if color
else str(document)
) + f" [{document.pk}]"
)
if relevant_tags:
print(
"Suggest tags: " + ", ".join([t.name for t in relevant_tags])
)
if extra_tags:
print("Extra tags: " + ", ".join([t.name for t in extra_tags]))
else:
if not relevant_tags:
return
message = 'Tagging "{}" with "{}"' message = 'Tagging "{}" with "{}"'
logger.info( logger.info(
message.format(document, ", ".join([t.name for t in relevant_tags])), message.format(
extra={'group': logging_group} document, ", ".join([t.name for t in relevant_tags])
) ),
extra={'group': logging_group}
)
document.tags.add(*relevant_tags) document.tags.add(*relevant_tags)
@receiver(models.signals.post_delete, sender=Document) @receiver(models.signals.post_delete, sender=Document)

View File

@ -11,14 +11,17 @@ class TestRetagger(DirectoriesMixin, TestCase):
self.d1 = Document.objects.create(checksum="A", title="A", content="first document") self.d1 = Document.objects.create(checksum="A", title="A", content="first document")
self.d2 = Document.objects.create(checksum="B", title="B", content="second document") self.d2 = Document.objects.create(checksum="B", title="B", content="second document")
self.d3 = Document.objects.create(checksum="C", title="C", content="unrelated document") self.d3 = Document.objects.create(checksum="C", title="C", content="unrelated document")
self.d4 = Document.objects.create(checksum="D", title="D", content="auto document")
self.tag_first = Tag.objects.create(name="tag1", match="first", matching_algorithm=Tag.MATCH_ANY) self.tag_first = Tag.objects.create(name="tag1", match="first", matching_algorithm=Tag.MATCH_ANY)
self.tag_second = Tag.objects.create(name="tag2", match="second", matching_algorithm=Tag.MATCH_ANY) self.tag_second = Tag.objects.create(name="tag2", match="second", matching_algorithm=Tag.MATCH_ANY)
self.tag_inbox = Tag.objects.create(name="test", is_inbox_tag=True) self.tag_inbox = Tag.objects.create(name="test", is_inbox_tag=True)
self.tag_no_match = Tag.objects.create(name="test2") self.tag_no_match = Tag.objects.create(name="test2")
self.tag_auto = Tag.objects.create(name="tagauto", matching_algorithm=Tag.MATCH_AUTO)
self.d3.tags.add(self.tag_inbox) self.d3.tags.add(self.tag_inbox)
self.d3.tags.add(self.tag_no_match) self.d3.tags.add(self.tag_no_match)
self.d4.tags.add(self.tag_auto)
self.correspondent_first = Correspondent.objects.create( self.correspondent_first = Correspondent.objects.create(
@ -32,7 +35,8 @@ class TestRetagger(DirectoriesMixin, TestCase):
name="dt2", match="second", matching_algorithm=DocumentType.MATCH_ANY) name="dt2", match="second", matching_algorithm=DocumentType.MATCH_ANY)
def get_updated_docs(self): def get_updated_docs(self):
return Document.objects.get(title="A"), Document.objects.get(title="B"), Document.objects.get(title="C") return Document.objects.get(title="A"), Document.objects.get(title="B"), \
Document.objects.get(title="C"), Document.objects.get(title="D")
def setUp(self) -> None: def setUp(self) -> None:
super(TestRetagger, self).setUp() super(TestRetagger, self).setUp()
@ -40,25 +44,26 @@ class TestRetagger(DirectoriesMixin, TestCase):
def test_add_tags(self): def test_add_tags(self):
call_command('document_retagger', '--tags') call_command('document_retagger', '--tags')
d_first, d_second, d_unrelated = self.get_updated_docs() d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
self.assertEqual(d_first.tags.count(), 1) self.assertEqual(d_first.tags.count(), 1)
self.assertEqual(d_second.tags.count(), 1) self.assertEqual(d_second.tags.count(), 1)
self.assertEqual(d_unrelated.tags.count(), 2) self.assertEqual(d_unrelated.tags.count(), 2)
self.assertEqual(d_auto.tags.count(), 1)
self.assertEqual(d_first.tags.first(), self.tag_first) self.assertEqual(d_first.tags.first(), self.tag_first)
self.assertEqual(d_second.tags.first(), self.tag_second) self.assertEqual(d_second.tags.first(), self.tag_second)
def test_add_type(self): def test_add_type(self):
call_command('document_retagger', '--document_type') call_command('document_retagger', '--document_type')
d_first, d_second, d_unrelated = self.get_updated_docs() d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
self.assertEqual(d_first.document_type, self.doctype_first) self.assertEqual(d_first.document_type, self.doctype_first)
self.assertEqual(d_second.document_type, self.doctype_second) self.assertEqual(d_second.document_type, self.doctype_second)
def test_add_correspondent(self): def test_add_correspondent(self):
call_command('document_retagger', '--correspondent') call_command('document_retagger', '--correspondent')
d_first, d_second, d_unrelated = self.get_updated_docs() d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
self.assertEqual(d_first.correspondent, self.correspondent_first) self.assertEqual(d_first.correspondent, self.correspondent_first)
self.assertEqual(d_second.correspondent, self.correspondent_second) self.assertEqual(d_second.correspondent, self.correspondent_second)
@ -68,11 +73,55 @@ class TestRetagger(DirectoriesMixin, TestCase):
call_command('document_retagger', '--tags', '--overwrite') call_command('document_retagger', '--tags', '--overwrite')
d_first, d_second, d_unrelated = self.get_updated_docs() d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
self.assertIsNotNone(Tag.objects.get(id=self.tag_second.id)) self.assertIsNotNone(Tag.objects.get(id=self.tag_second.id))
self.assertCountEqual([tag.id for tag in d_first.tags.all()], [self.tag_first.id]) self.assertCountEqual([tag.id for tag in d_first.tags.all()], [self.tag_first.id])
self.assertCountEqual([tag.id for tag in d_second.tags.all()], [self.tag_second.id]) self.assertCountEqual([tag.id for tag in d_second.tags.all()], [self.tag_second.id])
self.assertCountEqual([tag.id for tag in d_unrelated.tags.all()], [self.tag_inbox.id, self.tag_no_match.id]) self.assertCountEqual([tag.id for tag in d_unrelated.tags.all()], [self.tag_inbox.id, self.tag_no_match.id])
self.assertEqual(d_auto.tags.count(), 0)
def test_add_tags_suggest(self):
call_command('document_retagger', '--tags', '--suggest')
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
self.assertEqual(d_first.tags.count(), 0)
self.assertEqual(d_second.tags.count(), 0)
self.assertEqual(d_auto.tags.count(), 1)
def test_add_type_suggest(self):
call_command('document_retagger', '--document_type', '--suggest')
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
self.assertEqual(d_first.document_type, None)
self.assertEqual(d_second.document_type, None)
def test_add_correspondent_suggest(self):
call_command('document_retagger', '--correspondent', '--suggest')
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
self.assertEqual(d_first.correspondent, None)
self.assertEqual(d_second.correspondent, None)
def test_add_tags_suggest_url(self):
call_command('document_retagger', '--tags', '--suggest', '--base-url=http://localhost')
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
self.assertEqual(d_first.tags.count(), 0)
self.assertEqual(d_second.tags.count(), 0)
self.assertEqual(d_auto.tags.count(), 1)
def test_add_type_suggest_url(self):
call_command('document_retagger', '--document_type', '--suggest', '--base-url=http://localhost')
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
self.assertEqual(d_first.document_type, None)
self.assertEqual(d_second.document_type, None)
def test_add_correspondent_suggest_url(self):
call_command('document_retagger', '--correspondent', '--suggest', '--base-url=http://localhost')
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
self.assertEqual(d_first.correspondent, None)
self.assertEqual(d_second.correspondent, None)