mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Enhancement: add --id-range for document_retagger (#4080)
--------- Co-authored-by: Trenton H <797416+stumpylog@users.noreply.github.com>
This commit is contained in:
parent
a8e13df249
commit
b238ba054d
@ -351,7 +351,7 @@ currently-imported docs. This problem is common enough that there are
|
|||||||
tools for it.
|
tools for it.
|
||||||
|
|
||||||
```
|
```
|
||||||
document_retagger [-h] [-c] [-T] [-t] [-i] [--use-first] [-f]
|
document_retagger [-h] [-c] [-T] [-t] [-i] [--id-range] [--use-first] [-f]
|
||||||
|
|
||||||
optional arguments:
|
optional arguments:
|
||||||
-c, --correspondent
|
-c, --correspondent
|
||||||
@ -359,6 +359,7 @@ optional arguments:
|
|||||||
-t, --document_type
|
-t, --document_type
|
||||||
-s, --storage_path
|
-s, --storage_path
|
||||||
-i, --inbox-only
|
-i, --inbox-only
|
||||||
|
--id-range
|
||||||
--use-first
|
--use-first
|
||||||
-f, --overwrite
|
-f, --overwrite
|
||||||
```
|
```
|
||||||
@ -375,6 +376,11 @@ Specify `-i` to have the document retagger work on documents tagged with
|
|||||||
inbox tags only. This is useful when you don't want to mess with your
|
inbox tags only. This is useful when you don't want to mess with your
|
||||||
already processed documents.
|
already processed documents.
|
||||||
|
|
||||||
|
Specify `--id-range 1 100` to have the document retagger work only on a
|
||||||
|
specific range of document id´s. This can be useful if you have a lot of
|
||||||
|
documents and want to test the matching rules only on a subset of
|
||||||
|
documents.
|
||||||
|
|
||||||
When multiple document types or correspondents match a single document,
|
When multiple document types or correspondents match a single document,
|
||||||
the retagger won't assign these to the document. Specify `--use-first`
|
the retagger won't assign these to the document. Specify `--use-first`
|
||||||
to override this behavior and just use the first correspondent or type
|
to override this behavior and just use the first correspondent or type
|
||||||
|
@ -63,6 +63,12 @@ class Command(BaseCommand):
|
|||||||
"--base-url",
|
"--base-url",
|
||||||
help="The base URL to use to build the link to the documents.",
|
help="The base URL to use to build the link to the documents.",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--id-range",
|
||||||
|
help="A range of document ids on which the retagging should be applied.",
|
||||||
|
nargs=2,
|
||||||
|
type=int,
|
||||||
|
)
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
# Detect if we support color
|
# Detect if we support color
|
||||||
@ -72,6 +78,12 @@ class Command(BaseCommand):
|
|||||||
queryset = Document.objects.filter(tags__is_inbox_tag=True)
|
queryset = Document.objects.filter(tags__is_inbox_tag=True)
|
||||||
else:
|
else:
|
||||||
queryset = Document.objects.all()
|
queryset = Document.objects.all()
|
||||||
|
|
||||||
|
if options["id_range"]:
|
||||||
|
queryset = queryset.filter(
|
||||||
|
id__range=(options["id_range"][0], options["id_range"][1]),
|
||||||
|
)
|
||||||
|
|
||||||
documents = queryset.distinct()
|
documents = queryset.distinct()
|
||||||
|
|
||||||
classifier = load_classifier()
|
classifier = load_classifier()
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
from django.core.management import call_command
|
from django.core.management import call_command
|
||||||
|
from django.core.management.base import CommandError
|
||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
|
|
||||||
from documents.models import Correspondent
|
from documents.models import Correspondent
|
||||||
@ -258,3 +259,38 @@ class TestRetagger(DirectoriesMixin, TestCase):
|
|||||||
self.assertEqual(d_auto.storage_path, self.sp1)
|
self.assertEqual(d_auto.storage_path, self.sp1)
|
||||||
self.assertIsNone(d_second.storage_path)
|
self.assertIsNone(d_second.storage_path)
|
||||||
self.assertEqual(d_unrelated.storage_path, self.sp2)
|
self.assertEqual(d_unrelated.storage_path, self.sp2)
|
||||||
|
|
||||||
|
def test_id_range_parameter(self):
|
||||||
|
commandOutput = ""
|
||||||
|
Document.objects.create(
|
||||||
|
checksum="E",
|
||||||
|
title="E",
|
||||||
|
content="NOT the first document",
|
||||||
|
)
|
||||||
|
call_command("document_retagger", "--tags", "--id-range", "1", "2")
|
||||||
|
# The retagger shouldn`t apply the 'first' tag to our new document
|
||||||
|
self.assertEqual(Document.objects.filter(tags__id=self.tag_first.id).count(), 1)
|
||||||
|
|
||||||
|
try:
|
||||||
|
commandOutput = call_command("document_retagger", "--tags", "--id-range")
|
||||||
|
except CommandError:
|
||||||
|
# Just ignore the error
|
||||||
|
None
|
||||||
|
self.assertIn(commandOutput, "Error: argument --id-range: expected 2 arguments")
|
||||||
|
|
||||||
|
try:
|
||||||
|
commandOutput = call_command(
|
||||||
|
"document_retagger",
|
||||||
|
"--tags",
|
||||||
|
"--id-range",
|
||||||
|
"a",
|
||||||
|
"b",
|
||||||
|
)
|
||||||
|
except CommandError:
|
||||||
|
# Just ignore the error
|
||||||
|
None
|
||||||
|
self.assertIn(commandOutput, "error: argument --id-range: invalid int value:")
|
||||||
|
|
||||||
|
call_command("document_retagger", "--tags", "--id-range", "1", "9999")
|
||||||
|
# Now we should have 2 documents
|
||||||
|
self.assertEqual(Document.objects.filter(tags__id=self.tag_first.id).count(), 2)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user