mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Chore: Cleanup command arguments and standardize process count handling (#4541)
Cleans up some command help text and adds more control over process count for command with a Pool
This commit is contained in:
parent
577b49df9d
commit
e8527ba723
@ -414,6 +414,9 @@ This command takes no arguments.
|
|||||||
|
|
||||||
Use this command to re-create document thumbnails. Optionally include the ` --document {id}` option to generate thumbnails for a specific document only.
|
Use this command to re-create document thumbnails. Optionally include the ` --document {id}` option to generate thumbnails for a specific document only.
|
||||||
|
|
||||||
|
You may also specify `--processes` to control the number of processes used to generate new thumbnails. The default is to utilize
|
||||||
|
a quarter of the available processors.
|
||||||
|
|
||||||
```
|
```
|
||||||
document_thumbnails
|
document_thumbnails
|
||||||
```
|
```
|
||||||
@ -591,7 +594,7 @@ take into account by the detection.
|
|||||||
document_fuzzy_match [--ratio] [--processes N]
|
document_fuzzy_match [--ratio] [--processes N]
|
||||||
```
|
```
|
||||||
|
|
||||||
| Option | Required | Default | Description |
|
| Option | Required | Default | Description |
|
||||||
| ----------- | -------- | ------- | ------------------------------------------------------------------------------------------------------------------------------ |
|
| ----------- | -------- | ------------------- | ------------------------------------------------------------------------------------------------------------------------------ |
|
||||||
| --ratio | No | 85.0 | a number between 0 and 100, setting how similar a document must be for it to be reported. Higher numbers mean more similarity. |
|
| --ratio | No | 85.0 | a number between 0 and 100, setting how similar a document must be for it to be reported. Higher numbers mean more similarity. |
|
||||||
| --processes | No | 4 | Number of processes to use for matching. Setting 1 disables multiple processes |
|
| --processes | No | 1/4 of system cores | Number of processes to use for matching. Setting 1 disables multiple processes |
|
||||||
|
@ -17,19 +17,27 @@ class Command(BaseCommand):
|
|||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--passphrase",
|
"--passphrase",
|
||||||
help="If PAPERLESS_PASSPHRASE isn't set already, you need to "
|
help=(
|
||||||
"specify it here",
|
"If PAPERLESS_PASSPHRASE isn't set already, you need to "
|
||||||
|
"specify it here"
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
try:
|
try:
|
||||||
print(
|
self.stdout.write(
|
||||||
"\n\nWARNING: This script is going to work directly on your "
|
self.style.WARNING(
|
||||||
"document originals, so\nWARNING: you probably shouldn't run "
|
"\n\n"
|
||||||
"this unless you've got a recent backup\nWARNING: handy. It "
|
"WARNING: This script is going to work directly on your "
|
||||||
"*should* work without a hitch, but be safe and backup your\n"
|
"document originals, so\n"
|
||||||
"WARNING: stuff first.\n\nHit Ctrl+C to exit now, or Enter to "
|
"WARNING: you probably shouldn't run "
|
||||||
"continue.\n\n",
|
"this unless you've got a recent backup\n"
|
||||||
|
"WARNING: handy. It "
|
||||||
|
"*should* work without a hitch, but be safe and backup your\n"
|
||||||
|
"WARNING: stuff first.\n\n"
|
||||||
|
"Hit Ctrl+C to exit now, or Enter to "
|
||||||
|
"continue.\n\n",
|
||||||
|
),
|
||||||
)
|
)
|
||||||
_ = input()
|
_ = input()
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
@ -44,14 +52,13 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
self.__gpg_to_unencrypted(passphrase)
|
self.__gpg_to_unencrypted(passphrase)
|
||||||
|
|
||||||
@staticmethod
|
def __gpg_to_unencrypted(self, passphrase: str):
|
||||||
def __gpg_to_unencrypted(passphrase):
|
|
||||||
encrypted_files = Document.objects.filter(
|
encrypted_files = Document.objects.filter(
|
||||||
storage_type=Document.STORAGE_TYPE_GPG,
|
storage_type=Document.STORAGE_TYPE_GPG,
|
||||||
)
|
)
|
||||||
|
|
||||||
for document in encrypted_files:
|
for document in encrypted_files:
|
||||||
print(f"Decrypting {document}".encode())
|
self.stdout.write(f"Decrypting {document}")
|
||||||
|
|
||||||
old_paths = [document.source_path, document.thumbnail_path]
|
old_paths = [document.source_path, document.thumbnail_path]
|
||||||
|
|
||||||
|
@ -7,21 +7,20 @@ from django import db
|
|||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.core.management.base import BaseCommand
|
from django.core.management.base import BaseCommand
|
||||||
|
|
||||||
|
from documents.management.commands.mixins import MultiProcessMixin
|
||||||
|
from documents.management.commands.mixins import ProgressBarMixin
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
from documents.tasks import update_document_archive_file
|
from documents.tasks import update_document_archive_file
|
||||||
|
|
||||||
logger = logging.getLogger("paperless.management.archiver")
|
logger = logging.getLogger("paperless.management.archiver")
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
|
||||||
help = """
|
help = (
|
||||||
Using the current classification model, assigns correspondents, tags
|
"Using the current classification model, assigns correspondents, tags "
|
||||||
and document types to all documents, effectively allowing you to
|
"and document types to all documents, effectively allowing you to "
|
||||||
back-tag all previously indexed documents with metadata created (or
|
"back-tag all previously indexed documents with metadata created (or "
|
||||||
modified) after their initial import.
|
"modified) after their initial import."
|
||||||
""".replace(
|
|
||||||
" ",
|
|
||||||
"",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
@ -30,8 +29,10 @@ class Command(BaseCommand):
|
|||||||
"--overwrite",
|
"--overwrite",
|
||||||
default=False,
|
default=False,
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="Recreates the archived document for documents that already "
|
help=(
|
||||||
"have an archived version.",
|
"Recreates the archived document for documents that already "
|
||||||
|
"have an archived version."
|
||||||
|
),
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-d",
|
"-d",
|
||||||
@ -39,17 +40,18 @@ class Command(BaseCommand):
|
|||||||
default=None,
|
default=None,
|
||||||
type=int,
|
type=int,
|
||||||
required=False,
|
required=False,
|
||||||
help="Specify the ID of a document, and this command will only "
|
help=(
|
||||||
"run on this specific document.",
|
"Specify the ID of a document, and this command will only "
|
||||||
)
|
"run on this specific document."
|
||||||
parser.add_argument(
|
),
|
||||||
"--no-progress-bar",
|
|
||||||
default=False,
|
|
||||||
action="store_true",
|
|
||||||
help="If set, the progress bar will not be shown",
|
|
||||||
)
|
)
|
||||||
|
self.add_argument_progress_bar_mixin(parser)
|
||||||
|
self.add_argument_processes_mixin(parser)
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
|
self.handle_processes_mixin(**options)
|
||||||
|
self.handle_progress_bar_mixin(**options)
|
||||||
|
|
||||||
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
|
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
|
||||||
|
|
||||||
overwrite = options["overwrite"]
|
overwrite = options["overwrite"]
|
||||||
@ -67,19 +69,27 @@ class Command(BaseCommand):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Note to future self: this prevents django from reusing database
|
# Note to future self: this prevents django from reusing database
|
||||||
# conncetions between processes, which is bad and does not work
|
# connections between processes, which is bad and does not work
|
||||||
# with postgres.
|
# with postgres.
|
||||||
db.connections.close_all()
|
db.connections.close_all()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
logging.getLogger().handlers[0].level = logging.ERROR
|
logging.getLogger().handlers[0].level = logging.ERROR
|
||||||
with multiprocessing.Pool(processes=settings.TASK_WORKERS) as pool:
|
|
||||||
list(
|
if self.process_count == 1:
|
||||||
tqdm.tqdm(
|
for doc_id in document_ids:
|
||||||
pool.imap_unordered(update_document_archive_file, document_ids),
|
update_document_archive_file(doc_id)
|
||||||
total=len(document_ids),
|
else: # pragma: no cover
|
||||||
disable=options["no_progress_bar"],
|
with multiprocessing.Pool(self.process_count) as pool:
|
||||||
),
|
list(
|
||||||
)
|
tqdm.tqdm(
|
||||||
|
pool.imap_unordered(
|
||||||
|
update_document_archive_file,
|
||||||
|
document_ids,
|
||||||
|
),
|
||||||
|
total=len(document_ids),
|
||||||
|
disable=self.no_progress_bar,
|
||||||
|
),
|
||||||
|
)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
self.stdout.write(self.style.NOTICE("Aborting..."))
|
self.stdout.write(self.style.NOTICE("Aborting..."))
|
||||||
|
@ -4,16 +4,10 @@ from documents.tasks import train_classifier
|
|||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(BaseCommand):
|
||||||
help = """
|
help = (
|
||||||
Trains the classifier on your data and saves the resulting models to a
|
"Trains the classifier on your data and saves the resulting models to a "
|
||||||
file. The document consumer will then automatically use this new model.
|
"file. The document consumer will then automatically use this new model."
|
||||||
""".replace(
|
|
||||||
" ",
|
|
||||||
"",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
BaseCommand.__init__(self, *args, **kwargs)
|
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
train_classifier()
|
train_classifier()
|
||||||
|
@ -43,13 +43,10 @@ from paperless_mail.models import MailRule
|
|||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(BaseCommand):
|
||||||
help = """
|
help = (
|
||||||
Decrypt and rename all files in our collection into a given target
|
"Decrypt and rename all files in our collection into a given target "
|
||||||
directory. And include a manifest file containing document data for
|
"directory. And include a manifest file containing document data for "
|
||||||
easy import.
|
"easy import."
|
||||||
""".replace(
|
|
||||||
" ",
|
|
||||||
"",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
@ -60,9 +57,11 @@ class Command(BaseCommand):
|
|||||||
"--compare-checksums",
|
"--compare-checksums",
|
||||||
default=False,
|
default=False,
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="Compare file checksums when determining whether to export "
|
help=(
|
||||||
"a file or not. If not specified, file size and time "
|
"Compare file checksums when determining whether to export "
|
||||||
"modified is used instead.",
|
"a file or not. If not specified, file size and time "
|
||||||
|
"modified is used instead."
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@ -70,9 +69,11 @@ class Command(BaseCommand):
|
|||||||
"--delete",
|
"--delete",
|
||||||
default=False,
|
default=False,
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="After exporting, delete files in the export directory that "
|
help=(
|
||||||
"do not belong to the current export, such as files from "
|
"After exporting, delete files in the export directory that "
|
||||||
"deleted documents.",
|
"do not belong to the current export, such as files from "
|
||||||
|
"deleted documents."
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@ -80,8 +81,10 @@ class Command(BaseCommand):
|
|||||||
"--use-filename-format",
|
"--use-filename-format",
|
||||||
default=False,
|
default=False,
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="Use PAPERLESS_FILENAME_FORMAT for storing files in the "
|
help=(
|
||||||
"export directory, if configured.",
|
"Use PAPERLESS_FILENAME_FORMAT for storing files in the "
|
||||||
|
"export directory, if configured."
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@ -105,8 +108,10 @@ class Command(BaseCommand):
|
|||||||
"--use-folder-prefix",
|
"--use-folder-prefix",
|
||||||
default=False,
|
default=False,
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="Export files in dedicated folders according to their nature: "
|
help=(
|
||||||
"archive, originals or thumbnails",
|
"Export files in dedicated folders according to their nature: "
|
||||||
|
"archive, originals or thumbnails"
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
@ -7,6 +7,8 @@ import tqdm
|
|||||||
from django.core.management import BaseCommand
|
from django.core.management import BaseCommand
|
||||||
from django.core.management import CommandError
|
from django.core.management import CommandError
|
||||||
|
|
||||||
|
from documents.management.commands.mixins import MultiProcessMixin
|
||||||
|
from documents.management.commands.mixins import ProgressBarMixin
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
|
|
||||||
|
|
||||||
@ -41,7 +43,7 @@ def _process_and_match(work: _WorkPackage) -> _WorkResult:
|
|||||||
return _WorkResult(work.first_doc.pk, work.second_doc.pk, match)
|
return _WorkResult(work.first_doc.pk, work.second_doc.pk, match)
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
|
||||||
help = "Searches for documents where the content almost matches"
|
help = "Searches for documents where the content almost matches"
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
@ -51,23 +53,16 @@ class Command(BaseCommand):
|
|||||||
type=float,
|
type=float,
|
||||||
help="Ratio to consider documents a match",
|
help="Ratio to consider documents a match",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
self.add_argument_progress_bar_mixin(parser)
|
||||||
"--processes",
|
self.add_argument_processes_mixin(parser)
|
||||||
default=4,
|
|
||||||
type=int,
|
|
||||||
help="Number of processes to distribute work amongst",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--no-progress-bar",
|
|
||||||
default=False,
|
|
||||||
action="store_true",
|
|
||||||
help="If set, the progress bar will not be shown",
|
|
||||||
)
|
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
RATIO_MIN: Final[float] = 0.0
|
RATIO_MIN: Final[float] = 0.0
|
||||||
RATIO_MAX: Final[float] = 100.0
|
RATIO_MAX: Final[float] = 100.0
|
||||||
|
|
||||||
|
self.handle_processes_mixin(**options)
|
||||||
|
self.handle_progress_bar_mixin(**options)
|
||||||
|
|
||||||
opt_ratio = options["ratio"]
|
opt_ratio = options["ratio"]
|
||||||
checked_pairs: set[tuple[int, int]] = set()
|
checked_pairs: set[tuple[int, int]] = set()
|
||||||
work_pkgs: list[_WorkPackage] = []
|
work_pkgs: list[_WorkPackage] = []
|
||||||
@ -76,9 +71,6 @@ class Command(BaseCommand):
|
|||||||
if opt_ratio < RATIO_MIN or opt_ratio > RATIO_MAX:
|
if opt_ratio < RATIO_MIN or opt_ratio > RATIO_MAX:
|
||||||
raise CommandError("The ratio must be between 0 and 100")
|
raise CommandError("The ratio must be between 0 and 100")
|
||||||
|
|
||||||
if options["processes"] < 1:
|
|
||||||
raise CommandError("There must be at least 1 process")
|
|
||||||
|
|
||||||
all_docs = Document.objects.all().order_by("id")
|
all_docs = Document.objects.all().order_by("id")
|
||||||
|
|
||||||
# Build work packages for processing
|
# Build work packages for processing
|
||||||
@ -101,17 +93,17 @@ class Command(BaseCommand):
|
|||||||
work_pkgs.append(_WorkPackage(first_doc, second_doc))
|
work_pkgs.append(_WorkPackage(first_doc, second_doc))
|
||||||
|
|
||||||
# Don't spin up a pool of 1 process
|
# Don't spin up a pool of 1 process
|
||||||
if options["processes"] == 1:
|
if self.process_count == 1:
|
||||||
results = []
|
results = []
|
||||||
for work in tqdm.tqdm(work_pkgs, disable=options["no_progress_bar"]):
|
for work in tqdm.tqdm(work_pkgs, disable=self.no_progress_bar):
|
||||||
results.append(_process_and_match(work))
|
results.append(_process_and_match(work))
|
||||||
else:
|
else: # pragma: no cover
|
||||||
with multiprocessing.Pool(processes=options["processes"]) as pool:
|
with multiprocessing.Pool(processes=self.process_count) as pool:
|
||||||
results = list(
|
results = list(
|
||||||
tqdm.tqdm(
|
tqdm.tqdm(
|
||||||
pool.imap_unordered(_process_and_match, work_pkgs),
|
pool.imap_unordered(_process_and_match, work_pkgs),
|
||||||
total=len(work_pkgs),
|
total=len(work_pkgs),
|
||||||
disable=options["no_progress_bar"],
|
disable=self.no_progress_bar,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -40,12 +40,9 @@ def disable_signal(sig, receiver, sender):
|
|||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(BaseCommand):
|
||||||
help = """
|
help = (
|
||||||
Using a manifest.json file, load the data from there, and import the
|
"Using a manifest.json file, load the data from there, and import the "
|
||||||
documents it refers to.
|
"documents it refers to."
|
||||||
""".replace(
|
|
||||||
" ",
|
|
||||||
"",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
|
@ -1,25 +1,22 @@
|
|||||||
from django.core.management import BaseCommand
|
from django.core.management import BaseCommand
|
||||||
from django.db import transaction
|
from django.db import transaction
|
||||||
|
|
||||||
|
from documents.management.commands.mixins import ProgressBarMixin
|
||||||
from documents.tasks import index_optimize
|
from documents.tasks import index_optimize
|
||||||
from documents.tasks import index_reindex
|
from documents.tasks import index_reindex
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(ProgressBarMixin, BaseCommand):
|
||||||
help = "Manages the document index."
|
help = "Manages the document index."
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
parser.add_argument("command", choices=["reindex", "optimize"])
|
parser.add_argument("command", choices=["reindex", "optimize"])
|
||||||
parser.add_argument(
|
self.add_argument_progress_bar_mixin(parser)
|
||||||
"--no-progress-bar",
|
|
||||||
default=False,
|
|
||||||
action="store_true",
|
|
||||||
help="If set, the progress bar will not be shown",
|
|
||||||
)
|
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
|
self.handle_progress_bar_mixin(**options)
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
if options["command"] == "reindex":
|
if options["command"] == "reindex":
|
||||||
index_reindex(progress_bar_disable=options["no_progress_bar"])
|
index_reindex(progress_bar_disable=self.no_progress_bar)
|
||||||
elif options["command"] == "optimize":
|
elif options["command"] == "optimize":
|
||||||
index_optimize()
|
index_optimize()
|
||||||
|
@ -4,30 +4,22 @@ import tqdm
|
|||||||
from django.core.management.base import BaseCommand
|
from django.core.management.base import BaseCommand
|
||||||
from django.db.models.signals import post_save
|
from django.db.models.signals import post_save
|
||||||
|
|
||||||
|
from documents.management.commands.mixins import ProgressBarMixin
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(ProgressBarMixin, BaseCommand):
|
||||||
help = """
|
help = "This will rename all documents to match the latest filename format."
|
||||||
This will rename all documents to match the latest filename format.
|
|
||||||
""".replace(
|
|
||||||
" ",
|
|
||||||
"",
|
|
||||||
)
|
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
parser.add_argument(
|
self.add_argument_progress_bar_mixin(parser)
|
||||||
"--no-progress-bar",
|
|
||||||
default=False,
|
|
||||||
action="store_true",
|
|
||||||
help="If set, the progress bar will not be shown",
|
|
||||||
)
|
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
|
self.handle_progress_bar_mixin(**options)
|
||||||
logging.getLogger().handlers[0].level = logging.ERROR
|
logging.getLogger().handlers[0].level = logging.ERROR
|
||||||
|
|
||||||
for document in tqdm.tqdm(
|
for document in tqdm.tqdm(
|
||||||
Document.objects.all(),
|
Document.objects.all(),
|
||||||
disable=options["no_progress_bar"],
|
disable=self.no_progress_bar,
|
||||||
):
|
):
|
||||||
post_save.send(Document, instance=document)
|
post_save.send(Document, instance=document)
|
||||||
|
@ -4,6 +4,7 @@ import tqdm
|
|||||||
from django.core.management.base import BaseCommand
|
from django.core.management.base import BaseCommand
|
||||||
|
|
||||||
from documents.classifier import load_classifier
|
from documents.classifier import load_classifier
|
||||||
|
from documents.management.commands.mixins import ProgressBarMixin
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
from documents.signals.handlers import set_correspondent
|
from documents.signals.handlers import set_correspondent
|
||||||
from documents.signals.handlers import set_document_type
|
from documents.signals.handlers import set_document_type
|
||||||
@ -13,15 +14,12 @@ from documents.signals.handlers import set_tags
|
|||||||
logger = logging.getLogger("paperless.management.retagger")
|
logger = logging.getLogger("paperless.management.retagger")
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(ProgressBarMixin, BaseCommand):
|
||||||
help = """
|
help = (
|
||||||
Using the current classification model, assigns correspondents, tags
|
"Using the current classification model, assigns correspondents, tags "
|
||||||
and document types to all documents, effectively allowing you to
|
"and document types to all documents, effectively allowing you to "
|
||||||
back-tag all previously indexed documents with metadata created (or
|
"back-tag all previously indexed documents with metadata created (or "
|
||||||
modified) after their initial import.
|
"modified) after their initial import."
|
||||||
""".replace(
|
|
||||||
" ",
|
|
||||||
"",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
@ -34,25 +32,24 @@ class Command(BaseCommand):
|
|||||||
"--use-first",
|
"--use-first",
|
||||||
default=False,
|
default=False,
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="By default this command won't try to assign a correspondent "
|
help=(
|
||||||
"if more than one matches the document. Use this flag if "
|
"By default this command won't try to assign a correspondent "
|
||||||
"you'd rather it just pick the first one it finds.",
|
"if more than one matches the document. Use this flag if "
|
||||||
|
"you'd rather it just pick the first one it finds."
|
||||||
|
),
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-f",
|
"-f",
|
||||||
"--overwrite",
|
"--overwrite",
|
||||||
default=False,
|
default=False,
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="If set, the document retagger will overwrite any previously"
|
help=(
|
||||||
"set correspondent, document and remove correspondents, types"
|
"If set, the document retagger will overwrite any previously"
|
||||||
"and tags that do not match anymore due to changed rules.",
|
"set correspondent, document and remove correspondents, types"
|
||||||
)
|
"and tags that do not match anymore due to changed rules."
|
||||||
parser.add_argument(
|
),
|
||||||
"--no-progress-bar",
|
|
||||||
default=False,
|
|
||||||
action="store_true",
|
|
||||||
help="If set, the progress bar will not be shown",
|
|
||||||
)
|
)
|
||||||
|
self.add_argument_progress_bar_mixin(parser)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--suggest",
|
"--suggest",
|
||||||
default=False,
|
default=False,
|
||||||
@ -71,6 +68,7 @@ class Command(BaseCommand):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
|
self.handle_progress_bar_mixin(**options)
|
||||||
# Detect if we support color
|
# Detect if we support color
|
||||||
color = self.style.ERROR("test") != "test"
|
color = self.style.ERROR("test") != "test"
|
||||||
|
|
||||||
@ -88,7 +86,7 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
classifier = load_classifier()
|
classifier = load_classifier()
|
||||||
|
|
||||||
for document in tqdm.tqdm(documents, disable=options["no_progress_bar"]):
|
for document in tqdm.tqdm(documents, disable=self.no_progress_bar):
|
||||||
if options["correspondent"]:
|
if options["correspondent"]:
|
||||||
set_correspondent(
|
set_correspondent(
|
||||||
sender=None,
|
sender=None,
|
||||||
|
@ -1,25 +1,17 @@
|
|||||||
from django.core.management.base import BaseCommand
|
from django.core.management.base import BaseCommand
|
||||||
|
|
||||||
|
from documents.management.commands.mixins import ProgressBarMixin
|
||||||
from documents.sanity_checker import check_sanity
|
from documents.sanity_checker import check_sanity
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(ProgressBarMixin, BaseCommand):
|
||||||
help = """
|
help = "This command checks your document archive for issues."
|
||||||
This command checks your document archive for issues.
|
|
||||||
""".replace(
|
|
||||||
" ",
|
|
||||||
"",
|
|
||||||
)
|
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
parser.add_argument(
|
self.add_argument_progress_bar_mixin(parser)
|
||||||
"--no-progress-bar",
|
|
||||||
default=False,
|
|
||||||
action="store_true",
|
|
||||||
help="If set, the progress bar will not be shown",
|
|
||||||
)
|
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
messages = check_sanity(progress=not options["no_progress_bar"])
|
self.handle_progress_bar_mixin(**options)
|
||||||
|
messages = check_sanity(progress=self.use_progress_bar)
|
||||||
|
|
||||||
messages.log_messages()
|
messages.log_messages()
|
||||||
|
@ -6,6 +6,8 @@ import tqdm
|
|||||||
from django import db
|
from django import db
|
||||||
from django.core.management.base import BaseCommand
|
from django.core.management.base import BaseCommand
|
||||||
|
|
||||||
|
from documents.management.commands.mixins import MultiProcessMixin
|
||||||
|
from documents.management.commands.mixins import ProgressBarMixin
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
from documents.parsers import get_parser_class_for_mime_type
|
from documents.parsers import get_parser_class_for_mime_type
|
||||||
|
|
||||||
@ -32,13 +34,8 @@ def _process_document(doc_id):
|
|||||||
parser.cleanup()
|
parser.cleanup()
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
|
||||||
help = """
|
help = "This will regenerate the thumbnails for all documents."
|
||||||
This will regenerate the thumbnails for all documents.
|
|
||||||
""".replace(
|
|
||||||
" ",
|
|
||||||
"",
|
|
||||||
)
|
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@ -47,19 +44,20 @@ class Command(BaseCommand):
|
|||||||
default=None,
|
default=None,
|
||||||
type=int,
|
type=int,
|
||||||
required=False,
|
required=False,
|
||||||
help="Specify the ID of a document, and this command will only "
|
help=(
|
||||||
"run on this specific document.",
|
"Specify the ID of a document, and this command will only "
|
||||||
)
|
"run on this specific document."
|
||||||
parser.add_argument(
|
),
|
||||||
"--no-progress-bar",
|
|
||||||
default=False,
|
|
||||||
action="store_true",
|
|
||||||
help="If set, the progress bar will not be shown",
|
|
||||||
)
|
)
|
||||||
|
self.add_argument_progress_bar_mixin(parser)
|
||||||
|
self.add_argument_processes_mixin(parser)
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
logging.getLogger().handlers[0].level = logging.ERROR
|
logging.getLogger().handlers[0].level = logging.ERROR
|
||||||
|
|
||||||
|
self.handle_processes_mixin(**options)
|
||||||
|
self.handle_progress_bar_mixin(**options)
|
||||||
|
|
||||||
if options["document"]:
|
if options["document"]:
|
||||||
documents = Document.objects.filter(pk=options["document"])
|
documents = Document.objects.filter(pk=options["document"])
|
||||||
else:
|
else:
|
||||||
@ -72,11 +70,15 @@ class Command(BaseCommand):
|
|||||||
# with postgres.
|
# with postgres.
|
||||||
db.connections.close_all()
|
db.connections.close_all()
|
||||||
|
|
||||||
with multiprocessing.Pool() as pool:
|
if self.process_count == 1:
|
||||||
list(
|
for doc_id in ids:
|
||||||
tqdm.tqdm(
|
_process_document(doc_id)
|
||||||
pool.imap_unordered(_process_document, ids),
|
else: # pragma: no cover
|
||||||
total=len(ids),
|
with multiprocessing.Pool(processes=self.process_count) as pool:
|
||||||
disable=options["no_progress_bar"],
|
list(
|
||||||
),
|
tqdm.tqdm(
|
||||||
)
|
pool.imap_unordered(_process_document, ids),
|
||||||
|
total=len(ids),
|
||||||
|
disable=self.no_progress_bar,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
from argparse import RawTextHelpFormatter
|
||||||
|
|
||||||
from django.contrib.auth.models import User
|
from django.contrib.auth.models import User
|
||||||
from django.core.management.base import BaseCommand
|
from django.core.management.base import BaseCommand
|
||||||
@ -8,20 +9,22 @@ logger = logging.getLogger("paperless.management.superuser")
|
|||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(BaseCommand):
|
||||||
help = """
|
help = (
|
||||||
Creates a Django superuser:
|
"Creates a Django superuser:\n"
|
||||||
User named: admin
|
" User named: admin\n"
|
||||||
Email: root@localhost
|
" Email: root@localhost\n"
|
||||||
with password based on env variable.
|
" Password: based on env variable PAPERLESS_ADMIN_PASSWORD\n"
|
||||||
No superuser will be created, when:
|
"No superuser will be created, when:\n"
|
||||||
- The username is taken already exists
|
" - The username is taken already exists\n"
|
||||||
- A superuser already exists
|
" - A superuser already exists\n"
|
||||||
- PAPERLESS_ADMIN_PASSWORD is not set
|
" - PAPERLESS_ADMIN_PASSWORD is not set"
|
||||||
""".replace(
|
|
||||||
" ",
|
|
||||||
"",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def create_parser(self, *args, **kwargs):
|
||||||
|
parser = super().create_parser(*args, **kwargs)
|
||||||
|
parser.formatter_class = RawTextHelpFormatter
|
||||||
|
return parser
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
username = os.getenv("PAPERLESS_ADMIN_USER", "admin")
|
username = os.getenv("PAPERLESS_ADMIN_USER", "admin")
|
||||||
mail = os.getenv("PAPERLESS_ADMIN_MAIL", "root@localhost")
|
mail = os.getenv("PAPERLESS_ADMIN_MAIL", "root@localhost")
|
||||||
|
43
src/documents/management/commands/mixins.py
Normal file
43
src/documents/management/commands/mixins.py
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
import os
|
||||||
|
from argparse import ArgumentParser
|
||||||
|
|
||||||
|
from django.core.management import CommandError
|
||||||
|
|
||||||
|
|
||||||
|
class MultiProcessMixin:
|
||||||
|
"""
|
||||||
|
Small class to handle adding an argument and validating it
|
||||||
|
for the use of multiple processes
|
||||||
|
"""
|
||||||
|
|
||||||
|
def add_argument_processes_mixin(self, parser: ArgumentParser):
|
||||||
|
parser.add_argument(
|
||||||
|
"--processes",
|
||||||
|
default=max(1, os.cpu_count() // 4),
|
||||||
|
type=int,
|
||||||
|
help="Number of processes to distribute work amongst",
|
||||||
|
)
|
||||||
|
|
||||||
|
def handle_processes_mixin(self, *args, **options):
|
||||||
|
self.process_count = options["processes"]
|
||||||
|
if self.process_count < 1:
|
||||||
|
raise CommandError("There must be at least 1 process")
|
||||||
|
|
||||||
|
|
||||||
|
class ProgressBarMixin:
|
||||||
|
"""
|
||||||
|
Many commands use a progress bar, which can be disabled
|
||||||
|
via this class
|
||||||
|
"""
|
||||||
|
|
||||||
|
def add_argument_progress_bar_mixin(self, parser: ArgumentParser):
|
||||||
|
parser.add_argument(
|
||||||
|
"--no-progress-bar",
|
||||||
|
default=False,
|
||||||
|
action="store_true",
|
||||||
|
help="If set, the progress bar will not be shown",
|
||||||
|
)
|
||||||
|
|
||||||
|
def handle_progress_bar_mixin(self, *args, **options):
|
||||||
|
self.no_progress_bar = options["no_progress_bar"]
|
||||||
|
self.use_progress_bar = not self.no_progress_bar
|
@ -36,7 +36,7 @@ class TestArchiver(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"),
|
os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"),
|
||||||
)
|
)
|
||||||
|
|
||||||
call_command("document_archiver")
|
call_command("document_archiver", "--processes", "1")
|
||||||
|
|
||||||
def test_handle_document(self):
|
def test_handle_document(self):
|
||||||
doc = self.make_models()
|
doc = self.make_models()
|
||||||
|
@ -83,13 +83,13 @@ class TestMakeThumbnails(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
def test_command(self):
|
def test_command(self):
|
||||||
self.assertIsNotFile(self.d1.thumbnail_path)
|
self.assertIsNotFile(self.d1.thumbnail_path)
|
||||||
self.assertIsNotFile(self.d2.thumbnail_path)
|
self.assertIsNotFile(self.d2.thumbnail_path)
|
||||||
call_command("document_thumbnails")
|
call_command("document_thumbnails", "--processes", "1")
|
||||||
self.assertIsFile(self.d1.thumbnail_path)
|
self.assertIsFile(self.d1.thumbnail_path)
|
||||||
self.assertIsFile(self.d2.thumbnail_path)
|
self.assertIsFile(self.d2.thumbnail_path)
|
||||||
|
|
||||||
def test_command_documentid(self):
|
def test_command_documentid(self):
|
||||||
self.assertIsNotFile(self.d1.thumbnail_path)
|
self.assertIsNotFile(self.d1.thumbnail_path)
|
||||||
self.assertIsNotFile(self.d2.thumbnail_path)
|
self.assertIsNotFile(self.d2.thumbnail_path)
|
||||||
call_command("document_thumbnails", "-d", f"{self.d1.id}")
|
call_command("document_thumbnails", "--processes", "1", "-d", f"{self.d1.id}")
|
||||||
self.assertIsFile(self.d1.thumbnail_path)
|
self.assertIsFile(self.d1.thumbnail_path)
|
||||||
self.assertIsNotFile(self.d2.thumbnail_path)
|
self.assertIsNotFile(self.d2.thumbnail_path)
|
||||||
|
@ -4,11 +4,7 @@ from paperless_mail import tasks
|
|||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(BaseCommand):
|
||||||
help = """
|
help = "Manually triggers a fetching and processing of all mail accounts"
|
||||||
""".replace(
|
|
||||||
" ",
|
|
||||||
"",
|
|
||||||
)
|
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
tasks.process_mail_accounts()
|
tasks.process_mail_accounts()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user