mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00

Cleans up some command help text and adds more control over process count for command with a Pool
96 lines
3.2 KiB
Python
96 lines
3.2 KiB
Python
import logging
|
|
import multiprocessing
|
|
import os
|
|
|
|
import tqdm
|
|
from django import db
|
|
from django.conf import settings
|
|
from django.core.management.base import BaseCommand
|
|
|
|
from documents.management.commands.mixins import MultiProcessMixin
|
|
from documents.management.commands.mixins import ProgressBarMixin
|
|
from documents.models import Document
|
|
from documents.tasks import update_document_archive_file
|
|
|
|
logger = logging.getLogger("paperless.management.archiver")
|
|
|
|
|
|
class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
|
|
help = (
|
|
"Using the current classification model, assigns correspondents, tags "
|
|
"and document types to all documents, effectively allowing you to "
|
|
"back-tag all previously indexed documents with metadata created (or "
|
|
"modified) after their initial import."
|
|
)
|
|
|
|
def add_arguments(self, parser):
|
|
parser.add_argument(
|
|
"-f",
|
|
"--overwrite",
|
|
default=False,
|
|
action="store_true",
|
|
help=(
|
|
"Recreates the archived document for documents that already "
|
|
"have an archived version."
|
|
),
|
|
)
|
|
parser.add_argument(
|
|
"-d",
|
|
"--document",
|
|
default=None,
|
|
type=int,
|
|
required=False,
|
|
help=(
|
|
"Specify the ID of a document, and this command will only "
|
|
"run on this specific document."
|
|
),
|
|
)
|
|
self.add_argument_progress_bar_mixin(parser)
|
|
self.add_argument_processes_mixin(parser)
|
|
|
|
def handle(self, *args, **options):
|
|
self.handle_processes_mixin(**options)
|
|
self.handle_progress_bar_mixin(**options)
|
|
|
|
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
|
|
|
|
overwrite = options["overwrite"]
|
|
|
|
if options["document"]:
|
|
documents = Document.objects.filter(pk=options["document"])
|
|
else:
|
|
documents = Document.objects.all()
|
|
|
|
document_ids = list(
|
|
map(
|
|
lambda doc: doc.id,
|
|
filter(lambda d: overwrite or not d.has_archive_version, documents),
|
|
),
|
|
)
|
|
|
|
# Note to future self: this prevents django from reusing database
|
|
# connections between processes, which is bad and does not work
|
|
# with postgres.
|
|
db.connections.close_all()
|
|
|
|
try:
|
|
logging.getLogger().handlers[0].level = logging.ERROR
|
|
|
|
if self.process_count == 1:
|
|
for doc_id in document_ids:
|
|
update_document_archive_file(doc_id)
|
|
else: # pragma: no cover
|
|
with multiprocessing.Pool(self.process_count) as pool:
|
|
list(
|
|
tqdm.tqdm(
|
|
pool.imap_unordered(
|
|
update_document_archive_file,
|
|
document_ids,
|
|
),
|
|
total=len(document_ids),
|
|
disable=self.no_progress_bar,
|
|
),
|
|
)
|
|
except KeyboardInterrupt:
|
|
self.stdout.write(self.style.NOTICE("Aborting..."))
|