mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
added a task scheduler for recurring tasks
This commit is contained in:
@@ -1,10 +1,6 @@
|
||||
import logging
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from documents.classifier import DocumentClassifier, \
|
||||
IncompatibleClassifierVersionError
|
||||
from paperless import settings
|
||||
from ...mixins import Renderable
|
||||
from ...tasks import train_classifier
|
||||
|
||||
|
||||
class Command(Renderable, BaseCommand):
|
||||
@@ -18,27 +14,4 @@ class Command(Renderable, BaseCommand):
|
||||
BaseCommand.__init__(self, *args, **kwargs)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
classifier = DocumentClassifier()
|
||||
|
||||
try:
|
||||
# load the classifier, since we might not have to train it again.
|
||||
classifier.reload()
|
||||
except (FileNotFoundError, IncompatibleClassifierVersionError):
|
||||
# This is what we're going to fix here.
|
||||
pass
|
||||
|
||||
try:
|
||||
if classifier.train():
|
||||
logging.getLogger(__name__).info(
|
||||
"Saving updated classifier model to {}...".format(settings.MODEL_FILE)
|
||||
)
|
||||
classifier.save_classifier()
|
||||
else:
|
||||
logging.getLogger(__name__).debug(
|
||||
"Training data unchanged."
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).error(
|
||||
"Classifier error: " + str(e)
|
||||
)
|
||||
train_classifier()
|
||||
|
@@ -1,9 +1,7 @@
|
||||
from django.core.management import BaseCommand
|
||||
from whoosh.writing import AsyncWriter
|
||||
|
||||
import documents.index as index
|
||||
from documents.mixins import Renderable
|
||||
from documents.models import Document
|
||||
from documents.tasks import index_reindex, index_optimize
|
||||
|
||||
|
||||
class Command(Renderable, BaseCommand):
|
||||
@@ -22,13 +20,6 @@ class Command(Renderable, BaseCommand):
|
||||
self.verbosity = options["verbosity"]
|
||||
|
||||
if options['command'] == 'reindex':
|
||||
documents = Document.objects.all()
|
||||
|
||||
ix = index.open_index(recreate=True)
|
||||
|
||||
with AsyncWriter(ix) as writer:
|
||||
for document in documents:
|
||||
index.update_document(writer, document)
|
||||
|
||||
index_reindex()
|
||||
elif options['command'] == 'optimize':
|
||||
index.open_index().optimize()
|
||||
index_optimize()
|
||||
|
@@ -1,60 +0,0 @@
|
||||
import argparse
|
||||
import threading
|
||||
from multiprocessing import Pool
|
||||
from multiprocessing.pool import ThreadPool
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from documents.consumer import Consumer
|
||||
from documents.models import Log, Document
|
||||
from documents.parsers import get_parser_class
|
||||
|
||||
|
||||
def process_document(doc):
|
||||
parser_class = get_parser_class(doc.file_name)
|
||||
if not parser_class:
|
||||
print("no parser available")
|
||||
else:
|
||||
print("Parser: {}".format(parser_class.__name__))
|
||||
parser = parser_class(doc.source_path, None)
|
||||
try:
|
||||
text = parser.get_text()
|
||||
doc.content = text
|
||||
doc.save()
|
||||
finally:
|
||||
parser.cleanup()
|
||||
|
||||
|
||||
def document_index(value):
|
||||
ivalue = int(value)
|
||||
if not (1 <= ivalue <= Document.objects.count()):
|
||||
raise argparse.ArgumentTypeError(
|
||||
"{} is not a valid document index (out of range)".format(value))
|
||||
|
||||
return ivalue
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
|
||||
help = "Performs OCR on all documents again!"
|
||||
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
"-s", "--start_index",
|
||||
default=None,
|
||||
type=document_index
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
|
||||
docs = Document.objects.all().order_by("added")
|
||||
|
||||
indices = range(options['start_index']-1, len(docs)) if options['start_index'] else range(len(docs))
|
||||
|
||||
for i in indices:
|
||||
doc = docs[i]
|
||||
print("==================================")
|
||||
print("{} out of {}: {}".format(i+1, len(docs), doc.file_name))
|
||||
print("==================================")
|
||||
process_document(doc)
|
Reference in New Issue
Block a user