added option for polling

This commit is contained in:
Jonas Winkler 2020-11-16 18:52:13 +01:00
parent f6a926c9b1
commit 31c4167535
3 changed files with 29 additions and 1 deletions

View File

@ -165,6 +165,12 @@ PAPERLESS_EMAIL_SECRET=""
# If you only specify PAPERLESS_TASK_WORKERS, paperless will adjust
# PAPERLESS_THREADS_PER_WORKER automatically.
# If paperless won't find documents added to your consume folder, it might
# not be able to automatically detect filesystem changes. In that case,
# specify a polling interval in seconds below, which will then cause paperless
# to periodically check your consumption directory for changes.
#PAPERLESS_CONSUMER_POLLING=10
# When the consumer detects a duplicate document, it will not touch the
# original document. This default behavior can be changed here.

View File

@ -6,6 +6,7 @@ from django.core.management.base import BaseCommand
from django_q.tasks import async_task
from watchdog.events import FileSystemEventHandler
from watchdog.observers import Observer
from watchdog.observers.polling import PollingObserver
try:
from inotify_simple import INotify, flags
@ -75,7 +76,12 @@ class Command(BaseCommand):
async_task("documents.tasks.consume_file", entry.path, task_name=os.path.basename(entry.path))
# Start the watchdog. Woof!
observer = Observer()
if settings.CONSUMER_POLLING > 0:
logging.getLogger(__name__).info('Using polling instead of file'
'system notifications.')
observer = PollingObserver(timeout=settings.CONSUMER_POLLING)
else:
observer = Observer()
event_handler = Handler()
observer.schedule(event_handler, directory, recursive=True)
observer.start()

View File

@ -286,13 +286,29 @@ TASK_WORKERS = int(os.getenv("PAPERLESS_TASK_WORKERS", default_task_workers()))
Q_CLUSTER = {
'name': 'paperless',
'catch_up': False,
'workers': TASK_WORKERS,
'redis': os.getenv("PAPERLESS_REDIS", "redis://localhost:6379")
}
def default_threads_per_worker():
try:
return max(
math.floor(multiprocessing.cpu_count() / TASK_WORKERS),
1
)
except NotImplementedError:
return 1
THREADS_PER_WORKER = os.getenv("PAPERLESS_THREADS_PER_WORKER", default_threads_per_worker())
###############################################################################
# Paperless Specific Settings #
###############################################################################
CONSUMER_POLLING = int(os.getenv("PAPERLESS_CONSUMER_POLLING", 0))
CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
# The default language that tesseract will attempt to use when parsing