From 7a99dcf69309a464648db39e59498a97715238c4 Mon Sep 17 00:00:00 2001 From: Trenton Holmes Date: Thu, 21 Jul 2022 08:02:11 -0700 Subject: [PATCH 1/2] Adds configuration for database timeout, documentation and troubleshotting suggestion --- docs/configuration.rst | 9 ++++++++- docs/troubleshooting.rst | 16 ++++++++++++++++ src/documents/signals/handlers.py | 28 ++++++++++++++++++++-------- src/paperless/settings.py | 7 +++++++ 4 files changed, 51 insertions(+), 9 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index c4203472c..fd68f61cb 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -31,7 +31,7 @@ PAPERLESS_REDIS= PAPERLESS_DBHOST= By default, sqlite is used as the database backend. This can be changed here. - Set PAPERLESS_DBHOST and PostgreSQL will be used instead of mysql. + Set PAPERLESS_DBHOST and PostgreSQL will be used instead of sqlite. PAPERLESS_DBPORT= Adjust port if necessary. @@ -60,6 +60,13 @@ PAPERLESS_DBSSLMODE= Default is ``prefer``. +PAPERLESS_DB_TIMEOUT= + Amount of time for a database connection to wait for the database to unlock. + Mostly applicable for an sqlite based installation, consider changing to postgresql + if you need to increase this. + + Defaults to unset, keeping the Django defaults. + Paths and folders ################# diff --git a/docs/troubleshooting.rst b/docs/troubleshooting.rst index 3db9a069e..1605fed11 100644 --- a/docs/troubleshooting.rst +++ b/docs/troubleshooting.rst @@ -301,3 +301,19 @@ try adjusting the :ref:`polling configuration `. The user will need to manually move the file out of the consume folder and back in, for the initial failing file to be consumed. + +Log reports "Creating PaperlessTask failed". +######################################################### + +You might find messages like these in your log files: + +.. code:: + + [WARNING] [paperless.management.consumer] Not consuming file /usr/src/paperless/src/../consume/SCN_0001.pdf: OS reports file as busy still + +You are likely using an sqlite based installation, with an increased number of workers and are running into sqlite's concurrency limitations. +Uploading or consuming multiple files at once results in many workers attempting to access the database simultaneously. + +Consider changing to the PostgreSQL database if you will be processing many documents at once often. Otherwise, +try tweaking the ``PAPERLESS_DB_TIMEOUT`` setting to allow more time for the database to unlock. This may have +minor performance implications. diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index f7a04ad51..e2f4fb4f7 100644 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -11,6 +11,7 @@ from django.contrib.contenttypes.models import ContentType from django.db import DatabaseError from django.db import models from django.db.models import Q +from django.db.utils import OperationalError from django.dispatch import receiver from django.utils import termcolors from django.utils import timezone @@ -506,21 +507,28 @@ def add_to_index(sender, document, **kwargs): @receiver(django_q.signals.pre_enqueue) def init_paperless_task(sender, task, **kwargs): if task["func"] == "documents.tasks.consume_file": - paperless_task, created = PaperlessTask.objects.get_or_create( - task_id=task["id"], - ) - paperless_task.name = task["name"] - paperless_task.created = task["started"] - paperless_task.save() + try: + paperless_task, created = PaperlessTask.objects.get_or_create( + task_id=task["id"], + ) + paperless_task.name = task["name"] + paperless_task.created = task["started"] + paperless_task.save() + except OperationalError as e: + logger.error(f"Creating PaperlessTask failed: {e}") @receiver(django_q.signals.pre_execute) def paperless_task_started(sender, task, **kwargs): try: if task["func"] == "documents.tasks.consume_file": - paperless_task = PaperlessTask.objects.get(task_id=task["id"]) + paperless_task, created = PaperlessTask.objects.get_or_create( + task_id=task["id"], + ) paperless_task.started = timezone.now() paperless_task.save() + except OperationalError as e: + logger.error(f"Creating PaperlessTask failed: {e}") except PaperlessTask.DoesNotExist: pass @@ -529,8 +537,12 @@ def paperless_task_started(sender, task, **kwargs): def update_paperless_task(sender, instance, **kwargs): try: if instance.func == "documents.tasks.consume_file": - paperless_task = PaperlessTask.objects.get(task_id=instance.id) + paperless_task, created = PaperlessTask.objects.get_or_create( + task_id=instance.id, + ) paperless_task.attempted_task = instance paperless_task.save() + except OperationalError as e: + logger.error(f"Creating PaperlessTask failed: {e}") except PaperlessTask.DoesNotExist: pass diff --git a/src/paperless/settings.py b/src/paperless/settings.py index bfb9507ba..e7fd0f3f0 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -327,6 +327,13 @@ if os.getenv("PAPERLESS_DBHOST"): if os.getenv("PAPERLESS_DBPORT"): DATABASES["default"]["PORT"] = os.getenv("PAPERLESS_DBPORT") +if os.getenv("PAPERLESS_DB_TIMEOUT") is not None: + _new_opts = {"timeout": float(os.getenv("PAPERLESS_DB_TIMEOUT"))} + if "OPTIONS" in DATABASES["default"]: + DATABASES["default"]["OPTIONS"].update(_new_opts) + else: + DATABASES["default"]["OPTIONS"] = _new_opts + DEFAULT_AUTO_FIELD = "django.db.models.AutoField" ############################################################################### From ef790ca6f4336095610a3fca2a4ad6507c26455e Mon Sep 17 00:00:00 2001 From: Trenton Holmes Date: Fri, 22 Jul 2022 11:08:52 -0700 Subject: [PATCH 2/2] Fixes the copy and paste of the log line --- docs/troubleshooting.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/troubleshooting.rst b/docs/troubleshooting.rst index 1605fed11..6d94d7100 100644 --- a/docs/troubleshooting.rst +++ b/docs/troubleshooting.rst @@ -309,7 +309,7 @@ You might find messages like these in your log files: .. code:: - [WARNING] [paperless.management.consumer] Not consuming file /usr/src/paperless/src/../consume/SCN_0001.pdf: OS reports file as busy still + [ERROR] [paperless.management.consumer] Creating PaperlessTask failed: db locked You are likely using an sqlite based installation, with an increased number of workers and are running into sqlite's concurrency limitations. Uploading or consuming multiple files at once results in many workers attempting to access the database simultaneously.