diff --git a/docs/configuration.rst b/docs/configuration.rst index c4203472c..fd68f61cb 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -31,7 +31,7 @@ PAPERLESS_REDIS= PAPERLESS_DBHOST= By default, sqlite is used as the database backend. This can be changed here. - Set PAPERLESS_DBHOST and PostgreSQL will be used instead of mysql. + Set PAPERLESS_DBHOST and PostgreSQL will be used instead of sqlite. PAPERLESS_DBPORT= Adjust port if necessary. @@ -60,6 +60,13 @@ PAPERLESS_DBSSLMODE= Default is ``prefer``. +PAPERLESS_DB_TIMEOUT= + Amount of time for a database connection to wait for the database to unlock. + Mostly applicable for an sqlite based installation, consider changing to postgresql + if you need to increase this. + + Defaults to unset, keeping the Django defaults. + Paths and folders ################# diff --git a/docs/troubleshooting.rst b/docs/troubleshooting.rst index 3db9a069e..1605fed11 100644 --- a/docs/troubleshooting.rst +++ b/docs/troubleshooting.rst @@ -301,3 +301,19 @@ try adjusting the :ref:`polling configuration `. The user will need to manually move the file out of the consume folder and back in, for the initial failing file to be consumed. + +Log reports "Creating PaperlessTask failed". +######################################################### + +You might find messages like these in your log files: + +.. code:: + + [WARNING] [paperless.management.consumer] Not consuming file /usr/src/paperless/src/../consume/SCN_0001.pdf: OS reports file as busy still + +You are likely using an sqlite based installation, with an increased number of workers and are running into sqlite's concurrency limitations. +Uploading or consuming multiple files at once results in many workers attempting to access the database simultaneously. + +Consider changing to the PostgreSQL database if you will be processing many documents at once often. Otherwise, +try tweaking the ``PAPERLESS_DB_TIMEOUT`` setting to allow more time for the database to unlock. This may have +minor performance implications. diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index f7a04ad51..e2f4fb4f7 100644 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -11,6 +11,7 @@ from django.contrib.contenttypes.models import ContentType from django.db import DatabaseError from django.db import models from django.db.models import Q +from django.db.utils import OperationalError from django.dispatch import receiver from django.utils import termcolors from django.utils import timezone @@ -506,21 +507,28 @@ def add_to_index(sender, document, **kwargs): @receiver(django_q.signals.pre_enqueue) def init_paperless_task(sender, task, **kwargs): if task["func"] == "documents.tasks.consume_file": - paperless_task, created = PaperlessTask.objects.get_or_create( - task_id=task["id"], - ) - paperless_task.name = task["name"] - paperless_task.created = task["started"] - paperless_task.save() + try: + paperless_task, created = PaperlessTask.objects.get_or_create( + task_id=task["id"], + ) + paperless_task.name = task["name"] + paperless_task.created = task["started"] + paperless_task.save() + except OperationalError as e: + logger.error(f"Creating PaperlessTask failed: {e}") @receiver(django_q.signals.pre_execute) def paperless_task_started(sender, task, **kwargs): try: if task["func"] == "documents.tasks.consume_file": - paperless_task = PaperlessTask.objects.get(task_id=task["id"]) + paperless_task, created = PaperlessTask.objects.get_or_create( + task_id=task["id"], + ) paperless_task.started = timezone.now() paperless_task.save() + except OperationalError as e: + logger.error(f"Creating PaperlessTask failed: {e}") except PaperlessTask.DoesNotExist: pass @@ -529,8 +537,12 @@ def paperless_task_started(sender, task, **kwargs): def update_paperless_task(sender, instance, **kwargs): try: if instance.func == "documents.tasks.consume_file": - paperless_task = PaperlessTask.objects.get(task_id=instance.id) + paperless_task, created = PaperlessTask.objects.get_or_create( + task_id=instance.id, + ) paperless_task.attempted_task = instance paperless_task.save() + except OperationalError as e: + logger.error(f"Creating PaperlessTask failed: {e}") except PaperlessTask.DoesNotExist: pass diff --git a/src/paperless/settings.py b/src/paperless/settings.py index bfb9507ba..e7fd0f3f0 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -327,6 +327,13 @@ if os.getenv("PAPERLESS_DBHOST"): if os.getenv("PAPERLESS_DBPORT"): DATABASES["default"]["PORT"] = os.getenv("PAPERLESS_DBPORT") +if os.getenv("PAPERLESS_DB_TIMEOUT") is not None: + _new_opts = {"timeout": float(os.getenv("PAPERLESS_DB_TIMEOUT"))} + if "OPTIONS" in DATABASES["default"]: + DATABASES["default"]["OPTIONS"].update(_new_opts) + else: + DATABASES["default"]["OPTIONS"] = _new_opts + DEFAULT_AUTO_FIELD = "django.db.models.AutoField" ###############################################################################