Adds configuration for database timeout, documentation and troubleshotting suggestion

This commit is contained in:
Trenton Holmes 2022-07-21 08:02:11 -07:00
parent 83de38e56f
commit 7a99dcf693
4 changed files with 51 additions and 9 deletions

View File

@ -31,7 +31,7 @@ PAPERLESS_REDIS=<url>
PAPERLESS_DBHOST=<hostname> PAPERLESS_DBHOST=<hostname>
By default, sqlite is used as the database backend. This can be changed here. By default, sqlite is used as the database backend. This can be changed here.
Set PAPERLESS_DBHOST and PostgreSQL will be used instead of mysql. Set PAPERLESS_DBHOST and PostgreSQL will be used instead of sqlite.
PAPERLESS_DBPORT=<port> PAPERLESS_DBPORT=<port>
Adjust port if necessary. Adjust port if necessary.
@ -60,6 +60,13 @@ PAPERLESS_DBSSLMODE=<mode>
Default is ``prefer``. Default is ``prefer``.
PAPERLESS_DB_TIMEOUT=<float>
Amount of time for a database connection to wait for the database to unlock.
Mostly applicable for an sqlite based installation, consider changing to postgresql
if you need to increase this.
Defaults to unset, keeping the Django defaults.
Paths and folders Paths and folders
################# #################

View File

@ -301,3 +301,19 @@ try adjusting the :ref:`polling configuration <configuration-polling>`.
The user will need to manually move the file out of the consume folder and The user will need to manually move the file out of the consume folder and
back in, for the initial failing file to be consumed. back in, for the initial failing file to be consumed.
Log reports "Creating PaperlessTask failed".
#########################################################
You might find messages like these in your log files:
.. code::
[WARNING] [paperless.management.consumer] Not consuming file /usr/src/paperless/src/../consume/SCN_0001.pdf: OS reports file as busy still
You are likely using an sqlite based installation, with an increased number of workers and are running into sqlite's concurrency limitations.
Uploading or consuming multiple files at once results in many workers attempting to access the database simultaneously.
Consider changing to the PostgreSQL database if you will be processing many documents at once often. Otherwise,
try tweaking the ``PAPERLESS_DB_TIMEOUT`` setting to allow more time for the database to unlock. This may have
minor performance implications.

View File

@ -11,6 +11,7 @@ from django.contrib.contenttypes.models import ContentType
from django.db import DatabaseError from django.db import DatabaseError
from django.db import models from django.db import models
from django.db.models import Q from django.db.models import Q
from django.db.utils import OperationalError
from django.dispatch import receiver from django.dispatch import receiver
from django.utils import termcolors from django.utils import termcolors
from django.utils import timezone from django.utils import timezone
@ -506,21 +507,28 @@ def add_to_index(sender, document, **kwargs):
@receiver(django_q.signals.pre_enqueue) @receiver(django_q.signals.pre_enqueue)
def init_paperless_task(sender, task, **kwargs): def init_paperless_task(sender, task, **kwargs):
if task["func"] == "documents.tasks.consume_file": if task["func"] == "documents.tasks.consume_file":
paperless_task, created = PaperlessTask.objects.get_or_create( try:
task_id=task["id"], paperless_task, created = PaperlessTask.objects.get_or_create(
) task_id=task["id"],
paperless_task.name = task["name"] )
paperless_task.created = task["started"] paperless_task.name = task["name"]
paperless_task.save() paperless_task.created = task["started"]
paperless_task.save()
except OperationalError as e:
logger.error(f"Creating PaperlessTask failed: {e}")
@receiver(django_q.signals.pre_execute) @receiver(django_q.signals.pre_execute)
def paperless_task_started(sender, task, **kwargs): def paperless_task_started(sender, task, **kwargs):
try: try:
if task["func"] == "documents.tasks.consume_file": if task["func"] == "documents.tasks.consume_file":
paperless_task = PaperlessTask.objects.get(task_id=task["id"]) paperless_task, created = PaperlessTask.objects.get_or_create(
task_id=task["id"],
)
paperless_task.started = timezone.now() paperless_task.started = timezone.now()
paperless_task.save() paperless_task.save()
except OperationalError as e:
logger.error(f"Creating PaperlessTask failed: {e}")
except PaperlessTask.DoesNotExist: except PaperlessTask.DoesNotExist:
pass pass
@ -529,8 +537,12 @@ def paperless_task_started(sender, task, **kwargs):
def update_paperless_task(sender, instance, **kwargs): def update_paperless_task(sender, instance, **kwargs):
try: try:
if instance.func == "documents.tasks.consume_file": if instance.func == "documents.tasks.consume_file":
paperless_task = PaperlessTask.objects.get(task_id=instance.id) paperless_task, created = PaperlessTask.objects.get_or_create(
task_id=instance.id,
)
paperless_task.attempted_task = instance paperless_task.attempted_task = instance
paperless_task.save() paperless_task.save()
except OperationalError as e:
logger.error(f"Creating PaperlessTask failed: {e}")
except PaperlessTask.DoesNotExist: except PaperlessTask.DoesNotExist:
pass pass

View File

@ -327,6 +327,13 @@ if os.getenv("PAPERLESS_DBHOST"):
if os.getenv("PAPERLESS_DBPORT"): if os.getenv("PAPERLESS_DBPORT"):
DATABASES["default"]["PORT"] = os.getenv("PAPERLESS_DBPORT") DATABASES["default"]["PORT"] = os.getenv("PAPERLESS_DBPORT")
if os.getenv("PAPERLESS_DB_TIMEOUT") is not None:
_new_opts = {"timeout": float(os.getenv("PAPERLESS_DB_TIMEOUT"))}
if "OPTIONS" in DATABASES["default"]:
DATABASES["default"]["OPTIONS"].update(_new_opts)
else:
DATABASES["default"]["OPTIONS"] = _new_opts
DEFAULT_AUTO_FIELD = "django.db.models.AutoField" DEFAULT_AUTO_FIELD = "django.db.models.AutoField"
############################################################################### ###############################################################################