mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Merge pull request #953 from paperless-ngx/bugfix-intoify-debounce
Bugfix: Adds configurable intoify debounce time
This commit is contained in:
commit
9ae20a6bec
@ -590,6 +590,28 @@ PAPERLESS_CONSUMER_POLLING=<num>
|
|||||||
|
|
||||||
Defaults to 0, which disables polling and uses filesystem notifications.
|
Defaults to 0, which disables polling and uses filesystem notifications.
|
||||||
|
|
||||||
|
PAPERLESS_CONSUMER_POLLING_RETRY_COUNT=<num>
|
||||||
|
If consumer polling is enabled, sets the number of times paperless will check for a
|
||||||
|
file to remain unmodified.
|
||||||
|
|
||||||
|
Defaults to 5.
|
||||||
|
|
||||||
|
PAPERLESS_CONSUMER_POLLING_DELAY=<num>
|
||||||
|
If consumer polling is enabled, sets the delay in seconds between each check (above) paperless
|
||||||
|
will do while waiting for a file to remain unmodified.
|
||||||
|
|
||||||
|
Defaults to 5.
|
||||||
|
|
||||||
|
.. _configuration-inotify:
|
||||||
|
|
||||||
|
PAPERLESS_CONSUMER_INOTIFY_DELAY=<num>
|
||||||
|
Sets the time in seconds the consumer will wait for additional events
|
||||||
|
from inotify before the consumer will consider a file ready and begin consumption.
|
||||||
|
Certain scanners or network setups may generate multiple events for a single file,
|
||||||
|
leading to multiple consumers working on the same file. Configure this to
|
||||||
|
prevent that.
|
||||||
|
|
||||||
|
Defaults to 0.5 seconds.
|
||||||
|
|
||||||
PAPERLESS_CONSUMER_DELETE_DUPLICATES=<bool>
|
PAPERLESS_CONSUMER_DELETE_DUPLICATES=<bool>
|
||||||
When the consumer detects a duplicate document, it will not touch the
|
When the consumer detects a duplicate document, it will not touch the
|
||||||
|
@ -235,3 +235,66 @@ You might find messages like these in your log files:
|
|||||||
This indicates that paperless failed to read PDF metadata from one of your documents. This happens when you
|
This indicates that paperless failed to read PDF metadata from one of your documents. This happens when you
|
||||||
open the affected documents in paperless for editing. Paperless will continue to work, and will simply not
|
open the affected documents in paperless for editing. Paperless will continue to work, and will simply not
|
||||||
show the invalid metadata.
|
show the invalid metadata.
|
||||||
|
|
||||||
|
Consumer fails with a FileNotFoundError
|
||||||
|
############################
|
||||||
|
|
||||||
|
You might find messages like these in your log files:
|
||||||
|
|
||||||
|
.. code::
|
||||||
|
[ERROR] [paperless.consumer] Error while consuming document SCN_0001.pdf: FileNotFoundError: [Errno 2] No such file or directory: '/tmp/ocrmypdf.io.yhk3zbv0/origin.pdf'
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "/app/paperless/src/paperless_tesseract/parsers.py", line 261, in parse
|
||||||
|
ocrmypdf.ocr(**args)
|
||||||
|
File "/usr/local/lib/python3.8/dist-packages/ocrmypdf/api.py", line 337, in ocr
|
||||||
|
return run_pipeline(options=options, plugin_manager=plugin_manager, api=True)
|
||||||
|
File "/usr/local/lib/python3.8/dist-packages/ocrmypdf/_sync.py", line 385, in run_pipeline
|
||||||
|
exec_concurrent(context, executor)
|
||||||
|
File "/usr/local/lib/python3.8/dist-packages/ocrmypdf/_sync.py", line 302, in exec_concurrent
|
||||||
|
pdf = post_process(pdf, context, executor)
|
||||||
|
File "/usr/local/lib/python3.8/dist-packages/ocrmypdf/_sync.py", line 235, in post_process
|
||||||
|
pdf_out = metadata_fixup(pdf_out, context)
|
||||||
|
File "/usr/local/lib/python3.8/dist-packages/ocrmypdf/_pipeline.py", line 798, in metadata_fixup
|
||||||
|
with pikepdf.open(context.origin) as original, pikepdf.open(working_file) as pdf:
|
||||||
|
File "/usr/local/lib/python3.8/dist-packages/pikepdf/_methods.py", line 923, in open
|
||||||
|
pdf = Pdf._open(
|
||||||
|
FileNotFoundError: [Errno 2] No such file or directory: '/tmp/ocrmypdf.io.yhk3zbv0/origin.pdf'
|
||||||
|
|
||||||
|
This probably indicates paperless tried to consume the same file twice. This can happen for a number of reasons,
|
||||||
|
depending on how documents are placed into the consume folder. If paperless is using inotify (the default) to
|
||||||
|
check for documents, try adjusting the :ref:`inotify configuration <configuration-inotify>`. If polling is enabled,
|
||||||
|
try adjusting the :ref:`polling configuration <configuration-polling>`.
|
||||||
|
|
||||||
|
Consumer fails waiting for file to remain unmodified.
|
||||||
|
############################
|
||||||
|
|
||||||
|
You might find messages like these in your log files:
|
||||||
|
|
||||||
|
.. code::
|
||||||
|
[ERROR] [paperless.management.consumer] Timeout while waiting on file /usr/src/paperless/src/../consume/SCN_0001.pdf to remain unmodified.
|
||||||
|
|
||||||
|
This indicates paperless timed out while waiting for the file to be completely written to the consume folder.
|
||||||
|
Adjusting :ref:`polling configuration <configuration-polling>` values should resolve the issue.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
The user will need to manually move the file out of the consume folder and
|
||||||
|
back in, for the initial failing file to be consumed.
|
||||||
|
|
||||||
|
Consumer fails reporting "OS reports file as busy still".
|
||||||
|
############################
|
||||||
|
|
||||||
|
You might find messages like these in your log files:
|
||||||
|
|
||||||
|
.. code::
|
||||||
|
[WARNING] [paperless.management.consumer] Not consuming file /usr/src/paperless/src/../consume/SCN_0001.pdf: OS reports file as busy still
|
||||||
|
|
||||||
|
This indicates paperless was unable to open the file, as the OS reported the file as still being in use. To prevent a
|
||||||
|
crash, paperless did not try to consume the file. If paperless is using inotify (the default) to
|
||||||
|
check for documents, try adjusting the :ref:`inotify configuration <configuration-inotify>`. If polling is enabled,
|
||||||
|
try adjusting the :ref:`polling configuration <configuration-polling>`.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
The user will need to manually move the file out of the consume folder and
|
||||||
|
back in, for the initial failing file to be consumed.
|
||||||
|
@ -216,7 +216,7 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
||||||
inotify_debounce: Final[float] = 0.5
|
inotify_debounce: Final[float] = settings.CONSUMER_INOTIFY_DELAY
|
||||||
notified_files = {}
|
notified_files = {}
|
||||||
|
|
||||||
while not self.stop_flag:
|
while not self.stop_flag:
|
||||||
@ -234,10 +234,23 @@ class Command(BaseCommand):
|
|||||||
for filepath in notified_files:
|
for filepath in notified_files:
|
||||||
# Time of the last inotify event for this file
|
# Time of the last inotify event for this file
|
||||||
last_event_time = notified_files[filepath]
|
last_event_time = notified_files[filepath]
|
||||||
if (monotonic() - last_event_time) > inotify_debounce:
|
|
||||||
|
# Current time - last time over the configured timeout
|
||||||
|
waited_long_enough = (
|
||||||
|
monotonic() - last_event_time
|
||||||
|
) > inotify_debounce
|
||||||
|
|
||||||
|
# Also make sure the file exists still, some scanners might write a
|
||||||
|
# temporary file first
|
||||||
|
file_still_exists = os.path.exists(filepath) and os.path.isfile(
|
||||||
|
filepath,
|
||||||
|
)
|
||||||
|
|
||||||
|
if waited_long_enough and file_still_exists:
|
||||||
_consume(filepath)
|
_consume(filepath)
|
||||||
else:
|
elif file_still_exists:
|
||||||
still_waiting[filepath] = last_event_time
|
still_waiting[filepath] = last_event_time
|
||||||
|
|
||||||
# These files are still waiting to hit the timeout
|
# These files are still waiting to hit the timeout
|
||||||
notified_files = still_waiting
|
notified_files = still_waiting
|
||||||
|
|
||||||
|
@ -98,6 +98,9 @@ class ConsumerMixin:
|
|||||||
print("file completed.")
|
print("file completed.")
|
||||||
|
|
||||||
|
|
||||||
|
@override_settings(
|
||||||
|
CONSUMER_INOTIFY_DELAY=0.01,
|
||||||
|
)
|
||||||
class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||||
def test_consume_file(self):
|
def test_consume_file(self):
|
||||||
self.t_start()
|
self.t_start()
|
||||||
@ -286,7 +289,7 @@ class TestConsumerPolling(TestConsumer):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
@override_settings(CONSUMER_RECURSIVE=True)
|
@override_settings(CONSUMER_INOTIFY_DELAY=0.01, CONSUMER_RECURSIVE=True)
|
||||||
class TestConsumerRecursive(TestConsumer):
|
class TestConsumerRecursive(TestConsumer):
|
||||||
# just do all the tests with recursive
|
# just do all the tests with recursive
|
||||||
pass
|
pass
|
||||||
|
@ -48,6 +48,13 @@ def __get_int(key: str, default: int) -> int:
|
|||||||
return int(os.getenv(key, default))
|
return int(os.getenv(key, default))
|
||||||
|
|
||||||
|
|
||||||
|
def __get_float(key: str, default: float) -> float:
|
||||||
|
"""
|
||||||
|
Return an integer value based on the environment variable or a default
|
||||||
|
"""
|
||||||
|
return float(os.getenv(key, default))
|
||||||
|
|
||||||
|
|
||||||
# NEVER RUN WITH DEBUG IN PRODUCTION.
|
# NEVER RUN WITH DEBUG IN PRODUCTION.
|
||||||
DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
|
DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
|
||||||
|
|
||||||
@ -485,6 +492,11 @@ CONSUMER_POLLING_RETRY_COUNT = int(
|
|||||||
os.getenv("PAPERLESS_CONSUMER_POLLING_RETRY_COUNT", 5),
|
os.getenv("PAPERLESS_CONSUMER_POLLING_RETRY_COUNT", 5),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
CONSUMER_INOTIFY_DELAY: Final[float] = __get_float(
|
||||||
|
"PAPERLESS_CONSUMER_INOTIFY_DELAY",
|
||||||
|
0.5,
|
||||||
|
)
|
||||||
|
|
||||||
CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
|
CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
|
||||||
|
|
||||||
CONSUMER_RECURSIVE = __get_boolean("PAPERLESS_CONSUMER_RECURSIVE")
|
CONSUMER_RECURSIVE = __get_boolean("PAPERLESS_CONSUMER_RECURSIVE")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user