mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Consider mtime of ignored files, garbage-collect ignore list
1. Store the mtime of ignored files so that we can reconsider them if they have changed. 2. Regularly reset the ignore list to files that still exist in the consumption dir. Previously, the list could grow indefinitely.
This commit is contained in:
parent
12488c9634
commit
e65e27d11f
@ -80,14 +80,23 @@ class Consumer:
|
||||
Find non-ignored files in consumption dir and consume them if they have
|
||||
been unmodified for FILES_MIN_UNMODIFIED_DURATION.
|
||||
"""
|
||||
ignored_files = []
|
||||
files = []
|
||||
for entry in os.scandir(self.consume):
|
||||
if entry.is_file() and entry.path not in self._ignore:
|
||||
files.append((entry.path, entry.stat().st_mtime))
|
||||
if entry.is_file():
|
||||
file = (entry.path, entry.stat().st_mtime)
|
||||
if file in self._ignore:
|
||||
ignored_files.append(file)
|
||||
else:
|
||||
files.append(file)
|
||||
|
||||
if not files:
|
||||
return
|
||||
|
||||
# Set _ignore to only include files that still exist.
|
||||
# This keeps it from growing indefinitely.
|
||||
self._ignore[:] = ignored_files
|
||||
|
||||
files_old_to_new = sorted(files, key=itemgetter(1))
|
||||
|
||||
time.sleep(self.FILES_MIN_UNMODIFIED_DURATION)
|
||||
@ -96,7 +105,7 @@ class Consumer:
|
||||
if mtime == os.path.getmtime(file):
|
||||
# File has not been modified and can be consumed
|
||||
if not self.try_consume_file(file):
|
||||
self._ignore.append(file)
|
||||
self._ignore.append((file, mtime))
|
||||
|
||||
def try_consume_file(self, file):
|
||||
"Return True if file was consumed"
|
||||
|
Loading…
x
Reference in New Issue
Block a user