From e65e27d11f8bac0631eacd7d85cfdbdd58079c74 Mon Sep 17 00:00:00 2001 From: Erik Arvstedt <erik.arvstedt@gmail.com> Date: Fri, 11 May 2018 14:01:18 +0200 Subject: [PATCH] Consider mtime of ignored files, garbage-collect ignore list 1. Store the mtime of ignored files so that we can reconsider them if they have changed. 2. Regularly reset the ignore list to files that still exist in the consumption dir. Previously, the list could grow indefinitely. --- src/documents/consumer.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/documents/consumer.py b/src/documents/consumer.py index e895593a5..cca7c1c13 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -80,14 +80,23 @@ class Consumer: Find non-ignored files in consumption dir and consume them if they have been unmodified for FILES_MIN_UNMODIFIED_DURATION. """ + ignored_files = [] files = [] for entry in os.scandir(self.consume): - if entry.is_file() and entry.path not in self._ignore: - files.append((entry.path, entry.stat().st_mtime)) + if entry.is_file(): + file = (entry.path, entry.stat().st_mtime) + if file in self._ignore: + ignored_files.append(file) + else: + files.append(file) if not files: return + # Set _ignore to only include files that still exist. + # This keeps it from growing indefinitely. + self._ignore[:] = ignored_files + files_old_to_new = sorted(files, key=itemgetter(1)) time.sleep(self.FILES_MIN_UNMODIFIED_DURATION) @@ -96,7 +105,7 @@ class Consumer: if mtime == os.path.getmtime(file): # File has not been modified and can be consumed if not self.try_consume_file(file): - self._ignore.append(file) + self._ignore.append((file, mtime)) def try_consume_file(self, file): "Return True if file was consumed"