From e65e27d11f8bac0631eacd7d85cfdbdd58079c74 Mon Sep 17 00:00:00 2001
From: Erik Arvstedt <erik.arvstedt@gmail.com>
Date: Fri, 11 May 2018 14:01:18 +0200
Subject: [PATCH] Consider mtime of ignored files, garbage-collect ignore list

1. Store the mtime of ignored files so that we can reconsider them if
they have changed.

2. Regularly reset the ignore list to files that still exist in the
consumption dir. Previously, the list could grow indefinitely.
---
 src/documents/consumer.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/documents/consumer.py b/src/documents/consumer.py
index e895593a5..cca7c1c13 100644
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -80,14 +80,23 @@ class Consumer:
         Find non-ignored files in consumption dir and consume them if they have
         been unmodified for FILES_MIN_UNMODIFIED_DURATION.
         """
+        ignored_files = []
         files = []
         for entry in os.scandir(self.consume):
-            if entry.is_file() and entry.path not in self._ignore:
-                files.append((entry.path, entry.stat().st_mtime))
+            if entry.is_file():
+                file = (entry.path, entry.stat().st_mtime)
+                if file in self._ignore:
+                    ignored_files.append(file)
+                else:
+                    files.append(file)
 
         if not files:
             return
 
+        # Set _ignore to only include files that still exist.
+        # This keeps it from growing indefinitely.
+        self._ignore[:] = ignored_files
+
         files_old_to_new = sorted(files, key=itemgetter(1))
 
         time.sleep(self.FILES_MIN_UNMODIFIED_DURATION)
@@ -96,7 +105,7 @@ class Consumer:
             if mtime == os.path.getmtime(file):
                 # File has not been modified and can be consumed
                 if not self.try_consume_file(file):
-                    self._ignore.append(file)
+                    self._ignore.append((file, mtime))
 
     def try_consume_file(self, file):
         "Return True if file was consumed"