diff --git a/src/documents/consumer.py b/src/documents/consumer.py index e8f527974..7bae5c2a9 100755 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -41,21 +41,6 @@ class Consumer(LoggingMixin): raise ConsumerError("Cannot consume {}: It is not a file".format( self.path)) - def pre_check_file_extension(self): - extensions = get_supported_file_extensions() - _, ext = os.path.splitext(self.filename) - - if not ext: - raise ConsumerError( - f"Not consuming {self.filename}: File type unknown." - ) - - if ext not in extensions: - raise ConsumerError( - f"Not consuming {self.filename}: File extension {ext} does " - f"not map to any known file type ({str(extensions)})" - ) - def pre_check_duplicate(self): with open(self.path, "rb") as f: checksum = hashlib.md5(f.read()).hexdigest() @@ -98,7 +83,6 @@ class Consumer(LoggingMixin): # Make sure that preconditions for consuming the file are met. self.pre_check_file_exists() - self.pre_check_file_extension() self.pre_check_directories() self.pre_check_duplicate() diff --git a/src/documents/management/commands/document_consumer.py b/src/documents/management/commands/document_consumer.py index ea63cd19d..5cecd6bf9 100644 --- a/src/documents/management/commands/document_consumer.py +++ b/src/documents/management/commands/document_consumer.py @@ -11,6 +11,7 @@ from watchdog.events import FileSystemEventHandler from watchdog.observers.polling import PollingObserver from documents.models import Tag +from documents.parsers import is_file_ext_supported try: from inotifyrecursive import INotify, flags @@ -37,11 +38,19 @@ def _tags_from_path(filepath): def _consume(filepath): + if os.path.isdir(filepath): + return + if not os.path.isfile(filepath): logger.debug( f"Not consuming file {filepath}: File has moved.") return + if not is_file_ext_supported(os.path.splitext(filepath)[1]): + logger.debug( + f"Not consuming file {filepath}: Unknown file extension.") + return + tag_ids = None try: if settings.CONSUMER_SUBDIRS_AS_TAGS: @@ -181,7 +190,7 @@ class Command(BaseCommand): try: while not self.stop_flag: - for event in inotify.read(timeout=1000, read_delay=1000): + for event in inotify.read(timeout=1000): if recursive: path = inotify.get_path(event.wd) else: diff --git a/src/documents/parsers.py b/src/documents/parsers.py index 7fc927c50..eeaaed886 100644 --- a/src/documents/parsers.py +++ b/src/documents/parsers.py @@ -54,6 +54,13 @@ def get_default_file_extension(mime_type): return None +def is_file_ext_supported(ext): + if ext: + return ext.lower() in get_supported_file_extensions() + else: + return False + + def get_supported_file_extensions(): extensions = set() for response in document_consumer_declaration.send(None): diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py index 99801923f..992d450db 100644 --- a/src/documents/tests/test_consumer.py +++ b/src/documents/tests/test_consumer.py @@ -552,7 +552,7 @@ class TestConsumer(DirectoriesMixin, TestCase): try: self.consumer.try_consume_file(self.get_test_file()) except ConsumerError as e: - self.assertTrue("File extension .pdf does not map to any" in str(e)) + self.assertTrue("No parsers abvailable for" in str(e)) return self.fail("Should throw exception") diff --git a/src/documents/tests/test_management_consumer.py b/src/documents/tests/test_management_consumer.py index c56b49a41..6973fdacf 100644 --- a/src/documents/tests/test_management_consumer.py +++ b/src/documents/tests/test_management_consumer.py @@ -34,12 +34,12 @@ def chunked(size, source): yield source[i:i+size] -class TestConsumer(DirectoriesMixin, TransactionTestCase): +class ConsumerMixin: sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf") def setUp(self) -> None: - super(TestConsumer, self).setUp() + super(ConsumerMixin, self).setUp() self.t = None patcher = mock.patch("documents.management.commands.document_consumer.async_task") self.task_mock = patcher.start() @@ -58,7 +58,7 @@ class TestConsumer(DirectoriesMixin, TransactionTestCase): # wait for the consumer to exit. self.t.join() - super(TestConsumer, self).tearDown() + super(ConsumerMixin, self).tearDown() def wait_for_task_mock_call(self): n = 0 @@ -69,7 +69,6 @@ class TestConsumer(DirectoriesMixin, TransactionTestCase): return n += 1 sleep(0.1) - self.fail("async_task was never called") # A bogus async_task that will simply check the file for # completeness and raise an exception otherwise. @@ -96,6 +95,9 @@ class TestConsumer(DirectoriesMixin, TransactionTestCase): sleep(0.1) print("file completed.") + +class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase): + def test_consume_file(self): self.t_start() @@ -109,9 +111,15 @@ class TestConsumer(DirectoriesMixin, TransactionTestCase): args, kwargs = self.task_mock.call_args self.assertEqual(args[1], f) - @override_settings(CONSUMER_POLLING=1) - def test_consume_file_polling(self): - self.test_consume_file() + def test_consume_file_invalid_ext(self): + self.t_start() + + f = os.path.join(self.dirs.consumption_dir, "my_file.wow") + shutil.copy(self.sample_file, f) + + self.wait_for_task_mock_call() + + self.task_mock.assert_not_called() def test_consume_existing_file(self): f = os.path.join(self.dirs.consumption_dir, "my_file.pdf") @@ -123,12 +131,101 @@ class TestConsumer(DirectoriesMixin, TransactionTestCase): args, kwargs = self.task_mock.call_args self.assertEqual(args[1], f) - @override_settings(CONSUMER_POLLING=1) - def test_consume_existing_file_polling(self): - self.test_consume_existing_file() + @mock.patch("documents.management.commands.document_consumer.logger.error") + def test_slow_write_pdf(self, error_logger): - @override_settings(CONSUMER_RECURSIVE=1) - @override_settings(CONSUMER_SUBDIRS_AS_TAGS=1) + self.task_mock.side_effect = self.bogus_task + + self.t_start() + + fname = os.path.join(self.dirs.consumption_dir, "my_file.pdf") + + self.slow_write_file(fname) + + self.wait_for_task_mock_call() + + error_logger.assert_not_called() + + self.task_mock.assert_called_once() + + args, kwargs = self.task_mock.call_args + self.assertEqual(args[1], fname) + + @mock.patch("documents.management.commands.document_consumer.logger.error") + def test_slow_write_and_move(self, error_logger): + + self.task_mock.side_effect = self.bogus_task + + self.t_start() + + fname = os.path.join(self.dirs.consumption_dir, "my_file.~df") + fname2 = os.path.join(self.dirs.consumption_dir, "my_file.pdf") + + self.slow_write_file(fname) + shutil.move(fname, fname2) + + self.wait_for_task_mock_call() + + self.task_mock.assert_called_once() + + args, kwargs = self.task_mock.call_args + self.assertEqual(args[1], fname2) + + error_logger.assert_not_called() + + @mock.patch("documents.management.commands.document_consumer.logger.error") + def test_slow_write_incomplete(self, error_logger): + + self.task_mock.side_effect = self.bogus_task + + self.t_start() + + fname = os.path.join(self.dirs.consumption_dir, "my_file.pdf") + self.slow_write_file(fname, incomplete=True) + + self.wait_for_task_mock_call() + + self.task_mock.assert_called_once() + args, kwargs = self.task_mock.call_args + self.assertEqual(args[1], fname) + + # assert that we have an error logged with this invalid file. + error_logger.assert_called_once() + + @override_settings(CONSUMPTION_DIR="does_not_exist") + def test_consumption_directory_invalid(self): + + self.assertRaises(CommandError, call_command, 'document_consumer', '--oneshot') + + @override_settings(CONSUMPTION_DIR="") + def test_consumption_directory_unset(self): + + self.assertRaises(CommandError, call_command, 'document_consumer', '--oneshot') + + +@override_settings(CONSUMER_POLLING=1) +class TestConsumerPolling(TestConsumer): + # just do all the tests with polling + pass + + +@override_settings(CONSUMER_RECURSIVE=True) +class TestConsumerRecursive(TestConsumer): + # just do all the tests with recursive + pass + + +@override_settings(CONSUMER_RECURSIVE=True) +@override_settings(CONSUMER_POLLING=1) +class TestConsumerRecursivePolling(TestConsumer): + # just do all the tests with polling and recursive + pass + + +class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase): + + @override_settings(CONSUMER_RECURSIVE=True) + @override_settings(CONSUMER_SUBDIRS_AS_TAGS=True) def test_consume_file_with_path_tags(self): tag_names = ("existingTag", "Space Tag") @@ -163,86 +260,3 @@ class TestConsumer(DirectoriesMixin, TransactionTestCase): @override_settings(CONSUMER_POLLING=1) def test_consume_file_with_path_tags_polling(self): self.test_consume_file_with_path_tags() - - @mock.patch("documents.management.commands.document_consumer.logger.error") - def test_slow_write_pdf(self, error_logger): - - self.task_mock.side_effect = self.bogus_task - - self.t_start() - - fname = os.path.join(self.dirs.consumption_dir, "my_file.pdf") - - self.slow_write_file(fname) - - self.wait_for_task_mock_call() - - error_logger.assert_not_called() - - self.task_mock.assert_called_once() - - args, kwargs = self.task_mock.call_args - self.assertEqual(args[1], fname) - - @override_settings(CONSUMER_POLLING=1) - def test_slow_write_pdf_polling(self): - self.test_slow_write_pdf() - - @mock.patch("documents.management.commands.document_consumer.logger.error") - def test_slow_write_and_move(self, error_logger): - - self.task_mock.side_effect = self.bogus_task - - self.t_start() - - fname = os.path.join(self.dirs.consumption_dir, "my_file.~df") - fname2 = os.path.join(self.dirs.consumption_dir, "my_file.pdf") - - self.slow_write_file(fname) - shutil.move(fname, fname2) - - self.wait_for_task_mock_call() - - self.task_mock.assert_called_once() - - args, kwargs = self.task_mock.call_args - self.assertEqual(args[1], fname2) - - error_logger.assert_not_called() - - @override_settings(CONSUMER_POLLING=1) - def test_slow_write_and_move_polling(self): - self.test_slow_write_and_move() - - @mock.patch("documents.management.commands.document_consumer.logger.error") - def test_slow_write_incomplete(self, error_logger): - - self.task_mock.side_effect = self.bogus_task - - self.t_start() - - fname = os.path.join(self.dirs.consumption_dir, "my_file.pdf") - self.slow_write_file(fname, incomplete=True) - - self.wait_for_task_mock_call() - - self.task_mock.assert_called_once() - args, kwargs = self.task_mock.call_args - self.assertEqual(args[1], fname) - - # assert that we have an error logged with this invalid file. - error_logger.assert_called_once() - - @override_settings(CONSUMER_POLLING=1) - def test_slow_write_incomplete_polling(self): - self.test_slow_write_incomplete() - - @override_settings(CONSUMPTION_DIR="does_not_exist") - def test_consumption_directory_invalid(self): - - self.assertRaises(CommandError, call_command, 'document_consumer', '--oneshot') - - @override_settings(CONSUMPTION_DIR="") - def test_consumption_directory_unset(self): - - self.assertRaises(CommandError, call_command, 'document_consumer', '--oneshot')