diff --git a/docs/setup.rst b/docs/setup.rst index 88785364b..4e2826dd6 100644 --- a/docs/setup.rst +++ b/docs/setup.rst @@ -265,15 +265,17 @@ Migration to paperless-ng is then performed in a few simple steps: ``docker-compose.env`` to your needs. See `docker route`_ for details on which edits are advised. -6. Start paperless-ng. +6. In order to find your existing documents with the new search feature, you need + to invoke a one-time operation that will create the search index: - .. code:: bash + .. code:: shell-session - $ docker-compose up + $ docker-compose run --rm webserver document_index reindex + + This will migrate your database and create the search index. After that, + paperless will take care of maintaining the index by itself. - If you see everything working (you should see some migrations getting - applied, for instance), you can gracefully stop paperless-ng with Ctrl-C - and then start paperless-ng as usual with +7. Start paperless-ng. .. code:: bash @@ -281,11 +283,11 @@ Migration to paperless-ng is then performed in a few simple steps: This will run paperless in the background and automatically start it on system boot. -7. Paperless installed a permanent redirect to ``admin/`` in your browser. This +8. Paperless installed a permanent redirect to ``admin/`` in your browser. This redirect is still in place and prevents access to the new UI. Clear browsing cache in order to fix this. -8. Optionally, follow the instructions below to migrate your existing data to PostgreSQL. +9. Optionally, follow the instructions below to migrate your existing data to PostgreSQL. .. _setup-sqlite_to_psql: diff --git a/src/documents/consumer.py b/src/documents/consumer.py index 6aedb25b1..f38c726ac 100755 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -8,7 +8,6 @@ from django.conf import settings from django.db import transaction from django.utils import timezone -from paperless.db import GnuPG from .classifier import DocumentClassifier, IncompatibleClassifierVersionError from .file_handling import generate_filename, create_source_path_directory from .loggers import LoggingMixin @@ -40,17 +39,6 @@ class Consumer(LoggingMixin): raise ConsumerError("Cannot consume {}: It is not a file".format( self.path)) - def pre_check_consumption_dir(self): - if not settings.CONSUMPTION_DIR: - raise ConsumerError( - "The CONSUMPTION_DIR settings variable does not appear to be " - "set.") - - if not os.path.isdir(settings.CONSUMPTION_DIR): - raise ConsumerError( - "Consumption directory {} does not exist".format( - settings.CONSUMPTION_DIR)) - def pre_check_duplicate(self): with open(self.path, "rb") as f: checksum = hashlib.md5(f.read()).hexdigest() @@ -93,7 +81,6 @@ class Consumer(LoggingMixin): # Make sure that preconditions for consuming the file are met. self.pre_check_file_exists() - self.pre_check_consumption_dir() self.pre_check_directories() self.pre_check_duplicate() diff --git a/src/documents/index.py b/src/documents/index.py index a6c3abba8..ffa3e688f 100644 --- a/src/documents/index.py +++ b/src/documents/index.py @@ -64,9 +64,6 @@ def get_schema(): def open_index(recreate=False): - # TODO: this is not thread safe. If 2 instances try to create the index - # at the same time, this fails. This currently prevents parallel - # tests. try: if exists_in(settings.INDEX_DIR) and not recreate: return open_dir(settings.INDEX_DIR) diff --git a/src/documents/loggers.py b/src/documents/loggers.py index fd20e1288..76dbe0163 100644 --- a/src/documents/loggers.py +++ b/src/documents/loggers.py @@ -1,9 +1,14 @@ import logging import uuid +from django.conf import settings + class PaperlessHandler(logging.Handler): def emit(self, record): + if settings.DISABLE_DBHANDLER: + return + # We have to do the import here or Django will barf when it tries to # load this because the apps aren't loaded at that point from .models import Log diff --git a/src/documents/management/commands/document_consumer.py b/src/documents/management/commands/document_consumer.py index c25d0cfa9..7baeccce0 100644 --- a/src/documents/management/commands/document_consumer.py +++ b/src/documents/management/commands/document_consumer.py @@ -3,7 +3,7 @@ import os from time import sleep from django.conf import settings -from django.core.management.base import BaseCommand +from django.core.management.base import BaseCommand, CommandError from django_q.tasks import async_task from watchdog.events import FileSystemEventHandler from watchdog.observers.polling import PollingObserver @@ -95,6 +95,15 @@ class Command(BaseCommand): def handle(self, *args, **options): directory = options["directory"] + if not directory: + raise CommandError( + "CONSUMPTION_DIR does not appear to be set." + ) + + if not os.path.isdir(directory): + raise CommandError( + f"Consumption directory {directory} does not exist") + for entry in os.scandir(directory): _consume(entry.path) @@ -128,12 +137,15 @@ class Command(BaseCommand): f"Using inotify to watch directory for changes: {directory}") inotify = INotify() - inotify.add_watch(directory, flags.CLOSE_WRITE | flags.MOVED_TO) + descriptor = inotify.add_watch( + directory, flags.CLOSE_WRITE | flags.MOVED_TO) try: while not self.stop_flag: for event in inotify.read(timeout=1000, read_delay=1000): file = os.path.join(directory, event.name) - if os.path.isfile(file): - _consume(file) + _consume(file) except KeyboardInterrupt: pass + + inotify.rm_watch(descriptor) + inotify.close() diff --git a/src/documents/migrations/1000_update_paperless_all.py b/src/documents/migrations/1000_update_paperless_all.py index dc6313dd8..f3fbbb6c1 100644 --- a/src/documents/migrations/1000_update_paperless_all.py +++ b/src/documents/migrations/1000_update_paperless_all.py @@ -5,23 +5,6 @@ from django.db import migrations, models import django.db.models.deletion -def make_index(apps, schema_editor): - Document = apps.get_model("documents", "Document") - documents = Document.objects.all() - print() - try: - print(" --> Creating document index...") - from whoosh.writing import AsyncWriter - from documents import index - ix = index.open_index(recreate=True) - with AsyncWriter(ix) as writer: - for document in documents: - index.update_document(writer, document) - except ImportError: - # index may not be relevant anymore - print(" --> Cannot create document index.") - - def logs_set_default_group(apps, schema_editor): Log = apps.get_model('documents', 'Log') for log in Log.objects.all(): @@ -99,8 +82,4 @@ class Migration(migrations.Migration): code=django.db.migrations.operations.special.RunPython.noop, reverse_code=logs_set_default_group ), - migrations.RunPython( - code=make_index, - reverse_code=django.db.migrations.operations.special.RunPython.noop, - ), ] diff --git a/src/documents/models.py b/src/documents/models.py index 5a4c9a187..358749fae 100755 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -249,6 +249,7 @@ class Document(models.Model): @property def file_type(self): + # TODO: this is not stable across python versions return mimetypes.guess_extension(str(self.mime_type)) @property diff --git a/src/documents/tests/test_api.py b/src/documents/tests/test_api.py index d9a2aac26..37f774891 100644 --- a/src/documents/tests/test_api.py +++ b/src/documents/tests/test_api.py @@ -7,14 +7,13 @@ from pathvalidate import ValidationError from rest_framework.test import APITestCase from documents.models import Document, Correspondent, DocumentType, Tag -from documents.tests.utils import setup_directories, remove_dirs +from documents.tests.utils import DirectoriesMixin -class DocumentApiTest(APITestCase): +class DocumentApiTest(DirectoriesMixin, APITestCase): def setUp(self): - self.dirs = setup_directories() - self.addCleanup(remove_dirs, self.dirs) + super(DocumentApiTest, self).setUp() user = User.objects.create_superuser(username="temp_admin") self.client.force_login(user=user) diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py index b8644a41d..61a98612d 100644 --- a/src/documents/tests/test_consumer.py +++ b/src/documents/tests/test_consumer.py @@ -6,7 +6,7 @@ from unittest.mock import MagicMock from django.test import TestCase, override_settings -from .utils import setup_directories, remove_dirs +from .utils import DirectoriesMixin from ..consumer import Consumer, ConsumerError from ..models import FileInfo, Tag, Correspondent, DocumentType, Document from ..parsers import DocumentParser, ParseError @@ -408,7 +408,7 @@ def fake_magic_from_file(file, mime=False): @mock.patch("documents.consumer.magic.from_file", fake_magic_from_file) -class TestConsumer(TestCase): +class TestConsumer(DirectoriesMixin, TestCase): def make_dummy_parser(self, logging_group): return DummyParser(logging_group, self.dirs.scratch_dir) @@ -417,8 +417,7 @@ class TestConsumer(TestCase): return FaultyParser(logging_group, self.dirs.scratch_dir) def setUp(self): - self.dirs = setup_directories() - self.addCleanup(remove_dirs, self.dirs) + super(TestConsumer, self).setUp() patcher = mock.patch("documents.parsers.document_consumer_declaration.send") m = patcher.start() @@ -502,26 +501,6 @@ class TestConsumer(TestCase): self.fail("Should throw exception") - @override_settings(CONSUMPTION_DIR=None) - def testConsumptionDirUnset(self): - try: - self.consumer.try_consume_file(self.get_test_file()) - except ConsumerError as e: - self.assertEqual(str(e), "The CONSUMPTION_DIR settings variable does not appear to be set.") - return - - self.fail("Should throw exception") - - @override_settings(CONSUMPTION_DIR="asd") - def testNoConsumptionDir(self): - try: - self.consumer.try_consume_file(self.get_test_file()) - except ConsumerError as e: - self.assertEqual(str(e), "Consumption directory asd does not exist") - return - - self.fail("Should throw exception") - def testDuplicates(self): self.consumer.try_consume_file(self.get_test_file()) diff --git a/src/documents/tests/test_logger.py b/src/documents/tests/test_logger.py index 6e240ffc9..bbc9c2b5d 100644 --- a/src/documents/tests/test_logger.py +++ b/src/documents/tests/test_logger.py @@ -2,7 +2,7 @@ import logging import uuid from unittest import mock -from django.test import TestCase +from django.test import TestCase, override_settings from ..models import Log @@ -14,6 +14,7 @@ class TestPaperlessLog(TestCase): self.logger = logging.getLogger( "documents.management.commands.document_consumer") + @override_settings(DISABLE_DBHANDLER=False) def test_that_it_saves_at_all(self): kw = {"group": uuid.uuid4()} @@ -38,6 +39,7 @@ class TestPaperlessLog(TestCase): self.logger.critical("This is a critical message", extra=kw) self.assertEqual(Log.objects.all().count(), 5) + @override_settings(DISABLE_DBHANDLER=False) def test_groups(self): kw1 = {"group": uuid.uuid4()} diff --git a/src/documents/tests/test_management_consumer.py b/src/documents/tests/test_management_consumer.py index 33938d450..aed824926 100644 --- a/src/documents/tests/test_management_consumer.py +++ b/src/documents/tests/test_management_consumer.py @@ -6,11 +6,12 @@ from time import sleep from unittest import mock from django.conf import settings -from django.test import TestCase, override_settings +from django.core.management import call_command, CommandError +from django.test import override_settings, TestCase from documents.consumer import ConsumerError from documents.management.commands import document_consumer -from documents.tests.utils import setup_directories, remove_dirs +from documents.tests.utils import DirectoriesMixin class ConsumerThread(Thread): @@ -32,18 +33,17 @@ def chunked(size, source): yield source[i:i+size] -class TestConsumer(TestCase): +class TestConsumer(DirectoriesMixin, TestCase): sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf") def setUp(self) -> None: + super(TestConsumer, self).setUp() + self.t = None patcher = mock.patch("documents.management.commands.document_consumer.async_task") self.task_mock = patcher.start() self.addCleanup(patcher.stop) - self.dirs = setup_directories() - self.addCleanup(remove_dirs, self.dirs) - def t_start(self): self.t = ConsumerThread() self.t.start() @@ -52,7 +52,12 @@ class TestConsumer(TestCase): def tearDown(self) -> None: if self.t: + # set the stop flag self.t.stop() + # wait for the consumer to exit. + self.t.join() + + super(TestConsumer, self).tearDown() def wait_for_task_mock_call(self): n = 0 @@ -193,3 +198,13 @@ class TestConsumer(TestCase): @override_settings(CONSUMER_POLLING=1) def test_slow_write_incomplete_polling(self): self.test_slow_write_incomplete() + + @override_settings(CONSUMPTION_DIR="does_not_exist") + def test_consumption_directory_invalid(self): + + self.assertRaises(CommandError, call_command, 'document_consumer', '--oneshot') + + @override_settings(CONSUMPTION_DIR="") + def test_consumption_directory_unset(self): + + self.assertRaises(CommandError, call_command, 'document_consumer', '--oneshot') diff --git a/src/documents/tests/utils.py b/src/documents/tests/utils.py index 7b0938ee3..83148e9c7 100644 --- a/src/documents/tests/utils.py +++ b/src/documents/tests/utils.py @@ -39,3 +39,18 @@ def remove_dirs(dirs): shutil.rmtree(dirs.data_dir, ignore_errors=True) shutil.rmtree(dirs.scratch_dir, ignore_errors=True) shutil.rmtree(dirs.consumption_dir, ignore_errors=True) + + +class DirectoriesMixin: + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.dirs = None + + def setUp(self) -> None: + self.dirs = setup_directories() + super(DirectoriesMixin, self).setUp() + + def tearDown(self) -> None: + super(DirectoriesMixin, self).tearDown() + remove_dirs(self.dirs) diff --git a/src/documents/views.py b/src/documents/views.py index 9484b48cc..2c8585dd0 100755 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -221,7 +221,9 @@ class SearchView(APIView): permission_classes = (IsAuthenticated,) - ix = index.open_index() + def __init__(self, *args, **kwargs): + super(SearchView, self).__init__(*args, **kwargs) + self.ix = index.open_index() def add_infos_to_hit(self, r): doc = Document.objects.get(id=r['id']) @@ -260,7 +262,9 @@ class SearchAutoCompleteView(APIView): permission_classes = (IsAuthenticated,) - ix = index.open_index() + def __init__(self, *args, **kwargs): + super(SearchAutoCompleteView, self).__init__(*args, **kwargs) + self.ix = index.open_index() def get(self, request, format=None): if 'term' in request.query_params: diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 79b454649..18f7cfac4 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -251,6 +251,8 @@ USE_TZ = True # Logging # ############################################################################### +DISABLE_DBHANDLER = __get_boolean("PAPERLESS_DISABLE_DBHANDLER") + LOGGING = { "version": 1, "disable_existing_loggers": False, diff --git a/src/setup.cfg b/src/setup.cfg index b540f9efe..2a1a348bd 100644 --- a/src/setup.cfg +++ b/src/setup.cfg @@ -3,10 +3,9 @@ exclude = migrations, paperless/settings.py, .tox, */tests/* [tool:pytest] DJANGO_SETTINGS_MODULE=paperless.settings -addopts = --pythonwarnings=all --cov --cov-report=html +addopts = --pythonwarnings=all --cov --cov-report=html -n auto env = - PAPERLESS_SECRET=paperless - PAPERLESS_EMAIL_SECRET=paperless + PAPERLESS_DISABLE_DBHANDLER=true [coverage:run]