mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Merge branch 'dev' into feature-ocrmypdf
This commit is contained in:
		| @@ -265,15 +265,17 @@ Migration to paperless-ng is then performed in a few simple steps: | ||||
|     ``docker-compose.env`` to your needs. | ||||
|     See `docker route`_ for details on which edits are advised. | ||||
|  | ||||
| 6.  Start paperless-ng. | ||||
| 6.  In order to find your existing documents with the new search feature, you need | ||||
|     to invoke a one-time operation that will create the search index: | ||||
|  | ||||
|     .. code:: bash | ||||
|     .. code:: shell-session | ||||
|  | ||||
|         $ docker-compose up | ||||
|         $ docker-compose run --rm webserver document_index reindex | ||||
|      | ||||
|     This will migrate your database and create the search index. After that, | ||||
|     paperless will take care of maintaining the index by itself. | ||||
|  | ||||
|     If you see everything working (you should see some migrations getting | ||||
|     applied, for instance), you can gracefully stop paperless-ng with Ctrl-C | ||||
|     and then start paperless-ng as usual with | ||||
| 7.  Start paperless-ng. | ||||
|  | ||||
|     .. code:: bash | ||||
|  | ||||
| @@ -281,11 +283,11 @@ Migration to paperless-ng is then performed in a few simple steps: | ||||
|  | ||||
|     This will run paperless in the background and automatically start it on system boot. | ||||
|  | ||||
| 7.  Paperless installed a permanent redirect to ``admin/`` in your browser. This | ||||
| 8.  Paperless installed a permanent redirect to ``admin/`` in your browser. This | ||||
|     redirect is still in place and prevents access to the new UI. Clear | ||||
|     browsing cache in order to fix this. | ||||
|  | ||||
| 8.  Optionally, follow the instructions below to migrate your existing data to PostgreSQL. | ||||
| 9.  Optionally, follow the instructions below to migrate your existing data to PostgreSQL. | ||||
|  | ||||
|  | ||||
| .. _setup-sqlite_to_psql: | ||||
|   | ||||
| @@ -8,7 +8,6 @@ from django.conf import settings | ||||
| from django.db import transaction | ||||
| from django.utils import timezone | ||||
|  | ||||
| from paperless.db import GnuPG | ||||
| from .classifier import DocumentClassifier, IncompatibleClassifierVersionError | ||||
| from .file_handling import generate_filename, create_source_path_directory | ||||
| from .loggers import LoggingMixin | ||||
| @@ -40,17 +39,6 @@ class Consumer(LoggingMixin): | ||||
|             raise ConsumerError("Cannot consume {}: It is not a file".format( | ||||
|                 self.path)) | ||||
|  | ||||
|     def pre_check_consumption_dir(self): | ||||
|         if not settings.CONSUMPTION_DIR: | ||||
|             raise ConsumerError( | ||||
|                 "The CONSUMPTION_DIR settings variable does not appear to be " | ||||
|                 "set.") | ||||
|  | ||||
|         if not os.path.isdir(settings.CONSUMPTION_DIR): | ||||
|             raise ConsumerError( | ||||
|                 "Consumption directory {} does not exist".format( | ||||
|                     settings.CONSUMPTION_DIR)) | ||||
|  | ||||
|     def pre_check_duplicate(self): | ||||
|         with open(self.path, "rb") as f: | ||||
|             checksum = hashlib.md5(f.read()).hexdigest() | ||||
| @@ -93,7 +81,6 @@ class Consumer(LoggingMixin): | ||||
|         # Make sure that preconditions for consuming the file are met. | ||||
|  | ||||
|         self.pre_check_file_exists() | ||||
|         self.pre_check_consumption_dir() | ||||
|         self.pre_check_directories() | ||||
|         self.pre_check_duplicate() | ||||
|  | ||||
|   | ||||
| @@ -64,9 +64,6 @@ def get_schema(): | ||||
|  | ||||
|  | ||||
| def open_index(recreate=False): | ||||
|     # TODO: this is not thread safe. If 2 instances try to create the index | ||||
|     #  at the same time, this fails. This currently prevents parallel | ||||
|     #  tests. | ||||
|     try: | ||||
|         if exists_in(settings.INDEX_DIR) and not recreate: | ||||
|             return open_dir(settings.INDEX_DIR) | ||||
|   | ||||
| @@ -1,9 +1,14 @@ | ||||
| import logging | ||||
| import uuid | ||||
|  | ||||
| from django.conf import settings | ||||
|  | ||||
|  | ||||
| class PaperlessHandler(logging.Handler): | ||||
|     def emit(self, record): | ||||
|         if settings.DISABLE_DBHANDLER: | ||||
|             return | ||||
|  | ||||
|         # We have to do the import here or Django will barf when it tries to | ||||
|         # load this because the apps aren't loaded at that point | ||||
|         from .models import Log | ||||
|   | ||||
| @@ -3,7 +3,7 @@ import os | ||||
| from time import sleep | ||||
|  | ||||
| from django.conf import settings | ||||
| from django.core.management.base import BaseCommand | ||||
| from django.core.management.base import BaseCommand, CommandError | ||||
| from django_q.tasks import async_task | ||||
| from watchdog.events import FileSystemEventHandler | ||||
| from watchdog.observers.polling import PollingObserver | ||||
| @@ -95,6 +95,15 @@ class Command(BaseCommand): | ||||
|     def handle(self, *args, **options): | ||||
|         directory = options["directory"] | ||||
|  | ||||
|         if not directory: | ||||
|             raise CommandError( | ||||
|                 "CONSUMPTION_DIR does not appear to be set." | ||||
|             ) | ||||
|  | ||||
|         if not os.path.isdir(directory): | ||||
|             raise CommandError( | ||||
|                 f"Consumption directory {directory} does not exist") | ||||
|  | ||||
|         for entry in os.scandir(directory): | ||||
|             _consume(entry.path) | ||||
|  | ||||
| @@ -128,12 +137,15 @@ class Command(BaseCommand): | ||||
|             f"Using inotify to watch directory for changes: {directory}") | ||||
|  | ||||
|         inotify = INotify() | ||||
|         inotify.add_watch(directory, flags.CLOSE_WRITE | flags.MOVED_TO) | ||||
|         descriptor = inotify.add_watch( | ||||
|             directory, flags.CLOSE_WRITE | flags.MOVED_TO) | ||||
|         try: | ||||
|             while not self.stop_flag: | ||||
|                 for event in inotify.read(timeout=1000, read_delay=1000): | ||||
|                     file = os.path.join(directory, event.name) | ||||
|                     if os.path.isfile(file): | ||||
|                         _consume(file) | ||||
|                     _consume(file) | ||||
|         except KeyboardInterrupt: | ||||
|             pass | ||||
|  | ||||
|         inotify.rm_watch(descriptor) | ||||
|         inotify.close() | ||||
|   | ||||
| @@ -5,23 +5,6 @@ from django.db import migrations, models | ||||
| import django.db.models.deletion | ||||
|  | ||||
|  | ||||
| def make_index(apps, schema_editor): | ||||
|     Document = apps.get_model("documents", "Document") | ||||
|     documents = Document.objects.all() | ||||
|     print() | ||||
|     try: | ||||
|         print("  --> Creating document index...") | ||||
|         from whoosh.writing import AsyncWriter | ||||
|         from documents import index | ||||
|         ix = index.open_index(recreate=True) | ||||
|         with AsyncWriter(ix) as writer: | ||||
|             for document in documents: | ||||
|                 index.update_document(writer, document) | ||||
|     except ImportError: | ||||
|         # index may not be relevant anymore | ||||
|         print("  --> Cannot create document index.") | ||||
|  | ||||
|  | ||||
| def logs_set_default_group(apps, schema_editor): | ||||
|     Log = apps.get_model('documents', 'Log') | ||||
|     for log in Log.objects.all(): | ||||
| @@ -99,8 +82,4 @@ class Migration(migrations.Migration): | ||||
|             code=django.db.migrations.operations.special.RunPython.noop, | ||||
|             reverse_code=logs_set_default_group | ||||
|         ), | ||||
|         migrations.RunPython( | ||||
|             code=make_index, | ||||
|             reverse_code=django.db.migrations.operations.special.RunPython.noop, | ||||
|         ), | ||||
|     ] | ||||
|   | ||||
| @@ -249,6 +249,7 @@ class Document(models.Model): | ||||
|  | ||||
|     @property | ||||
|     def file_type(self): | ||||
|         # TODO: this is not stable across python versions | ||||
|         return mimetypes.guess_extension(str(self.mime_type)) | ||||
|  | ||||
|     @property | ||||
|   | ||||
| @@ -7,14 +7,13 @@ from pathvalidate import ValidationError | ||||
| from rest_framework.test import APITestCase | ||||
|  | ||||
| from documents.models import Document, Correspondent, DocumentType, Tag | ||||
| from documents.tests.utils import setup_directories, remove_dirs | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
|  | ||||
|  | ||||
| class DocumentApiTest(APITestCase): | ||||
| class DocumentApiTest(DirectoriesMixin, APITestCase): | ||||
|  | ||||
|     def setUp(self): | ||||
|         self.dirs = setup_directories() | ||||
|         self.addCleanup(remove_dirs, self.dirs) | ||||
|         super(DocumentApiTest, self).setUp() | ||||
|  | ||||
|         user = User.objects.create_superuser(username="temp_admin") | ||||
|         self.client.force_login(user=user) | ||||
|   | ||||
| @@ -6,7 +6,7 @@ from unittest.mock import MagicMock | ||||
|  | ||||
| from django.test import TestCase, override_settings | ||||
|  | ||||
| from .utils import setup_directories, remove_dirs | ||||
| from .utils import DirectoriesMixin | ||||
| from ..consumer import Consumer, ConsumerError | ||||
| from ..models import FileInfo, Tag, Correspondent, DocumentType, Document | ||||
| from ..parsers import DocumentParser, ParseError | ||||
| @@ -408,7 +408,7 @@ def fake_magic_from_file(file, mime=False): | ||||
|  | ||||
|  | ||||
| @mock.patch("documents.consumer.magic.from_file", fake_magic_from_file) | ||||
| class TestConsumer(TestCase): | ||||
| class TestConsumer(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     def make_dummy_parser(self, logging_group): | ||||
|         return DummyParser(logging_group, self.dirs.scratch_dir) | ||||
| @@ -417,8 +417,7 @@ class TestConsumer(TestCase): | ||||
|         return FaultyParser(logging_group, self.dirs.scratch_dir) | ||||
|  | ||||
|     def setUp(self): | ||||
|         self.dirs = setup_directories() | ||||
|         self.addCleanup(remove_dirs, self.dirs) | ||||
|         super(TestConsumer, self).setUp() | ||||
|  | ||||
|         patcher = mock.patch("documents.parsers.document_consumer_declaration.send") | ||||
|         m = patcher.start() | ||||
| @@ -502,26 +501,6 @@ class TestConsumer(TestCase): | ||||
|  | ||||
|         self.fail("Should throw exception") | ||||
|  | ||||
|     @override_settings(CONSUMPTION_DIR=None) | ||||
|     def testConsumptionDirUnset(self): | ||||
|         try: | ||||
|             self.consumer.try_consume_file(self.get_test_file()) | ||||
|         except ConsumerError as e: | ||||
|             self.assertEqual(str(e), "The CONSUMPTION_DIR settings variable does not appear to be set.") | ||||
|             return | ||||
|  | ||||
|         self.fail("Should throw exception") | ||||
|  | ||||
|     @override_settings(CONSUMPTION_DIR="asd") | ||||
|     def testNoConsumptionDir(self): | ||||
|         try: | ||||
|             self.consumer.try_consume_file(self.get_test_file()) | ||||
|         except ConsumerError as e: | ||||
|             self.assertEqual(str(e), "Consumption directory asd does not exist") | ||||
|             return | ||||
|  | ||||
|         self.fail("Should throw exception") | ||||
|  | ||||
|     def testDuplicates(self): | ||||
|         self.consumer.try_consume_file(self.get_test_file()) | ||||
|  | ||||
|   | ||||
| @@ -2,7 +2,7 @@ import logging | ||||
| import uuid | ||||
| from unittest import mock | ||||
|  | ||||
| from django.test import TestCase | ||||
| from django.test import TestCase, override_settings | ||||
|  | ||||
| from ..models import Log | ||||
|  | ||||
| @@ -14,6 +14,7 @@ class TestPaperlessLog(TestCase): | ||||
|         self.logger = logging.getLogger( | ||||
|             "documents.management.commands.document_consumer") | ||||
|  | ||||
|     @override_settings(DISABLE_DBHANDLER=False) | ||||
|     def test_that_it_saves_at_all(self): | ||||
|  | ||||
|         kw = {"group": uuid.uuid4()} | ||||
| @@ -38,6 +39,7 @@ class TestPaperlessLog(TestCase): | ||||
|             self.logger.critical("This is a critical message", extra=kw) | ||||
|             self.assertEqual(Log.objects.all().count(), 5) | ||||
|  | ||||
|     @override_settings(DISABLE_DBHANDLER=False) | ||||
|     def test_groups(self): | ||||
|  | ||||
|         kw1 = {"group": uuid.uuid4()} | ||||
|   | ||||
| @@ -6,11 +6,12 @@ from time import sleep | ||||
| from unittest import mock | ||||
|  | ||||
| from django.conf import settings | ||||
| from django.test import TestCase, override_settings | ||||
| from django.core.management import call_command, CommandError | ||||
| from django.test import override_settings, TestCase | ||||
|  | ||||
| from documents.consumer import ConsumerError | ||||
| from documents.management.commands import document_consumer | ||||
| from documents.tests.utils import setup_directories, remove_dirs | ||||
| from documents.tests.utils import DirectoriesMixin | ||||
|  | ||||
|  | ||||
| class ConsumerThread(Thread): | ||||
| @@ -32,18 +33,17 @@ def chunked(size, source): | ||||
|         yield source[i:i+size] | ||||
|  | ||||
|  | ||||
| class TestConsumer(TestCase): | ||||
| class TestConsumer(DirectoriesMixin, TestCase): | ||||
|  | ||||
|     sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf") | ||||
|  | ||||
|     def setUp(self) -> None: | ||||
|         super(TestConsumer, self).setUp() | ||||
|         self.t = None | ||||
|         patcher = mock.patch("documents.management.commands.document_consumer.async_task") | ||||
|         self.task_mock = patcher.start() | ||||
|         self.addCleanup(patcher.stop) | ||||
|  | ||||
|         self.dirs = setup_directories() | ||||
|         self.addCleanup(remove_dirs, self.dirs) | ||||
|  | ||||
|     def t_start(self): | ||||
|         self.t = ConsumerThread() | ||||
|         self.t.start() | ||||
| @@ -52,7 +52,12 @@ class TestConsumer(TestCase): | ||||
|  | ||||
|     def tearDown(self) -> None: | ||||
|         if self.t: | ||||
|             # set the stop flag | ||||
|             self.t.stop() | ||||
|             # wait for the consumer to exit. | ||||
|             self.t.join() | ||||
|  | ||||
|         super(TestConsumer, self).tearDown() | ||||
|  | ||||
|     def wait_for_task_mock_call(self): | ||||
|         n = 0 | ||||
| @@ -193,3 +198,13 @@ class TestConsumer(TestCase): | ||||
|     @override_settings(CONSUMER_POLLING=1) | ||||
|     def test_slow_write_incomplete_polling(self): | ||||
|         self.test_slow_write_incomplete() | ||||
|  | ||||
|     @override_settings(CONSUMPTION_DIR="does_not_exist") | ||||
|     def test_consumption_directory_invalid(self): | ||||
|  | ||||
|         self.assertRaises(CommandError, call_command, 'document_consumer', '--oneshot') | ||||
|  | ||||
|     @override_settings(CONSUMPTION_DIR="") | ||||
|     def test_consumption_directory_unset(self): | ||||
|  | ||||
|         self.assertRaises(CommandError, call_command, 'document_consumer', '--oneshot') | ||||
|   | ||||
| @@ -39,3 +39,18 @@ def remove_dirs(dirs): | ||||
|     shutil.rmtree(dirs.data_dir, ignore_errors=True) | ||||
|     shutil.rmtree(dirs.scratch_dir, ignore_errors=True) | ||||
|     shutil.rmtree(dirs.consumption_dir, ignore_errors=True) | ||||
|  | ||||
|  | ||||
| class DirectoriesMixin: | ||||
|  | ||||
|     def __init__(self, *args, **kwargs): | ||||
|         super().__init__(*args, **kwargs) | ||||
|         self.dirs = None | ||||
|  | ||||
|     def setUp(self) -> None: | ||||
|         self.dirs = setup_directories() | ||||
|         super(DirectoriesMixin, self).setUp() | ||||
|  | ||||
|     def tearDown(self) -> None: | ||||
|         super(DirectoriesMixin, self).tearDown() | ||||
|         remove_dirs(self.dirs) | ||||
|   | ||||
| @@ -221,7 +221,9 @@ class SearchView(APIView): | ||||
|  | ||||
|     permission_classes = (IsAuthenticated,) | ||||
|  | ||||
|     ix = index.open_index() | ||||
|     def __init__(self, *args, **kwargs): | ||||
|         super(SearchView, self).__init__(*args, **kwargs) | ||||
|         self.ix = index.open_index() | ||||
|  | ||||
|     def add_infos_to_hit(self, r): | ||||
|         doc = Document.objects.get(id=r['id']) | ||||
| @@ -260,7 +262,9 @@ class SearchAutoCompleteView(APIView): | ||||
|  | ||||
|     permission_classes = (IsAuthenticated,) | ||||
|  | ||||
|     ix = index.open_index() | ||||
|     def __init__(self, *args, **kwargs): | ||||
|         super(SearchAutoCompleteView, self).__init__(*args, **kwargs) | ||||
|         self.ix = index.open_index() | ||||
|  | ||||
|     def get(self, request, format=None): | ||||
|         if 'term' in request.query_params: | ||||
|   | ||||
| @@ -251,6 +251,8 @@ USE_TZ = True | ||||
| # Logging                                                                     # | ||||
| ############################################################################### | ||||
|  | ||||
| DISABLE_DBHANDLER = __get_boolean("PAPERLESS_DISABLE_DBHANDLER") | ||||
|  | ||||
| LOGGING = { | ||||
|     "version": 1, | ||||
|     "disable_existing_loggers": False, | ||||
|   | ||||
| @@ -3,10 +3,9 @@ exclude = migrations, paperless/settings.py, .tox, */tests/* | ||||
|  | ||||
| [tool:pytest] | ||||
| DJANGO_SETTINGS_MODULE=paperless.settings | ||||
| addopts = --pythonwarnings=all --cov --cov-report=html | ||||
| addopts = --pythonwarnings=all --cov --cov-report=html -n auto | ||||
| env = | ||||
|   PAPERLESS_SECRET=paperless | ||||
|   PAPERLESS_EMAIL_SECRET=paperless | ||||
|   PAPERLESS_DISABLE_DBHANDLER=true | ||||
|  | ||||
|  | ||||
| [coverage:run] | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 jonaswinkler
					jonaswinkler