Merge branch 'dev' into feature-ocrmypdf

This commit is contained in:
jonaswinkler 2020-11-27 14:03:19 +01:00
commit ea9de1bcf1
15 changed files with 87 additions and 89 deletions

View File

@ -265,15 +265,17 @@ Migration to paperless-ng is then performed in a few simple steps:
``docker-compose.env`` to your needs. ``docker-compose.env`` to your needs.
See `docker route`_ for details on which edits are advised. See `docker route`_ for details on which edits are advised.
6. Start paperless-ng. 6. In order to find your existing documents with the new search feature, you need
to invoke a one-time operation that will create the search index:
.. code:: bash .. code:: shell-session
$ docker-compose up $ docker-compose run --rm webserver document_index reindex
This will migrate your database and create the search index. After that,
paperless will take care of maintaining the index by itself.
If you see everything working (you should see some migrations getting 7. Start paperless-ng.
applied, for instance), you can gracefully stop paperless-ng with Ctrl-C
and then start paperless-ng as usual with
.. code:: bash .. code:: bash
@ -281,11 +283,11 @@ Migration to paperless-ng is then performed in a few simple steps:
This will run paperless in the background and automatically start it on system boot. This will run paperless in the background and automatically start it on system boot.
7. Paperless installed a permanent redirect to ``admin/`` in your browser. This 8. Paperless installed a permanent redirect to ``admin/`` in your browser. This
redirect is still in place and prevents access to the new UI. Clear redirect is still in place and prevents access to the new UI. Clear
browsing cache in order to fix this. browsing cache in order to fix this.
8. Optionally, follow the instructions below to migrate your existing data to PostgreSQL. 9. Optionally, follow the instructions below to migrate your existing data to PostgreSQL.
.. _setup-sqlite_to_psql: .. _setup-sqlite_to_psql:

View File

@ -8,7 +8,6 @@ from django.conf import settings
from django.db import transaction from django.db import transaction
from django.utils import timezone from django.utils import timezone
from paperless.db import GnuPG
from .classifier import DocumentClassifier, IncompatibleClassifierVersionError from .classifier import DocumentClassifier, IncompatibleClassifierVersionError
from .file_handling import generate_filename, create_source_path_directory from .file_handling import generate_filename, create_source_path_directory
from .loggers import LoggingMixin from .loggers import LoggingMixin
@ -40,17 +39,6 @@ class Consumer(LoggingMixin):
raise ConsumerError("Cannot consume {}: It is not a file".format( raise ConsumerError("Cannot consume {}: It is not a file".format(
self.path)) self.path))
def pre_check_consumption_dir(self):
if not settings.CONSUMPTION_DIR:
raise ConsumerError(
"The CONSUMPTION_DIR settings variable does not appear to be "
"set.")
if not os.path.isdir(settings.CONSUMPTION_DIR):
raise ConsumerError(
"Consumption directory {} does not exist".format(
settings.CONSUMPTION_DIR))
def pre_check_duplicate(self): def pre_check_duplicate(self):
with open(self.path, "rb") as f: with open(self.path, "rb") as f:
checksum = hashlib.md5(f.read()).hexdigest() checksum = hashlib.md5(f.read()).hexdigest()
@ -93,7 +81,6 @@ class Consumer(LoggingMixin):
# Make sure that preconditions for consuming the file are met. # Make sure that preconditions for consuming the file are met.
self.pre_check_file_exists() self.pre_check_file_exists()
self.pre_check_consumption_dir()
self.pre_check_directories() self.pre_check_directories()
self.pre_check_duplicate() self.pre_check_duplicate()

View File

@ -64,9 +64,6 @@ def get_schema():
def open_index(recreate=False): def open_index(recreate=False):
# TODO: this is not thread safe. If 2 instances try to create the index
# at the same time, this fails. This currently prevents parallel
# tests.
try: try:
if exists_in(settings.INDEX_DIR) and not recreate: if exists_in(settings.INDEX_DIR) and not recreate:
return open_dir(settings.INDEX_DIR) return open_dir(settings.INDEX_DIR)

View File

@ -1,9 +1,14 @@
import logging import logging
import uuid import uuid
from django.conf import settings
class PaperlessHandler(logging.Handler): class PaperlessHandler(logging.Handler):
def emit(self, record): def emit(self, record):
if settings.DISABLE_DBHANDLER:
return
# We have to do the import here or Django will barf when it tries to # We have to do the import here or Django will barf when it tries to
# load this because the apps aren't loaded at that point # load this because the apps aren't loaded at that point
from .models import Log from .models import Log

View File

@ -3,7 +3,7 @@ import os
from time import sleep from time import sleep
from django.conf import settings from django.conf import settings
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand, CommandError
from django_q.tasks import async_task from django_q.tasks import async_task
from watchdog.events import FileSystemEventHandler from watchdog.events import FileSystemEventHandler
from watchdog.observers.polling import PollingObserver from watchdog.observers.polling import PollingObserver
@ -95,6 +95,15 @@ class Command(BaseCommand):
def handle(self, *args, **options): def handle(self, *args, **options):
directory = options["directory"] directory = options["directory"]
if not directory:
raise CommandError(
"CONSUMPTION_DIR does not appear to be set."
)
if not os.path.isdir(directory):
raise CommandError(
f"Consumption directory {directory} does not exist")
for entry in os.scandir(directory): for entry in os.scandir(directory):
_consume(entry.path) _consume(entry.path)
@ -128,12 +137,15 @@ class Command(BaseCommand):
f"Using inotify to watch directory for changes: {directory}") f"Using inotify to watch directory for changes: {directory}")
inotify = INotify() inotify = INotify()
inotify.add_watch(directory, flags.CLOSE_WRITE | flags.MOVED_TO) descriptor = inotify.add_watch(
directory, flags.CLOSE_WRITE | flags.MOVED_TO)
try: try:
while not self.stop_flag: while not self.stop_flag:
for event in inotify.read(timeout=1000, read_delay=1000): for event in inotify.read(timeout=1000, read_delay=1000):
file = os.path.join(directory, event.name) file = os.path.join(directory, event.name)
if os.path.isfile(file): _consume(file)
_consume(file)
except KeyboardInterrupt: except KeyboardInterrupt:
pass pass
inotify.rm_watch(descriptor)
inotify.close()

View File

@ -5,23 +5,6 @@ from django.db import migrations, models
import django.db.models.deletion import django.db.models.deletion
def make_index(apps, schema_editor):
Document = apps.get_model("documents", "Document")
documents = Document.objects.all()
print()
try:
print(" --> Creating document index...")
from whoosh.writing import AsyncWriter
from documents import index
ix = index.open_index(recreate=True)
with AsyncWriter(ix) as writer:
for document in documents:
index.update_document(writer, document)
except ImportError:
# index may not be relevant anymore
print(" --> Cannot create document index.")
def logs_set_default_group(apps, schema_editor): def logs_set_default_group(apps, schema_editor):
Log = apps.get_model('documents', 'Log') Log = apps.get_model('documents', 'Log')
for log in Log.objects.all(): for log in Log.objects.all():
@ -99,8 +82,4 @@ class Migration(migrations.Migration):
code=django.db.migrations.operations.special.RunPython.noop, code=django.db.migrations.operations.special.RunPython.noop,
reverse_code=logs_set_default_group reverse_code=logs_set_default_group
), ),
migrations.RunPython(
code=make_index,
reverse_code=django.db.migrations.operations.special.RunPython.noop,
),
] ]

View File

@ -249,6 +249,7 @@ class Document(models.Model):
@property @property
def file_type(self): def file_type(self):
# TODO: this is not stable across python versions
return mimetypes.guess_extension(str(self.mime_type)) return mimetypes.guess_extension(str(self.mime_type))
@property @property

View File

@ -7,14 +7,13 @@ from pathvalidate import ValidationError
from rest_framework.test import APITestCase from rest_framework.test import APITestCase
from documents.models import Document, Correspondent, DocumentType, Tag from documents.models import Document, Correspondent, DocumentType, Tag
from documents.tests.utils import setup_directories, remove_dirs from documents.tests.utils import DirectoriesMixin
class DocumentApiTest(APITestCase): class DocumentApiTest(DirectoriesMixin, APITestCase):
def setUp(self): def setUp(self):
self.dirs = setup_directories() super(DocumentApiTest, self).setUp()
self.addCleanup(remove_dirs, self.dirs)
user = User.objects.create_superuser(username="temp_admin") user = User.objects.create_superuser(username="temp_admin")
self.client.force_login(user=user) self.client.force_login(user=user)

View File

@ -6,7 +6,7 @@ from unittest.mock import MagicMock
from django.test import TestCase, override_settings from django.test import TestCase, override_settings
from .utils import setup_directories, remove_dirs from .utils import DirectoriesMixin
from ..consumer import Consumer, ConsumerError from ..consumer import Consumer, ConsumerError
from ..models import FileInfo, Tag, Correspondent, DocumentType, Document from ..models import FileInfo, Tag, Correspondent, DocumentType, Document
from ..parsers import DocumentParser, ParseError from ..parsers import DocumentParser, ParseError
@ -408,7 +408,7 @@ def fake_magic_from_file(file, mime=False):
@mock.patch("documents.consumer.magic.from_file", fake_magic_from_file) @mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
class TestConsumer(TestCase): class TestConsumer(DirectoriesMixin, TestCase):
def make_dummy_parser(self, logging_group): def make_dummy_parser(self, logging_group):
return DummyParser(logging_group, self.dirs.scratch_dir) return DummyParser(logging_group, self.dirs.scratch_dir)
@ -417,8 +417,7 @@ class TestConsumer(TestCase):
return FaultyParser(logging_group, self.dirs.scratch_dir) return FaultyParser(logging_group, self.dirs.scratch_dir)
def setUp(self): def setUp(self):
self.dirs = setup_directories() super(TestConsumer, self).setUp()
self.addCleanup(remove_dirs, self.dirs)
patcher = mock.patch("documents.parsers.document_consumer_declaration.send") patcher = mock.patch("documents.parsers.document_consumer_declaration.send")
m = patcher.start() m = patcher.start()
@ -502,26 +501,6 @@ class TestConsumer(TestCase):
self.fail("Should throw exception") self.fail("Should throw exception")
@override_settings(CONSUMPTION_DIR=None)
def testConsumptionDirUnset(self):
try:
self.consumer.try_consume_file(self.get_test_file())
except ConsumerError as e:
self.assertEqual(str(e), "The CONSUMPTION_DIR settings variable does not appear to be set.")
return
self.fail("Should throw exception")
@override_settings(CONSUMPTION_DIR="asd")
def testNoConsumptionDir(self):
try:
self.consumer.try_consume_file(self.get_test_file())
except ConsumerError as e:
self.assertEqual(str(e), "Consumption directory asd does not exist")
return
self.fail("Should throw exception")
def testDuplicates(self): def testDuplicates(self):
self.consumer.try_consume_file(self.get_test_file()) self.consumer.try_consume_file(self.get_test_file())

View File

@ -2,7 +2,7 @@ import logging
import uuid import uuid
from unittest import mock from unittest import mock
from django.test import TestCase from django.test import TestCase, override_settings
from ..models import Log from ..models import Log
@ -14,6 +14,7 @@ class TestPaperlessLog(TestCase):
self.logger = logging.getLogger( self.logger = logging.getLogger(
"documents.management.commands.document_consumer") "documents.management.commands.document_consumer")
@override_settings(DISABLE_DBHANDLER=False)
def test_that_it_saves_at_all(self): def test_that_it_saves_at_all(self):
kw = {"group": uuid.uuid4()} kw = {"group": uuid.uuid4()}
@ -38,6 +39,7 @@ class TestPaperlessLog(TestCase):
self.logger.critical("This is a critical message", extra=kw) self.logger.critical("This is a critical message", extra=kw)
self.assertEqual(Log.objects.all().count(), 5) self.assertEqual(Log.objects.all().count(), 5)
@override_settings(DISABLE_DBHANDLER=False)
def test_groups(self): def test_groups(self):
kw1 = {"group": uuid.uuid4()} kw1 = {"group": uuid.uuid4()}

View File

@ -6,11 +6,12 @@ from time import sleep
from unittest import mock from unittest import mock
from django.conf import settings from django.conf import settings
from django.test import TestCase, override_settings from django.core.management import call_command, CommandError
from django.test import override_settings, TestCase
from documents.consumer import ConsumerError from documents.consumer import ConsumerError
from documents.management.commands import document_consumer from documents.management.commands import document_consumer
from documents.tests.utils import setup_directories, remove_dirs from documents.tests.utils import DirectoriesMixin
class ConsumerThread(Thread): class ConsumerThread(Thread):
@ -32,18 +33,17 @@ def chunked(size, source):
yield source[i:i+size] yield source[i:i+size]
class TestConsumer(TestCase): class TestConsumer(DirectoriesMixin, TestCase):
sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf") sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
def setUp(self) -> None: def setUp(self) -> None:
super(TestConsumer, self).setUp()
self.t = None
patcher = mock.patch("documents.management.commands.document_consumer.async_task") patcher = mock.patch("documents.management.commands.document_consumer.async_task")
self.task_mock = patcher.start() self.task_mock = patcher.start()
self.addCleanup(patcher.stop) self.addCleanup(patcher.stop)
self.dirs = setup_directories()
self.addCleanup(remove_dirs, self.dirs)
def t_start(self): def t_start(self):
self.t = ConsumerThread() self.t = ConsumerThread()
self.t.start() self.t.start()
@ -52,7 +52,12 @@ class TestConsumer(TestCase):
def tearDown(self) -> None: def tearDown(self) -> None:
if self.t: if self.t:
# set the stop flag
self.t.stop() self.t.stop()
# wait for the consumer to exit.
self.t.join()
super(TestConsumer, self).tearDown()
def wait_for_task_mock_call(self): def wait_for_task_mock_call(self):
n = 0 n = 0
@ -193,3 +198,13 @@ class TestConsumer(TestCase):
@override_settings(CONSUMER_POLLING=1) @override_settings(CONSUMER_POLLING=1)
def test_slow_write_incomplete_polling(self): def test_slow_write_incomplete_polling(self):
self.test_slow_write_incomplete() self.test_slow_write_incomplete()
@override_settings(CONSUMPTION_DIR="does_not_exist")
def test_consumption_directory_invalid(self):
self.assertRaises(CommandError, call_command, 'document_consumer', '--oneshot')
@override_settings(CONSUMPTION_DIR="")
def test_consumption_directory_unset(self):
self.assertRaises(CommandError, call_command, 'document_consumer', '--oneshot')

View File

@ -39,3 +39,18 @@ def remove_dirs(dirs):
shutil.rmtree(dirs.data_dir, ignore_errors=True) shutil.rmtree(dirs.data_dir, ignore_errors=True)
shutil.rmtree(dirs.scratch_dir, ignore_errors=True) shutil.rmtree(dirs.scratch_dir, ignore_errors=True)
shutil.rmtree(dirs.consumption_dir, ignore_errors=True) shutil.rmtree(dirs.consumption_dir, ignore_errors=True)
class DirectoriesMixin:
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.dirs = None
def setUp(self) -> None:
self.dirs = setup_directories()
super(DirectoriesMixin, self).setUp()
def tearDown(self) -> None:
super(DirectoriesMixin, self).tearDown()
remove_dirs(self.dirs)

View File

@ -221,7 +221,9 @@ class SearchView(APIView):
permission_classes = (IsAuthenticated,) permission_classes = (IsAuthenticated,)
ix = index.open_index() def __init__(self, *args, **kwargs):
super(SearchView, self).__init__(*args, **kwargs)
self.ix = index.open_index()
def add_infos_to_hit(self, r): def add_infos_to_hit(self, r):
doc = Document.objects.get(id=r['id']) doc = Document.objects.get(id=r['id'])
@ -260,7 +262,9 @@ class SearchAutoCompleteView(APIView):
permission_classes = (IsAuthenticated,) permission_classes = (IsAuthenticated,)
ix = index.open_index() def __init__(self, *args, **kwargs):
super(SearchAutoCompleteView, self).__init__(*args, **kwargs)
self.ix = index.open_index()
def get(self, request, format=None): def get(self, request, format=None):
if 'term' in request.query_params: if 'term' in request.query_params:

View File

@ -251,6 +251,8 @@ USE_TZ = True
# Logging # # Logging #
############################################################################### ###############################################################################
DISABLE_DBHANDLER = __get_boolean("PAPERLESS_DISABLE_DBHANDLER")
LOGGING = { LOGGING = {
"version": 1, "version": 1,
"disable_existing_loggers": False, "disable_existing_loggers": False,

View File

@ -3,10 +3,9 @@ exclude = migrations, paperless/settings.py, .tox, */tests/*
[tool:pytest] [tool:pytest]
DJANGO_SETTINGS_MODULE=paperless.settings DJANGO_SETTINGS_MODULE=paperless.settings
addopts = --pythonwarnings=all --cov --cov-report=html addopts = --pythonwarnings=all --cov --cov-report=html -n auto
env = env =
PAPERLESS_SECRET=paperless PAPERLESS_DISABLE_DBHANDLER=true
PAPERLESS_EMAIL_SECRET=paperless
[coverage:run] [coverage:run]