Merge branch 'dev' into feature-ocrmypdf

This commit is contained in:
jonaswinkler 2020-11-27 14:03:19 +01:00
commit ea9de1bcf1
15 changed files with 87 additions and 89 deletions

View File

@ -265,15 +265,17 @@ Migration to paperless-ng is then performed in a few simple steps:
``docker-compose.env`` to your needs.
See `docker route`_ for details on which edits are advised.
6. Start paperless-ng.
6. In order to find your existing documents with the new search feature, you need
to invoke a one-time operation that will create the search index:
.. code:: bash
.. code:: shell-session
$ docker-compose up
$ docker-compose run --rm webserver document_index reindex
This will migrate your database and create the search index. After that,
paperless will take care of maintaining the index by itself.
If you see everything working (you should see some migrations getting
applied, for instance), you can gracefully stop paperless-ng with Ctrl-C
and then start paperless-ng as usual with
7. Start paperless-ng.
.. code:: bash
@ -281,11 +283,11 @@ Migration to paperless-ng is then performed in a few simple steps:
This will run paperless in the background and automatically start it on system boot.
7. Paperless installed a permanent redirect to ``admin/`` in your browser. This
8. Paperless installed a permanent redirect to ``admin/`` in your browser. This
redirect is still in place and prevents access to the new UI. Clear
browsing cache in order to fix this.
8. Optionally, follow the instructions below to migrate your existing data to PostgreSQL.
9. Optionally, follow the instructions below to migrate your existing data to PostgreSQL.
.. _setup-sqlite_to_psql:

View File

@ -8,7 +8,6 @@ from django.conf import settings
from django.db import transaction
from django.utils import timezone
from paperless.db import GnuPG
from .classifier import DocumentClassifier, IncompatibleClassifierVersionError
from .file_handling import generate_filename, create_source_path_directory
from .loggers import LoggingMixin
@ -40,17 +39,6 @@ class Consumer(LoggingMixin):
raise ConsumerError("Cannot consume {}: It is not a file".format(
self.path))
def pre_check_consumption_dir(self):
if not settings.CONSUMPTION_DIR:
raise ConsumerError(
"The CONSUMPTION_DIR settings variable does not appear to be "
"set.")
if not os.path.isdir(settings.CONSUMPTION_DIR):
raise ConsumerError(
"Consumption directory {} does not exist".format(
settings.CONSUMPTION_DIR))
def pre_check_duplicate(self):
with open(self.path, "rb") as f:
checksum = hashlib.md5(f.read()).hexdigest()
@ -93,7 +81,6 @@ class Consumer(LoggingMixin):
# Make sure that preconditions for consuming the file are met.
self.pre_check_file_exists()
self.pre_check_consumption_dir()
self.pre_check_directories()
self.pre_check_duplicate()

View File

@ -64,9 +64,6 @@ def get_schema():
def open_index(recreate=False):
# TODO: this is not thread safe. If 2 instances try to create the index
# at the same time, this fails. This currently prevents parallel
# tests.
try:
if exists_in(settings.INDEX_DIR) and not recreate:
return open_dir(settings.INDEX_DIR)

View File

@ -1,9 +1,14 @@
import logging
import uuid
from django.conf import settings
class PaperlessHandler(logging.Handler):
def emit(self, record):
if settings.DISABLE_DBHANDLER:
return
# We have to do the import here or Django will barf when it tries to
# load this because the apps aren't loaded at that point
from .models import Log

View File

@ -3,7 +3,7 @@ import os
from time import sleep
from django.conf import settings
from django.core.management.base import BaseCommand
from django.core.management.base import BaseCommand, CommandError
from django_q.tasks import async_task
from watchdog.events import FileSystemEventHandler
from watchdog.observers.polling import PollingObserver
@ -95,6 +95,15 @@ class Command(BaseCommand):
def handle(self, *args, **options):
directory = options["directory"]
if not directory:
raise CommandError(
"CONSUMPTION_DIR does not appear to be set."
)
if not os.path.isdir(directory):
raise CommandError(
f"Consumption directory {directory} does not exist")
for entry in os.scandir(directory):
_consume(entry.path)
@ -128,12 +137,15 @@ class Command(BaseCommand):
f"Using inotify to watch directory for changes: {directory}")
inotify = INotify()
inotify.add_watch(directory, flags.CLOSE_WRITE | flags.MOVED_TO)
descriptor = inotify.add_watch(
directory, flags.CLOSE_WRITE | flags.MOVED_TO)
try:
while not self.stop_flag:
for event in inotify.read(timeout=1000, read_delay=1000):
file = os.path.join(directory, event.name)
if os.path.isfile(file):
_consume(file)
_consume(file)
except KeyboardInterrupt:
pass
inotify.rm_watch(descriptor)
inotify.close()

View File

@ -5,23 +5,6 @@ from django.db import migrations, models
import django.db.models.deletion
def make_index(apps, schema_editor):
Document = apps.get_model("documents", "Document")
documents = Document.objects.all()
print()
try:
print(" --> Creating document index...")
from whoosh.writing import AsyncWriter
from documents import index
ix = index.open_index(recreate=True)
with AsyncWriter(ix) as writer:
for document in documents:
index.update_document(writer, document)
except ImportError:
# index may not be relevant anymore
print(" --> Cannot create document index.")
def logs_set_default_group(apps, schema_editor):
Log = apps.get_model('documents', 'Log')
for log in Log.objects.all():
@ -99,8 +82,4 @@ class Migration(migrations.Migration):
code=django.db.migrations.operations.special.RunPython.noop,
reverse_code=logs_set_default_group
),
migrations.RunPython(
code=make_index,
reverse_code=django.db.migrations.operations.special.RunPython.noop,
),
]

View File

@ -249,6 +249,7 @@ class Document(models.Model):
@property
def file_type(self):
# TODO: this is not stable across python versions
return mimetypes.guess_extension(str(self.mime_type))
@property

View File

@ -7,14 +7,13 @@ from pathvalidate import ValidationError
from rest_framework.test import APITestCase
from documents.models import Document, Correspondent, DocumentType, Tag
from documents.tests.utils import setup_directories, remove_dirs
from documents.tests.utils import DirectoriesMixin
class DocumentApiTest(APITestCase):
class DocumentApiTest(DirectoriesMixin, APITestCase):
def setUp(self):
self.dirs = setup_directories()
self.addCleanup(remove_dirs, self.dirs)
super(DocumentApiTest, self).setUp()
user = User.objects.create_superuser(username="temp_admin")
self.client.force_login(user=user)

View File

@ -6,7 +6,7 @@ from unittest.mock import MagicMock
from django.test import TestCase, override_settings
from .utils import setup_directories, remove_dirs
from .utils import DirectoriesMixin
from ..consumer import Consumer, ConsumerError
from ..models import FileInfo, Tag, Correspondent, DocumentType, Document
from ..parsers import DocumentParser, ParseError
@ -408,7 +408,7 @@ def fake_magic_from_file(file, mime=False):
@mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
class TestConsumer(TestCase):
class TestConsumer(DirectoriesMixin, TestCase):
def make_dummy_parser(self, logging_group):
return DummyParser(logging_group, self.dirs.scratch_dir)
@ -417,8 +417,7 @@ class TestConsumer(TestCase):
return FaultyParser(logging_group, self.dirs.scratch_dir)
def setUp(self):
self.dirs = setup_directories()
self.addCleanup(remove_dirs, self.dirs)
super(TestConsumer, self).setUp()
patcher = mock.patch("documents.parsers.document_consumer_declaration.send")
m = patcher.start()
@ -502,26 +501,6 @@ class TestConsumer(TestCase):
self.fail("Should throw exception")
@override_settings(CONSUMPTION_DIR=None)
def testConsumptionDirUnset(self):
try:
self.consumer.try_consume_file(self.get_test_file())
except ConsumerError as e:
self.assertEqual(str(e), "The CONSUMPTION_DIR settings variable does not appear to be set.")
return
self.fail("Should throw exception")
@override_settings(CONSUMPTION_DIR="asd")
def testNoConsumptionDir(self):
try:
self.consumer.try_consume_file(self.get_test_file())
except ConsumerError as e:
self.assertEqual(str(e), "Consumption directory asd does not exist")
return
self.fail("Should throw exception")
def testDuplicates(self):
self.consumer.try_consume_file(self.get_test_file())

View File

@ -2,7 +2,7 @@ import logging
import uuid
from unittest import mock
from django.test import TestCase
from django.test import TestCase, override_settings
from ..models import Log
@ -14,6 +14,7 @@ class TestPaperlessLog(TestCase):
self.logger = logging.getLogger(
"documents.management.commands.document_consumer")
@override_settings(DISABLE_DBHANDLER=False)
def test_that_it_saves_at_all(self):
kw = {"group": uuid.uuid4()}
@ -38,6 +39,7 @@ class TestPaperlessLog(TestCase):
self.logger.critical("This is a critical message", extra=kw)
self.assertEqual(Log.objects.all().count(), 5)
@override_settings(DISABLE_DBHANDLER=False)
def test_groups(self):
kw1 = {"group": uuid.uuid4()}

View File

@ -6,11 +6,12 @@ from time import sleep
from unittest import mock
from django.conf import settings
from django.test import TestCase, override_settings
from django.core.management import call_command, CommandError
from django.test import override_settings, TestCase
from documents.consumer import ConsumerError
from documents.management.commands import document_consumer
from documents.tests.utils import setup_directories, remove_dirs
from documents.tests.utils import DirectoriesMixin
class ConsumerThread(Thread):
@ -32,18 +33,17 @@ def chunked(size, source):
yield source[i:i+size]
class TestConsumer(TestCase):
class TestConsumer(DirectoriesMixin, TestCase):
sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
def setUp(self) -> None:
super(TestConsumer, self).setUp()
self.t = None
patcher = mock.patch("documents.management.commands.document_consumer.async_task")
self.task_mock = patcher.start()
self.addCleanup(patcher.stop)
self.dirs = setup_directories()
self.addCleanup(remove_dirs, self.dirs)
def t_start(self):
self.t = ConsumerThread()
self.t.start()
@ -52,7 +52,12 @@ class TestConsumer(TestCase):
def tearDown(self) -> None:
if self.t:
# set the stop flag
self.t.stop()
# wait for the consumer to exit.
self.t.join()
super(TestConsumer, self).tearDown()
def wait_for_task_mock_call(self):
n = 0
@ -193,3 +198,13 @@ class TestConsumer(TestCase):
@override_settings(CONSUMER_POLLING=1)
def test_slow_write_incomplete_polling(self):
self.test_slow_write_incomplete()
@override_settings(CONSUMPTION_DIR="does_not_exist")
def test_consumption_directory_invalid(self):
self.assertRaises(CommandError, call_command, 'document_consumer', '--oneshot')
@override_settings(CONSUMPTION_DIR="")
def test_consumption_directory_unset(self):
self.assertRaises(CommandError, call_command, 'document_consumer', '--oneshot')

View File

@ -39,3 +39,18 @@ def remove_dirs(dirs):
shutil.rmtree(dirs.data_dir, ignore_errors=True)
shutil.rmtree(dirs.scratch_dir, ignore_errors=True)
shutil.rmtree(dirs.consumption_dir, ignore_errors=True)
class DirectoriesMixin:
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.dirs = None
def setUp(self) -> None:
self.dirs = setup_directories()
super(DirectoriesMixin, self).setUp()
def tearDown(self) -> None:
super(DirectoriesMixin, self).tearDown()
remove_dirs(self.dirs)

View File

@ -221,7 +221,9 @@ class SearchView(APIView):
permission_classes = (IsAuthenticated,)
ix = index.open_index()
def __init__(self, *args, **kwargs):
super(SearchView, self).__init__(*args, **kwargs)
self.ix = index.open_index()
def add_infos_to_hit(self, r):
doc = Document.objects.get(id=r['id'])
@ -260,7 +262,9 @@ class SearchAutoCompleteView(APIView):
permission_classes = (IsAuthenticated,)
ix = index.open_index()
def __init__(self, *args, **kwargs):
super(SearchAutoCompleteView, self).__init__(*args, **kwargs)
self.ix = index.open_index()
def get(self, request, format=None):
if 'term' in request.query_params:

View File

@ -251,6 +251,8 @@ USE_TZ = True
# Logging #
###############################################################################
DISABLE_DBHANDLER = __get_boolean("PAPERLESS_DISABLE_DBHANDLER")
LOGGING = {
"version": 1,
"disable_existing_loggers": False,

View File

@ -3,10 +3,9 @@ exclude = migrations, paperless/settings.py, .tox, */tests/*
[tool:pytest]
DJANGO_SETTINGS_MODULE=paperless.settings
addopts = --pythonwarnings=all --cov --cov-report=html
addopts = --pythonwarnings=all --cov --cov-report=html -n auto
env =
PAPERLESS_SECRET=paperless
PAPERLESS_EMAIL_SECRET=paperless
PAPERLESS_DISABLE_DBHANDLER=true
[coverage:run]