Merge branch 'dev' into feature-ocrmypdf

This commit is contained in:
jonaswinkler
2020-11-27 14:03:19 +01:00
15 changed files with 87 additions and 89 deletions

View File

@@ -8,7 +8,6 @@ from django.conf import settings
from django.db import transaction
from django.utils import timezone
from paperless.db import GnuPG
from .classifier import DocumentClassifier, IncompatibleClassifierVersionError
from .file_handling import generate_filename, create_source_path_directory
from .loggers import LoggingMixin
@@ -40,17 +39,6 @@ class Consumer(LoggingMixin):
raise ConsumerError("Cannot consume {}: It is not a file".format(
self.path))
def pre_check_consumption_dir(self):
if not settings.CONSUMPTION_DIR:
raise ConsumerError(
"The CONSUMPTION_DIR settings variable does not appear to be "
"set.")
if not os.path.isdir(settings.CONSUMPTION_DIR):
raise ConsumerError(
"Consumption directory {} does not exist".format(
settings.CONSUMPTION_DIR))
def pre_check_duplicate(self):
with open(self.path, "rb") as f:
checksum = hashlib.md5(f.read()).hexdigest()
@@ -93,7 +81,6 @@ class Consumer(LoggingMixin):
# Make sure that preconditions for consuming the file are met.
self.pre_check_file_exists()
self.pre_check_consumption_dir()
self.pre_check_directories()
self.pre_check_duplicate()

View File

@@ -64,9 +64,6 @@ def get_schema():
def open_index(recreate=False):
# TODO: this is not thread safe. If 2 instances try to create the index
# at the same time, this fails. This currently prevents parallel
# tests.
try:
if exists_in(settings.INDEX_DIR) and not recreate:
return open_dir(settings.INDEX_DIR)

View File

@@ -1,9 +1,14 @@
import logging
import uuid
from django.conf import settings
class PaperlessHandler(logging.Handler):
def emit(self, record):
if settings.DISABLE_DBHANDLER:
return
# We have to do the import here or Django will barf when it tries to
# load this because the apps aren't loaded at that point
from .models import Log

View File

@@ -3,7 +3,7 @@ import os
from time import sleep
from django.conf import settings
from django.core.management.base import BaseCommand
from django.core.management.base import BaseCommand, CommandError
from django_q.tasks import async_task
from watchdog.events import FileSystemEventHandler
from watchdog.observers.polling import PollingObserver
@@ -95,6 +95,15 @@ class Command(BaseCommand):
def handle(self, *args, **options):
directory = options["directory"]
if not directory:
raise CommandError(
"CONSUMPTION_DIR does not appear to be set."
)
if not os.path.isdir(directory):
raise CommandError(
f"Consumption directory {directory} does not exist")
for entry in os.scandir(directory):
_consume(entry.path)
@@ -128,12 +137,15 @@ class Command(BaseCommand):
f"Using inotify to watch directory for changes: {directory}")
inotify = INotify()
inotify.add_watch(directory, flags.CLOSE_WRITE | flags.MOVED_TO)
descriptor = inotify.add_watch(
directory, flags.CLOSE_WRITE | flags.MOVED_TO)
try:
while not self.stop_flag:
for event in inotify.read(timeout=1000, read_delay=1000):
file = os.path.join(directory, event.name)
if os.path.isfile(file):
_consume(file)
_consume(file)
except KeyboardInterrupt:
pass
inotify.rm_watch(descriptor)
inotify.close()

View File

@@ -5,23 +5,6 @@ from django.db import migrations, models
import django.db.models.deletion
def make_index(apps, schema_editor):
Document = apps.get_model("documents", "Document")
documents = Document.objects.all()
print()
try:
print(" --> Creating document index...")
from whoosh.writing import AsyncWriter
from documents import index
ix = index.open_index(recreate=True)
with AsyncWriter(ix) as writer:
for document in documents:
index.update_document(writer, document)
except ImportError:
# index may not be relevant anymore
print(" --> Cannot create document index.")
def logs_set_default_group(apps, schema_editor):
Log = apps.get_model('documents', 'Log')
for log in Log.objects.all():
@@ -99,8 +82,4 @@ class Migration(migrations.Migration):
code=django.db.migrations.operations.special.RunPython.noop,
reverse_code=logs_set_default_group
),
migrations.RunPython(
code=make_index,
reverse_code=django.db.migrations.operations.special.RunPython.noop,
),
]

View File

@@ -249,6 +249,7 @@ class Document(models.Model):
@property
def file_type(self):
# TODO: this is not stable across python versions
return mimetypes.guess_extension(str(self.mime_type))
@property

View File

@@ -7,14 +7,13 @@ from pathvalidate import ValidationError
from rest_framework.test import APITestCase
from documents.models import Document, Correspondent, DocumentType, Tag
from documents.tests.utils import setup_directories, remove_dirs
from documents.tests.utils import DirectoriesMixin
class DocumentApiTest(APITestCase):
class DocumentApiTest(DirectoriesMixin, APITestCase):
def setUp(self):
self.dirs = setup_directories()
self.addCleanup(remove_dirs, self.dirs)
super(DocumentApiTest, self).setUp()
user = User.objects.create_superuser(username="temp_admin")
self.client.force_login(user=user)

View File

@@ -6,7 +6,7 @@ from unittest.mock import MagicMock
from django.test import TestCase, override_settings
from .utils import setup_directories, remove_dirs
from .utils import DirectoriesMixin
from ..consumer import Consumer, ConsumerError
from ..models import FileInfo, Tag, Correspondent, DocumentType, Document
from ..parsers import DocumentParser, ParseError
@@ -408,7 +408,7 @@ def fake_magic_from_file(file, mime=False):
@mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
class TestConsumer(TestCase):
class TestConsumer(DirectoriesMixin, TestCase):
def make_dummy_parser(self, logging_group):
return DummyParser(logging_group, self.dirs.scratch_dir)
@@ -417,8 +417,7 @@ class TestConsumer(TestCase):
return FaultyParser(logging_group, self.dirs.scratch_dir)
def setUp(self):
self.dirs = setup_directories()
self.addCleanup(remove_dirs, self.dirs)
super(TestConsumer, self).setUp()
patcher = mock.patch("documents.parsers.document_consumer_declaration.send")
m = patcher.start()
@@ -502,26 +501,6 @@ class TestConsumer(TestCase):
self.fail("Should throw exception")
@override_settings(CONSUMPTION_DIR=None)
def testConsumptionDirUnset(self):
try:
self.consumer.try_consume_file(self.get_test_file())
except ConsumerError as e:
self.assertEqual(str(e), "The CONSUMPTION_DIR settings variable does not appear to be set.")
return
self.fail("Should throw exception")
@override_settings(CONSUMPTION_DIR="asd")
def testNoConsumptionDir(self):
try:
self.consumer.try_consume_file(self.get_test_file())
except ConsumerError as e:
self.assertEqual(str(e), "Consumption directory asd does not exist")
return
self.fail("Should throw exception")
def testDuplicates(self):
self.consumer.try_consume_file(self.get_test_file())

View File

@@ -2,7 +2,7 @@ import logging
import uuid
from unittest import mock
from django.test import TestCase
from django.test import TestCase, override_settings
from ..models import Log
@@ -14,6 +14,7 @@ class TestPaperlessLog(TestCase):
self.logger = logging.getLogger(
"documents.management.commands.document_consumer")
@override_settings(DISABLE_DBHANDLER=False)
def test_that_it_saves_at_all(self):
kw = {"group": uuid.uuid4()}
@@ -38,6 +39,7 @@ class TestPaperlessLog(TestCase):
self.logger.critical("This is a critical message", extra=kw)
self.assertEqual(Log.objects.all().count(), 5)
@override_settings(DISABLE_DBHANDLER=False)
def test_groups(self):
kw1 = {"group": uuid.uuid4()}

View File

@@ -6,11 +6,12 @@ from time import sleep
from unittest import mock
from django.conf import settings
from django.test import TestCase, override_settings
from django.core.management import call_command, CommandError
from django.test import override_settings, TestCase
from documents.consumer import ConsumerError
from documents.management.commands import document_consumer
from documents.tests.utils import setup_directories, remove_dirs
from documents.tests.utils import DirectoriesMixin
class ConsumerThread(Thread):
@@ -32,18 +33,17 @@ def chunked(size, source):
yield source[i:i+size]
class TestConsumer(TestCase):
class TestConsumer(DirectoriesMixin, TestCase):
sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
def setUp(self) -> None:
super(TestConsumer, self).setUp()
self.t = None
patcher = mock.patch("documents.management.commands.document_consumer.async_task")
self.task_mock = patcher.start()
self.addCleanup(patcher.stop)
self.dirs = setup_directories()
self.addCleanup(remove_dirs, self.dirs)
def t_start(self):
self.t = ConsumerThread()
self.t.start()
@@ -52,7 +52,12 @@ class TestConsumer(TestCase):
def tearDown(self) -> None:
if self.t:
# set the stop flag
self.t.stop()
# wait for the consumer to exit.
self.t.join()
super(TestConsumer, self).tearDown()
def wait_for_task_mock_call(self):
n = 0
@@ -193,3 +198,13 @@ class TestConsumer(TestCase):
@override_settings(CONSUMER_POLLING=1)
def test_slow_write_incomplete_polling(self):
self.test_slow_write_incomplete()
@override_settings(CONSUMPTION_DIR="does_not_exist")
def test_consumption_directory_invalid(self):
self.assertRaises(CommandError, call_command, 'document_consumer', '--oneshot')
@override_settings(CONSUMPTION_DIR="")
def test_consumption_directory_unset(self):
self.assertRaises(CommandError, call_command, 'document_consumer', '--oneshot')

View File

@@ -39,3 +39,18 @@ def remove_dirs(dirs):
shutil.rmtree(dirs.data_dir, ignore_errors=True)
shutil.rmtree(dirs.scratch_dir, ignore_errors=True)
shutil.rmtree(dirs.consumption_dir, ignore_errors=True)
class DirectoriesMixin:
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.dirs = None
def setUp(self) -> None:
self.dirs = setup_directories()
super(DirectoriesMixin, self).setUp()
def tearDown(self) -> None:
super(DirectoriesMixin, self).tearDown()
remove_dirs(self.dirs)

View File

@@ -221,7 +221,9 @@ class SearchView(APIView):
permission_classes = (IsAuthenticated,)
ix = index.open_index()
def __init__(self, *args, **kwargs):
super(SearchView, self).__init__(*args, **kwargs)
self.ix = index.open_index()
def add_infos_to_hit(self, r):
doc = Document.objects.get(id=r['id'])
@@ -260,7 +262,9 @@ class SearchAutoCompleteView(APIView):
permission_classes = (IsAuthenticated,)
ix = index.open_index()
def __init__(self, *args, **kwargs):
super(SearchAutoCompleteView, self).__init__(*args, **kwargs)
self.ix = index.open_index()
def get(self, request, format=None):
if 'term' in request.query_params:

View File

@@ -251,6 +251,8 @@ USE_TZ = True
# Logging #
###############################################################################
DISABLE_DBHANDLER = __get_boolean("PAPERLESS_DISABLE_DBHANDLER")
LOGGING = {
"version": 1,
"disable_existing_loggers": False,

View File

@@ -3,10 +3,9 @@ exclude = migrations, paperless/settings.py, .tox, */tests/*
[tool:pytest]
DJANGO_SETTINGS_MODULE=paperless.settings
addopts = --pythonwarnings=all --cov --cov-report=html
addopts = --pythonwarnings=all --cov --cov-report=html -n auto
env =
PAPERLESS_SECRET=paperless
PAPERLESS_EMAIL_SECRET=paperless
PAPERLESS_DISABLE_DBHANDLER=true
[coverage:run]