mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
Merge branch 'dev' into feature-localization
This commit is contained in:
@@ -6,29 +6,21 @@ class DocumentsConfig(AppConfig):
|
||||
name = "documents"
|
||||
|
||||
def ready(self):
|
||||
|
||||
from .signals import document_consumption_started
|
||||
from .signals import document_consumption_finished
|
||||
from .signals.handlers import (
|
||||
add_inbox_tags,
|
||||
run_pre_consume_script,
|
||||
run_post_consume_script,
|
||||
set_log_entry,
|
||||
set_correspondent,
|
||||
set_document_type,
|
||||
set_tags,
|
||||
add_to_index
|
||||
|
||||
)
|
||||
|
||||
document_consumption_started.connect(run_pre_consume_script)
|
||||
|
||||
document_consumption_finished.connect(add_inbox_tags)
|
||||
document_consumption_finished.connect(set_correspondent)
|
||||
document_consumption_finished.connect(set_document_type)
|
||||
document_consumption_finished.connect(set_tags)
|
||||
document_consumption_finished.connect(set_log_entry)
|
||||
document_consumption_finished.connect(add_to_index)
|
||||
document_consumption_finished.connect(run_post_consume_script)
|
||||
|
||||
AppConfig.ready(self)
|
||||
|
@@ -1,7 +1,7 @@
|
||||
import datetime
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
from subprocess import Popen
|
||||
|
||||
import magic
|
||||
from django.conf import settings
|
||||
@@ -9,6 +9,7 @@ from django.db import transaction
|
||||
from django.db.models import Q
|
||||
from django.utils import timezone
|
||||
from filelock import FileLock
|
||||
from rest_framework.reverse import reverse
|
||||
|
||||
from .classifier import DocumentClassifier, IncompatibleClassifierVersionError
|
||||
from .file_handling import create_source_path_directory, \
|
||||
@@ -66,6 +67,39 @@ class Consumer(LoggingMixin):
|
||||
os.makedirs(settings.ORIGINALS_DIR, exist_ok=True)
|
||||
os.makedirs(settings.ARCHIVE_DIR, exist_ok=True)
|
||||
|
||||
def run_pre_consume_script(self):
|
||||
if not settings.PRE_CONSUME_SCRIPT:
|
||||
return
|
||||
|
||||
try:
|
||||
Popen((settings.PRE_CONSUME_SCRIPT, self.path)).wait()
|
||||
except Exception as e:
|
||||
raise ConsumerError(
|
||||
f"Error while executing pre-consume script: {e}"
|
||||
)
|
||||
|
||||
def run_post_consume_script(self, document):
|
||||
if not settings.POST_CONSUME_SCRIPT:
|
||||
return
|
||||
|
||||
try:
|
||||
Popen((
|
||||
settings.POST_CONSUME_SCRIPT,
|
||||
str(document.pk),
|
||||
document.get_public_filename(),
|
||||
os.path.normpath(document.source_path),
|
||||
os.path.normpath(document.thumbnail_path),
|
||||
reverse("document-download", kwargs={"pk": document.pk}),
|
||||
reverse("document-thumb", kwargs={"pk": document.pk}),
|
||||
str(document.correspondent),
|
||||
str(",".join(document.tags.all().values_list(
|
||||
"name", flat=True)))
|
||||
)).wait()
|
||||
except Exception as e:
|
||||
raise ConsumerError(
|
||||
f"Error while executing pre-consume script: {e}"
|
||||
)
|
||||
|
||||
def try_consume_file(self,
|
||||
path,
|
||||
override_filename=None,
|
||||
@@ -119,6 +153,8 @@ class Consumer(LoggingMixin):
|
||||
logging_group=self.logging_group
|
||||
)
|
||||
|
||||
self.run_pre_consume_script()
|
||||
|
||||
# This doesn't parse the document yet, but gives us a parser.
|
||||
|
||||
document_parser = parser_class(self.logging_group)
|
||||
@@ -130,7 +166,7 @@ class Consumer(LoggingMixin):
|
||||
|
||||
try:
|
||||
self.log("debug", "Parsing {}...".format(self.filename))
|
||||
document_parser.parse(self.path, mime_type)
|
||||
document_parser.parse(self.path, mime_type, self.filename)
|
||||
|
||||
self.log("debug", f"Generating thumbnail for {self.filename}...")
|
||||
thumbnail = document_parser.get_optimised_thumbnail(
|
||||
@@ -215,6 +251,9 @@ class Consumer(LoggingMixin):
|
||||
# Delete the file only if it was successfully consumed
|
||||
self.log("debug", "Deleting file {}".format(self.path))
|
||||
os.unlink(self.path)
|
||||
|
||||
self.run_post_consume_script(document)
|
||||
|
||||
except Exception as e:
|
||||
self.log(
|
||||
"error",
|
||||
|
18
src/documents/migrations/1010_auto_20210101_2159.py
Normal file
18
src/documents/migrations/1010_auto_20210101_2159.py
Normal file
@@ -0,0 +1,18 @@
|
||||
# Generated by Django 3.1.4 on 2021-01-01 21:59
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '1009_auto_20201216_2005'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='savedviewfilterrule',
|
||||
name='value',
|
||||
field=models.CharField(blank=True, max_length=128, null=True),
|
||||
),
|
||||
]
|
@@ -404,7 +404,9 @@ class SavedViewFilterRule(models.Model):
|
||||
|
||||
value = models.CharField(
|
||||
_("value"),
|
||||
max_length=128)
|
||||
max_length=128,
|
||||
blank=True,
|
||||
null=True)
|
||||
|
||||
class Meta:
|
||||
verbose_name = _("filter rule")
|
||||
|
@@ -144,6 +144,52 @@ def run_convert(input_file,
|
||||
raise ParseError("Convert failed at {}".format(args))
|
||||
|
||||
|
||||
def make_thumbnail_from_pdf(in_path, temp_dir, logging_group=None):
|
||||
"""
|
||||
The thumbnail of a PDF is just a 500px wide image of the first page.
|
||||
"""
|
||||
out_path = os.path.join(temp_dir, "convert.png")
|
||||
|
||||
# Run convert to get a decent thumbnail
|
||||
try:
|
||||
run_convert(density=300,
|
||||
scale="500x5000>",
|
||||
alpha="remove",
|
||||
strip=True,
|
||||
trim=False,
|
||||
auto_orient=True,
|
||||
input_file="{}[0]".format(in_path),
|
||||
output_file=out_path,
|
||||
logging_group=logging_group)
|
||||
except ParseError:
|
||||
# if convert fails, fall back to extracting
|
||||
# the first PDF page as a PNG using Ghostscript
|
||||
logger.warning(
|
||||
"Thumbnail generation with ImageMagick failed, falling back "
|
||||
"to ghostscript. Check your /etc/ImageMagick-x/policy.xml!",
|
||||
extra={'group': logging_group}
|
||||
)
|
||||
gs_out_path = os.path.join(temp_dir, "gs_out.png")
|
||||
cmd = [settings.GS_BINARY,
|
||||
"-q",
|
||||
"-sDEVICE=pngalpha",
|
||||
"-o", gs_out_path,
|
||||
in_path]
|
||||
if not subprocess.Popen(cmd).wait() == 0:
|
||||
raise ParseError("Thumbnail (gs) failed at {}".format(cmd))
|
||||
# then run convert on the output from gs
|
||||
run_convert(density=300,
|
||||
scale="500x5000>",
|
||||
alpha="remove",
|
||||
strip=True,
|
||||
trim=False,
|
||||
auto_orient=True,
|
||||
input_file=gs_out_path,
|
||||
output_file=out_path,
|
||||
logging_group=logging_group)
|
||||
|
||||
return out_path
|
||||
|
||||
def parse_date(filename, text):
|
||||
"""
|
||||
Returns the date of the document.
|
||||
@@ -221,7 +267,7 @@ class DocumentParser(LoggingMixin):
|
||||
def extract_metadata(self, document_path, mime_type):
|
||||
return []
|
||||
|
||||
def parse(self, document_path, mime_type):
|
||||
def parse(self, document_path, mime_type, file_name=None):
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_archive_path(self):
|
||||
|
@@ -11,7 +11,6 @@ from django.db.models import Q
|
||||
from django.dispatch import receiver
|
||||
from django.utils import timezone
|
||||
from filelock import FileLock
|
||||
from rest_framework.reverse import reverse
|
||||
|
||||
from .. import index, matching
|
||||
from ..file_handling import delete_empty_directories, \
|
||||
@@ -147,32 +146,6 @@ def set_tags(sender,
|
||||
document.tags.add(*relevant_tags)
|
||||
|
||||
|
||||
def run_pre_consume_script(sender, filename, **kwargs):
|
||||
|
||||
if not settings.PRE_CONSUME_SCRIPT:
|
||||
return
|
||||
|
||||
Popen((settings.PRE_CONSUME_SCRIPT, filename)).wait()
|
||||
|
||||
|
||||
def run_post_consume_script(sender, document, **kwargs):
|
||||
|
||||
if not settings.POST_CONSUME_SCRIPT:
|
||||
return
|
||||
|
||||
Popen((
|
||||
settings.POST_CONSUME_SCRIPT,
|
||||
str(document.pk),
|
||||
document.get_public_filename(),
|
||||
os.path.normpath(document.source_path),
|
||||
os.path.normpath(document.thumbnail_path),
|
||||
reverse("document-download", kwargs={"pk": document.pk}),
|
||||
reverse("document-thumb", kwargs={"pk": document.pk}),
|
||||
str(document.correspondent),
|
||||
str(",".join(document.tags.all().values_list("name", flat=True)))
|
||||
)).wait()
|
||||
|
||||
|
||||
@receiver(models.signals.post_delete, sender=Document)
|
||||
def cleanup_document_deletion(sender, instance, using, **kwargs):
|
||||
with FileLock(settings.MEDIA_LOCK):
|
||||
|
@@ -177,7 +177,7 @@ class DummyParser(DocumentParser):
|
||||
def get_optimised_thumbnail(self, document_path, mime_type):
|
||||
return self.fake_thumb
|
||||
|
||||
def parse(self, document_path, mime_type):
|
||||
def parse(self, document_path, mime_type, file_name=None):
|
||||
self.text = "The Text"
|
||||
|
||||
|
||||
@@ -194,7 +194,7 @@ class FaultyParser(DocumentParser):
|
||||
def get_optimised_thumbnail(self, document_path, mime_type):
|
||||
return self.fake_thumb
|
||||
|
||||
def parse(self, document_path, mime_type):
|
||||
def parse(self, document_path, mime_type, file_name=None):
|
||||
raise ParseError("Does not compute.")
|
||||
|
||||
|
||||
@@ -466,3 +466,53 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
self.assertTrue(os.path.isfile(dst))
|
||||
self.assertRaises(ConsumerError, self.consumer.try_consume_file, dst)
|
||||
self.assertTrue(os.path.isfile(dst))
|
||||
|
||||
|
||||
class PostConsumeTestCase(TestCase):
|
||||
|
||||
@mock.patch("documents.signals.handlers.Popen")
|
||||
@override_settings(POST_CONSUME_SCRIPT=None)
|
||||
def test_no_post_consume_script(self, m):
|
||||
doc = Document.objects.create(title="Test", mime_type="application/pdf")
|
||||
tag1 = Tag.objects.create(name="a")
|
||||
tag2 = Tag.objects.create(name="b")
|
||||
doc.tags.add(tag1)
|
||||
doc.tags.add(tag2)
|
||||
|
||||
Consumer().run_post_consume_script(doc)
|
||||
|
||||
m.assert_not_called()
|
||||
|
||||
@mock.patch("documents.signals.handlers.Popen")
|
||||
@override_settings(POST_CONSUME_SCRIPT="script")
|
||||
def test_post_consume_script_simple(self, m):
|
||||
doc = Document.objects.create(title="Test", mime_type="application/pdf")
|
||||
|
||||
Consumer().run_post_consume_script(doc)
|
||||
|
||||
m.assert_called_once()
|
||||
|
||||
@mock.patch("documents.signals.handlers.Popen")
|
||||
@override_settings(POST_CONSUME_SCRIPT="script")
|
||||
def test_post_consume_script_with_correspondent(self, m):
|
||||
c = Correspondent.objects.create(name="my_bank")
|
||||
doc = Document.objects.create(title="Test", mime_type="application/pdf", correspondent=c)
|
||||
tag1 = Tag.objects.create(name="a")
|
||||
tag2 = Tag.objects.create(name="b")
|
||||
doc.tags.add(tag1)
|
||||
doc.tags.add(tag2)
|
||||
|
||||
Consumer().run_post_consume_script(doc)
|
||||
|
||||
m.assert_called_once()
|
||||
|
||||
args, kwargs = m.call_args
|
||||
|
||||
command = args[0]
|
||||
|
||||
self.assertEqual(command[0], "script")
|
||||
self.assertEqual(command[1], str(doc.pk))
|
||||
self.assertEqual(command[5], f"/api/documents/{doc.pk}/download/")
|
||||
self.assertEqual(command[6], f"/api/documents/{doc.pk}/thumb/")
|
||||
self.assertEqual(command[7], "my_bank")
|
||||
self.assertCountEqual(command[8].split(","), ["a", "b"])
|
||||
|
@@ -1,56 +0,0 @@
|
||||
from unittest import mock
|
||||
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
from documents.models import Document, Tag, Correspondent
|
||||
from documents.signals.handlers import run_post_consume_script
|
||||
|
||||
|
||||
class PostConsumeTestCase(TestCase):
|
||||
|
||||
@mock.patch("documents.signals.handlers.Popen")
|
||||
@override_settings(POST_CONSUME_SCRIPT=None)
|
||||
def test_no_post_consume_script(self, m):
|
||||
doc = Document.objects.create(title="Test", mime_type="application/pdf")
|
||||
tag1 = Tag.objects.create(name="a")
|
||||
tag2 = Tag.objects.create(name="b")
|
||||
doc.tags.add(tag1)
|
||||
doc.tags.add(tag2)
|
||||
|
||||
run_post_consume_script(None, doc)
|
||||
|
||||
m.assert_not_called()
|
||||
|
||||
@mock.patch("documents.signals.handlers.Popen")
|
||||
@override_settings(POST_CONSUME_SCRIPT="script")
|
||||
def test_post_consume_script_simple(self, m):
|
||||
doc = Document.objects.create(title="Test", mime_type="application/pdf")
|
||||
|
||||
run_post_consume_script(None, doc)
|
||||
|
||||
m.assert_called_once()
|
||||
|
||||
@mock.patch("documents.signals.handlers.Popen")
|
||||
@override_settings(POST_CONSUME_SCRIPT="script")
|
||||
def test_post_consume_script_with_correspondent(self, m):
|
||||
c = Correspondent.objects.create(name="my_bank")
|
||||
doc = Document.objects.create(title="Test", mime_type="application/pdf", correspondent=c)
|
||||
tag1 = Tag.objects.create(name="a")
|
||||
tag2 = Tag.objects.create(name="b")
|
||||
doc.tags.add(tag1)
|
||||
doc.tags.add(tag2)
|
||||
|
||||
run_post_consume_script(None, doc)
|
||||
|
||||
m.assert_called_once()
|
||||
|
||||
args, kwargs = m.call_args
|
||||
|
||||
command = args[0]
|
||||
|
||||
self.assertEqual(command[0], "script")
|
||||
self.assertEqual(command[1], str(doc.pk))
|
||||
self.assertEqual(command[5], f"/api/documents/{doc.pk}/download/")
|
||||
self.assertEqual(command[6], f"/api/documents/{doc.pk}/thumb/")
|
||||
self.assertEqual(command[7], "my_bank")
|
||||
self.assertCountEqual(command[8].split(","), ["a", "b"])
|
Reference in New Issue
Block a user