mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-28 18:24:38 -05:00
Merge branch 'dev' into feature-bulk-edit
This commit is contained in:
@@ -247,7 +247,6 @@ class Consumer(LoggingMixin):
|
||||
|
||||
with open(self.path, "rb") as f:
|
||||
document = Document.objects.create(
|
||||
correspondent=file_info.correspondent,
|
||||
title=(self.override_title or file_info.title)[:127],
|
||||
content=text,
|
||||
mime_type=mime_type,
|
||||
@@ -257,12 +256,6 @@ class Consumer(LoggingMixin):
|
||||
storage_type=storage_type
|
||||
)
|
||||
|
||||
relevant_tags = set(file_info.tags)
|
||||
if relevant_tags:
|
||||
tag_names = ", ".join([t.name for t in relevant_tags])
|
||||
self.log("debug", "Tagging with {}".format(tag_names))
|
||||
document.tags.add(*relevant_tags)
|
||||
|
||||
self.apply_overrides(document)
|
||||
|
||||
document.save()
|
||||
|
@@ -3,7 +3,7 @@ import os
|
||||
from contextlib import contextmanager
|
||||
|
||||
from django.conf import settings
|
||||
from whoosh import highlight
|
||||
from whoosh import highlight, classify, query
|
||||
from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME
|
||||
from whoosh.highlight import Formatter, get_text
|
||||
from whoosh.index import create_in, exists_in, open_dir
|
||||
@@ -20,32 +20,37 @@ class JsonFormatter(Formatter):
|
||||
self.seen = {}
|
||||
|
||||
def format_token(self, text, token, replace=False):
|
||||
seen = self.seen
|
||||
ttext = self._text(get_text(text, token, replace))
|
||||
if ttext in seen:
|
||||
termnum = seen[ttext]
|
||||
else:
|
||||
termnum = len(seen)
|
||||
seen[ttext] = termnum
|
||||
|
||||
return {'text': ttext, 'term': termnum}
|
||||
return {'text': ttext, 'highlight': 'true'}
|
||||
|
||||
def format_fragment(self, fragment, replace=False):
|
||||
output = []
|
||||
index = fragment.startchar
|
||||
text = fragment.text
|
||||
|
||||
amend_token = None
|
||||
for t in fragment.matches:
|
||||
if t.startchar is None:
|
||||
continue
|
||||
if t.startchar < index:
|
||||
continue
|
||||
if t.startchar > index:
|
||||
output.append({'text': text[index:t.startchar]})
|
||||
output.append(self.format_token(text, t, replace))
|
||||
text_inbetween = text[index:t.startchar]
|
||||
if amend_token and t.startchar - index < 10:
|
||||
amend_token['text'] += text_inbetween
|
||||
else:
|
||||
output.append({'text': text_inbetween,
|
||||
'highlight': False})
|
||||
amend_token = None
|
||||
token = self.format_token(text, t, replace)
|
||||
if amend_token:
|
||||
amend_token['text'] += token['text']
|
||||
else:
|
||||
output.append(token)
|
||||
amend_token = token
|
||||
index = t.endchar
|
||||
if index < fragment.endchar:
|
||||
output.append({'text': text[index:fragment.endchar]})
|
||||
output.append({'text': text[index:fragment.endchar],
|
||||
'highlight': False})
|
||||
return output
|
||||
|
||||
def format(self, fragments, replace=False):
|
||||
@@ -120,22 +125,42 @@ def remove_document_from_index(document):
|
||||
|
||||
|
||||
@contextmanager
|
||||
def query_page(ix, querystring, page):
|
||||
def query_page(ix, page, querystring, more_like_doc_id, more_like_doc_content):
|
||||
searcher = ix.searcher()
|
||||
try:
|
||||
qp = MultifieldParser(
|
||||
["content", "title", "correspondent", "tag", "type"],
|
||||
ix.schema)
|
||||
qp.add_plugin(DateParserPlugin())
|
||||
if querystring:
|
||||
qp = MultifieldParser(
|
||||
["content", "title", "correspondent", "tag", "type"],
|
||||
ix.schema)
|
||||
qp.add_plugin(DateParserPlugin())
|
||||
str_q = qp.parse(querystring)
|
||||
corrected = searcher.correct_query(str_q, querystring)
|
||||
else:
|
||||
str_q = None
|
||||
corrected = None
|
||||
|
||||
if more_like_doc_id:
|
||||
docnum = searcher.document_number(id=more_like_doc_id)
|
||||
kts = searcher.key_terms_from_text(
|
||||
'content', more_like_doc_content, numterms=20,
|
||||
model=classify.Bo1Model, normalize=False)
|
||||
more_like_q = query.Or(
|
||||
[query.Term('content', word, boost=weight)
|
||||
for word, weight in kts])
|
||||
result_page = searcher.search_page(
|
||||
more_like_q, page, filter=str_q, mask={docnum})
|
||||
elif str_q:
|
||||
result_page = searcher.search_page(str_q, page)
|
||||
else:
|
||||
raise ValueError(
|
||||
"Either querystring or more_like_doc_id is required."
|
||||
)
|
||||
|
||||
q = qp.parse(querystring)
|
||||
result_page = searcher.search_page(q, page)
|
||||
result_page.results.fragmenter = highlight.ContextFragmenter(
|
||||
surround=50)
|
||||
result_page.results.formatter = JsonFormatter()
|
||||
|
||||
corrected = searcher.correct_query(q, querystring)
|
||||
if corrected.query != q:
|
||||
if corrected and corrected.query != str_q:
|
||||
corrected_query = corrected.string
|
||||
else:
|
||||
corrected_query = None
|
||||
|
@@ -1,18 +1,29 @@
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
from contextlib import contextmanager
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.management import call_command
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from django.db.models.signals import post_save, m2m_changed
|
||||
from filelock import FileLock
|
||||
|
||||
from documents.models import Document
|
||||
from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME, \
|
||||
EXPORTER_ARCHIVE_NAME
|
||||
from ...file_handling import create_source_path_directory, \
|
||||
generate_unique_filename
|
||||
from ...file_handling import create_source_path_directory
|
||||
from ...mixins import Renderable
|
||||
from ...signals.handlers import update_filename_and_move_files
|
||||
|
||||
|
||||
@contextmanager
|
||||
def disable_signal(sig, receiver, sender):
|
||||
try:
|
||||
sig.disconnect(receiver=receiver, sender=sender)
|
||||
yield
|
||||
finally:
|
||||
sig.connect(receiver=receiver, sender=sender)
|
||||
|
||||
|
||||
class Command(Renderable, BaseCommand):
|
||||
@@ -47,11 +58,16 @@ class Command(Renderable, BaseCommand):
|
||||
self.manifest = json.load(f)
|
||||
|
||||
self._check_manifest()
|
||||
with disable_signal(post_save,
|
||||
receiver=update_filename_and_move_files,
|
||||
sender=Document):
|
||||
with disable_signal(m2m_changed,
|
||||
receiver=update_filename_and_move_files,
|
||||
sender=Document.tags.through):
|
||||
# Fill up the database with whatever is in the manifest
|
||||
call_command("loaddata", manifest_path)
|
||||
|
||||
# Fill up the database with whatever is in the manifest
|
||||
call_command("loaddata", manifest_path)
|
||||
|
||||
self._import_files_from_manifest()
|
||||
self._import_files_from_manifest()
|
||||
|
||||
@staticmethod
|
||||
def _check_manifest_exists(path):
|
||||
@@ -117,9 +133,6 @@ class Command(Renderable, BaseCommand):
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
with FileLock(settings.MEDIA_LOCK):
|
||||
document.filename = generate_unique_filename(
|
||||
document, settings.ORIGINALS_DIR)
|
||||
|
||||
if os.path.isfile(document.source_path):
|
||||
raise FileExistsError(document.source_path)
|
||||
|
||||
|
@@ -11,6 +11,7 @@ from paperless.db import GnuPG
|
||||
STORAGE_TYPE_UNENCRYPTED = "unencrypted"
|
||||
STORAGE_TYPE_GPG = "gpg"
|
||||
|
||||
|
||||
def source_path(self):
|
||||
if self.filename:
|
||||
fname = str(self.filename)
|
||||
|
@@ -357,54 +357,12 @@ class SavedViewFilterRule(models.Model):
|
||||
# TODO: why is this in the models file?
|
||||
class FileInfo:
|
||||
|
||||
# This epic regex *almost* worked for our needs, so I'm keeping it here for
|
||||
# posterity, in the hopes that we might find a way to make it work one day.
|
||||
ALMOST_REGEX = re.compile(
|
||||
r"^((?P<date>\d\d\d\d\d\d\d\d\d\d\d\d\d\dZ){separator})?"
|
||||
r"((?P<correspondent>{non_separated_word}+){separator})??"
|
||||
r"(?P<title>{non_separated_word}+)"
|
||||
r"({separator}(?P<tags>[a-z,0-9-]+))?"
|
||||
r"\.(?P<extension>[a-zA-Z.-]+)$".format(
|
||||
separator=r"\s+-\s+",
|
||||
non_separated_word=r"([\w,. ]|([^\s]-))"
|
||||
)
|
||||
)
|
||||
REGEXES = OrderedDict([
|
||||
("created-correspondent-title-tags", re.compile(
|
||||
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
||||
r"(?P<correspondent>.*) - "
|
||||
r"(?P<title>.*) - "
|
||||
r"(?P<tags>[a-z0-9\-,]*)$",
|
||||
flags=re.IGNORECASE
|
||||
)),
|
||||
("created-title-tags", re.compile(
|
||||
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
||||
r"(?P<title>.*) - "
|
||||
r"(?P<tags>[a-z0-9\-,]*)$",
|
||||
flags=re.IGNORECASE
|
||||
)),
|
||||
("created-correspondent-title", re.compile(
|
||||
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
||||
r"(?P<correspondent>.*) - "
|
||||
r"(?P<title>.*)$",
|
||||
flags=re.IGNORECASE
|
||||
)),
|
||||
("created-title", re.compile(
|
||||
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
||||
r"(?P<title>.*)$",
|
||||
flags=re.IGNORECASE
|
||||
)),
|
||||
("correspondent-title-tags", re.compile(
|
||||
r"(?P<correspondent>.*) - "
|
||||
r"(?P<title>.*) - "
|
||||
r"(?P<tags>[a-z0-9\-,]*)$",
|
||||
flags=re.IGNORECASE
|
||||
)),
|
||||
("correspondent-title", re.compile(
|
||||
r"(?P<correspondent>.*) - "
|
||||
r"(?P<title>.*)?$",
|
||||
flags=re.IGNORECASE
|
||||
)),
|
||||
("title", re.compile(
|
||||
r"(?P<title>.*)$",
|
||||
flags=re.IGNORECASE
|
||||
@@ -427,23 +385,10 @@ class FileInfo:
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def _get_correspondent(cls, name):
|
||||
if not name:
|
||||
return None
|
||||
return Correspondent.objects.get_or_create(name=name)[0]
|
||||
|
||||
@classmethod
|
||||
def _get_title(cls, title):
|
||||
return title
|
||||
|
||||
@classmethod
|
||||
def _get_tags(cls, tags):
|
||||
r = []
|
||||
for t in tags.split(","):
|
||||
r.append(Tag.objects.get_or_create(name=t)[0])
|
||||
return tuple(r)
|
||||
|
||||
@classmethod
|
||||
def _mangle_property(cls, properties, name):
|
||||
if name in properties:
|
||||
@@ -453,15 +398,6 @@ class FileInfo:
|
||||
|
||||
@classmethod
|
||||
def from_filename(cls, filename):
|
||||
"""
|
||||
We use a crude naming convention to make handling the correspondent,
|
||||
title, and tags easier:
|
||||
"<date> - <correspondent> - <title> - <tags>"
|
||||
"<correspondent> - <title> - <tags>"
|
||||
"<correspondent> - <title>"
|
||||
"<title>"
|
||||
"""
|
||||
|
||||
# Mutate filename in-place before parsing its components
|
||||
# by applying at most one of the configured transformations.
|
||||
for (pattern, repl) in settings.FILENAME_PARSE_TRANSFORMS:
|
||||
@@ -492,7 +428,5 @@ class FileInfo:
|
||||
if m:
|
||||
properties = m.groupdict()
|
||||
cls._mangle_property(properties, "created")
|
||||
cls._mangle_property(properties, "correspondent")
|
||||
cls._mangle_property(properties, "title")
|
||||
cls._mangle_property(properties, "tags")
|
||||
return cls(**properties)
|
||||
|
@@ -5,9 +5,11 @@
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>PaperlessUi</title>
|
||||
<title>Paperless-ng</title>
|
||||
<base href="/">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<meta name="username" content="{{username}}">
|
||||
<meta name="full_name" content="{{full_name}}">
|
||||
<meta name="cookie_prefix" content="{{cookie_prefix}}">
|
||||
<link rel="icon" type="image/x-icon" href="favicon.ico">
|
||||
<link rel="stylesheet" href="{% static 'frontend/styles.css' %}"></head>
|
||||
|
57
src/documents/tests/test_admin.py
Normal file
57
src/documents/tests/test_admin.py
Normal file
@@ -0,0 +1,57 @@
|
||||
from unittest import mock
|
||||
|
||||
from django.contrib.admin.sites import AdminSite
|
||||
from django.test import TestCase
|
||||
from django.utils import timezone
|
||||
|
||||
from documents.admin import DocumentAdmin
|
||||
from documents.models import Document, Tag
|
||||
|
||||
|
||||
class TestDocumentAdmin(TestCase):
|
||||
|
||||
def setUp(self) -> None:
|
||||
self.doc_admin = DocumentAdmin(model=Document, admin_site=AdminSite())
|
||||
|
||||
@mock.patch("documents.admin.index.add_or_update_document")
|
||||
def test_save_model(self, m):
|
||||
doc = Document.objects.create(title="test")
|
||||
doc.title = "new title"
|
||||
self.doc_admin.save_model(None, doc, None, None)
|
||||
self.assertEqual(Document.objects.get(id=doc.id).title, "new title")
|
||||
m.assert_called_once()
|
||||
|
||||
def test_tags(self):
|
||||
doc = Document.objects.create(title="test")
|
||||
doc.tags.create(name="t1")
|
||||
doc.tags.create(name="t2")
|
||||
|
||||
self.assertEqual(self.doc_admin.tags_(doc), "<span >t1, </span><span >t2, </span>")
|
||||
|
||||
def test_tags_empty(self):
|
||||
doc = Document.objects.create(title="test")
|
||||
|
||||
self.assertEqual(self.doc_admin.tags_(doc), "")
|
||||
|
||||
@mock.patch("documents.admin.index.remove_document")
|
||||
def test_delete_model(self, m):
|
||||
doc = Document.objects.create(title="test")
|
||||
self.doc_admin.delete_model(None, doc)
|
||||
self.assertRaises(Document.DoesNotExist, Document.objects.get, id=doc.id)
|
||||
m.assert_called_once()
|
||||
|
||||
@mock.patch("documents.admin.index.remove_document")
|
||||
def test_delete_queryset(self, m):
|
||||
for i in range(42):
|
||||
Document.objects.create(title="Many documents with the same title", checksum=f"{i:02}")
|
||||
|
||||
self.assertEqual(Document.objects.count(), 42)
|
||||
|
||||
self.doc_admin.delete_queryset(None, Document.objects.all())
|
||||
|
||||
self.assertEqual(m.call_count, 42)
|
||||
self.assertEqual(Document.objects.count(), 0)
|
||||
|
||||
def test_created(self):
|
||||
doc = Document.objects.create(title="test", created=timezone.datetime(2020, 4, 12))
|
||||
self.assertEqual(self.doc_admin.created_(doc), "2020-04-12")
|
@@ -352,6 +352,25 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
self.assertEqual(correction, None)
|
||||
|
||||
def test_search_more_like(self):
|
||||
d1=Document.objects.create(title="invoice", content="the thing i bought at a shop and paid with bank account", checksum="A", pk=1)
|
||||
d2=Document.objects.create(title="bank statement 1", content="things i paid for in august", pk=2, checksum="B")
|
||||
d3=Document.objects.create(title="bank statement 3", content="things i paid for in september", pk=3, checksum="C")
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
|
||||
response = self.client.get(f"/api/search/?more_like={d2.id}")
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
results = response.data['results']
|
||||
|
||||
self.assertEqual(len(results), 2)
|
||||
self.assertEqual(results[0]['id'], d3.id)
|
||||
self.assertEqual(results[1]['id'], d1.id)
|
||||
|
||||
def test_statistics(self):
|
||||
|
||||
doc1 = Document.objects.create(title="none1", checksum="A")
|
||||
|
@@ -29,81 +29,6 @@ class TestAttributes(TestCase):
|
||||
|
||||
self.assertEqual(tuple([t.name for t in file_info.tags]), tags, filename)
|
||||
|
||||
def test_guess_attributes_from_name0(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"Sender - Title.pdf", "Sender", "Title", ())
|
||||
|
||||
def test_guess_attributes_from_name1(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"Spaced Sender - Title.pdf", "Spaced Sender", "Title", ())
|
||||
|
||||
def test_guess_attributes_from_name2(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"Sender - Spaced Title.pdf", "Sender", "Spaced Title", ())
|
||||
|
||||
def test_guess_attributes_from_name3(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"Dashed-Sender - Title.pdf", "Dashed-Sender", "Title", ())
|
||||
|
||||
def test_guess_attributes_from_name4(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"Sender - Dashed-Title.pdf", "Sender", "Dashed-Title", ())
|
||||
|
||||
def test_guess_attributes_from_name5(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"Sender - Title - tag1,tag2,tag3.pdf",
|
||||
"Sender",
|
||||
"Title",
|
||||
self.TAGS
|
||||
)
|
||||
|
||||
def test_guess_attributes_from_name6(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"Spaced Sender - Title - tag1,tag2,tag3.pdf",
|
||||
"Spaced Sender",
|
||||
"Title",
|
||||
self.TAGS
|
||||
)
|
||||
|
||||
def test_guess_attributes_from_name7(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"Sender - Spaced Title - tag1,tag2,tag3.pdf",
|
||||
"Sender",
|
||||
"Spaced Title",
|
||||
self.TAGS
|
||||
)
|
||||
|
||||
def test_guess_attributes_from_name8(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"Dashed-Sender - Title - tag1,tag2,tag3.pdf",
|
||||
"Dashed-Sender",
|
||||
"Title",
|
||||
self.TAGS
|
||||
)
|
||||
|
||||
def test_guess_attributes_from_name9(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"Sender - Dashed-Title - tag1,tag2,tag3.pdf",
|
||||
"Sender",
|
||||
"Dashed-Title",
|
||||
self.TAGS
|
||||
)
|
||||
|
||||
def test_guess_attributes_from_name10(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
"Σενδερ - Τιτλε - tag1,tag2,tag3.pdf",
|
||||
"Σενδερ",
|
||||
"Τιτλε",
|
||||
self.TAGS
|
||||
)
|
||||
|
||||
def test_guess_attributes_from_name_when_correspondent_empty(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
' - weird empty correspondent but should not break.pdf',
|
||||
None,
|
||||
'weird empty correspondent but should not break',
|
||||
()
|
||||
)
|
||||
|
||||
def test_guess_attributes_from_name_when_title_starts_with_dash(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
@@ -121,28 +46,6 @@ class TestAttributes(TestCase):
|
||||
()
|
||||
)
|
||||
|
||||
def test_guess_attributes_from_name_when_title_is_empty(self):
|
||||
self._test_guess_attributes_from_name(
|
||||
'weird correspondent but should not break - .pdf',
|
||||
'weird correspondent but should not break',
|
||||
'',
|
||||
()
|
||||
)
|
||||
|
||||
def test_case_insensitive_tag_creation(self):
|
||||
"""
|
||||
Tags should be detected and created as lower case.
|
||||
:return:
|
||||
"""
|
||||
|
||||
filename = "Title - Correspondent - tAg1,TAG2.pdf"
|
||||
self.assertEqual(len(FileInfo.from_filename(filename).tags), 2)
|
||||
|
||||
path = "Title - Correspondent - tag1,tag2.pdf"
|
||||
self.assertEqual(len(FileInfo.from_filename(filename).tags), 2)
|
||||
|
||||
self.assertEqual(Tag.objects.all().count(), 2)
|
||||
|
||||
|
||||
class TestFieldPermutations(TestCase):
|
||||
|
||||
@@ -199,69 +102,7 @@ class TestFieldPermutations(TestCase):
|
||||
filename = template.format(**spec)
|
||||
self._test_guessed_attributes(filename, **spec)
|
||||
|
||||
def test_title_and_correspondent(self):
|
||||
template = '{correspondent} - {title}.pdf'
|
||||
for correspondent in self.valid_correspondents:
|
||||
for title in self.valid_titles:
|
||||
spec = dict(correspondent=correspondent, title=title)
|
||||
filename = template.format(**spec)
|
||||
self._test_guessed_attributes(filename, **spec)
|
||||
|
||||
def test_title_and_correspondent_and_tags(self):
|
||||
template = '{correspondent} - {title} - {tags}.pdf'
|
||||
for correspondent in self.valid_correspondents:
|
||||
for title in self.valid_titles:
|
||||
for tags in self.valid_tags:
|
||||
spec = dict(correspondent=correspondent, title=title,
|
||||
tags=tags)
|
||||
filename = template.format(**spec)
|
||||
self._test_guessed_attributes(filename, **spec)
|
||||
|
||||
def test_created_and_correspondent_and_title_and_tags(self):
|
||||
|
||||
template = (
|
||||
"{created} - "
|
||||
"{correspondent} - "
|
||||
"{title} - "
|
||||
"{tags}.pdf"
|
||||
)
|
||||
|
||||
for created in self.valid_dates:
|
||||
for correspondent in self.valid_correspondents:
|
||||
for title in self.valid_titles:
|
||||
for tags in self.valid_tags:
|
||||
spec = {
|
||||
"created": created,
|
||||
"correspondent": correspondent,
|
||||
"title": title,
|
||||
"tags": tags,
|
||||
}
|
||||
self._test_guessed_attributes(
|
||||
template.format(**spec), **spec)
|
||||
|
||||
def test_created_and_correspondent_and_title(self):
|
||||
|
||||
template = "{created} - {correspondent} - {title}.pdf"
|
||||
|
||||
for created in self.valid_dates:
|
||||
for correspondent in self.valid_correspondents:
|
||||
for title in self.valid_titles:
|
||||
|
||||
# Skip cases where title looks like a tag as we can't
|
||||
# accommodate such cases.
|
||||
if title.lower() == title:
|
||||
continue
|
||||
|
||||
spec = {
|
||||
"created": created,
|
||||
"correspondent": correspondent,
|
||||
"title": title
|
||||
}
|
||||
self._test_guessed_attributes(
|
||||
template.format(**spec), **spec)
|
||||
|
||||
def test_created_and_title(self):
|
||||
|
||||
template = "{created} - {title}.pdf"
|
||||
|
||||
for created in self.valid_dates:
|
||||
@@ -273,21 +114,6 @@ class TestFieldPermutations(TestCase):
|
||||
self._test_guessed_attributes(
|
||||
template.format(**spec), **spec)
|
||||
|
||||
def test_created_and_title_and_tags(self):
|
||||
|
||||
template = "{created} - {title} - {tags}.pdf"
|
||||
|
||||
for created in self.valid_dates:
|
||||
for title in self.valid_titles:
|
||||
for tags in self.valid_tags:
|
||||
spec = {
|
||||
"created": created,
|
||||
"title": title,
|
||||
"tags": tags
|
||||
}
|
||||
self._test_guessed_attributes(
|
||||
template.format(**spec), **spec)
|
||||
|
||||
def test_invalid_date_format(self):
|
||||
info = FileInfo.from_filename("06112017Z - title.pdf")
|
||||
self.assertEqual(info.title, "title")
|
||||
@@ -336,32 +162,6 @@ class TestFieldPermutations(TestCase):
|
||||
info = FileInfo.from_filename(filename)
|
||||
self.assertEqual(info.title, "anotherall")
|
||||
|
||||
# Complex transformation without date in replacement string
|
||||
with self.settings(
|
||||
FILENAME_PARSE_TRANSFORMS=[(exact_patt, repl1)]):
|
||||
info = FileInfo.from_filename(filename)
|
||||
self.assertEqual(info.title, "0001")
|
||||
self.assertEqual(len(info.tags), 2)
|
||||
self.assertEqual(info.tags[0].name, "tag1")
|
||||
self.assertEqual(info.tags[1].name, "tag2")
|
||||
self.assertIsNone(info.created)
|
||||
|
||||
# Complex transformation with date in replacement string
|
||||
with self.settings(
|
||||
FILENAME_PARSE_TRANSFORMS=[
|
||||
(none_patt, "none.gif"),
|
||||
(exact_patt, repl2), # <-- matches
|
||||
(exact_patt, repl1),
|
||||
(all_patt, "all.gif")]):
|
||||
info = FileInfo.from_filename(filename)
|
||||
self.assertEqual(info.title, "0001")
|
||||
self.assertEqual(len(info.tags), 2)
|
||||
self.assertEqual(info.tags[0].name, "tag1")
|
||||
self.assertEqual(info.tags[1].name, "tag2")
|
||||
self.assertEqual(info.created.year, 2019)
|
||||
self.assertEqual(info.created.month, 9)
|
||||
self.assertEqual(info.created.day, 8)
|
||||
|
||||
|
||||
class DummyParser(DocumentParser):
|
||||
|
||||
@@ -476,15 +276,13 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
|
||||
def testOverrideFilename(self):
|
||||
filename = self.get_test_file()
|
||||
override_filename = "My Bank - Statement for November.pdf"
|
||||
override_filename = "Statement for November.pdf"
|
||||
|
||||
document = self.consumer.try_consume_file(filename, override_filename=override_filename)
|
||||
|
||||
self.assertEqual(document.correspondent.name, "My Bank")
|
||||
self.assertEqual(document.title, "Statement for November")
|
||||
|
||||
def testOverrideTitle(self):
|
||||
|
||||
document = self.consumer.try_consume_file(self.get_test_file(), override_title="Override Title")
|
||||
self.assertEqual(document.title, "Override Title")
|
||||
|
||||
@@ -594,11 +392,10 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
def testFilenameHandling(self):
|
||||
filename = self.get_test_file()
|
||||
|
||||
document = self.consumer.try_consume_file(filename, override_filename="Bank - Test.pdf", override_title="new docs")
|
||||
document = self.consumer.try_consume_file(filename, override_title="new docs")
|
||||
|
||||
self.assertEqual(document.title, "new docs")
|
||||
self.assertEqual(document.correspondent.name, "Bank")
|
||||
self.assertEqual(document.filename, "Bank/new docs.pdf")
|
||||
self.assertEqual(document.filename, "none/new docs.pdf")
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
||||
@mock.patch("documents.signals.handlers.generate_unique_filename")
|
||||
@@ -617,10 +414,9 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
|
||||
Tag.objects.create(name="test", is_inbox_tag=True)
|
||||
|
||||
document = self.consumer.try_consume_file(filename, override_filename="Bank - Test.pdf", override_title="new docs")
|
||||
document = self.consumer.try_consume_file(filename, override_title="new docs")
|
||||
|
||||
self.assertEqual(document.title, "new docs")
|
||||
self.assertEqual(document.correspondent.name, "Bank")
|
||||
self.assertIsNotNone(os.path.isfile(document.title))
|
||||
self.assertTrue(os.path.isfile(document.source_path))
|
||||
|
||||
@@ -642,3 +438,31 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(document.document_type, dtype)
|
||||
self.assertIn(t1, document.tags.all())
|
||||
self.assertNotIn(t2, document.tags.all())
|
||||
|
||||
@override_settings(CONSUMER_DELETE_DUPLICATES=True)
|
||||
def test_delete_duplicate(self):
|
||||
dst = self.get_test_file()
|
||||
self.assertTrue(os.path.isfile(dst))
|
||||
doc = self.consumer.try_consume_file(dst)
|
||||
|
||||
self.assertFalse(os.path.isfile(dst))
|
||||
self.assertIsNotNone(doc)
|
||||
|
||||
dst = self.get_test_file()
|
||||
self.assertTrue(os.path.isfile(dst))
|
||||
self.assertRaises(ConsumerError, self.consumer.try_consume_file, dst)
|
||||
self.assertFalse(os.path.isfile(dst))
|
||||
|
||||
@override_settings(CONSUMER_DELETE_DUPLICATES=False)
|
||||
def test_no_delete_duplicate(self):
|
||||
dst = self.get_test_file()
|
||||
self.assertTrue(os.path.isfile(dst))
|
||||
doc = self.consumer.try_consume_file(dst)
|
||||
|
||||
self.assertFalse(os.path.isfile(dst))
|
||||
self.assertIsNotNone(doc)
|
||||
|
||||
dst = self.get_test_file()
|
||||
self.assertTrue(os.path.isfile(dst))
|
||||
self.assertRaises(ConsumerError, self.consumer.try_consume_file, dst)
|
||||
self.assertTrue(os.path.isfile(dst))
|
||||
|
@@ -14,7 +14,7 @@ from django.utils import timezone
|
||||
from .utils import DirectoriesMixin
|
||||
from ..file_handling import generate_filename, create_source_path_directory, delete_empty_directories, \
|
||||
generate_unique_filename
|
||||
from ..models import Document, Correspondent, Tag
|
||||
from ..models import Document, Correspondent, Tag, DocumentType
|
||||
|
||||
|
||||
class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
@@ -190,6 +190,17 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), True)
|
||||
self.assertTrue(os.path.isfile(important_file))
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{document_type} - {title}")
|
||||
def test_document_type(self):
|
||||
dt = DocumentType.objects.create(name="my_doc_type")
|
||||
d = Document.objects.create(title="the_doc", mime_type="application/pdf")
|
||||
|
||||
self.assertEqual(generate_filename(d), "none - the_doc.pdf")
|
||||
|
||||
d.document_type = dt
|
||||
|
||||
self.assertEqual(generate_filename(d), "my_doc_type - the_doc.pdf")
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
|
||||
def test_tags_with_underscore(self):
|
||||
document = Document()
|
||||
|
135
src/documents/tests/test_management.py
Normal file
135
src/documents/tests/test_management.py
Normal file
@@ -0,0 +1,135 @@
|
||||
import hashlib
|
||||
import tempfile
|
||||
import filecmp
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
|
||||
from django.core.management import call_command
|
||||
|
||||
from documents.file_handling import generate_filename
|
||||
from documents.management.commands.document_archiver import handle_document
|
||||
from documents.models import Document
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
|
||||
|
||||
|
||||
class TestArchiver(DirectoriesMixin, TestCase):
|
||||
|
||||
def make_models(self):
|
||||
return Document.objects.create(checksum="A", title="A", content="first document", mime_type="application/pdf")
|
||||
|
||||
def test_archiver(self):
|
||||
|
||||
doc = self.make_models()
|
||||
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"))
|
||||
|
||||
call_command('document_archiver')
|
||||
|
||||
def test_handle_document(self):
|
||||
|
||||
doc = self.make_models()
|
||||
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"))
|
||||
|
||||
handle_document(doc.pk)
|
||||
|
||||
doc = Document.objects.get(id=doc.id)
|
||||
|
||||
self.assertIsNotNone(doc.checksum)
|
||||
self.assertTrue(os.path.isfile(doc.archive_path))
|
||||
self.assertTrue(os.path.isfile(doc.source_path))
|
||||
self.assertTrue(filecmp.cmp(sample_file, doc.source_path))
|
||||
|
||||
|
||||
class TestDecryptDocuments(TestCase):
|
||||
|
||||
@override_settings(
|
||||
ORIGINALS_DIR=os.path.join(os.path.dirname(__file__), "samples", "originals"),
|
||||
THUMBNAIL_DIR=os.path.join(os.path.dirname(__file__), "samples", "thumb"),
|
||||
PASSPHRASE="test",
|
||||
PAPERLESS_FILENAME_FORMAT=None
|
||||
)
|
||||
@mock.patch("documents.management.commands.decrypt_documents.input")
|
||||
def test_decrypt(self, m):
|
||||
|
||||
media_dir = tempfile.mkdtemp()
|
||||
originals_dir = os.path.join(media_dir, "documents", "originals")
|
||||
thumb_dir = os.path.join(media_dir, "documents", "thumbnails")
|
||||
os.makedirs(originals_dir, exist_ok=True)
|
||||
os.makedirs(thumb_dir, exist_ok=True)
|
||||
|
||||
override_settings(
|
||||
ORIGINALS_DIR=originals_dir,
|
||||
THUMBNAIL_DIR=thumb_dir,
|
||||
PASSPHRASE="test"
|
||||
).enable()
|
||||
|
||||
doc = Document.objects.create(checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
|
||||
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf.gpg"), os.path.join(originals_dir, "0000002.pdf.gpg"))
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", f"0000002.png.gpg"), os.path.join(thumb_dir, f"{doc.id:07}.png.gpg"))
|
||||
|
||||
call_command('decrypt_documents')
|
||||
|
||||
doc.refresh_from_db()
|
||||
|
||||
self.assertEqual(doc.storage_type, Document.STORAGE_TYPE_UNENCRYPTED)
|
||||
self.assertEqual(doc.filename, "0000002.pdf")
|
||||
self.assertTrue(os.path.isfile(os.path.join(originals_dir, "0000002.pdf")))
|
||||
self.assertTrue(os.path.isfile(doc.source_path))
|
||||
self.assertTrue(os.path.isfile(os.path.join(thumb_dir, f"{doc.id:07}.png")))
|
||||
self.assertTrue(os.path.isfile(doc.thumbnail_path))
|
||||
|
||||
with doc.source_file as f:
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
self.assertEqual(checksum, doc.checksum)
|
||||
|
||||
|
||||
class TestMakeIndex(TestCase):
|
||||
|
||||
@mock.patch("documents.management.commands.document_index.index_reindex")
|
||||
def test_reindex(self, m):
|
||||
call_command("document_index", "reindex")
|
||||
m.assert_called_once()
|
||||
|
||||
@mock.patch("documents.management.commands.document_index.index_optimize")
|
||||
def test_optimize(self, m):
|
||||
call_command("document_index", "optimize")
|
||||
m.assert_called_once()
|
||||
|
||||
|
||||
class TestRenamer(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_rename(self):
|
||||
doc = Document.objects.create(title="test", mime_type="application/pdf")
|
||||
doc.filename = generate_filename(doc)
|
||||
doc.save()
|
||||
|
||||
Path(doc.source_path).touch()
|
||||
|
||||
old_source_path = doc.source_path
|
||||
|
||||
with override_settings(PAPERLESS_FILENAME_FORMAT="{title}"):
|
||||
call_command("document_renamer")
|
||||
|
||||
doc2 = Document.objects.get(id=doc.id)
|
||||
|
||||
self.assertEqual(doc2.filename, "test.pdf")
|
||||
self.assertFalse(os.path.isfile(old_source_path))
|
||||
self.assertFalse(os.path.isfile(doc.source_path))
|
||||
self.assertTrue(os.path.isfile(doc2.source_path))
|
||||
|
||||
|
||||
class TestCreateClassifier(TestCase):
|
||||
|
||||
@mock.patch("documents.management.commands.document_create_classifier.train_classifier")
|
||||
def test_create_classifier(self, m):
|
||||
call_command("document_create_classifier")
|
||||
|
||||
m.assert_called_once()
|
@@ -1,40 +0,0 @@
|
||||
import filecmp
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from django.core.management import call_command
|
||||
from django.test import TestCase
|
||||
|
||||
from documents.management.commands.document_archiver import handle_document
|
||||
from documents.models import Document
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
|
||||
|
||||
|
||||
class TestArchiver(DirectoriesMixin, TestCase):
|
||||
|
||||
def make_models(self):
|
||||
return Document.objects.create(checksum="A", title="A", content="first document", mime_type="application/pdf")
|
||||
|
||||
def test_archiver(self):
|
||||
|
||||
doc = self.make_models()
|
||||
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"))
|
||||
|
||||
call_command('document_archiver')
|
||||
|
||||
def test_handle_document(self):
|
||||
|
||||
doc = self.make_models()
|
||||
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"))
|
||||
|
||||
handle_document(doc.pk)
|
||||
|
||||
doc = Document.objects.get(id=doc.id)
|
||||
|
||||
self.assertIsNotNone(doc.checksum)
|
||||
self.assertTrue(os.path.isfile(doc.archive_path))
|
||||
self.assertTrue(os.path.isfile(doc.source_path))
|
||||
self.assertTrue(filecmp.cmp(sample_file, doc.source_path))
|
@@ -1,57 +0,0 @@
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
from unittest import mock
|
||||
|
||||
from django.core.management import call_command
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
from documents.management.commands import document_exporter
|
||||
from documents.models import Document, Tag, DocumentType, Correspondent
|
||||
|
||||
|
||||
class TestDecryptDocuments(TestCase):
|
||||
|
||||
@override_settings(
|
||||
ORIGINALS_DIR=os.path.join(os.path.dirname(__file__), "samples", "originals"),
|
||||
THUMBNAIL_DIR=os.path.join(os.path.dirname(__file__), "samples", "thumb"),
|
||||
PASSPHRASE="test",
|
||||
PAPERLESS_FILENAME_FORMAT=None
|
||||
)
|
||||
@mock.patch("documents.management.commands.decrypt_documents.input")
|
||||
def test_decrypt(self, m):
|
||||
|
||||
media_dir = tempfile.mkdtemp()
|
||||
originals_dir = os.path.join(media_dir, "documents", "originals")
|
||||
thumb_dir = os.path.join(media_dir, "documents", "thumbnails")
|
||||
os.makedirs(originals_dir, exist_ok=True)
|
||||
os.makedirs(thumb_dir, exist_ok=True)
|
||||
|
||||
override_settings(
|
||||
ORIGINALS_DIR=originals_dir,
|
||||
THUMBNAIL_DIR=thumb_dir,
|
||||
PASSPHRASE="test"
|
||||
).enable()
|
||||
|
||||
doc = Document.objects.create(checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
|
||||
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf.gpg"), os.path.join(originals_dir, "0000002.pdf.gpg"))
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", f"0000002.png.gpg"), os.path.join(thumb_dir, f"{doc.id:07}.png.gpg"))
|
||||
|
||||
call_command('decrypt_documents')
|
||||
|
||||
doc.refresh_from_db()
|
||||
|
||||
self.assertEqual(doc.storage_type, Document.STORAGE_TYPE_UNENCRYPTED)
|
||||
self.assertEqual(doc.filename, "0000002.pdf")
|
||||
self.assertTrue(os.path.isfile(os.path.join(originals_dir, "0000002.pdf")))
|
||||
self.assertTrue(os.path.isfile(doc.source_path))
|
||||
self.assertTrue(os.path.isfile(os.path.join(thumb_dir, f"{doc.id:07}.png")))
|
||||
self.assertTrue(os.path.isfile(doc.thumbnail_path))
|
||||
|
||||
with doc.source_file as f:
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
self.assertEqual(checksum, doc.checksum)
|
||||
|
@@ -24,11 +24,17 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
|
||||
file = os.path.join(self.dirs.originals_dir, "0000001.pdf")
|
||||
|
||||
Document.objects.create(content="Content", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", title="wow", filename="0000001.pdf", mime_type="application/pdf")
|
||||
Document.objects.create(content="Content", checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
|
||||
Tag.objects.create(name="t")
|
||||
DocumentType.objects.create(name="dt")
|
||||
Correspondent.objects.create(name="c")
|
||||
d1 = Document.objects.create(content="Content", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", title="wow", filename="0000001.pdf", mime_type="application/pdf")
|
||||
d2 = Document.objects.create(content="Content", checksum="9c9691e51741c1f4f41a20896af31770", title="wow", filename="0000002.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
|
||||
t1 = Tag.objects.create(name="t")
|
||||
dt1 = DocumentType.objects.create(name="dt")
|
||||
c1 = Correspondent.objects.create(name="c")
|
||||
|
||||
d1.tags.add(t1)
|
||||
d1.correspondents = c1
|
||||
d1.document_type = dt1
|
||||
d1.save()
|
||||
d2.save()
|
||||
|
||||
target = tempfile.mkdtemp()
|
||||
self.addCleanup(shutil.rmtree, target)
|
||||
@@ -59,11 +65,25 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
||||
self.assertEqual(checksum, element['fields']['archive_checksum'])
|
||||
|
||||
with paperless_environment() as dirs:
|
||||
self.assertEqual(Document.objects.count(), 2)
|
||||
Document.objects.all().delete()
|
||||
Correspondent.objects.all().delete()
|
||||
DocumentType.objects.all().delete()
|
||||
Tag.objects.all().delete()
|
||||
self.assertEqual(Document.objects.count(), 0)
|
||||
|
||||
call_command('document_importer', target)
|
||||
self.assertEqual(Document.objects.count(), 2)
|
||||
messages = check_sanity()
|
||||
# everything is alright after the test
|
||||
self.assertEqual(len(messages), 0, str([str(m) for m in messages]))
|
||||
|
||||
@override_settings(
|
||||
PAPERLESS_FILENAME_FORMAT="{title}"
|
||||
)
|
||||
def test_exporter_with_filename_format(self):
|
||||
self.test_exporter()
|
||||
|
||||
def test_export_missing_files(self):
|
||||
|
||||
target = tempfile.mkdtemp()
|
||||
|
129
src/documents/tests/test_migrations.py
Normal file
129
src/documents/tests/test_migrations.py
Normal file
@@ -0,0 +1,129 @@
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
from django.apps import apps
|
||||
from django.conf import settings
|
||||
from django.db import connection
|
||||
from django.db.migrations.executor import MigrationExecutor
|
||||
from django.test import TestCase, TransactionTestCase, override_settings
|
||||
|
||||
from documents.models import Document
|
||||
from documents.parsers import get_default_file_extension
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
class TestMigrations(TransactionTestCase):
|
||||
|
||||
@property
|
||||
def app(self):
|
||||
return apps.get_containing_app_config(type(self).__module__).name
|
||||
|
||||
migrate_from = None
|
||||
migrate_to = None
|
||||
|
||||
def setUp(self):
|
||||
super(TestMigrations, self).setUp()
|
||||
|
||||
assert self.migrate_from and self.migrate_to, \
|
||||
"TestCase '{}' must define migrate_from and migrate_to properties".format(type(self).__name__)
|
||||
self.migrate_from = [(self.app, self.migrate_from)]
|
||||
self.migrate_to = [(self.app, self.migrate_to)]
|
||||
executor = MigrationExecutor(connection)
|
||||
old_apps = executor.loader.project_state(self.migrate_from).apps
|
||||
|
||||
# Reverse to the original migration
|
||||
executor.migrate(self.migrate_from)
|
||||
|
||||
self.setUpBeforeMigration(old_apps)
|
||||
|
||||
# Run the migration to test
|
||||
executor = MigrationExecutor(connection)
|
||||
executor.loader.build_graph() # reload.
|
||||
executor.migrate(self.migrate_to)
|
||||
|
||||
self.apps = executor.loader.project_state(self.migrate_to).apps
|
||||
|
||||
def setUpBeforeMigration(self, apps):
|
||||
pass
|
||||
|
||||
|
||||
STORAGE_TYPE_UNENCRYPTED = "unencrypted"
|
||||
STORAGE_TYPE_GPG = "gpg"
|
||||
|
||||
|
||||
def source_path_before(self):
|
||||
if self.filename:
|
||||
fname = str(self.filename)
|
||||
else:
|
||||
fname = "{:07}.{}".format(self.pk, self.file_type)
|
||||
if self.storage_type == STORAGE_TYPE_GPG:
|
||||
fname += ".gpg"
|
||||
|
||||
return os.path.join(
|
||||
settings.ORIGINALS_DIR,
|
||||
fname
|
||||
)
|
||||
|
||||
|
||||
def file_type_after(self):
|
||||
return get_default_file_extension(self.mime_type)
|
||||
|
||||
|
||||
def source_path_after(doc):
|
||||
if doc.filename:
|
||||
fname = str(doc.filename)
|
||||
else:
|
||||
fname = "{:07}{}".format(doc.pk, file_type_after(doc))
|
||||
if doc.storage_type == STORAGE_TYPE_GPG:
|
||||
fname += ".gpg" # pragma: no cover
|
||||
|
||||
return os.path.join(
|
||||
settings.ORIGINALS_DIR,
|
||||
fname
|
||||
)
|
||||
|
||||
|
||||
@override_settings(PASSPHRASE="test")
|
||||
class TestMigrateMimeType(DirectoriesMixin, TestMigrations):
|
||||
|
||||
migrate_from = '1002_auto_20201111_1105'
|
||||
migrate_to = '1003_mime_types'
|
||||
|
||||
def setUpBeforeMigration(self, apps):
|
||||
Document = apps.get_model("documents", "Document")
|
||||
doc = Document.objects.create(title="test", file_type="pdf", filename="file1.pdf")
|
||||
self.doc_id = doc.id
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), source_path_before(doc))
|
||||
|
||||
doc2 = Document.objects.create(checksum="B", file_type="pdf", storage_type=STORAGE_TYPE_GPG)
|
||||
self.doc2_id = doc2.id
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf.gpg"), source_path_before(doc2))
|
||||
|
||||
def testMimeTypesMigrated(self):
|
||||
Document = self.apps.get_model('documents', 'Document')
|
||||
|
||||
doc = Document.objects.get(id=self.doc_id)
|
||||
self.assertEqual(doc.mime_type, "application/pdf")
|
||||
|
||||
doc2 = Document.objects.get(id=self.doc2_id)
|
||||
self.assertEqual(doc2.mime_type, "application/pdf")
|
||||
|
||||
|
||||
@override_settings(PASSPHRASE="test")
|
||||
class TestMigrateMimeTypeBackwards(DirectoriesMixin, TestMigrations):
|
||||
|
||||
migrate_from = '1003_mime_types'
|
||||
migrate_to = '1002_auto_20201111_1105'
|
||||
|
||||
def setUpBeforeMigration(self, apps):
|
||||
Document = apps.get_model("documents", "Document")
|
||||
doc = Document.objects.create(title="test", mime_type="application/pdf", filename="file1.pdf")
|
||||
self.doc_id = doc.id
|
||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), source_path_after(doc))
|
||||
|
||||
def testMimeTypesReverted(self):
|
||||
Document = self.apps.get_model('documents', 'Document')
|
||||
|
||||
doc = Document.objects.get(id=self.doc_id)
|
||||
self.assertEqual(doc.file_type, "pdf")
|
@@ -58,6 +58,8 @@ class IndexView(TemplateView):
|
||||
def get_context_data(self, **kwargs):
|
||||
context = super().get_context_data(**kwargs)
|
||||
context['cookie_prefix'] = settings.COOKIE_PREFIX
|
||||
context['username'] = self.request.user.username
|
||||
context['full_name'] = self.request.user.get_full_name()
|
||||
return context
|
||||
|
||||
|
||||
@@ -389,14 +391,27 @@ class SearchView(APIView):
|
||||
}
|
||||
|
||||
def get(self, request, format=None):
|
||||
if 'query' not in request.query_params:
|
||||
|
||||
if 'query' in request.query_params:
|
||||
query = request.query_params['query']
|
||||
else:
|
||||
query = None
|
||||
|
||||
if 'more_like' in request.query_params:
|
||||
more_like_id = request.query_params['more_like']
|
||||
more_like_content = Document.objects.get(id=more_like_id).content
|
||||
else:
|
||||
more_like_id = None
|
||||
more_like_content = None
|
||||
|
||||
if not query and not more_like_id:
|
||||
return Response({
|
||||
'count': 0,
|
||||
'page': 0,
|
||||
'page_count': 0,
|
||||
'corrected_query': None,
|
||||
'results': []})
|
||||
|
||||
query = request.query_params['query']
|
||||
try:
|
||||
page = int(request.query_params.get('page', 1))
|
||||
except (ValueError, TypeError):
|
||||
@@ -406,8 +421,7 @@ class SearchView(APIView):
|
||||
page = 1
|
||||
|
||||
try:
|
||||
with index.query_page(self.ix, query, page) as (result_page,
|
||||
corrected_query):
|
||||
with index.query_page(self.ix, page, query, more_like_id, more_like_content) as (result_page, corrected_query): # NOQA: E501
|
||||
return Response(
|
||||
{'count': len(result_page),
|
||||
'page': result_page.pagenum,
|
||||
|
@@ -13,18 +13,17 @@ writeable_hint = (
|
||||
)
|
||||
|
||||
|
||||
def path_check(env_var):
|
||||
def path_check(var, directory):
|
||||
messages = []
|
||||
directory = os.getenv(env_var)
|
||||
if directory:
|
||||
if not os.path.exists(directory):
|
||||
messages.append(Error(
|
||||
exists_message.format(env_var),
|
||||
exists_message.format(var),
|
||||
exists_hint.format(directory)
|
||||
))
|
||||
elif not os.access(directory, os.W_OK | os.X_OK):
|
||||
messages.append(Error(
|
||||
writeable_message.format(env_var),
|
||||
writeable_message.format(var),
|
||||
writeable_hint.format(directory)
|
||||
))
|
||||
return messages
|
||||
@@ -36,12 +35,9 @@ def paths_check(app_configs, **kwargs):
|
||||
Check the various paths for existence, readability and writeability
|
||||
"""
|
||||
|
||||
check_messages = path_check("PAPERLESS_DATA_DIR") + \
|
||||
path_check("PAPERLESS_MEDIA_ROOT") + \
|
||||
path_check("PAPERLESS_CONSUMPTION_DIR") + \
|
||||
path_check("PAPERLESS_STATICDIR")
|
||||
|
||||
return check_messages
|
||||
return path_check("PAPERLESS_DATA_DIR", settings.DATA_DIR) + \
|
||||
path_check("PAPERLESS_MEDIA_ROOT", settings.MEDIA_ROOT) + \
|
||||
path_check("PAPERLESS_CONSUMPTION_DIR", settings.CONSUMPTION_DIR)
|
||||
|
||||
|
||||
@register()
|
||||
|
@@ -160,13 +160,6 @@ if AUTO_LOGIN_USERNAME:
|
||||
MIDDLEWARE.insert(_index+1, 'paperless.auth.AutoLoginMiddleware')
|
||||
|
||||
|
||||
if DEBUG:
|
||||
X_FRAME_OPTIONS = ''
|
||||
# this should really be 'allow-from uri' but its not supported in any mayor
|
||||
# browser.
|
||||
else:
|
||||
X_FRAME_OPTIONS = 'SAMEORIGIN'
|
||||
|
||||
# We allow CORS from localhost:8080
|
||||
CORS_ALLOWED_ORIGINS = tuple(os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "http://localhost:8000").split(","))
|
||||
|
||||
|
54
src/paperless/tests/test_checks.py
Normal file
54
src/paperless/tests/test_checks.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from paperless import binaries_check, paths_check
|
||||
from paperless.checks import debug_mode_check
|
||||
|
||||
|
||||
class TestChecks(DirectoriesMixin, TestCase):
|
||||
|
||||
def test_binaries(self):
|
||||
self.assertEqual(binaries_check(None), [])
|
||||
|
||||
@override_settings(CONVERT_BINARY="uuuhh", OPTIPNG_BINARY="forgot")
|
||||
def test_binaries_fail(self):
|
||||
self.assertEqual(len(binaries_check(None)), 2)
|
||||
|
||||
def test_paths_check(self):
|
||||
self.assertEqual(paths_check(None), [])
|
||||
|
||||
@override_settings(MEDIA_ROOT="uuh",
|
||||
DATA_DIR="whatever",
|
||||
CONSUMPTION_DIR="idontcare")
|
||||
def test_paths_check_dont_exist(self):
|
||||
msgs = paths_check(None)
|
||||
self.assertEqual(len(msgs), 3, str(msgs))
|
||||
|
||||
for msg in msgs:
|
||||
self.assertTrue(msg.msg.endswith("is set but doesn't exist."))
|
||||
|
||||
def test_paths_check_no_access(self):
|
||||
os.chmod(self.dirs.data_dir, 0o000)
|
||||
os.chmod(self.dirs.media_dir, 0o000)
|
||||
os.chmod(self.dirs.consumption_dir, 0o000)
|
||||
|
||||
self.addCleanup(os.chmod, self.dirs.data_dir, 0o777)
|
||||
self.addCleanup(os.chmod, self.dirs.media_dir, 0o777)
|
||||
self.addCleanup(os.chmod, self.dirs.consumption_dir, 0o777)
|
||||
|
||||
msgs = paths_check(None)
|
||||
self.assertEqual(len(msgs), 3)
|
||||
|
||||
for msg in msgs:
|
||||
self.assertTrue(msg.msg.endswith("is not writeable"))
|
||||
|
||||
@override_settings(DEBUG=False)
|
||||
def test_debug_disabled(self):
|
||||
self.assertEqual(debug_mode_check(None), [])
|
||||
|
||||
@override_settings(DEBUG=True)
|
||||
def test_debug_enabled(self):
|
||||
self.assertEqual(len(debug_mode_check(None)), 1)
|
@@ -1,7 +1,7 @@
|
||||
import subprocess
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.checks import Error, register
|
||||
from django.core.checks import Error, Warning, register
|
||||
|
||||
|
||||
def get_tesseract_langs():
|
||||
|
@@ -1,194 +0,0 @@
|
||||
# Thanks to the Library of Congress and some creative use of sed and awk:
|
||||
# http://www.loc.gov/standards/iso639-2/php/English_list.php
|
||||
|
||||
ISO639 = {
|
||||
|
||||
"aa": "aar",
|
||||
"ab": "abk",
|
||||
"ae": "ave",
|
||||
"af": "afr",
|
||||
"ak": "aka",
|
||||
"am": "amh",
|
||||
"an": "arg",
|
||||
"ar": "ara",
|
||||
"as": "asm",
|
||||
"av": "ava",
|
||||
"ay": "aym",
|
||||
"az": "aze",
|
||||
"ba": "bak",
|
||||
"be": "bel",
|
||||
"bg": "bul",
|
||||
"bh": "bih",
|
||||
"bi": "bis",
|
||||
"bm": "bam",
|
||||
"bn": "ben",
|
||||
"bo": "bod",
|
||||
"br": "bre",
|
||||
"bs": "bos",
|
||||
"ca": "cat",
|
||||
"ce": "che",
|
||||
"ch": "cha",
|
||||
"co": "cos",
|
||||
"cr": "cre",
|
||||
"cs": "ces",
|
||||
"cu": "chu",
|
||||
"cv": "chv",
|
||||
"cy": "cym",
|
||||
"da": "dan",
|
||||
"de": "deu",
|
||||
"dv": "div",
|
||||
"dz": "dzo",
|
||||
"ee": "ewe",
|
||||
"el": "ell",
|
||||
"en": "eng",
|
||||
"eo": "epo",
|
||||
"es": "spa",
|
||||
"et": "est",
|
||||
"eu": "eus",
|
||||
"fa": "fas",
|
||||
"ff": "ful",
|
||||
"fi": "fin",
|
||||
"fj": "fij",
|
||||
"fo": "fao",
|
||||
"fr": "fra",
|
||||
"fy": "fry",
|
||||
"ga": "gle",
|
||||
"gd": "gla",
|
||||
"gl": "glg",
|
||||
"gn": "grn",
|
||||
"gu": "guj",
|
||||
"gv": "glv",
|
||||
"ha": "hau",
|
||||
"he": "heb",
|
||||
"hi": "hin",
|
||||
"ho": "hmo",
|
||||
"hr": "hrv",
|
||||
"ht": "hat",
|
||||
"hu": "hun",
|
||||
"hy": "hye",
|
||||
"hz": "her",
|
||||
"ia": "ina",
|
||||
"id": "ind",
|
||||
"ie": "ile",
|
||||
"ig": "ibo",
|
||||
"ii": "iii",
|
||||
"ik": "ipk",
|
||||
"io": "ido",
|
||||
"is": "isl",
|
||||
"it": "ita",
|
||||
"iu": "iku",
|
||||
"ja": "jpn",
|
||||
"jv": "jav",
|
||||
"ka": "kat",
|
||||
"kg": "kon",
|
||||
"ki": "kik",
|
||||
"kj": "kua",
|
||||
"kk": "kaz",
|
||||
"kl": "kal",
|
||||
"km": "khm",
|
||||
"kn": "kan",
|
||||
"ko": "kor",
|
||||
"kr": "kau",
|
||||
"ks": "kas",
|
||||
"ku": "kur",
|
||||
"kv": "kom",
|
||||
"kw": "cor",
|
||||
"ky": "kir",
|
||||
"la": "lat",
|
||||
"lb": "ltz",
|
||||
"lg": "lug",
|
||||
"li": "lim",
|
||||
"ln": "lin",
|
||||
"lo": "lao",
|
||||
"lt": "lit",
|
||||
"lu": "lub",
|
||||
"lv": "lav",
|
||||
"mg": "mlg",
|
||||
"mh": "mah",
|
||||
"mi": "mri",
|
||||
"mk": "mkd",
|
||||
"ml": "mal",
|
||||
"mn": "mon",
|
||||
"mr": "mar",
|
||||
"ms": "msa",
|
||||
"mt": "mlt",
|
||||
"my": "mya",
|
||||
"na": "nau",
|
||||
"nb": "nob",
|
||||
"nd": "nde",
|
||||
"ne": "nep",
|
||||
"ng": "ndo",
|
||||
"nl": "nld",
|
||||
"no": "nor",
|
||||
"nr": "nbl",
|
||||
"nv": "nav",
|
||||
"ny": "nya",
|
||||
"oc": "oci",
|
||||
"oj": "oji",
|
||||
"om": "orm",
|
||||
"or": "ori",
|
||||
"os": "oss",
|
||||
"pa": "pan",
|
||||
"pi": "pli",
|
||||
"pl": "pol",
|
||||
"ps": "pus",
|
||||
"pt": "por",
|
||||
"qu": "que",
|
||||
"rm": "roh",
|
||||
"rn": "run",
|
||||
"ro": "ron",
|
||||
"ru": "rus",
|
||||
"rw": "kin",
|
||||
"sa": "san",
|
||||
"sc": "srd",
|
||||
"sd": "snd",
|
||||
"se": "sme",
|
||||
"sg": "sag",
|
||||
"si": "sin",
|
||||
"sk": "slk",
|
||||
"sl": "slv",
|
||||
"sm": "smo",
|
||||
"sn": "sna",
|
||||
"so": "som",
|
||||
"sq": "sqi",
|
||||
"sr": "srp",
|
||||
"ss": "ssw",
|
||||
"st": "sot",
|
||||
"su": "sun",
|
||||
"sv": "swe",
|
||||
"sw": "swa",
|
||||
"ta": "tam",
|
||||
"te": "tel",
|
||||
"tg": "tgk",
|
||||
"th": "tha",
|
||||
"ti": "tir",
|
||||
"tk": "tuk",
|
||||
"tl": "tgl",
|
||||
"tn": "tsn",
|
||||
"to": "ton",
|
||||
"tr": "tur",
|
||||
"ts": "tso",
|
||||
"tt": "tat",
|
||||
"tw": "twi",
|
||||
"ty": "tah",
|
||||
"ug": "uig",
|
||||
"uk": "ukr",
|
||||
"ur": "urd",
|
||||
"uz": "uzb",
|
||||
"ve": "ven",
|
||||
"vi": "vie",
|
||||
"vo": "vol",
|
||||
"wa": "wln",
|
||||
"wo": "wol",
|
||||
"xh": "xho",
|
||||
"yi": "yid",
|
||||
"yo": "yor",
|
||||
"za": "zha",
|
||||
|
||||
# Tessdata contains two values for Chinese, "chi_sim" and "chi_tra". I
|
||||
# have no idea which one is better, so I just picked the bigger file.
|
||||
"zh": "chi_tra",
|
||||
|
||||
"zu": "zul"
|
||||
|
||||
}
|
26
src/paperless_tesseract/tests/test_checks.py
Normal file
26
src/paperless_tesseract/tests/test_checks.py
Normal file
@@ -0,0 +1,26 @@
|
||||
from unittest import mock
|
||||
|
||||
from django.core.checks import ERROR
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
from paperless_tesseract import check_default_language_available
|
||||
|
||||
|
||||
class TestChecks(TestCase):
|
||||
|
||||
def test_default_language(self):
|
||||
msgs = check_default_language_available(None)
|
||||
|
||||
@override_settings(OCR_LANGUAGE="")
|
||||
def test_no_language(self):
|
||||
msgs = check_default_language_available(None)
|
||||
self.assertEqual(len(msgs), 1)
|
||||
self.assertTrue(msgs[0].msg.startswith("No OCR language has been specified with PAPERLESS_OCR_LANGUAGE"))
|
||||
|
||||
@override_settings(OCR_LANGUAGE="ita")
|
||||
@mock.patch("paperless_tesseract.checks.get_tesseract_langs")
|
||||
def test_invalid_language(self, m):
|
||||
m.return_value = ["deu", "eng"]
|
||||
msgs = check_default_language_available(None)
|
||||
self.assertEqual(len(msgs), 1)
|
||||
self.assertEqual(msgs[0].level, ERROR)
|
@@ -35,15 +35,3 @@ class TextDocumentParser(DocumentParser):
|
||||
def parse(self, document_path, mime_type):
|
||||
with open(document_path, 'r') as f:
|
||||
self.text = f.read()
|
||||
|
||||
|
||||
def run_command(*args):
|
||||
environment = os.environ.copy()
|
||||
if settings.CONVERT_MEMORY_LIMIT:
|
||||
environment["MAGICK_MEMORY_LIMIT"] = settings.CONVERT_MEMORY_LIMIT
|
||||
if settings.CONVERT_TMPDIR:
|
||||
environment["MAGICK_TMPDIR"] = settings.CONVERT_TMPDIR
|
||||
|
||||
if not subprocess.Popen(' '.join(args), env=environment,
|
||||
shell=True).wait() == 0:
|
||||
raise ParseError("Convert failed at {}".format(args))
|
||||
|
Reference in New Issue
Block a user