Merge remote-tracking branch 'origin/dev' into dev

This commit is contained in:
Jonas Winkler
2021-05-16 01:23:07 +02:00
76 changed files with 1487 additions and 537 deletions

View File

@@ -7,7 +7,7 @@ from dateutil.parser import isoparse
from django.conf import settings
from whoosh import highlight, classify, query
from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME, BOOLEAN
from whoosh.highlight import Formatter, get_text, HtmlFormatter
from whoosh.highlight import HtmlFormatter
from whoosh.index import create_in, exists_in, open_dir
from whoosh.qparser import MultifieldParser
from whoosh.qparser.dateparse import DateParserPlugin
@@ -147,12 +147,10 @@ def remove_document_from_index(document):
class DelayedQuery:
@property
def _query(self):
def _get_query(self):
raise NotImplementedError()
@property
def _query_filter(self):
def _get_query_filter(self):
criterias = []
for k, v in self.query_params.items():
if k == 'correspondent__id':
@@ -185,16 +183,32 @@ class DelayedQuery:
else:
return None
@property
def _query_sortedby(self):
# if not 'ordering' in self.query_params:
return None, False
def _get_query_sortedby(self):
if 'ordering' not in self.query_params:
return None, False
# o: str = self.query_params['ordering']
# if o.startswith('-'):
# return o[1:], True
# else:
# return o, False
field: str = self.query_params['ordering']
sort_fields_map = {
"created": "created",
"modified": "modified",
"added": "added",
"title": "title",
"correspondent__name": "correspondent",
"document_type__name": "type",
"archive_serial_number": "asn"
}
if field.startswith('-'):
field = field[1:]
reverse = True
else:
reverse = False
if field not in sort_fields_map:
return None, False
else:
return sort_fields_map[field], reverse
def __init__(self, searcher: Searcher, query_params, page_size):
self.searcher = searcher
@@ -211,13 +225,13 @@ class DelayedQuery:
if item.start in self.saved_results:
return self.saved_results[item.start]
q, mask = self._query
sortedby, reverse = self._query_sortedby
q, mask = self._get_query()
sortedby, reverse = self._get_query_sortedby()
page: ResultsPage = self.searcher.search_page(
q,
mask=mask,
filter=self._query_filter,
filter=self._get_query_filter(),
pagenum=math.floor(item.start / self.page_size) + 1,
pagelen=self.page_size,
sortedby=sortedby,
@@ -227,14 +241,18 @@ class DelayedQuery:
surround=50)
page.results.formatter = HtmlFormatter(tagname="span", between=" ... ")
if not self.first_score and len(page.results) > 0:
if (not self.first_score and
len(page.results) > 0 and
sortedby is None):
self.first_score = page.results[0].score
if self.first_score:
page.results.top_n = list(map(
lambda hit: (hit[0] / self.first_score, hit[1]),
page.results.top_n
))
page.results.top_n = list(map(
lambda hit: (
(hit[0] / self.first_score) if self.first_score else None,
hit[1]
),
page.results.top_n
))
self.saved_results[item.start] = page
@@ -243,8 +261,7 @@ class DelayedQuery:
class DelayedFullTextQuery(DelayedQuery):
@property
def _query(self):
def _get_query(self):
q_str = self.query_params['query']
qp = MultifieldParser(
["content", "title", "correspondent", "tag", "type"],
@@ -261,8 +278,7 @@ class DelayedFullTextQuery(DelayedQuery):
class DelayedMoreLikeThisQuery(DelayedQuery):
@property
def _query(self):
def _get_query(self):
more_like_doc_id = int(self.query_params['more_like_id'])
content = Document.objects.get(id=more_like_doc_id).content

View File

@@ -106,6 +106,12 @@ class Command(BaseCommand):
help="Specify the ID of a document, and this command will only "
"run on this specific document."
)
parser.add_argument(
"--no-progress-bar",
default=False,
action="store_true",
help="If set, the progress bar will not be shown"
)
def handle(self, *args, **options):
@@ -140,7 +146,8 @@ class Command(BaseCommand):
handle_document,
document_ids
),
total=len(document_ids)
total=len(document_ids),
disable=options['no_progress_bar']
))
except KeyboardInterrupt:
print("Aborting...")

View File

@@ -57,6 +57,12 @@ class Command(BaseCommand):
"do not belong to the current export, such as files from "
"deleted documents."
)
parser.add_argument(
"--no-progress-bar",
default=False,
action="store_true",
help="If set, the progress bar will not be shown"
)
def __init__(self, *args, **kwargs):
BaseCommand.__init__(self, *args, **kwargs)
@@ -81,9 +87,9 @@ class Command(BaseCommand):
raise CommandError("That path doesn't appear to be writable")
with FileLock(settings.MEDIA_LOCK):
self.dump()
self.dump(options['no_progress_bar'])
def dump(self):
def dump(self, progress_bar_disable=False):
# 1. Take a snapshot of what files exist in the current export folder
for root, dirs, files in os.walk(self.target):
self.files_in_export_dir.extend(
@@ -124,8 +130,11 @@ class Command(BaseCommand):
"json", User.objects.all()))
# 3. Export files from each document
for index, document_dict in tqdm.tqdm(enumerate(document_manifest),
total=len(document_manifest)):
for index, document_dict in tqdm.tqdm(
enumerate(document_manifest),
total=len(document_manifest),
disable=progress_bar_disable
):
# 3.1. store files unencrypted
document_dict["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED # NOQA: E501

View File

@@ -36,6 +36,12 @@ class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument("source")
parser.add_argument(
"--no-progress-bar",
default=False,
action="store_true",
help="If set, the progress bar will not be shown"
)
def __init__(self, *args, **kwargs):
BaseCommand.__init__(self, *args, **kwargs)
@@ -70,7 +76,7 @@ class Command(BaseCommand):
# Fill up the database with whatever is in the manifest
call_command("loaddata", manifest_path)
self._import_files_from_manifest()
self._import_files_from_manifest(options['no_progress_bar'])
print("Updating search index...")
call_command('document_index', 'reindex')
@@ -111,7 +117,7 @@ class Command(BaseCommand):
f"does not appear to be in the source directory."
)
def _import_files_from_manifest(self):
def _import_files_from_manifest(self, progress_bar_disable):
os.makedirs(settings.ORIGINALS_DIR, exist_ok=True)
os.makedirs(settings.THUMBNAIL_DIR, exist_ok=True)
@@ -123,7 +129,10 @@ class Command(BaseCommand):
lambda r: r["model"] == "documents.document",
self.manifest))
for record in tqdm.tqdm(manifest_documents):
for record in tqdm.tqdm(
manifest_documents,
disable=progress_bar_disable
):
document = Document.objects.get(pk=record["pk"])

View File

@@ -10,10 +10,16 @@ class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument("command", choices=['reindex', 'optimize'])
parser.add_argument(
"--no-progress-bar",
default=False,
action="store_true",
help="If set, the progress bar will not be shown"
)
def handle(self, *args, **options):
with transaction.atomic():
if options['command'] == 'reindex':
index_reindex()
index_reindex(progress_bar_disable=options['no_progress_bar'])
elif options['command'] == 'optimize':
index_optimize()

View File

@@ -13,9 +13,20 @@ class Command(BaseCommand):
This will rename all documents to match the latest filename format.
""".replace(" ", "")
def add_arguments(self, parser):
parser.add_argument(
"--no-progress-bar",
default=False,
action="store_true",
help="If set, the progress bar will not be shown"
)
def handle(self, *args, **options):
logging.getLogger().handlers[0].level = logging.ERROR
for document in tqdm.tqdm(Document.objects.all()):
for document in tqdm.tqdm(
Document.objects.all(),
disable=options['no_progress_bar']
):
post_save.send(Document, instance=document)

View File

@@ -57,6 +57,12 @@ class Command(BaseCommand):
"set correspondent, document and remove correspondents, types"
"and tags that do not match anymore due to changed rules."
)
parser.add_argument(
"--no-progress-bar",
default=False,
action="store_true",
help="If set, the progress bar will not be shown"
)
def handle(self, *args, **options):
@@ -68,7 +74,10 @@ class Command(BaseCommand):
classifier = load_classifier()
for document in tqdm.tqdm(documents):
for document in tqdm.tqdm(
documents,
disable=options['no_progress_bar']
):
if options['correspondent']:
set_correspondent(

View File

@@ -8,8 +8,16 @@ class Command(BaseCommand):
This command checks your document archive for issues.
""".replace(" ", "")
def add_arguments(self, parser):
parser.add_argument(
"--no-progress-bar",
default=False,
action="store_true",
help="If set, the progress bar will not be shown"
)
def handle(self, *args, **options):
messages = check_sanity(progress=True)
messages = check_sanity(progress=not options['no_progress_bar'])
messages.log_messages()

View File

@@ -47,6 +47,12 @@ class Command(BaseCommand):
help="Specify the ID of a document, and this command will only "
"run on this specific document."
)
parser.add_argument(
"--no-progress-bar",
default=False,
action="store_true",
help="If set, the progress bar will not be shown"
)
def handle(self, *args, **options):
logging.getLogger().handlers[0].level = logging.ERROR
@@ -65,5 +71,7 @@ class Command(BaseCommand):
with multiprocessing.Pool() as pool:
list(tqdm.tqdm(
pool.imap_unordered(_process_document, ids), total=len(ids)
pool.imap_unordered(_process_document, ids),
total=len(ids),
disable=options['no_progress_bar']
))

View File

@@ -60,12 +60,7 @@ def check_sanity(progress=False):
if lockfile in present_files:
present_files.remove(lockfile)
if progress:
docs = tqdm(Document.objects.all())
else:
docs = Document.objects.all()
for doc in docs:
for doc in tqdm(Document.objects.all(), disable=not progress):
# Check sanity of the thumbnail
if not os.path.isfile(doc.thumbnail_path):
messages.error(f"Thumbnail of document {doc.pk} does not exist.")

View File

@@ -42,3 +42,58 @@ body {
border-top-left-radius: 0;
border-top-right-radius: 0;
}
@media (prefers-color-scheme: dark) {
/*
From theme_dark.scss
$primary-dark-mode: #45973a;
$danger-dark-mode: #b71631;
$bg-dark-mode: #161618;
$bg-dark-mode-accent: #21262d;
$bg-light-dark-mode: #1c1c1f;
$text-color-dark-mode: #abb2bf;
$border-color-dark-mode: #47494f;
*/
body {
background-color: #161618 !important;
color: #abb2bf;
}
svg.logo .text {
fill: #abb2bf!important;
}
.form-control:not(.is-invalid):not(.btn) {
border-color: #47494f;
}
.form-control:not(.btn) {
background-color: #161618;
color: #abb2bf;
}
.form-control:not(.btn)::placeholder {
color: #abb2bf;
}
.form-control:not(.btn):focus {
background-color: #1c1c1f !important;
color: #8e97a9 !important;
}
.btn-primary {
color: #fff;
background-color: #17541f;
border-color: #17541f;
}
.btn-primary:hover, .btn-primary:focus {
background-color: #0f3614;
border-color: #0c2c10;
}
.btn-primary:not(:disabled):not(.disabled):active {
background-color: #0c2c10;
border-color: #09220d;
}
}

View File

@@ -20,13 +20,13 @@ def index_optimize():
writer.commit(optimize=True)
def index_reindex():
def index_reindex(progress_bar_disable=False):
documents = Document.objects.all()
ix = index.open_index(recreate=True)
with AsyncWriter(ix) as writer:
for document in tqdm.tqdm(documents):
for document in tqdm.tqdm(documents, disable=progress_bar_disable):
index.update_document(writer, document)

View File

@@ -7,11 +7,12 @@
<head>
<meta charset="utf-8">
<title>Paperless-ng</title>
<base href="/">
<base href="{% url 'base' %}">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="username" content="{{username}}">
<meta name="full_name" content="{{full_name}}">
<meta name="cookie_prefix" content="{{cookie_prefix}}">
<meta name="robots" content="noindex,nofollow">
<link rel="icon" type="image/x-icon" href="favicon.ico">
<link rel="manifest" href="{% static webmanifest %}">
<link rel="stylesheet" href="{% static styles_css %}">

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -471,6 +471,31 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
self.assertNotIn(d5.id, search_query("&added__date__lt=" + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d")))
self.assertIn(d5.id, search_query("&added__date__gt=" + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d")))
def test_search_sorting(self):
c1 = Correspondent.objects.create(name="corres Ax")
c2 = Correspondent.objects.create(name="corres Cx")
c3 = Correspondent.objects.create(name="corres Bx")
d1 = Document.objects.create(checksum="1", correspondent=c1, content="test", archive_serial_number=2, title="3")
d2 = Document.objects.create(checksum="2", correspondent=c2, content="test", archive_serial_number=3, title="2")
d3 = Document.objects.create(checksum="3", correspondent=c3, content="test", archive_serial_number=1, title="1")
with AsyncWriter(index.open_index()) as writer:
for doc in Document.objects.all():
index.update_document(writer, doc)
def search_query(q):
r = self.client.get("/api/documents/?query=test" + q)
self.assertEqual(r.status_code, 200)
return [hit['id'] for hit in r.data['results']]
self.assertListEqual(search_query("&ordering=archive_serial_number"), [d3.id, d1.id, d2.id])
self.assertListEqual(search_query("&ordering=-archive_serial_number"), [d2.id, d1.id, d3.id])
self.assertListEqual(search_query("&ordering=title"), [d3.id, d2.id, d1.id])
self.assertListEqual(search_query("&ordering=-title"), [d1.id, d2.id, d3.id])
self.assertListEqual(search_query("&ordering=correspondent__name"), [d1.id, d3.id, d2.id])
self.assertListEqual(search_query("&ordering=-correspondent__name"), [d2.id, d3.id, d1.id])
def test_statistics(self):
doc1 = Document.objects.create(title="none1", checksum="A")