mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-04-02 13:45:10 -05:00
Merge pull request #1367 from Eckii24/feat/date-suggestions
Adding date suggestions to the documents details view
This commit is contained in:
commit
d40c13420d
@ -741,6 +741,19 @@ PAPERLESS_FILENAME_DATE_ORDER=<format>
|
||||
|
||||
Defaults to none, which disables this feature.
|
||||
|
||||
PAPERLESS_NUMBER_OF_SUGGESTED_DATES=<num>
|
||||
Paperless searches an entire document for dates. The first date found will
|
||||
be used as the initial value for the created date. When this variable is
|
||||
greater than 0 (or left to it's default value), paperless will also suggest
|
||||
other dates found in the document, up to a maximum of this setting. Note that
|
||||
duplicates will be removed, which can result in fewer dates displayed in the
|
||||
frontend than this setting value.
|
||||
|
||||
The task to find all dates can be time-consuming and increases with a higher
|
||||
(maximum) number of suggested dates and slower hardware.
|
||||
|
||||
Defaults to 3. Set to 0 to disable this feature.
|
||||
|
||||
PAPERLESS_THUMBNAIL_FONT_NAME=<filename>
|
||||
Paperless creates thumbnails for plain text files by rendering the content
|
||||
of the file on an image and uses a predefined font for that. This
|
||||
|
@ -69,6 +69,7 @@
|
||||
#PAPERLESS_POST_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
|
||||
#PAPERLESS_FILENAME_DATE_ORDER=YMD
|
||||
#PAPERLESS_FILENAME_PARSE_TRANSFORMS=[]
|
||||
#PAPERLESS_NUMBER_OF_SUGGESTED_DATES=5
|
||||
#PAPERLESS_THUMBNAIL_FONT_NAME=
|
||||
#PAPERLESS_IGNORE_DATES=
|
||||
#PAPERLESS_ENABLE_UPDATE_CHECK=
|
||||
|
@ -12,4 +12,10 @@
|
||||
</div>
|
||||
<div class="invalid-feedback" i18n>Invalid date.</div>
|
||||
<small *ngIf="hint" class="form-text text-muted">{{hint}}</small>
|
||||
<small *ngIf="getSuggestions().length > 0">
|
||||
<span i18n>Suggestions:</span>
|
||||
<ng-container *ngFor="let s of getSuggestions()">
|
||||
<a (click)="onSuggestionClick(s)" [routerLink]="[]">{{s}}</a>
|
||||
</ng-container>
|
||||
</small>
|
||||
</div>
|
||||
|
@ -1,4 +1,4 @@
|
||||
import { Component, forwardRef, OnInit } from '@angular/core'
|
||||
import { Component, forwardRef, Input, OnInit } from '@angular/core'
|
||||
import { NG_VALUE_ACCESSOR } from '@angular/forms'
|
||||
import {
|
||||
NgbDateAdapter,
|
||||
@ -31,6 +31,28 @@ export class DateComponent
|
||||
super()
|
||||
}
|
||||
|
||||
@Input()
|
||||
suggestions: string[]
|
||||
|
||||
getSuggestions() {
|
||||
return this.suggestions == null
|
||||
? []
|
||||
: this.suggestions
|
||||
.map((s) => this.ngbDateParserFormatter.parse(s))
|
||||
.filter(
|
||||
(d) =>
|
||||
this.value === null || // if value is not set, take all suggestions
|
||||
this.value != this.isoDateAdapter.toModel(d) // otherwise filter out current date
|
||||
)
|
||||
.map((s) => this.ngbDateParserFormatter.format(s))
|
||||
}
|
||||
|
||||
onSuggestionClick(dateString: string) {
|
||||
const parsedDate = this.ngbDateParserFormatter.parse(dateString)
|
||||
this.writeValue(this.isoDateAdapter.toModel(parsedDate))
|
||||
this.onChange(this.value)
|
||||
}
|
||||
|
||||
ngOnInit(): void {
|
||||
super.ngOnInit()
|
||||
this.placeholder = this.settings.getLocalizedDateInputFormat()
|
||||
|
@ -74,7 +74,8 @@
|
||||
|
||||
<app-input-text #inputTitle i18n-title title="Title" formControlName="title" (keyup)="titleKeyUp($event)" [error]="error?.title"></app-input-text>
|
||||
<app-input-number i18n-title title="Archive serial number" [error]="error?.archive_serial_number" formControlName='archive_serial_number'></app-input-number>
|
||||
<app-input-date i18n-title title="Date created" formControlName="created_date" [error]="error?.created_date"></app-input-date>
|
||||
<app-input-date i18n-title title="Date created" formControlName="created_date" [suggestions]="suggestions?.dates"
|
||||
[error]="error?.created_date"></app-input-date>
|
||||
<app-input-select [items]="correspondents" i18n-title title="Correspondent" formControlName="correspondent" [allowNull]="true"
|
||||
(createNew)="createCorrespondent($event)" [suggestions]="suggestions?.correspondents"></app-input-select>
|
||||
<app-input-select [items]="documentTypes" i18n-title title="Document type" formControlName="document_type" [allowNull]="true"
|
||||
|
@ -6,4 +6,6 @@ export interface PaperlessDocumentSuggestions {
|
||||
document_types?: number[]
|
||||
|
||||
storage_paths?: number[]
|
||||
|
||||
dates?: string[] // ISO-formatted date string e.g. 2022-11-03
|
||||
}
|
||||
|
@ -6,6 +6,8 @@ import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
from typing import Iterator
|
||||
from typing import Match
|
||||
from typing import Optional
|
||||
from typing import Set
|
||||
|
||||
@ -216,6 +218,10 @@ def make_thumbnail_from_pdf(in_path, temp_dir, logging_group=None) -> str:
|
||||
|
||||
|
||||
def parse_date(filename, text) -> Optional[datetime.datetime]:
|
||||
return next(parse_date_generator(filename, text), None)
|
||||
|
||||
|
||||
def parse_date_generator(filename, text) -> Iterator[datetime.datetime]:
|
||||
"""
|
||||
Returns the date of the document.
|
||||
"""
|
||||
@ -246,38 +252,32 @@ def parse_date(filename, text) -> Optional[datetime.datetime]:
|
||||
return date
|
||||
return None
|
||||
|
||||
date = None
|
||||
def __process_match(
|
||||
match: Match[str],
|
||||
date_order: str,
|
||||
) -> Optional[datetime.datetime]:
|
||||
date_string = match.group(0)
|
||||
|
||||
try:
|
||||
date = __parser(date_string, date_order)
|
||||
except (TypeError, ValueError):
|
||||
# Skip all matches that do not parse to a proper date
|
||||
date = None
|
||||
|
||||
return __filter(date)
|
||||
|
||||
def __process_content(content: str, date_order: str) -> Iterator[datetime.datetime]:
|
||||
for m in re.finditer(DATE_REGEX, content):
|
||||
date = __process_match(m, date_order)
|
||||
if date is not None:
|
||||
yield date
|
||||
|
||||
# if filename date parsing is enabled, search there first:
|
||||
if settings.FILENAME_DATE_ORDER:
|
||||
for m in re.finditer(DATE_REGEX, filename):
|
||||
date_string = m.group(0)
|
||||
|
||||
try:
|
||||
date = __parser(date_string, settings.FILENAME_DATE_ORDER)
|
||||
except (TypeError, ValueError):
|
||||
# Skip all matches that do not parse to a proper date
|
||||
continue
|
||||
|
||||
date = __filter(date)
|
||||
if date is not None:
|
||||
return date
|
||||
yield from __process_content(filename, settings.FILENAME_DATE_ORDER)
|
||||
|
||||
# Iterate through all regex matches in text and try to parse the date
|
||||
for m in re.finditer(DATE_REGEX, text):
|
||||
date_string = m.group(0)
|
||||
|
||||
try:
|
||||
date = __parser(date_string, settings.DATE_ORDER)
|
||||
except (TypeError, ValueError):
|
||||
# Skip all matches that do not parse to a proper date
|
||||
continue
|
||||
|
||||
date = __filter(date)
|
||||
if date is not None:
|
||||
return date
|
||||
|
||||
return date
|
||||
yield from __process_content(text, settings.DATE_ORDER)
|
||||
|
||||
|
||||
class ParseError(Exception):
|
||||
|
@ -1107,6 +1107,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
"tags": [],
|
||||
"document_types": [],
|
||||
"storage_paths": [],
|
||||
"dates": [],
|
||||
},
|
||||
)
|
||||
|
||||
@ -1118,6 +1119,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
@mock.patch("documents.views.match_document_types")
|
||||
@mock.patch("documents.views.match_tags")
|
||||
@mock.patch("documents.views.match_correspondents")
|
||||
@override_settings(NUMBER_OF_SUGGESTED_DATES=10)
|
||||
def test_get_suggestions(
|
||||
self,
|
||||
match_correspondents,
|
||||
@ -1128,7 +1130,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
doc = Document.objects.create(
|
||||
title="test",
|
||||
mime_type="application/pdf",
|
||||
content="this is an invoice!",
|
||||
content="this is an invoice from 12.04.2022!",
|
||||
)
|
||||
|
||||
match_correspondents.return_value = [Correspondent(id=88), Correspondent(id=2)]
|
||||
@ -1144,6 +1146,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
"tags": [56, 123],
|
||||
"document_types": [23],
|
||||
"storage_paths": [99, 77],
|
||||
"dates": ["2022-04-12"],
|
||||
},
|
||||
)
|
||||
|
||||
|
@ -8,6 +8,7 @@ from django.conf import settings
|
||||
from django.test import override_settings
|
||||
from django.test import TestCase
|
||||
from documents.parsers import parse_date
|
||||
from documents.parsers import parse_date_generator
|
||||
from paperless.settings import DATE_ORDER
|
||||
|
||||
|
||||
@ -161,6 +162,25 @@ class TestDate(TestCase):
|
||||
def test_crazy_date_with_spaces(self, *args):
|
||||
self.assertIsNone(parse_date("", "20 408000l 2475"))
|
||||
|
||||
def test_multiple_dates(self):
|
||||
text = """This text has multiple dates.
|
||||
For example 02.02.2018, 22 July 2022 and Dezember 2021.
|
||||
But not 24-12-9999 because its in the future..."""
|
||||
dates = list(parse_date_generator("", text))
|
||||
self.assertEqual(len(dates), 3)
|
||||
self.assertEqual(
|
||||
dates[0],
|
||||
datetime.datetime(2018, 2, 2, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
|
||||
)
|
||||
self.assertEqual(
|
||||
dates[1],
|
||||
datetime.datetime(2022, 7, 22, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
|
||||
)
|
||||
self.assertEqual(
|
||||
dates[2],
|
||||
datetime.datetime(2021, 12, 1, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
|
||||
)
|
||||
|
||||
@override_settings(FILENAME_DATE_ORDER="YMD")
|
||||
def test_filename_date_parse_valid_ymd(self, *args):
|
||||
"""
|
||||
|
@ -1,3 +1,4 @@
|
||||
import itertools
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
@ -21,6 +22,7 @@ from django.db.models.functions import Lower
|
||||
from django.http import Http404
|
||||
from django.http import HttpResponse
|
||||
from django.http import HttpResponseBadRequest
|
||||
from django.shortcuts import get_object_or_404
|
||||
from django.utils.decorators import method_decorator
|
||||
from django.utils.translation import get_language
|
||||
from django.views.decorators.cache import cache_control
|
||||
@ -70,6 +72,7 @@ from .models import SavedView
|
||||
from .models import StoragePath
|
||||
from .models import Tag
|
||||
from .parsers import get_parser_class_for_mime_type
|
||||
from .parsers import parse_date_generator
|
||||
from .serialisers import AcknowledgeTasksViewSerializer
|
||||
from .serialisers import BulkDownloadSerializer
|
||||
from .serialisers import BulkEditSerializer
|
||||
@ -330,13 +333,15 @@ class DocumentViewSet(
|
||||
|
||||
@action(methods=["get"], detail=True)
|
||||
def suggestions(self, request, pk=None):
|
||||
try:
|
||||
doc = Document.objects.get(pk=pk)
|
||||
except Document.DoesNotExist:
|
||||
raise Http404()
|
||||
doc = get_object_or_404(Document, pk=pk)
|
||||
|
||||
classifier = load_classifier()
|
||||
|
||||
gen = parse_date_generator(doc.filename, doc.content)
|
||||
dates = sorted(
|
||||
{i for i in itertools.islice(gen, settings.NUMBER_OF_SUGGESTED_DATES)},
|
||||
)
|
||||
|
||||
return Response(
|
||||
{
|
||||
"correspondents": [c.id for c in match_correspondents(doc, classifier)],
|
||||
@ -345,6 +350,9 @@ class DocumentViewSet(
|
||||
dt.id for dt in match_document_types(doc, classifier)
|
||||
],
|
||||
"storage_paths": [dt.id for dt in match_storage_paths(doc, classifier)],
|
||||
"dates": [
|
||||
date.strftime("%Y-%m-%d") for date in dates if date is not None
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
|
@ -588,6 +588,10 @@ POST_CONSUME_SCRIPT = os.getenv("PAPERLESS_POST_CONSUME_SCRIPT")
|
||||
DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
|
||||
FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")
|
||||
|
||||
# Maximum number of dates taken from document start to end to show as suggestions for
|
||||
# `created` date in the frontend. Duplicates are removed, which can result in fewer dates shown.
|
||||
NUMBER_OF_SUGGESTED_DATES = __get_int("PAPERLESS_NUMBER_OF_SUGGESTED_DATES", 3)
|
||||
|
||||
# Transformations applied before filename parsing
|
||||
FILENAME_PARSE_TRANSFORMS = []
|
||||
for t in json.loads(os.getenv("PAPERLESS_FILENAME_PARSE_TRANSFORMS", "[]")):
|
||||
|
Loading…
x
Reference in New Issue
Block a user