Merge branch 'dev' into feature-bulk-edit

This commit is contained in:
jonaswinkler 2020-12-13 00:52:36 +01:00
commit 2374506a20
14 changed files with 152 additions and 51 deletions

View File

@ -24,3 +24,7 @@ feature-X branches is for experimental stuff that will eventually be merged into
I'm trying to get most of paperless tested, so please do the same for your code! I know its a hassle, but it makes sure that your code works now and will allow us to detect regressions easily. I'm trying to get most of paperless tested, so please do the same for your code! I know its a hassle, but it makes sure that your code works now and will allow us to detect regressions easily.
To test your code, execute `pytest` in the src/ directory. Executing that in the project root is no good. This also generates a html coverage report, which you can use to see if you missed anything important during testing. To test your code, execute `pytest` in the src/ directory. Executing that in the project root is no good. This also generates a html coverage report, which you can use to see if you missed anything important during testing.
## More info:
... is available in the documentation. https://paperless-ng.readthedocs.io/en/latest/extending.html

View File

@ -2215,6 +2215,11 @@
"integrity": "sha512-UV1/ZJMC+HcP902wWdpC43cAcGu0IQk/I5bXjP2aSuCjsk3cE74mDvFrLKga7oDC170ugOAYBwfT4DSQW3akDA==", "integrity": "sha512-UV1/ZJMC+HcP902wWdpC43cAcGu0IQk/I5bXjP2aSuCjsk3cE74mDvFrLKga7oDC170ugOAYBwfT4DSQW3akDA==",
"dev": true "dev": true
}, },
"@types/pdfjs-dist": {
"version": "2.1.7",
"resolved": "https://registry.npmjs.org/@types/pdfjs-dist/-/pdfjs-dist-2.1.7.tgz",
"integrity": "sha512-nQIwcPUhkAIyn7x9NS0lR/qxYfd5unRtfGkMjvpgF4Sh28IXftRymaNmFKTTdejDNY25NDGSIyjwj/BRwAPexg=="
},
"@types/q": { "@types/q": {
"version": "1.5.4", "version": "1.5.4",
"resolved": "https://registry.npmjs.org/@types/q/-/q-1.5.4.tgz", "resolved": "https://registry.npmjs.org/@types/q/-/q-1.5.4.tgz",
@ -3023,6 +3028,16 @@
"integrity": "sha512-1Yj8h9Q+QDF5FzhMs/c9+6UntbD5MkRfRwac8DoEm9ZfUBZ7tZ55YcGVAzEe4bXsdQHEk+s9S5wsOKVdZrw0tQ==", "integrity": "sha512-1Yj8h9Q+QDF5FzhMs/c9+6UntbD5MkRfRwac8DoEm9ZfUBZ7tZ55YcGVAzEe4bXsdQHEk+s9S5wsOKVdZrw0tQ==",
"dev": true "dev": true
}, },
"bindings": {
"version": "1.5.0",
"resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz",
"integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==",
"dev": true,
"optional": true,
"requires": {
"file-uri-to-path": "1.0.0"
}
},
"blob": { "blob": {
"version": "0.0.5", "version": "0.0.5",
"resolved": "https://registry.npmjs.org/blob/-/blob-0.0.5.tgz", "resolved": "https://registry.npmjs.org/blob/-/blob-0.0.5.tgz",
@ -5508,6 +5523,13 @@
"schema-utils": "^2.6.5" "schema-utils": "^2.6.5"
} }
}, },
"file-uri-to-path": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz",
"integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==",
"dev": true,
"optional": true
},
"fill-range": { "fill-range": {
"version": "7.0.1", "version": "7.0.1",
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz", "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz",
@ -8208,6 +8230,13 @@
"integrity": "sha512-nnbWWOkoWyUsTjKrhgD0dcz22mdkSnpYqbEjIm2nhwhuxlSkpywJmBo8h0ZqJdkp73mb90SssHkN4rsRaBAfAA==", "integrity": "sha512-nnbWWOkoWyUsTjKrhgD0dcz22mdkSnpYqbEjIm2nhwhuxlSkpywJmBo8h0ZqJdkp73mb90SssHkN4rsRaBAfAA==",
"dev": true "dev": true
}, },
"nan": {
"version": "2.14.2",
"resolved": "https://registry.npmjs.org/nan/-/nan-2.14.2.tgz",
"integrity": "sha512-M2ufzIiINKCuDfBSAUr1vWQ+vuVcA9kqx8JJUsbQi6yf1uGRyb7HfpdfUr5qLXf3B/t8dPvcjhKMmlfnP47EzQ==",
"dev": true,
"optional": true
},
"nanomatch": { "nanomatch": {
"version": "1.2.13", "version": "1.2.13",
"resolved": "https://registry.npmjs.org/nanomatch/-/nanomatch-1.2.13.tgz", "resolved": "https://registry.npmjs.org/nanomatch/-/nanomatch-1.2.13.tgz",
@ -8260,6 +8289,23 @@
"moment": "2.18.1" "moment": "2.18.1"
} }
}, },
"ng2-pdf-viewer": {
"version": "6.3.2",
"resolved": "https://registry.npmjs.org/ng2-pdf-viewer/-/ng2-pdf-viewer-6.3.2.tgz",
"integrity": "sha512-H2tBhDd+Lq6CUzK2g54HsCcZDR2wTn1sDjYqKY3yF0Ydasl2R5ppCKynZBU/zge4EKvmHglJI120FbQMpJKDYQ==",
"requires": {
"@types/pdfjs-dist": "^2.1.4",
"pdfjs-dist": "^2.4.456",
"tslib": "^1.10.0"
},
"dependencies": {
"tslib": {
"version": "1.14.1",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-1.14.1.tgz",
"integrity": "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg=="
}
}
},
"ngx-cookie-service": { "ngx-cookie-service": {
"version": "10.1.1", "version": "10.1.1",
"resolved": "https://registry.npmjs.org/ngx-cookie-service/-/ngx-cookie-service-10.1.1.tgz", "resolved": "https://registry.npmjs.org/ngx-cookie-service/-/ngx-cookie-service-10.1.1.tgz",
@ -9270,6 +9316,11 @@
"sha.js": "^2.4.8" "sha.js": "^2.4.8"
} }
}, },
"pdfjs-dist": {
"version": "2.5.207",
"resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-2.5.207.tgz",
"integrity": "sha512-xGDUhnCYPfHy+unMXCLCJtlpZaaZ17Ew3WIL0tnSgKFUZXHAPD49GO9xScyszSsQMoutNDgRb+rfBXIaX/lJbw=="
},
"performance-now": { "performance-now": {
"version": "2.1.0", "version": "2.1.0",
"resolved": "https://registry.npmjs.org/performance-now/-/performance-now-2.1.0.tgz", "resolved": "https://registry.npmjs.org/performance-now/-/performance-now-2.1.0.tgz",
@ -13228,7 +13279,11 @@
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-1.2.13.tgz", "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-1.2.13.tgz",
"integrity": "sha512-oWb1Z6mkHIskLzEJ/XWX0srkpkTQ7vaopMQkyaEIoq0fmtFVxOthb8cCxeT+p3ynTdkk/RZwbgG4brR5BeWECw==", "integrity": "sha512-oWb1Z6mkHIskLzEJ/XWX0srkpkTQ7vaopMQkyaEIoq0fmtFVxOthb8cCxeT+p3ynTdkk/RZwbgG4brR5BeWECw==",
"dev": true, "dev": true,
"optional": true "optional": true,
"requires": {
"bindings": "^1.5.0",
"nan": "^2.12.1"
}
}, },
"glob-parent": { "glob-parent": {
"version": "3.1.0", "version": "3.1.0",
@ -13832,7 +13887,11 @@
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-1.2.13.tgz", "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-1.2.13.tgz",
"integrity": "sha512-oWb1Z6mkHIskLzEJ/XWX0srkpkTQ7vaopMQkyaEIoq0fmtFVxOthb8cCxeT+p3ynTdkk/RZwbgG4brR5BeWECw==", "integrity": "sha512-oWb1Z6mkHIskLzEJ/XWX0srkpkTQ7vaopMQkyaEIoq0fmtFVxOthb8cCxeT+p3ynTdkk/RZwbgG4brR5BeWECw==",
"dev": true, "dev": true,
"optional": true "optional": true,
"requires": {
"bindings": "^1.5.0",
"nan": "^2.12.1"
}
}, },
"glob-parent": { "glob-parent": {
"version": "3.1.0", "version": "3.1.0",

View File

@ -23,6 +23,7 @@
"@ng-bootstrap/ng-bootstrap": "^8.0.0", "@ng-bootstrap/ng-bootstrap": "^8.0.0",
"bootstrap": "^4.5.0", "bootstrap": "^4.5.0",
"ng-bootstrap": "^1.6.3", "ng-bootstrap": "^1.6.3",
"ng2-pdf-viewer": "^6.3.2",
"ngx-cookie-service": "^10.1.1", "ngx-cookie-service": "^10.1.1",
"ngx-file-drop": "^10.0.0", "ngx-file-drop": "^10.0.0",
"ngx-infinite-scroll": "^9.1.0", "ngx-infinite-scroll": "^9.1.0",

View File

@ -14,7 +14,6 @@ import { LogsComponent } from './components/manage/logs/logs.component';
import { SettingsComponent } from './components/manage/settings/settings.component'; import { SettingsComponent } from './components/manage/settings/settings.component';
import { FormsModule, ReactiveFormsModule } from '@angular/forms'; import { FormsModule, ReactiveFormsModule } from '@angular/forms';
import { DatePipe } from '@angular/common'; import { DatePipe } from '@angular/common';
import { SafePipe } from './pipes/safe.pipe';
import { NotFoundComponent } from './components/not-found/not-found.component'; import { NotFoundComponent } from './components/not-found/not-found.component';
import { CorrespondentListComponent } from './components/manage/correspondent-list/correspondent-list.component'; import { CorrespondentListComponent } from './components/manage/correspondent-list/correspondent-list.component';
import { DeleteDialogComponent } from './components/common/delete-dialog/delete-dialog.component'; import { DeleteDialogComponent } from './components/common/delete-dialog/delete-dialog.component';
@ -45,6 +44,7 @@ import { SavedViewWidgetComponent } from './components/dashboard/widgets/saved-v
import { StatisticsWidgetComponent } from './components/dashboard/widgets/statistics-widget/statistics-widget.component'; import { StatisticsWidgetComponent } from './components/dashboard/widgets/statistics-widget/statistics-widget.component';
import { UploadFileWidgetComponent } from './components/dashboard/widgets/upload-file-widget/upload-file-widget.component'; import { UploadFileWidgetComponent } from './components/dashboard/widgets/upload-file-widget/upload-file-widget.component';
import { WidgetFrameComponent } from './components/dashboard/widgets/widget-frame/widget-frame.component'; import { WidgetFrameComponent } from './components/dashboard/widgets/widget-frame/widget-frame.component';
import { PdfViewerModule } from 'ng2-pdf-viewer';
import { WelcomeWidgetComponent } from './components/dashboard/widgets/welcome-widget/welcome-widget.component'; import { WelcomeWidgetComponent } from './components/dashboard/widgets/welcome-widget/welcome-widget.component';
import { YesNoPipe } from './pipes/yes-no.pipe'; import { YesNoPipe } from './pipes/yes-no.pipe';
import { FileSizePipe } from './pipes/file-size.pipe'; import { FileSizePipe } from './pipes/file-size.pipe';
@ -62,7 +62,6 @@ import { SelectDialogComponent } from './components/common/select-dialog/select-
DocumentTypeListComponent, DocumentTypeListComponent,
LogsComponent, LogsComponent,
SettingsComponent, SettingsComponent,
SafePipe,
NotFoundComponent, NotFoundComponent,
CorrespondentEditDialogComponent, CorrespondentEditDialogComponent,
DeleteDialogComponent, DeleteDialogComponent,
@ -102,7 +101,8 @@ import { SelectDialogComponent } from './components/common/select-dialog/select-
FormsModule, FormsModule,
ReactiveFormsModule, ReactiveFormsModule,
NgxFileDropModule, NgxFileDropModule,
InfiniteScrollModule InfiniteScrollModule,
PdfViewerModule
], ],
providers: [ providers: [
DatePipe, DatePipe,

View File

@ -35,7 +35,7 @@
<div class="row"> <div class="row">
<div class="col-xl"> <div class="col mb-4">
<form [formGroup]='documentForm' (ngSubmit)="save()"> <form [formGroup]='documentForm' (ngSubmit)="save()">
@ -171,11 +171,9 @@
</form> </form>
</div> </div>
<div class="col-xl d-none d-xl-block document-preview"> <div class="col-md-6 col-xl-8 mb-3">
<object [data]="previewUrl | safe" type="application/pdf" width="100%" height="100%"> <div class="pdf-viewer-container" *ngIf="getContentType() == 'application/pdf'">
<p>Your browser does not support PDFs. <pdf-viewer [src]="previewUrl" [original-size]="false" [show-borders]="true"></pdf-viewer>
<a href="previewUrl">Download the PDF</a>.</p> </div>
</object>
</div> </div>
</div> </div>

View File

@ -1,5 +1,6 @@
.document-preview { .pdf-viewer-container {
height: calc(100vh - 180px); height: calc(100vh - 160px);
top: 70px; top: 70px;
position: sticky; position: sticky;
} background-color: gray;
}

View File

@ -59,6 +59,10 @@ export class DocumentDetailComponent implements OnInit {
private documentListViewService: DocumentListViewService, private documentListViewService: DocumentListViewService,
private titleService: Title) { } private titleService: Title) { }
getContentType() {
return this.metadata?.has_archive_version ? 'application/pdf' : this.metadata?.original_mime_type
}
ngOnInit(): void { ngOnInit(): void {
this.documentForm.valueChanges.subscribe(wow => { this.documentForm.valueChanges.subscribe(wow => {
Object.assign(this.document, this.documentForm.value) Object.assign(this.document, this.documentForm.value)

View File

@ -1,8 +0,0 @@
import { SafePipe } from './safe.pipe';
describe('SafePipe', () => {
it('create an instance', () => {
const pipe = new SafePipe();
expect(pipe).toBeTruthy();
});
});

View File

@ -1,19 +0,0 @@
import { Pipe, PipeTransform } from '@angular/core';
import { DomSanitizer } from '@angular/platform-browser';
@Pipe({
name: 'safe'
})
export class SafePipe implements PipeTransform {
constructor(private sanitizer: DomSanitizer) { }
transform(url) {
if (url == null) {
return this.sanitizer.bypassSecurityTrustResourceUrl("")
} else {
return this.sanitizer.bypassSecurityTrustResourceUrl(url);
}
}
}

View File

@ -2,6 +2,7 @@ import logging
import tqdm import tqdm
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from django.db.models.signals import post_save
from documents.models import Document from documents.models import Document
from ...mixins import Renderable from ...mixins import Renderable
@ -24,5 +25,4 @@ class Command(Renderable, BaseCommand):
logging.getLogger().handlers[0].level = logging.ERROR logging.getLogger().handlers[0].level = logging.ERROR
for document in tqdm.tqdm(Document.objects.all()): for document in tqdm.tqdm(Document.objects.all()):
# Saving the document again will generate a new filename and rename post_save.send(Document, instance=document)
document.save()

View File

@ -7,6 +7,7 @@ from django.contrib.admin.models import ADDITION, LogEntry
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.contrib.contenttypes.models import ContentType from django.contrib.contenttypes.models import ContentType
from django.db import models, DatabaseError from django.db import models, DatabaseError
from django.db.models import Q
from django.dispatch import receiver from django.dispatch import receiver
from django.utils import timezone from django.utils import timezone
from filelock import FileLock from filelock import FileLock
@ -121,11 +122,14 @@ def set_tags(sender,
classifier=None, classifier=None,
replace=False, replace=False,
**kwargs): **kwargs):
if replace: if replace:
document.tags.clear() Document.tags.through.objects.filter(document=document).exclude(
current_tags = set([]) Q(tag__is_inbox_tag=True)).exclude(
else: Q(tag__match="") & ~Q(tag__matching_algorithm=Tag.MATCH_AUTO)
current_tags = set(document.tags.all()) ).delete()
current_tags = set(document.tags.all())
matched_tags = matching.match_tags(document.content, classifier) matched_tags = matching.match_tags(document.content, classifier)

View File

@ -14,6 +14,12 @@ class TestRetagger(DirectoriesMixin, TestCase):
self.tag_first = Tag.objects.create(name="tag1", match="first", matching_algorithm=Tag.MATCH_ANY) self.tag_first = Tag.objects.create(name="tag1", match="first", matching_algorithm=Tag.MATCH_ANY)
self.tag_second = Tag.objects.create(name="tag2", match="second", matching_algorithm=Tag.MATCH_ANY) self.tag_second = Tag.objects.create(name="tag2", match="second", matching_algorithm=Tag.MATCH_ANY)
self.tag_inbox = Tag.objects.create(name="test", is_inbox_tag=True)
self.tag_no_match = Tag.objects.create(name="test2")
self.d3.tags.add(self.tag_inbox)
self.d3.tags.add(self.tag_no_match)
self.correspondent_first = Correspondent.objects.create( self.correspondent_first = Correspondent.objects.create(
name="c1", match="first", matching_algorithm=Correspondent.MATCH_ANY) name="c1", match="first", matching_algorithm=Correspondent.MATCH_ANY)
@ -38,7 +44,7 @@ class TestRetagger(DirectoriesMixin, TestCase):
self.assertEqual(d_first.tags.count(), 1) self.assertEqual(d_first.tags.count(), 1)
self.assertEqual(d_second.tags.count(), 1) self.assertEqual(d_second.tags.count(), 1)
self.assertEqual(d_unrelated.tags.count(), 0) self.assertEqual(d_unrelated.tags.count(), 2)
self.assertEqual(d_first.tags.first(), self.tag_first) self.assertEqual(d_first.tags.first(), self.tag_first)
self.assertEqual(d_second.tags.first(), self.tag_second) self.assertEqual(d_second.tags.first(), self.tag_second)
@ -56,3 +62,17 @@ class TestRetagger(DirectoriesMixin, TestCase):
self.assertEqual(d_first.correspondent, self.correspondent_first) self.assertEqual(d_first.correspondent, self.correspondent_first)
self.assertEqual(d_second.correspondent, self.correspondent_second) self.assertEqual(d_second.correspondent, self.correspondent_second)
def test_overwrite_preserve_inbox(self):
self.d1.tags.add(self.tag_second)
call_command('document_retagger', '--tags', '--overwrite')
d_first, d_second, d_unrelated = self.get_updated_docs()
self.assertIsNotNone(Tag.objects.get(id=self.tag_second.id))
self.assertCountEqual([tag.id for tag in d_first.tags.all()], [self.tag_first.id])
self.assertCountEqual([tag.id for tag in d_second.tags.all()], [self.tag_second.id])
self.assertCountEqual([tag.id for tag in d_unrelated.tags.all()], [self.tag_inbox.id, self.tag_no_match.id])

View File

@ -110,6 +110,24 @@ class RasterisedDocumentParser(DocumentParser):
f"Error while getting DPI from image {image}: {e}") f"Error while getting DPI from image {image}: {e}")
return None return None
def calculate_a4_dpi(self, image):
try:
with Image.open(image) as im:
width, height = im.size
# divide image width by A4 width (210mm) in inches.
dpi = int(width / (21 / 2.54))
self.log(
'debug',
f"Estimated DPI {dpi} based on image width {width}"
)
return dpi
except Exception as e:
self.log(
'warning',
f"Error while calculating DPI for image {image}: {e}")
return None
def parse(self, document_path, mime_type): def parse(self, document_path, mime_type):
mode = settings.OCR_MODE mode = settings.OCR_MODE
@ -162,6 +180,7 @@ class RasterisedDocumentParser(DocumentParser):
if self.is_image(mime_type): if self.is_image(mime_type):
dpi = self.get_dpi(document_path) dpi = self.get_dpi(document_path)
a4_dpi = self.calculate_a4_dpi(document_path)
if dpi: if dpi:
self.log( self.log(
"debug", "debug",
@ -170,6 +189,8 @@ class RasterisedDocumentParser(DocumentParser):
ocr_args['image_dpi'] = dpi ocr_args['image_dpi'] = dpi
elif settings.OCR_IMAGE_DPI: elif settings.OCR_IMAGE_DPI:
ocr_args['image_dpi'] = settings.OCR_IMAGE_DPI ocr_args['image_dpi'] = settings.OCR_IMAGE_DPI
elif a4_dpi:
ocr_args['image_dpi'] = a4_dpi
else: else:
raise ParseError( raise ParseError(
f"Cannot produce archive PDF for image {document_path}, " f"Cannot produce archive PDF for image {document_path}, "
@ -241,6 +262,9 @@ def strip_excess_whitespace(text):
def get_text_from_pdf(pdf_file): def get_text_from_pdf(pdf_file):
if not os.path.isfile(pdf_file):
return None
with open(pdf_file, "rb") as f: with open(pdf_file, "rb") as f:
try: try:
pdf = pdftotext.PDF(f) pdf = pdftotext.PDF(f)

View File

@ -164,8 +164,21 @@ class TestParser(DirectoriesMixin, TestCase):
self.assertRaises(ParseError, f) self.assertRaises(ParseError, f)
@mock.patch("paperless_tesseract.parsers.ocrmypdf.ocr")
def test_image_calc_a4_dpi(self, m):
parser = RasterisedDocumentParser(None)
def test_image_no_dpi_fail(self): parser.parse(os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png"), "image/png")
m.assert_called_once()
args, kwargs = m.call_args
self.assertEqual(kwargs['image_dpi'], 62)
@mock.patch("paperless_tesseract.parsers.RasterisedDocumentParser.calculate_a4_dpi")
def test_image_dpi_fail(self, m):
m.return_value = None
parser = RasterisedDocumentParser(None) parser = RasterisedDocumentParser(None)
def f(): def f():