Merge branch 'dev' into feature-bulk-edit

This commit is contained in:
jonaswinkler 2020-12-13 00:52:36 +01:00
commit 2374506a20
14 changed files with 152 additions and 51 deletions

View File

@ -24,3 +24,7 @@ feature-X branches is for experimental stuff that will eventually be merged into
I'm trying to get most of paperless tested, so please do the same for your code! I know its a hassle, but it makes sure that your code works now and will allow us to detect regressions easily.
To test your code, execute `pytest` in the src/ directory. Executing that in the project root is no good. This also generates a html coverage report, which you can use to see if you missed anything important during testing.
## More info:
... is available in the documentation. https://paperless-ng.readthedocs.io/en/latest/extending.html

View File

@ -2215,6 +2215,11 @@
"integrity": "sha512-UV1/ZJMC+HcP902wWdpC43cAcGu0IQk/I5bXjP2aSuCjsk3cE74mDvFrLKga7oDC170ugOAYBwfT4DSQW3akDA==",
"dev": true
},
"@types/pdfjs-dist": {
"version": "2.1.7",
"resolved": "https://registry.npmjs.org/@types/pdfjs-dist/-/pdfjs-dist-2.1.7.tgz",
"integrity": "sha512-nQIwcPUhkAIyn7x9NS0lR/qxYfd5unRtfGkMjvpgF4Sh28IXftRymaNmFKTTdejDNY25NDGSIyjwj/BRwAPexg=="
},
"@types/q": {
"version": "1.5.4",
"resolved": "https://registry.npmjs.org/@types/q/-/q-1.5.4.tgz",
@ -3023,6 +3028,16 @@
"integrity": "sha512-1Yj8h9Q+QDF5FzhMs/c9+6UntbD5MkRfRwac8DoEm9ZfUBZ7tZ55YcGVAzEe4bXsdQHEk+s9S5wsOKVdZrw0tQ==",
"dev": true
},
"bindings": {
"version": "1.5.0",
"resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz",
"integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==",
"dev": true,
"optional": true,
"requires": {
"file-uri-to-path": "1.0.0"
}
},
"blob": {
"version": "0.0.5",
"resolved": "https://registry.npmjs.org/blob/-/blob-0.0.5.tgz",
@ -5508,6 +5523,13 @@
"schema-utils": "^2.6.5"
}
},
"file-uri-to-path": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz",
"integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==",
"dev": true,
"optional": true
},
"fill-range": {
"version": "7.0.1",
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz",
@ -8208,6 +8230,13 @@
"integrity": "sha512-nnbWWOkoWyUsTjKrhgD0dcz22mdkSnpYqbEjIm2nhwhuxlSkpywJmBo8h0ZqJdkp73mb90SssHkN4rsRaBAfAA==",
"dev": true
},
"nan": {
"version": "2.14.2",
"resolved": "https://registry.npmjs.org/nan/-/nan-2.14.2.tgz",
"integrity": "sha512-M2ufzIiINKCuDfBSAUr1vWQ+vuVcA9kqx8JJUsbQi6yf1uGRyb7HfpdfUr5qLXf3B/t8dPvcjhKMmlfnP47EzQ==",
"dev": true,
"optional": true
},
"nanomatch": {
"version": "1.2.13",
"resolved": "https://registry.npmjs.org/nanomatch/-/nanomatch-1.2.13.tgz",
@ -8260,6 +8289,23 @@
"moment": "2.18.1"
}
},
"ng2-pdf-viewer": {
"version": "6.3.2",
"resolved": "https://registry.npmjs.org/ng2-pdf-viewer/-/ng2-pdf-viewer-6.3.2.tgz",
"integrity": "sha512-H2tBhDd+Lq6CUzK2g54HsCcZDR2wTn1sDjYqKY3yF0Ydasl2R5ppCKynZBU/zge4EKvmHglJI120FbQMpJKDYQ==",
"requires": {
"@types/pdfjs-dist": "^2.1.4",
"pdfjs-dist": "^2.4.456",
"tslib": "^1.10.0"
},
"dependencies": {
"tslib": {
"version": "1.14.1",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-1.14.1.tgz",
"integrity": "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg=="
}
}
},
"ngx-cookie-service": {
"version": "10.1.1",
"resolved": "https://registry.npmjs.org/ngx-cookie-service/-/ngx-cookie-service-10.1.1.tgz",
@ -9270,6 +9316,11 @@
"sha.js": "^2.4.8"
}
},
"pdfjs-dist": {
"version": "2.5.207",
"resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-2.5.207.tgz",
"integrity": "sha512-xGDUhnCYPfHy+unMXCLCJtlpZaaZ17Ew3WIL0tnSgKFUZXHAPD49GO9xScyszSsQMoutNDgRb+rfBXIaX/lJbw=="
},
"performance-now": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/performance-now/-/performance-now-2.1.0.tgz",
@ -13228,7 +13279,11 @@
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-1.2.13.tgz",
"integrity": "sha512-oWb1Z6mkHIskLzEJ/XWX0srkpkTQ7vaopMQkyaEIoq0fmtFVxOthb8cCxeT+p3ynTdkk/RZwbgG4brR5BeWECw==",
"dev": true,
"optional": true
"optional": true,
"requires": {
"bindings": "^1.5.0",
"nan": "^2.12.1"
}
},
"glob-parent": {
"version": "3.1.0",
@ -13832,7 +13887,11 @@
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-1.2.13.tgz",
"integrity": "sha512-oWb1Z6mkHIskLzEJ/XWX0srkpkTQ7vaopMQkyaEIoq0fmtFVxOthb8cCxeT+p3ynTdkk/RZwbgG4brR5BeWECw==",
"dev": true,
"optional": true
"optional": true,
"requires": {
"bindings": "^1.5.0",
"nan": "^2.12.1"
}
},
"glob-parent": {
"version": "3.1.0",

View File

@ -23,6 +23,7 @@
"@ng-bootstrap/ng-bootstrap": "^8.0.0",
"bootstrap": "^4.5.0",
"ng-bootstrap": "^1.6.3",
"ng2-pdf-viewer": "^6.3.2",
"ngx-cookie-service": "^10.1.1",
"ngx-file-drop": "^10.0.0",
"ngx-infinite-scroll": "^9.1.0",

View File

@ -14,7 +14,6 @@ import { LogsComponent } from './components/manage/logs/logs.component';
import { SettingsComponent } from './components/manage/settings/settings.component';
import { FormsModule, ReactiveFormsModule } from '@angular/forms';
import { DatePipe } from '@angular/common';
import { SafePipe } from './pipes/safe.pipe';
import { NotFoundComponent } from './components/not-found/not-found.component';
import { CorrespondentListComponent } from './components/manage/correspondent-list/correspondent-list.component';
import { DeleteDialogComponent } from './components/common/delete-dialog/delete-dialog.component';
@ -45,6 +44,7 @@ import { SavedViewWidgetComponent } from './components/dashboard/widgets/saved-v
import { StatisticsWidgetComponent } from './components/dashboard/widgets/statistics-widget/statistics-widget.component';
import { UploadFileWidgetComponent } from './components/dashboard/widgets/upload-file-widget/upload-file-widget.component';
import { WidgetFrameComponent } from './components/dashboard/widgets/widget-frame/widget-frame.component';
import { PdfViewerModule } from 'ng2-pdf-viewer';
import { WelcomeWidgetComponent } from './components/dashboard/widgets/welcome-widget/welcome-widget.component';
import { YesNoPipe } from './pipes/yes-no.pipe';
import { FileSizePipe } from './pipes/file-size.pipe';
@ -62,7 +62,6 @@ import { SelectDialogComponent } from './components/common/select-dialog/select-
DocumentTypeListComponent,
LogsComponent,
SettingsComponent,
SafePipe,
NotFoundComponent,
CorrespondentEditDialogComponent,
DeleteDialogComponent,
@ -102,7 +101,8 @@ import { SelectDialogComponent } from './components/common/select-dialog/select-
FormsModule,
ReactiveFormsModule,
NgxFileDropModule,
InfiniteScrollModule
InfiniteScrollModule,
PdfViewerModule
],
providers: [
DatePipe,

View File

@ -35,7 +35,7 @@
<div class="row">
<div class="col-xl">
<div class="col mb-4">
<form [formGroup]='documentForm' (ngSubmit)="save()">
@ -171,11 +171,9 @@
</form>
</div>
<div class="col-xl d-none d-xl-block document-preview">
<object [data]="previewUrl | safe" type="application/pdf" width="100%" height="100%">
<p>Your browser does not support PDFs.
<a href="previewUrl">Download the PDF</a>.</p>
</object>
<div class="col-md-6 col-xl-8 mb-3">
<div class="pdf-viewer-container" *ngIf="getContentType() == 'application/pdf'">
<pdf-viewer [src]="previewUrl" [original-size]="false" [show-borders]="true"></pdf-viewer>
</div>
</div>
</div>
</div>

View File

@ -1,5 +1,6 @@
.document-preview {
height: calc(100vh - 180px);
.pdf-viewer-container {
height: calc(100vh - 160px);
top: 70px;
position: sticky;
}
background-color: gray;
}

View File

@ -59,6 +59,10 @@ export class DocumentDetailComponent implements OnInit {
private documentListViewService: DocumentListViewService,
private titleService: Title) { }
getContentType() {
return this.metadata?.has_archive_version ? 'application/pdf' : this.metadata?.original_mime_type
}
ngOnInit(): void {
this.documentForm.valueChanges.subscribe(wow => {
Object.assign(this.document, this.documentForm.value)

View File

@ -1,8 +0,0 @@
import { SafePipe } from './safe.pipe';
describe('SafePipe', () => {
it('create an instance', () => {
const pipe = new SafePipe();
expect(pipe).toBeTruthy();
});
});

View File

@ -1,19 +0,0 @@
import { Pipe, PipeTransform } from '@angular/core';
import { DomSanitizer } from '@angular/platform-browser';
@Pipe({
name: 'safe'
})
export class SafePipe implements PipeTransform {
constructor(private sanitizer: DomSanitizer) { }
transform(url) {
if (url == null) {
return this.sanitizer.bypassSecurityTrustResourceUrl("")
} else {
return this.sanitizer.bypassSecurityTrustResourceUrl(url);
}
}
}

View File

@ -2,6 +2,7 @@ import logging
import tqdm
from django.core.management.base import BaseCommand
from django.db.models.signals import post_save
from documents.models import Document
from ...mixins import Renderable
@ -24,5 +25,4 @@ class Command(Renderable, BaseCommand):
logging.getLogger().handlers[0].level = logging.ERROR
for document in tqdm.tqdm(Document.objects.all()):
# Saving the document again will generate a new filename and rename
document.save()
post_save.send(Document, instance=document)

View File

@ -7,6 +7,7 @@ from django.contrib.admin.models import ADDITION, LogEntry
from django.contrib.auth.models import User
from django.contrib.contenttypes.models import ContentType
from django.db import models, DatabaseError
from django.db.models import Q
from django.dispatch import receiver
from django.utils import timezone
from filelock import FileLock
@ -121,11 +122,14 @@ def set_tags(sender,
classifier=None,
replace=False,
**kwargs):
if replace:
document.tags.clear()
current_tags = set([])
else:
current_tags = set(document.tags.all())
Document.tags.through.objects.filter(document=document).exclude(
Q(tag__is_inbox_tag=True)).exclude(
Q(tag__match="") & ~Q(tag__matching_algorithm=Tag.MATCH_AUTO)
).delete()
current_tags = set(document.tags.all())
matched_tags = matching.match_tags(document.content, classifier)

View File

@ -14,6 +14,12 @@ class TestRetagger(DirectoriesMixin, TestCase):
self.tag_first = Tag.objects.create(name="tag1", match="first", matching_algorithm=Tag.MATCH_ANY)
self.tag_second = Tag.objects.create(name="tag2", match="second", matching_algorithm=Tag.MATCH_ANY)
self.tag_inbox = Tag.objects.create(name="test", is_inbox_tag=True)
self.tag_no_match = Tag.objects.create(name="test2")
self.d3.tags.add(self.tag_inbox)
self.d3.tags.add(self.tag_no_match)
self.correspondent_first = Correspondent.objects.create(
name="c1", match="first", matching_algorithm=Correspondent.MATCH_ANY)
@ -38,7 +44,7 @@ class TestRetagger(DirectoriesMixin, TestCase):
self.assertEqual(d_first.tags.count(), 1)
self.assertEqual(d_second.tags.count(), 1)
self.assertEqual(d_unrelated.tags.count(), 0)
self.assertEqual(d_unrelated.tags.count(), 2)
self.assertEqual(d_first.tags.first(), self.tag_first)
self.assertEqual(d_second.tags.first(), self.tag_second)
@ -56,3 +62,17 @@ class TestRetagger(DirectoriesMixin, TestCase):
self.assertEqual(d_first.correspondent, self.correspondent_first)
self.assertEqual(d_second.correspondent, self.correspondent_second)
def test_overwrite_preserve_inbox(self):
self.d1.tags.add(self.tag_second)
call_command('document_retagger', '--tags', '--overwrite')
d_first, d_second, d_unrelated = self.get_updated_docs()
self.assertIsNotNone(Tag.objects.get(id=self.tag_second.id))
self.assertCountEqual([tag.id for tag in d_first.tags.all()], [self.tag_first.id])
self.assertCountEqual([tag.id for tag in d_second.tags.all()], [self.tag_second.id])
self.assertCountEqual([tag.id for tag in d_unrelated.tags.all()], [self.tag_inbox.id, self.tag_no_match.id])

View File

@ -110,6 +110,24 @@ class RasterisedDocumentParser(DocumentParser):
f"Error while getting DPI from image {image}: {e}")
return None
def calculate_a4_dpi(self, image):
try:
with Image.open(image) as im:
width, height = im.size
# divide image width by A4 width (210mm) in inches.
dpi = int(width / (21 / 2.54))
self.log(
'debug',
f"Estimated DPI {dpi} based on image width {width}"
)
return dpi
except Exception as e:
self.log(
'warning',
f"Error while calculating DPI for image {image}: {e}")
return None
def parse(self, document_path, mime_type):
mode = settings.OCR_MODE
@ -162,6 +180,7 @@ class RasterisedDocumentParser(DocumentParser):
if self.is_image(mime_type):
dpi = self.get_dpi(document_path)
a4_dpi = self.calculate_a4_dpi(document_path)
if dpi:
self.log(
"debug",
@ -170,6 +189,8 @@ class RasterisedDocumentParser(DocumentParser):
ocr_args['image_dpi'] = dpi
elif settings.OCR_IMAGE_DPI:
ocr_args['image_dpi'] = settings.OCR_IMAGE_DPI
elif a4_dpi:
ocr_args['image_dpi'] = a4_dpi
else:
raise ParseError(
f"Cannot produce archive PDF for image {document_path}, "
@ -241,6 +262,9 @@ def strip_excess_whitespace(text):
def get_text_from_pdf(pdf_file):
if not os.path.isfile(pdf_file):
return None
with open(pdf_file, "rb") as f:
try:
pdf = pdftotext.PDF(f)

View File

@ -164,8 +164,21 @@ class TestParser(DirectoriesMixin, TestCase):
self.assertRaises(ParseError, f)
@mock.patch("paperless_tesseract.parsers.ocrmypdf.ocr")
def test_image_calc_a4_dpi(self, m):
parser = RasterisedDocumentParser(None)
def test_image_no_dpi_fail(self):
parser.parse(os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png"), "image/png")
m.assert_called_once()
args, kwargs = m.call_args
self.assertEqual(kwargs['image_dpi'], 62)
@mock.patch("paperless_tesseract.parsers.RasterisedDocumentParser.calculate_a4_dpi")
def test_image_dpi_fail(self, m):
m.return_value = None
parser = RasterisedDocumentParser(None)
def f():