diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index bd6080d35..a8fb1f8e9 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -24,3 +24,7 @@ feature-X branches is for experimental stuff that will eventually be merged into
I'm trying to get most of paperless tested, so please do the same for your code! I know its a hassle, but it makes sure that your code works now and will allow us to detect regressions easily.
To test your code, execute `pytest` in the src/ directory. Executing that in the project root is no good. This also generates a html coverage report, which you can use to see if you missed anything important during testing.
+
+## More info:
+
+... is available in the documentation. https://paperless-ng.readthedocs.io/en/latest/extending.html
diff --git a/src-ui/package-lock.json b/src-ui/package-lock.json
index b6b66e1c6..5eca0b3c0 100644
--- a/src-ui/package-lock.json
+++ b/src-ui/package-lock.json
@@ -2215,6 +2215,11 @@
"integrity": "sha512-UV1/ZJMC+HcP902wWdpC43cAcGu0IQk/I5bXjP2aSuCjsk3cE74mDvFrLKga7oDC170ugOAYBwfT4DSQW3akDA==",
"dev": true
},
+ "@types/pdfjs-dist": {
+ "version": "2.1.7",
+ "resolved": "https://registry.npmjs.org/@types/pdfjs-dist/-/pdfjs-dist-2.1.7.tgz",
+ "integrity": "sha512-nQIwcPUhkAIyn7x9NS0lR/qxYfd5unRtfGkMjvpgF4Sh28IXftRymaNmFKTTdejDNY25NDGSIyjwj/BRwAPexg=="
+ },
"@types/q": {
"version": "1.5.4",
"resolved": "https://registry.npmjs.org/@types/q/-/q-1.5.4.tgz",
@@ -3023,6 +3028,16 @@
"integrity": "sha512-1Yj8h9Q+QDF5FzhMs/c9+6UntbD5MkRfRwac8DoEm9ZfUBZ7tZ55YcGVAzEe4bXsdQHEk+s9S5wsOKVdZrw0tQ==",
"dev": true
},
+ "bindings": {
+ "version": "1.5.0",
+ "resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz",
+ "integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==",
+ "dev": true,
+ "optional": true,
+ "requires": {
+ "file-uri-to-path": "1.0.0"
+ }
+ },
"blob": {
"version": "0.0.5",
"resolved": "https://registry.npmjs.org/blob/-/blob-0.0.5.tgz",
@@ -5508,6 +5523,13 @@
"schema-utils": "^2.6.5"
}
},
+ "file-uri-to-path": {
+ "version": "1.0.0",
+ "resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz",
+ "integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==",
+ "dev": true,
+ "optional": true
+ },
"fill-range": {
"version": "7.0.1",
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz",
@@ -8208,6 +8230,13 @@
"integrity": "sha512-nnbWWOkoWyUsTjKrhgD0dcz22mdkSnpYqbEjIm2nhwhuxlSkpywJmBo8h0ZqJdkp73mb90SssHkN4rsRaBAfAA==",
"dev": true
},
+ "nan": {
+ "version": "2.14.2",
+ "resolved": "https://registry.npmjs.org/nan/-/nan-2.14.2.tgz",
+ "integrity": "sha512-M2ufzIiINKCuDfBSAUr1vWQ+vuVcA9kqx8JJUsbQi6yf1uGRyb7HfpdfUr5qLXf3B/t8dPvcjhKMmlfnP47EzQ==",
+ "dev": true,
+ "optional": true
+ },
"nanomatch": {
"version": "1.2.13",
"resolved": "https://registry.npmjs.org/nanomatch/-/nanomatch-1.2.13.tgz",
@@ -8260,6 +8289,23 @@
"moment": "2.18.1"
}
},
+ "ng2-pdf-viewer": {
+ "version": "6.3.2",
+ "resolved": "https://registry.npmjs.org/ng2-pdf-viewer/-/ng2-pdf-viewer-6.3.2.tgz",
+ "integrity": "sha512-H2tBhDd+Lq6CUzK2g54HsCcZDR2wTn1sDjYqKY3yF0Ydasl2R5ppCKynZBU/zge4EKvmHglJI120FbQMpJKDYQ==",
+ "requires": {
+ "@types/pdfjs-dist": "^2.1.4",
+ "pdfjs-dist": "^2.4.456",
+ "tslib": "^1.10.0"
+ },
+ "dependencies": {
+ "tslib": {
+ "version": "1.14.1",
+ "resolved": "https://registry.npmjs.org/tslib/-/tslib-1.14.1.tgz",
+ "integrity": "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg=="
+ }
+ }
+ },
"ngx-cookie-service": {
"version": "10.1.1",
"resolved": "https://registry.npmjs.org/ngx-cookie-service/-/ngx-cookie-service-10.1.1.tgz",
@@ -9270,6 +9316,11 @@
"sha.js": "^2.4.8"
}
},
+ "pdfjs-dist": {
+ "version": "2.5.207",
+ "resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-2.5.207.tgz",
+ "integrity": "sha512-xGDUhnCYPfHy+unMXCLCJtlpZaaZ17Ew3WIL0tnSgKFUZXHAPD49GO9xScyszSsQMoutNDgRb+rfBXIaX/lJbw=="
+ },
"performance-now": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/performance-now/-/performance-now-2.1.0.tgz",
@@ -13228,7 +13279,11 @@
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-1.2.13.tgz",
"integrity": "sha512-oWb1Z6mkHIskLzEJ/XWX0srkpkTQ7vaopMQkyaEIoq0fmtFVxOthb8cCxeT+p3ynTdkk/RZwbgG4brR5BeWECw==",
"dev": true,
- "optional": true
+ "optional": true,
+ "requires": {
+ "bindings": "^1.5.0",
+ "nan": "^2.12.1"
+ }
},
"glob-parent": {
"version": "3.1.0",
@@ -13832,7 +13887,11 @@
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-1.2.13.tgz",
"integrity": "sha512-oWb1Z6mkHIskLzEJ/XWX0srkpkTQ7vaopMQkyaEIoq0fmtFVxOthb8cCxeT+p3ynTdkk/RZwbgG4brR5BeWECw==",
"dev": true,
- "optional": true
+ "optional": true,
+ "requires": {
+ "bindings": "^1.5.0",
+ "nan": "^2.12.1"
+ }
},
"glob-parent": {
"version": "3.1.0",
diff --git a/src-ui/package.json b/src-ui/package.json
index af3334db9..6293f2672 100644
--- a/src-ui/package.json
+++ b/src-ui/package.json
@@ -23,6 +23,7 @@
"@ng-bootstrap/ng-bootstrap": "^8.0.0",
"bootstrap": "^4.5.0",
"ng-bootstrap": "^1.6.3",
+ "ng2-pdf-viewer": "^6.3.2",
"ngx-cookie-service": "^10.1.1",
"ngx-file-drop": "^10.0.0",
"ngx-infinite-scroll": "^9.1.0",
diff --git a/src-ui/src/app/app.module.ts b/src-ui/src/app/app.module.ts
index af00fde63..befd26639 100644
--- a/src-ui/src/app/app.module.ts
+++ b/src-ui/src/app/app.module.ts
@@ -14,7 +14,6 @@ import { LogsComponent } from './components/manage/logs/logs.component';
import { SettingsComponent } from './components/manage/settings/settings.component';
import { FormsModule, ReactiveFormsModule } from '@angular/forms';
import { DatePipe } from '@angular/common';
-import { SafePipe } from './pipes/safe.pipe';
import { NotFoundComponent } from './components/not-found/not-found.component';
import { CorrespondentListComponent } from './components/manage/correspondent-list/correspondent-list.component';
import { DeleteDialogComponent } from './components/common/delete-dialog/delete-dialog.component';
@@ -45,6 +44,7 @@ import { SavedViewWidgetComponent } from './components/dashboard/widgets/saved-v
import { StatisticsWidgetComponent } from './components/dashboard/widgets/statistics-widget/statistics-widget.component';
import { UploadFileWidgetComponent } from './components/dashboard/widgets/upload-file-widget/upload-file-widget.component';
import { WidgetFrameComponent } from './components/dashboard/widgets/widget-frame/widget-frame.component';
+import { PdfViewerModule } from 'ng2-pdf-viewer';
import { WelcomeWidgetComponent } from './components/dashboard/widgets/welcome-widget/welcome-widget.component';
import { YesNoPipe } from './pipes/yes-no.pipe';
import { FileSizePipe } from './pipes/file-size.pipe';
@@ -62,7 +62,6 @@ import { SelectDialogComponent } from './components/common/select-dialog/select-
DocumentTypeListComponent,
LogsComponent,
SettingsComponent,
- SafePipe,
NotFoundComponent,
CorrespondentEditDialogComponent,
DeleteDialogComponent,
@@ -102,7 +101,8 @@ import { SelectDialogComponent } from './components/common/select-dialog/select-
FormsModule,
ReactiveFormsModule,
NgxFileDropModule,
- InfiniteScrollModule
+ InfiniteScrollModule,
+ PdfViewerModule
],
providers: [
DatePipe,
diff --git a/src-ui/src/app/components/document-detail/document-detail.component.html b/src-ui/src/app/components/document-detail/document-detail.component.html
index e0b5c6da9..f9f6e57ef 100644
--- a/src-ui/src/app/components/document-detail/document-detail.component.html
+++ b/src-ui/src/app/components/document-detail/document-detail.component.html
@@ -35,7 +35,7 @@
-
+
-
\ No newline at end of file
+
diff --git a/src-ui/src/app/components/document-detail/document-detail.component.scss b/src-ui/src/app/components/document-detail/document-detail.component.scss
index b1e9fddfb..998653bab 100644
--- a/src-ui/src/app/components/document-detail/document-detail.component.scss
+++ b/src-ui/src/app/components/document-detail/document-detail.component.scss
@@ -1,5 +1,6 @@
-.document-preview {
- height: calc(100vh - 180px);
+.pdf-viewer-container {
+ height: calc(100vh - 160px);
top: 70px;
position: sticky;
-}
\ No newline at end of file
+ background-color: gray;
+}
diff --git a/src-ui/src/app/components/document-detail/document-detail.component.ts b/src-ui/src/app/components/document-detail/document-detail.component.ts
index 329077693..c80a8b1ce 100644
--- a/src-ui/src/app/components/document-detail/document-detail.component.ts
+++ b/src-ui/src/app/components/document-detail/document-detail.component.ts
@@ -59,6 +59,10 @@ export class DocumentDetailComponent implements OnInit {
private documentListViewService: DocumentListViewService,
private titleService: Title) { }
+ getContentType() {
+ return this.metadata?.has_archive_version ? 'application/pdf' : this.metadata?.original_mime_type
+ }
+
ngOnInit(): void {
this.documentForm.valueChanges.subscribe(wow => {
Object.assign(this.document, this.documentForm.value)
diff --git a/src-ui/src/app/pipes/safe.pipe.spec.ts b/src-ui/src/app/pipes/safe.pipe.spec.ts
deleted file mode 100644
index 49ee0ad14..000000000
--- a/src-ui/src/app/pipes/safe.pipe.spec.ts
+++ /dev/null
@@ -1,8 +0,0 @@
-import { SafePipe } from './safe.pipe';
-
-describe('SafePipe', () => {
- it('create an instance', () => {
- const pipe = new SafePipe();
- expect(pipe).toBeTruthy();
- });
-});
diff --git a/src-ui/src/app/pipes/safe.pipe.ts b/src-ui/src/app/pipes/safe.pipe.ts
deleted file mode 100644
index f2d77a72d..000000000
--- a/src-ui/src/app/pipes/safe.pipe.ts
+++ /dev/null
@@ -1,19 +0,0 @@
-import { Pipe, PipeTransform } from '@angular/core';
-import { DomSanitizer } from '@angular/platform-browser';
-
-@Pipe({
- name: 'safe'
-})
-export class SafePipe implements PipeTransform {
-
- constructor(private sanitizer: DomSanitizer) { }
-
- transform(url) {
- if (url == null) {
- return this.sanitizer.bypassSecurityTrustResourceUrl("")
- } else {
- return this.sanitizer.bypassSecurityTrustResourceUrl(url);
- }
- }
-
-}
\ No newline at end of file
diff --git a/src/documents/management/commands/document_renamer.py b/src/documents/management/commands/document_renamer.py
index 5d7d0d90c..745d2d03d 100644
--- a/src/documents/management/commands/document_renamer.py
+++ b/src/documents/management/commands/document_renamer.py
@@ -2,6 +2,7 @@ import logging
import tqdm
from django.core.management.base import BaseCommand
+from django.db.models.signals import post_save
from documents.models import Document
from ...mixins import Renderable
@@ -24,5 +25,4 @@ class Command(Renderable, BaseCommand):
logging.getLogger().handlers[0].level = logging.ERROR
for document in tqdm.tqdm(Document.objects.all()):
- # Saving the document again will generate a new filename and rename
- document.save()
+ post_save.send(Document, instance=document)
diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py
index 4fbbe8f8a..586897585 100755
--- a/src/documents/signals/handlers.py
+++ b/src/documents/signals/handlers.py
@@ -7,6 +7,7 @@ from django.contrib.admin.models import ADDITION, LogEntry
from django.contrib.auth.models import User
from django.contrib.contenttypes.models import ContentType
from django.db import models, DatabaseError
+from django.db.models import Q
from django.dispatch import receiver
from django.utils import timezone
from filelock import FileLock
@@ -121,11 +122,14 @@ def set_tags(sender,
classifier=None,
replace=False,
**kwargs):
+
if replace:
- document.tags.clear()
- current_tags = set([])
- else:
- current_tags = set(document.tags.all())
+ Document.tags.through.objects.filter(document=document).exclude(
+ Q(tag__is_inbox_tag=True)).exclude(
+ Q(tag__match="") & ~Q(tag__matching_algorithm=Tag.MATCH_AUTO)
+ ).delete()
+
+ current_tags = set(document.tags.all())
matched_tags = matching.match_tags(document.content, classifier)
diff --git a/src/documents/tests/test_management_retagger.py b/src/documents/tests/test_management_retagger.py
index 2346b6527..907a23d09 100644
--- a/src/documents/tests/test_management_retagger.py
+++ b/src/documents/tests/test_management_retagger.py
@@ -14,6 +14,12 @@ class TestRetagger(DirectoriesMixin, TestCase):
self.tag_first = Tag.objects.create(name="tag1", match="first", matching_algorithm=Tag.MATCH_ANY)
self.tag_second = Tag.objects.create(name="tag2", match="second", matching_algorithm=Tag.MATCH_ANY)
+ self.tag_inbox = Tag.objects.create(name="test", is_inbox_tag=True)
+ self.tag_no_match = Tag.objects.create(name="test2")
+
+ self.d3.tags.add(self.tag_inbox)
+ self.d3.tags.add(self.tag_no_match)
+
self.correspondent_first = Correspondent.objects.create(
name="c1", match="first", matching_algorithm=Correspondent.MATCH_ANY)
@@ -38,7 +44,7 @@ class TestRetagger(DirectoriesMixin, TestCase):
self.assertEqual(d_first.tags.count(), 1)
self.assertEqual(d_second.tags.count(), 1)
- self.assertEqual(d_unrelated.tags.count(), 0)
+ self.assertEqual(d_unrelated.tags.count(), 2)
self.assertEqual(d_first.tags.first(), self.tag_first)
self.assertEqual(d_second.tags.first(), self.tag_second)
@@ -56,3 +62,17 @@ class TestRetagger(DirectoriesMixin, TestCase):
self.assertEqual(d_first.correspondent, self.correspondent_first)
self.assertEqual(d_second.correspondent, self.correspondent_second)
+
+ def test_overwrite_preserve_inbox(self):
+ self.d1.tags.add(self.tag_second)
+
+ call_command('document_retagger', '--tags', '--overwrite')
+
+ d_first, d_second, d_unrelated = self.get_updated_docs()
+
+ self.assertIsNotNone(Tag.objects.get(id=self.tag_second.id))
+
+ self.assertCountEqual([tag.id for tag in d_first.tags.all()], [self.tag_first.id])
+ self.assertCountEqual([tag.id for tag in d_second.tags.all()], [self.tag_second.id])
+ self.assertCountEqual([tag.id for tag in d_unrelated.tags.all()], [self.tag_inbox.id, self.tag_no_match.id])
+
diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py
index 1cf6a769c..80e200f27 100644
--- a/src/paperless_tesseract/parsers.py
+++ b/src/paperless_tesseract/parsers.py
@@ -110,6 +110,24 @@ class RasterisedDocumentParser(DocumentParser):
f"Error while getting DPI from image {image}: {e}")
return None
+ def calculate_a4_dpi(self, image):
+ try:
+ with Image.open(image) as im:
+ width, height = im.size
+ # divide image width by A4 width (210mm) in inches.
+ dpi = int(width / (21 / 2.54))
+ self.log(
+ 'debug',
+ f"Estimated DPI {dpi} based on image width {width}"
+ )
+ return dpi
+
+ except Exception as e:
+ self.log(
+ 'warning',
+ f"Error while calculating DPI for image {image}: {e}")
+ return None
+
def parse(self, document_path, mime_type):
mode = settings.OCR_MODE
@@ -162,6 +180,7 @@ class RasterisedDocumentParser(DocumentParser):
if self.is_image(mime_type):
dpi = self.get_dpi(document_path)
+ a4_dpi = self.calculate_a4_dpi(document_path)
if dpi:
self.log(
"debug",
@@ -170,6 +189,8 @@ class RasterisedDocumentParser(DocumentParser):
ocr_args['image_dpi'] = dpi
elif settings.OCR_IMAGE_DPI:
ocr_args['image_dpi'] = settings.OCR_IMAGE_DPI
+ elif a4_dpi:
+ ocr_args['image_dpi'] = a4_dpi
else:
raise ParseError(
f"Cannot produce archive PDF for image {document_path}, "
@@ -241,6 +262,9 @@ def strip_excess_whitespace(text):
def get_text_from_pdf(pdf_file):
+ if not os.path.isfile(pdf_file):
+ return None
+
with open(pdf_file, "rb") as f:
try:
pdf = pdftotext.PDF(f)
diff --git a/src/paperless_tesseract/tests/test_parser.py b/src/paperless_tesseract/tests/test_parser.py
index 8834ec755..7be176663 100644
--- a/src/paperless_tesseract/tests/test_parser.py
+++ b/src/paperless_tesseract/tests/test_parser.py
@@ -164,8 +164,21 @@ class TestParser(DirectoriesMixin, TestCase):
self.assertRaises(ParseError, f)
+ @mock.patch("paperless_tesseract.parsers.ocrmypdf.ocr")
+ def test_image_calc_a4_dpi(self, m):
+ parser = RasterisedDocumentParser(None)
- def test_image_no_dpi_fail(self):
+ parser.parse(os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png"), "image/png")
+
+ m.assert_called_once()
+
+ args, kwargs = m.call_args
+
+ self.assertEqual(kwargs['image_dpi'], 62)
+
+ @mock.patch("paperless_tesseract.parsers.RasterisedDocumentParser.calculate_a4_dpi")
+ def test_image_dpi_fail(self, m):
+ m.return_value = None
parser = RasterisedDocumentParser(None)
def f():