diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index bd6080d35..a8fb1f8e9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -24,3 +24,7 @@ feature-X branches is for experimental stuff that will eventually be merged into I'm trying to get most of paperless tested, so please do the same for your code! I know its a hassle, but it makes sure that your code works now and will allow us to detect regressions easily. To test your code, execute `pytest` in the src/ directory. Executing that in the project root is no good. This also generates a html coverage report, which you can use to see if you missed anything important during testing. + +## More info: + +... is available in the documentation. https://paperless-ng.readthedocs.io/en/latest/extending.html diff --git a/src-ui/package-lock.json b/src-ui/package-lock.json index b6b66e1c6..5eca0b3c0 100644 --- a/src-ui/package-lock.json +++ b/src-ui/package-lock.json @@ -2215,6 +2215,11 @@ "integrity": "sha512-UV1/ZJMC+HcP902wWdpC43cAcGu0IQk/I5bXjP2aSuCjsk3cE74mDvFrLKga7oDC170ugOAYBwfT4DSQW3akDA==", "dev": true }, + "@types/pdfjs-dist": { + "version": "2.1.7", + "resolved": "https://registry.npmjs.org/@types/pdfjs-dist/-/pdfjs-dist-2.1.7.tgz", + "integrity": "sha512-nQIwcPUhkAIyn7x9NS0lR/qxYfd5unRtfGkMjvpgF4Sh28IXftRymaNmFKTTdejDNY25NDGSIyjwj/BRwAPexg==" + }, "@types/q": { "version": "1.5.4", "resolved": "https://registry.npmjs.org/@types/q/-/q-1.5.4.tgz", @@ -3023,6 +3028,16 @@ "integrity": "sha512-1Yj8h9Q+QDF5FzhMs/c9+6UntbD5MkRfRwac8DoEm9ZfUBZ7tZ55YcGVAzEe4bXsdQHEk+s9S5wsOKVdZrw0tQ==", "dev": true }, + "bindings": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz", + "integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==", + "dev": true, + "optional": true, + "requires": { + "file-uri-to-path": "1.0.0" + } + }, "blob": { "version": "0.0.5", "resolved": "https://registry.npmjs.org/blob/-/blob-0.0.5.tgz", @@ -5508,6 +5523,13 @@ "schema-utils": "^2.6.5" } }, + "file-uri-to-path": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz", + "integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==", + "dev": true, + "optional": true + }, "fill-range": { "version": "7.0.1", "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz", @@ -8208,6 +8230,13 @@ "integrity": "sha512-nnbWWOkoWyUsTjKrhgD0dcz22mdkSnpYqbEjIm2nhwhuxlSkpywJmBo8h0ZqJdkp73mb90SssHkN4rsRaBAfAA==", "dev": true }, + "nan": { + "version": "2.14.2", + "resolved": "https://registry.npmjs.org/nan/-/nan-2.14.2.tgz", + "integrity": "sha512-M2ufzIiINKCuDfBSAUr1vWQ+vuVcA9kqx8JJUsbQi6yf1uGRyb7HfpdfUr5qLXf3B/t8dPvcjhKMmlfnP47EzQ==", + "dev": true, + "optional": true + }, "nanomatch": { "version": "1.2.13", "resolved": "https://registry.npmjs.org/nanomatch/-/nanomatch-1.2.13.tgz", @@ -8260,6 +8289,23 @@ "moment": "2.18.1" } }, + "ng2-pdf-viewer": { + "version": "6.3.2", + "resolved": "https://registry.npmjs.org/ng2-pdf-viewer/-/ng2-pdf-viewer-6.3.2.tgz", + "integrity": "sha512-H2tBhDd+Lq6CUzK2g54HsCcZDR2wTn1sDjYqKY3yF0Ydasl2R5ppCKynZBU/zge4EKvmHglJI120FbQMpJKDYQ==", + "requires": { + "@types/pdfjs-dist": "^2.1.4", + "pdfjs-dist": "^2.4.456", + "tslib": "^1.10.0" + }, + "dependencies": { + "tslib": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-1.14.1.tgz", + "integrity": "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==" + } + } + }, "ngx-cookie-service": { "version": "10.1.1", "resolved": "https://registry.npmjs.org/ngx-cookie-service/-/ngx-cookie-service-10.1.1.tgz", @@ -9270,6 +9316,11 @@ "sha.js": "^2.4.8" } }, + "pdfjs-dist": { + "version": "2.5.207", + "resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-2.5.207.tgz", + "integrity": "sha512-xGDUhnCYPfHy+unMXCLCJtlpZaaZ17Ew3WIL0tnSgKFUZXHAPD49GO9xScyszSsQMoutNDgRb+rfBXIaX/lJbw==" + }, "performance-now": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/performance-now/-/performance-now-2.1.0.tgz", @@ -13228,7 +13279,11 @@ "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-1.2.13.tgz", "integrity": "sha512-oWb1Z6mkHIskLzEJ/XWX0srkpkTQ7vaopMQkyaEIoq0fmtFVxOthb8cCxeT+p3ynTdkk/RZwbgG4brR5BeWECw==", "dev": true, - "optional": true + "optional": true, + "requires": { + "bindings": "^1.5.0", + "nan": "^2.12.1" + } }, "glob-parent": { "version": "3.1.0", @@ -13832,7 +13887,11 @@ "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-1.2.13.tgz", "integrity": "sha512-oWb1Z6mkHIskLzEJ/XWX0srkpkTQ7vaopMQkyaEIoq0fmtFVxOthb8cCxeT+p3ynTdkk/RZwbgG4brR5BeWECw==", "dev": true, - "optional": true + "optional": true, + "requires": { + "bindings": "^1.5.0", + "nan": "^2.12.1" + } }, "glob-parent": { "version": "3.1.0", diff --git a/src-ui/package.json b/src-ui/package.json index af3334db9..6293f2672 100644 --- a/src-ui/package.json +++ b/src-ui/package.json @@ -23,6 +23,7 @@ "@ng-bootstrap/ng-bootstrap": "^8.0.0", "bootstrap": "^4.5.0", "ng-bootstrap": "^1.6.3", + "ng2-pdf-viewer": "^6.3.2", "ngx-cookie-service": "^10.1.1", "ngx-file-drop": "^10.0.0", "ngx-infinite-scroll": "^9.1.0", diff --git a/src-ui/src/app/app.module.ts b/src-ui/src/app/app.module.ts index af00fde63..befd26639 100644 --- a/src-ui/src/app/app.module.ts +++ b/src-ui/src/app/app.module.ts @@ -14,7 +14,6 @@ import { LogsComponent } from './components/manage/logs/logs.component'; import { SettingsComponent } from './components/manage/settings/settings.component'; import { FormsModule, ReactiveFormsModule } from '@angular/forms'; import { DatePipe } from '@angular/common'; -import { SafePipe } from './pipes/safe.pipe'; import { NotFoundComponent } from './components/not-found/not-found.component'; import { CorrespondentListComponent } from './components/manage/correspondent-list/correspondent-list.component'; import { DeleteDialogComponent } from './components/common/delete-dialog/delete-dialog.component'; @@ -45,6 +44,7 @@ import { SavedViewWidgetComponent } from './components/dashboard/widgets/saved-v import { StatisticsWidgetComponent } from './components/dashboard/widgets/statistics-widget/statistics-widget.component'; import { UploadFileWidgetComponent } from './components/dashboard/widgets/upload-file-widget/upload-file-widget.component'; import { WidgetFrameComponent } from './components/dashboard/widgets/widget-frame/widget-frame.component'; +import { PdfViewerModule } from 'ng2-pdf-viewer'; import { WelcomeWidgetComponent } from './components/dashboard/widgets/welcome-widget/welcome-widget.component'; import { YesNoPipe } from './pipes/yes-no.pipe'; import { FileSizePipe } from './pipes/file-size.pipe'; @@ -62,7 +62,6 @@ import { SelectDialogComponent } from './components/common/select-dialog/select- DocumentTypeListComponent, LogsComponent, SettingsComponent, - SafePipe, NotFoundComponent, CorrespondentEditDialogComponent, DeleteDialogComponent, @@ -102,7 +101,8 @@ import { SelectDialogComponent } from './components/common/select-dialog/select- FormsModule, ReactiveFormsModule, NgxFileDropModule, - InfiniteScrollModule + InfiniteScrollModule, + PdfViewerModule ], providers: [ DatePipe, diff --git a/src-ui/src/app/components/document-detail/document-detail.component.html b/src-ui/src/app/components/document-detail/document-detail.component.html index e0b5c6da9..f9f6e57ef 100644 --- a/src-ui/src/app/components/document-detail/document-detail.component.html +++ b/src-ui/src/app/components/document-detail/document-detail.component.html @@ -35,7 +35,7 @@
-
+
@@ -171,11 +171,9 @@
-
- -

Your browser does not support PDFs. - Download the PDF.

-
- +
+
+ +
-
\ No newline at end of file +
diff --git a/src-ui/src/app/components/document-detail/document-detail.component.scss b/src-ui/src/app/components/document-detail/document-detail.component.scss index b1e9fddfb..998653bab 100644 --- a/src-ui/src/app/components/document-detail/document-detail.component.scss +++ b/src-ui/src/app/components/document-detail/document-detail.component.scss @@ -1,5 +1,6 @@ -.document-preview { - height: calc(100vh - 180px); +.pdf-viewer-container { + height: calc(100vh - 160px); top: 70px; position: sticky; -} \ No newline at end of file + background-color: gray; +} diff --git a/src-ui/src/app/components/document-detail/document-detail.component.ts b/src-ui/src/app/components/document-detail/document-detail.component.ts index 329077693..c80a8b1ce 100644 --- a/src-ui/src/app/components/document-detail/document-detail.component.ts +++ b/src-ui/src/app/components/document-detail/document-detail.component.ts @@ -59,6 +59,10 @@ export class DocumentDetailComponent implements OnInit { private documentListViewService: DocumentListViewService, private titleService: Title) { } + getContentType() { + return this.metadata?.has_archive_version ? 'application/pdf' : this.metadata?.original_mime_type + } + ngOnInit(): void { this.documentForm.valueChanges.subscribe(wow => { Object.assign(this.document, this.documentForm.value) diff --git a/src-ui/src/app/pipes/safe.pipe.spec.ts b/src-ui/src/app/pipes/safe.pipe.spec.ts deleted file mode 100644 index 49ee0ad14..000000000 --- a/src-ui/src/app/pipes/safe.pipe.spec.ts +++ /dev/null @@ -1,8 +0,0 @@ -import { SafePipe } from './safe.pipe'; - -describe('SafePipe', () => { - it('create an instance', () => { - const pipe = new SafePipe(); - expect(pipe).toBeTruthy(); - }); -}); diff --git a/src-ui/src/app/pipes/safe.pipe.ts b/src-ui/src/app/pipes/safe.pipe.ts deleted file mode 100644 index f2d77a72d..000000000 --- a/src-ui/src/app/pipes/safe.pipe.ts +++ /dev/null @@ -1,19 +0,0 @@ -import { Pipe, PipeTransform } from '@angular/core'; -import { DomSanitizer } from '@angular/platform-browser'; - -@Pipe({ - name: 'safe' -}) -export class SafePipe implements PipeTransform { - - constructor(private sanitizer: DomSanitizer) { } - - transform(url) { - if (url == null) { - return this.sanitizer.bypassSecurityTrustResourceUrl("") - } else { - return this.sanitizer.bypassSecurityTrustResourceUrl(url); - } - } - -} \ No newline at end of file diff --git a/src/documents/management/commands/document_renamer.py b/src/documents/management/commands/document_renamer.py index 5d7d0d90c..745d2d03d 100644 --- a/src/documents/management/commands/document_renamer.py +++ b/src/documents/management/commands/document_renamer.py @@ -2,6 +2,7 @@ import logging import tqdm from django.core.management.base import BaseCommand +from django.db.models.signals import post_save from documents.models import Document from ...mixins import Renderable @@ -24,5 +25,4 @@ class Command(Renderable, BaseCommand): logging.getLogger().handlers[0].level = logging.ERROR for document in tqdm.tqdm(Document.objects.all()): - # Saving the document again will generate a new filename and rename - document.save() + post_save.send(Document, instance=document) diff --git a/src/documents/signals/handlers.py b/src/documents/signals/handlers.py index 4fbbe8f8a..586897585 100755 --- a/src/documents/signals/handlers.py +++ b/src/documents/signals/handlers.py @@ -7,6 +7,7 @@ from django.contrib.admin.models import ADDITION, LogEntry from django.contrib.auth.models import User from django.contrib.contenttypes.models import ContentType from django.db import models, DatabaseError +from django.db.models import Q from django.dispatch import receiver from django.utils import timezone from filelock import FileLock @@ -121,11 +122,14 @@ def set_tags(sender, classifier=None, replace=False, **kwargs): + if replace: - document.tags.clear() - current_tags = set([]) - else: - current_tags = set(document.tags.all()) + Document.tags.through.objects.filter(document=document).exclude( + Q(tag__is_inbox_tag=True)).exclude( + Q(tag__match="") & ~Q(tag__matching_algorithm=Tag.MATCH_AUTO) + ).delete() + + current_tags = set(document.tags.all()) matched_tags = matching.match_tags(document.content, classifier) diff --git a/src/documents/tests/test_management_retagger.py b/src/documents/tests/test_management_retagger.py index 2346b6527..907a23d09 100644 --- a/src/documents/tests/test_management_retagger.py +++ b/src/documents/tests/test_management_retagger.py @@ -14,6 +14,12 @@ class TestRetagger(DirectoriesMixin, TestCase): self.tag_first = Tag.objects.create(name="tag1", match="first", matching_algorithm=Tag.MATCH_ANY) self.tag_second = Tag.objects.create(name="tag2", match="second", matching_algorithm=Tag.MATCH_ANY) + self.tag_inbox = Tag.objects.create(name="test", is_inbox_tag=True) + self.tag_no_match = Tag.objects.create(name="test2") + + self.d3.tags.add(self.tag_inbox) + self.d3.tags.add(self.tag_no_match) + self.correspondent_first = Correspondent.objects.create( name="c1", match="first", matching_algorithm=Correspondent.MATCH_ANY) @@ -38,7 +44,7 @@ class TestRetagger(DirectoriesMixin, TestCase): self.assertEqual(d_first.tags.count(), 1) self.assertEqual(d_second.tags.count(), 1) - self.assertEqual(d_unrelated.tags.count(), 0) + self.assertEqual(d_unrelated.tags.count(), 2) self.assertEqual(d_first.tags.first(), self.tag_first) self.assertEqual(d_second.tags.first(), self.tag_second) @@ -56,3 +62,17 @@ class TestRetagger(DirectoriesMixin, TestCase): self.assertEqual(d_first.correspondent, self.correspondent_first) self.assertEqual(d_second.correspondent, self.correspondent_second) + + def test_overwrite_preserve_inbox(self): + self.d1.tags.add(self.tag_second) + + call_command('document_retagger', '--tags', '--overwrite') + + d_first, d_second, d_unrelated = self.get_updated_docs() + + self.assertIsNotNone(Tag.objects.get(id=self.tag_second.id)) + + self.assertCountEqual([tag.id for tag in d_first.tags.all()], [self.tag_first.id]) + self.assertCountEqual([tag.id for tag in d_second.tags.all()], [self.tag_second.id]) + self.assertCountEqual([tag.id for tag in d_unrelated.tags.all()], [self.tag_inbox.id, self.tag_no_match.id]) + diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py index 1cf6a769c..80e200f27 100644 --- a/src/paperless_tesseract/parsers.py +++ b/src/paperless_tesseract/parsers.py @@ -110,6 +110,24 @@ class RasterisedDocumentParser(DocumentParser): f"Error while getting DPI from image {image}: {e}") return None + def calculate_a4_dpi(self, image): + try: + with Image.open(image) as im: + width, height = im.size + # divide image width by A4 width (210mm) in inches. + dpi = int(width / (21 / 2.54)) + self.log( + 'debug', + f"Estimated DPI {dpi} based on image width {width}" + ) + return dpi + + except Exception as e: + self.log( + 'warning', + f"Error while calculating DPI for image {image}: {e}") + return None + def parse(self, document_path, mime_type): mode = settings.OCR_MODE @@ -162,6 +180,7 @@ class RasterisedDocumentParser(DocumentParser): if self.is_image(mime_type): dpi = self.get_dpi(document_path) + a4_dpi = self.calculate_a4_dpi(document_path) if dpi: self.log( "debug", @@ -170,6 +189,8 @@ class RasterisedDocumentParser(DocumentParser): ocr_args['image_dpi'] = dpi elif settings.OCR_IMAGE_DPI: ocr_args['image_dpi'] = settings.OCR_IMAGE_DPI + elif a4_dpi: + ocr_args['image_dpi'] = a4_dpi else: raise ParseError( f"Cannot produce archive PDF for image {document_path}, " @@ -241,6 +262,9 @@ def strip_excess_whitespace(text): def get_text_from_pdf(pdf_file): + if not os.path.isfile(pdf_file): + return None + with open(pdf_file, "rb") as f: try: pdf = pdftotext.PDF(f) diff --git a/src/paperless_tesseract/tests/test_parser.py b/src/paperless_tesseract/tests/test_parser.py index 8834ec755..7be176663 100644 --- a/src/paperless_tesseract/tests/test_parser.py +++ b/src/paperless_tesseract/tests/test_parser.py @@ -164,8 +164,21 @@ class TestParser(DirectoriesMixin, TestCase): self.assertRaises(ParseError, f) + @mock.patch("paperless_tesseract.parsers.ocrmypdf.ocr") + def test_image_calc_a4_dpi(self, m): + parser = RasterisedDocumentParser(None) - def test_image_no_dpi_fail(self): + parser.parse(os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png"), "image/png") + + m.assert_called_once() + + args, kwargs = m.call_args + + self.assertEqual(kwargs['image_dpi'], 62) + + @mock.patch("paperless_tesseract.parsers.RasterisedDocumentParser.calculate_a4_dpi") + def test_image_dpi_fail(self, m): + m.return_value = None parser = RasterisedDocumentParser(None) def f():