mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Merge branch 'dev' into feature-bulk-edit
This commit is contained in:
		| @@ -24,3 +24,7 @@ feature-X branches is for experimental stuff that will eventually be merged into | ||||
| I'm trying to get most of paperless tested, so please do the same for your code! I know its a hassle, but it makes sure that your code works now and will allow us to detect regressions easily. | ||||
|  | ||||
| To test your code, execute `pytest` in the src/ directory. Executing that in the project root is no good. This also generates a html coverage report, which you can use to see if you missed anything important during testing. | ||||
|  | ||||
| ## More info: | ||||
|  | ||||
| ... is available in the documentation. https://paperless-ng.readthedocs.io/en/latest/extending.html | ||||
|   | ||||
							
								
								
									
										63
									
								
								src-ui/package-lock.json
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										63
									
								
								src-ui/package-lock.json
									
									
									
										generated
									
									
									
								
							| @@ -2215,6 +2215,11 @@ | ||||
|       "integrity": "sha512-UV1/ZJMC+HcP902wWdpC43cAcGu0IQk/I5bXjP2aSuCjsk3cE74mDvFrLKga7oDC170ugOAYBwfT4DSQW3akDA==", | ||||
|       "dev": true | ||||
|     }, | ||||
|     "@types/pdfjs-dist": { | ||||
|       "version": "2.1.7", | ||||
|       "resolved": "https://registry.npmjs.org/@types/pdfjs-dist/-/pdfjs-dist-2.1.7.tgz", | ||||
|       "integrity": "sha512-nQIwcPUhkAIyn7x9NS0lR/qxYfd5unRtfGkMjvpgF4Sh28IXftRymaNmFKTTdejDNY25NDGSIyjwj/BRwAPexg==" | ||||
|     }, | ||||
|     "@types/q": { | ||||
|       "version": "1.5.4", | ||||
|       "resolved": "https://registry.npmjs.org/@types/q/-/q-1.5.4.tgz", | ||||
| @@ -3023,6 +3028,16 @@ | ||||
|       "integrity": "sha512-1Yj8h9Q+QDF5FzhMs/c9+6UntbD5MkRfRwac8DoEm9ZfUBZ7tZ55YcGVAzEe4bXsdQHEk+s9S5wsOKVdZrw0tQ==", | ||||
|       "dev": true | ||||
|     }, | ||||
|     "bindings": { | ||||
|       "version": "1.5.0", | ||||
|       "resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz", | ||||
|       "integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==", | ||||
|       "dev": true, | ||||
|       "optional": true, | ||||
|       "requires": { | ||||
|         "file-uri-to-path": "1.0.0" | ||||
|       } | ||||
|     }, | ||||
|     "blob": { | ||||
|       "version": "0.0.5", | ||||
|       "resolved": "https://registry.npmjs.org/blob/-/blob-0.0.5.tgz", | ||||
| @@ -5508,6 +5523,13 @@ | ||||
|         "schema-utils": "^2.6.5" | ||||
|       } | ||||
|     }, | ||||
|     "file-uri-to-path": { | ||||
|       "version": "1.0.0", | ||||
|       "resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz", | ||||
|       "integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==", | ||||
|       "dev": true, | ||||
|       "optional": true | ||||
|     }, | ||||
|     "fill-range": { | ||||
|       "version": "7.0.1", | ||||
|       "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz", | ||||
| @@ -8208,6 +8230,13 @@ | ||||
|       "integrity": "sha512-nnbWWOkoWyUsTjKrhgD0dcz22mdkSnpYqbEjIm2nhwhuxlSkpywJmBo8h0ZqJdkp73mb90SssHkN4rsRaBAfAA==", | ||||
|       "dev": true | ||||
|     }, | ||||
|     "nan": { | ||||
|       "version": "2.14.2", | ||||
|       "resolved": "https://registry.npmjs.org/nan/-/nan-2.14.2.tgz", | ||||
|       "integrity": "sha512-M2ufzIiINKCuDfBSAUr1vWQ+vuVcA9kqx8JJUsbQi6yf1uGRyb7HfpdfUr5qLXf3B/t8dPvcjhKMmlfnP47EzQ==", | ||||
|       "dev": true, | ||||
|       "optional": true | ||||
|     }, | ||||
|     "nanomatch": { | ||||
|       "version": "1.2.13", | ||||
|       "resolved": "https://registry.npmjs.org/nanomatch/-/nanomatch-1.2.13.tgz", | ||||
| @@ -8260,6 +8289,23 @@ | ||||
|         "moment": "2.18.1" | ||||
|       } | ||||
|     }, | ||||
|     "ng2-pdf-viewer": { | ||||
|       "version": "6.3.2", | ||||
|       "resolved": "https://registry.npmjs.org/ng2-pdf-viewer/-/ng2-pdf-viewer-6.3.2.tgz", | ||||
|       "integrity": "sha512-H2tBhDd+Lq6CUzK2g54HsCcZDR2wTn1sDjYqKY3yF0Ydasl2R5ppCKynZBU/zge4EKvmHglJI120FbQMpJKDYQ==", | ||||
|       "requires": { | ||||
|         "@types/pdfjs-dist": "^2.1.4", | ||||
|         "pdfjs-dist": "^2.4.456", | ||||
|         "tslib": "^1.10.0" | ||||
|       }, | ||||
|       "dependencies": { | ||||
|         "tslib": { | ||||
|           "version": "1.14.1", | ||||
|           "resolved": "https://registry.npmjs.org/tslib/-/tslib-1.14.1.tgz", | ||||
|           "integrity": "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==" | ||||
|         } | ||||
|       } | ||||
|     }, | ||||
|     "ngx-cookie-service": { | ||||
|       "version": "10.1.1", | ||||
|       "resolved": "https://registry.npmjs.org/ngx-cookie-service/-/ngx-cookie-service-10.1.1.tgz", | ||||
| @@ -9270,6 +9316,11 @@ | ||||
|         "sha.js": "^2.4.8" | ||||
|       } | ||||
|     }, | ||||
|     "pdfjs-dist": { | ||||
|       "version": "2.5.207", | ||||
|       "resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-2.5.207.tgz", | ||||
|       "integrity": "sha512-xGDUhnCYPfHy+unMXCLCJtlpZaaZ17Ew3WIL0tnSgKFUZXHAPD49GO9xScyszSsQMoutNDgRb+rfBXIaX/lJbw==" | ||||
|     }, | ||||
|     "performance-now": { | ||||
|       "version": "2.1.0", | ||||
|       "resolved": "https://registry.npmjs.org/performance-now/-/performance-now-2.1.0.tgz", | ||||
| @@ -13228,7 +13279,11 @@ | ||||
|           "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-1.2.13.tgz", | ||||
|           "integrity": "sha512-oWb1Z6mkHIskLzEJ/XWX0srkpkTQ7vaopMQkyaEIoq0fmtFVxOthb8cCxeT+p3ynTdkk/RZwbgG4brR5BeWECw==", | ||||
|           "dev": true, | ||||
|           "optional": true | ||||
|           "optional": true, | ||||
|           "requires": { | ||||
|             "bindings": "^1.5.0", | ||||
|             "nan": "^2.12.1" | ||||
|           } | ||||
|         }, | ||||
|         "glob-parent": { | ||||
|           "version": "3.1.0", | ||||
| @@ -13832,7 +13887,11 @@ | ||||
|           "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-1.2.13.tgz", | ||||
|           "integrity": "sha512-oWb1Z6mkHIskLzEJ/XWX0srkpkTQ7vaopMQkyaEIoq0fmtFVxOthb8cCxeT+p3ynTdkk/RZwbgG4brR5BeWECw==", | ||||
|           "dev": true, | ||||
|           "optional": true | ||||
|           "optional": true, | ||||
|           "requires": { | ||||
|             "bindings": "^1.5.0", | ||||
|             "nan": "^2.12.1" | ||||
|           } | ||||
|         }, | ||||
|         "glob-parent": { | ||||
|           "version": "3.1.0", | ||||
|   | ||||
| @@ -23,6 +23,7 @@ | ||||
|     "@ng-bootstrap/ng-bootstrap": "^8.0.0", | ||||
|     "bootstrap": "^4.5.0", | ||||
|     "ng-bootstrap": "^1.6.3", | ||||
|     "ng2-pdf-viewer": "^6.3.2", | ||||
|     "ngx-cookie-service": "^10.1.1", | ||||
|     "ngx-file-drop": "^10.0.0", | ||||
|     "ngx-infinite-scroll": "^9.1.0", | ||||
|   | ||||
| @@ -14,7 +14,6 @@ import { LogsComponent } from './components/manage/logs/logs.component'; | ||||
| import { SettingsComponent } from './components/manage/settings/settings.component'; | ||||
| import { FormsModule, ReactiveFormsModule } from '@angular/forms'; | ||||
| import { DatePipe } from '@angular/common'; | ||||
| import { SafePipe } from './pipes/safe.pipe'; | ||||
| import { NotFoundComponent } from './components/not-found/not-found.component'; | ||||
| import { CorrespondentListComponent } from './components/manage/correspondent-list/correspondent-list.component'; | ||||
| import { DeleteDialogComponent } from './components/common/delete-dialog/delete-dialog.component'; | ||||
| @@ -45,6 +44,7 @@ import { SavedViewWidgetComponent } from './components/dashboard/widgets/saved-v | ||||
| import { StatisticsWidgetComponent } from './components/dashboard/widgets/statistics-widget/statistics-widget.component'; | ||||
| import { UploadFileWidgetComponent } from './components/dashboard/widgets/upload-file-widget/upload-file-widget.component'; | ||||
| import { WidgetFrameComponent } from './components/dashboard/widgets/widget-frame/widget-frame.component'; | ||||
| import { PdfViewerModule } from 'ng2-pdf-viewer'; | ||||
| import { WelcomeWidgetComponent } from './components/dashboard/widgets/welcome-widget/welcome-widget.component'; | ||||
| import { YesNoPipe } from './pipes/yes-no.pipe'; | ||||
| import { FileSizePipe } from './pipes/file-size.pipe'; | ||||
| @@ -62,7 +62,6 @@ import { SelectDialogComponent } from './components/common/select-dialog/select- | ||||
|     DocumentTypeListComponent, | ||||
|     LogsComponent, | ||||
|     SettingsComponent, | ||||
|     SafePipe, | ||||
|     NotFoundComponent, | ||||
|     CorrespondentEditDialogComponent, | ||||
|     DeleteDialogComponent, | ||||
| @@ -102,7 +101,8 @@ import { SelectDialogComponent } from './components/common/select-dialog/select- | ||||
|     FormsModule, | ||||
|     ReactiveFormsModule, | ||||
|     NgxFileDropModule, | ||||
|     InfiniteScrollModule | ||||
|     InfiniteScrollModule, | ||||
|     PdfViewerModule | ||||
|   ], | ||||
|   providers: [ | ||||
|     DatePipe, | ||||
|   | ||||
| @@ -35,7 +35,7 @@ | ||||
|  | ||||
|  | ||||
| <div class="row"> | ||||
|     <div class="col-xl"> | ||||
|     <div class="col mb-4"> | ||||
|  | ||||
|         <form [formGroup]='documentForm' (ngSubmit)="save()"> | ||||
|  | ||||
| @@ -171,11 +171,9 @@ | ||||
|         </form> | ||||
|     </div> | ||||
|  | ||||
|     <div class="col-xl d-none d-xl-block document-preview"> | ||||
|         <object [data]="previewUrl | safe" type="application/pdf" width="100%" height="100%"> | ||||
|             <p>Your browser does not support PDFs. | ||||
|                 <a href="previewUrl">Download the PDF</a>.</p> | ||||
|         </object> | ||||
|  | ||||
|     <div class="col-md-6 col-xl-8 mb-3"> | ||||
|       <div class="pdf-viewer-container" *ngIf="getContentType() == 'application/pdf'"> | ||||
|         <pdf-viewer [src]="previewUrl" [original-size]="false" [show-borders]="true"></pdf-viewer> | ||||
|       </div> | ||||
|     </div> | ||||
| </div> | ||||
| @@ -1,5 +1,6 @@ | ||||
| .document-preview { | ||||
|   height: calc(100vh - 180px); | ||||
| .pdf-viewer-container { | ||||
|   height: calc(100vh - 160px); | ||||
|   top: 70px; | ||||
|   position: sticky; | ||||
|   background-color: gray; | ||||
| } | ||||
| @@ -59,6 +59,10 @@ export class DocumentDetailComponent implements OnInit { | ||||
|     private documentListViewService: DocumentListViewService, | ||||
|     private titleService: Title) { } | ||||
|  | ||||
|   getContentType() { | ||||
|     return this.metadata?.has_archive_version ? 'application/pdf' : this.metadata?.original_mime_type | ||||
|   } | ||||
|  | ||||
|   ngOnInit(): void { | ||||
|     this.documentForm.valueChanges.subscribe(wow => { | ||||
|       Object.assign(this.document, this.documentForm.value) | ||||
|   | ||||
| @@ -1,8 +0,0 @@ | ||||
| import { SafePipe } from './safe.pipe'; | ||||
|  | ||||
| describe('SafePipe', () => { | ||||
|   it('create an instance', () => { | ||||
|     const pipe = new SafePipe(); | ||||
|     expect(pipe).toBeTruthy(); | ||||
|   }); | ||||
| }); | ||||
| @@ -1,19 +0,0 @@ | ||||
| import { Pipe, PipeTransform } from '@angular/core'; | ||||
| import { DomSanitizer } from '@angular/platform-browser'; | ||||
|  | ||||
| @Pipe({ | ||||
|   name: 'safe' | ||||
| }) | ||||
| export class SafePipe implements PipeTransform { | ||||
|  | ||||
|   constructor(private sanitizer: DomSanitizer) { } | ||||
|  | ||||
|   transform(url) { | ||||
|     if (url == null) { | ||||
|       return this.sanitizer.bypassSecurityTrustResourceUrl("") | ||||
|     } else { | ||||
|       return this.sanitizer.bypassSecurityTrustResourceUrl(url); | ||||
|     } | ||||
|   } | ||||
|  | ||||
| } | ||||
| @@ -2,6 +2,7 @@ import logging | ||||
|  | ||||
| import tqdm | ||||
| from django.core.management.base import BaseCommand | ||||
| from django.db.models.signals import post_save | ||||
|  | ||||
| from documents.models import Document | ||||
| from ...mixins import Renderable | ||||
| @@ -24,5 +25,4 @@ class Command(Renderable, BaseCommand): | ||||
|         logging.getLogger().handlers[0].level = logging.ERROR | ||||
|  | ||||
|         for document in tqdm.tqdm(Document.objects.all()): | ||||
|             # Saving the document again will generate a new filename and rename | ||||
|             document.save() | ||||
|             post_save.send(Document, instance=document) | ||||
|   | ||||
| @@ -7,6 +7,7 @@ from django.contrib.admin.models import ADDITION, LogEntry | ||||
| from django.contrib.auth.models import User | ||||
| from django.contrib.contenttypes.models import ContentType | ||||
| from django.db import models, DatabaseError | ||||
| from django.db.models import Q | ||||
| from django.dispatch import receiver | ||||
| from django.utils import timezone | ||||
| from filelock import FileLock | ||||
| @@ -121,11 +122,14 @@ def set_tags(sender, | ||||
|              classifier=None, | ||||
|              replace=False, | ||||
|              **kwargs): | ||||
|  | ||||
|     if replace: | ||||
|         document.tags.clear() | ||||
|         current_tags = set([]) | ||||
|     else: | ||||
|         current_tags = set(document.tags.all()) | ||||
|         Document.tags.through.objects.filter(document=document).exclude( | ||||
|             Q(tag__is_inbox_tag=True)).exclude( | ||||
|             Q(tag__match="") & ~Q(tag__matching_algorithm=Tag.MATCH_AUTO) | ||||
|         ).delete() | ||||
|  | ||||
|     current_tags = set(document.tags.all()) | ||||
|  | ||||
|     matched_tags = matching.match_tags(document.content, classifier) | ||||
|  | ||||
|   | ||||
| @@ -14,6 +14,12 @@ class TestRetagger(DirectoriesMixin, TestCase): | ||||
|  | ||||
|         self.tag_first = Tag.objects.create(name="tag1", match="first", matching_algorithm=Tag.MATCH_ANY) | ||||
|         self.tag_second = Tag.objects.create(name="tag2", match="second", matching_algorithm=Tag.MATCH_ANY) | ||||
|         self.tag_inbox = Tag.objects.create(name="test", is_inbox_tag=True) | ||||
|         self.tag_no_match = Tag.objects.create(name="test2") | ||||
|  | ||||
|         self.d3.tags.add(self.tag_inbox) | ||||
|         self.d3.tags.add(self.tag_no_match) | ||||
|  | ||||
|  | ||||
|         self.correspondent_first = Correspondent.objects.create( | ||||
|             name="c1", match="first", matching_algorithm=Correspondent.MATCH_ANY) | ||||
| @@ -38,7 +44,7 @@ class TestRetagger(DirectoriesMixin, TestCase): | ||||
|  | ||||
|         self.assertEqual(d_first.tags.count(), 1) | ||||
|         self.assertEqual(d_second.tags.count(), 1) | ||||
|         self.assertEqual(d_unrelated.tags.count(), 0) | ||||
|         self.assertEqual(d_unrelated.tags.count(), 2) | ||||
|  | ||||
|         self.assertEqual(d_first.tags.first(), self.tag_first) | ||||
|         self.assertEqual(d_second.tags.first(), self.tag_second) | ||||
| @@ -56,3 +62,17 @@ class TestRetagger(DirectoriesMixin, TestCase): | ||||
|  | ||||
|         self.assertEqual(d_first.correspondent, self.correspondent_first) | ||||
|         self.assertEqual(d_second.correspondent, self.correspondent_second) | ||||
|  | ||||
|     def test_overwrite_preserve_inbox(self): | ||||
|         self.d1.tags.add(self.tag_second) | ||||
|  | ||||
|         call_command('document_retagger', '--tags', '--overwrite') | ||||
|  | ||||
|         d_first, d_second, d_unrelated = self.get_updated_docs() | ||||
|  | ||||
|         self.assertIsNotNone(Tag.objects.get(id=self.tag_second.id)) | ||||
|  | ||||
|         self.assertCountEqual([tag.id for tag in d_first.tags.all()], [self.tag_first.id]) | ||||
|         self.assertCountEqual([tag.id for tag in d_second.tags.all()], [self.tag_second.id]) | ||||
|         self.assertCountEqual([tag.id for tag in d_unrelated.tags.all()], [self.tag_inbox.id, self.tag_no_match.id]) | ||||
|  | ||||
|   | ||||
| @@ -110,6 +110,24 @@ class RasterisedDocumentParser(DocumentParser): | ||||
|                 f"Error while getting DPI from image {image}: {e}") | ||||
|             return None | ||||
|  | ||||
|     def calculate_a4_dpi(self, image): | ||||
|         try: | ||||
|             with Image.open(image) as im: | ||||
|                 width, height = im.size | ||||
|                 # divide image width by A4 width (210mm) in inches. | ||||
|                 dpi = int(width / (21 / 2.54)) | ||||
|                 self.log( | ||||
|                     'debug', | ||||
|                     f"Estimated DPI {dpi} based on image width {width}" | ||||
|                 ) | ||||
|                 return dpi | ||||
|  | ||||
|         except Exception as e: | ||||
|             self.log( | ||||
|                 'warning', | ||||
|                 f"Error while calculating DPI for image {image}: {e}") | ||||
|             return None | ||||
|  | ||||
|     def parse(self, document_path, mime_type): | ||||
|         mode = settings.OCR_MODE | ||||
|  | ||||
| @@ -162,6 +180,7 @@ class RasterisedDocumentParser(DocumentParser): | ||||
|  | ||||
|         if self.is_image(mime_type): | ||||
|             dpi = self.get_dpi(document_path) | ||||
|             a4_dpi = self.calculate_a4_dpi(document_path) | ||||
|             if dpi: | ||||
|                 self.log( | ||||
|                     "debug", | ||||
| @@ -170,6 +189,8 @@ class RasterisedDocumentParser(DocumentParser): | ||||
|                 ocr_args['image_dpi'] = dpi | ||||
|             elif settings.OCR_IMAGE_DPI: | ||||
|                 ocr_args['image_dpi'] = settings.OCR_IMAGE_DPI | ||||
|             elif a4_dpi: | ||||
|                 ocr_args['image_dpi'] = a4_dpi | ||||
|             else: | ||||
|                 raise ParseError( | ||||
|                     f"Cannot produce archive PDF for image {document_path}, " | ||||
| @@ -241,6 +262,9 @@ def strip_excess_whitespace(text): | ||||
|  | ||||
| def get_text_from_pdf(pdf_file): | ||||
|  | ||||
|     if not os.path.isfile(pdf_file): | ||||
|         return None | ||||
|  | ||||
|     with open(pdf_file, "rb") as f: | ||||
|         try: | ||||
|             pdf = pdftotext.PDF(f) | ||||
|   | ||||
| @@ -164,8 +164,21 @@ class TestParser(DirectoriesMixin, TestCase): | ||||
|  | ||||
|         self.assertRaises(ParseError, f) | ||||
|  | ||||
|     @mock.patch("paperless_tesseract.parsers.ocrmypdf.ocr") | ||||
|     def test_image_calc_a4_dpi(self, m): | ||||
|         parser = RasterisedDocumentParser(None) | ||||
|  | ||||
|     def test_image_no_dpi_fail(self): | ||||
|         parser.parse(os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png"), "image/png") | ||||
|  | ||||
|         m.assert_called_once() | ||||
|  | ||||
|         args, kwargs = m.call_args | ||||
|  | ||||
|         self.assertEqual(kwargs['image_dpi'], 62) | ||||
|  | ||||
|     @mock.patch("paperless_tesseract.parsers.RasterisedDocumentParser.calculate_a4_dpi") | ||||
|     def test_image_dpi_fail(self, m): | ||||
|         m.return_value = None | ||||
|         parser = RasterisedDocumentParser(None) | ||||
|  | ||||
|         def f(): | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 jonaswinkler
					jonaswinkler