mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-30 03:56:23 -05:00 
			
		
		
		
	Feature: Upgrade Gotenberg to v8 (#7094)
This commit is contained in:
		
							
								
								
									
										6
									
								
								Pipfile.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										6
									
								
								Pipfile.lock
									
									
									
										generated
									
									
									
								
							| @@ -602,12 +602,12 @@ | |||||||
|         }, |         }, | ||||||
|         "gotenberg-client": { |         "gotenberg-client": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|                 "sha256:097151c959d9ad9c6292694dac454a07a511489a353086df924f489190084425", |                 "sha256:94e12d1e2ebaaf8552008c95553f5c635c6b8beef401c65cee6ba0d73bbb146b", | ||||||
|                 "sha256:3d6c0449fd1afb82206bfdc2edacfe1d7d98e9de7207332b696b97a3d4dfba6b" |                 "sha256:b0829ae666ee75fc2bd61cf055c2dea9877f73d52a3b4e1998eea4cee14923f5" | ||||||
|             ], |             ], | ||||||
|             "index": "pypi", |             "index": "pypi", | ||||||
|             "markers": "python_version >= '3.8'", |             "markers": "python_version >= '3.8'", | ||||||
|             "version": "==0.5.0" |             "version": "==0.6.0" | ||||||
|         }, |         }, | ||||||
|         "gunicorn": { |         "gunicorn": { | ||||||
|             "hashes": [ |             "hashes": [ | ||||||
|   | |||||||
| @@ -5,7 +5,7 @@ | |||||||
|  |  | ||||||
| services: | services: | ||||||
|   gotenberg: |   gotenberg: | ||||||
|     image: docker.io/gotenberg/gotenberg:7.10 |     image: docker.io/gotenberg/gotenberg:8.7 | ||||||
|     hostname: gotenberg |     hostname: gotenberg | ||||||
|     container_name: gotenberg |     container_name: gotenberg | ||||||
|     network_mode: host |     network_mode: host | ||||||
|   | |||||||
| @@ -77,7 +77,7 @@ services: | |||||||
|       PAPERLESS_TIKA_ENDPOINT: http://tika:9998 |       PAPERLESS_TIKA_ENDPOINT: http://tika:9998 | ||||||
|  |  | ||||||
|   gotenberg: |   gotenberg: | ||||||
|     image: docker.io/gotenberg/gotenberg:7.10 |     image: docker.io/gotenberg/gotenberg:8.7 | ||||||
|     restart: unless-stopped |     restart: unless-stopped | ||||||
|     # The gotenberg chromium route is used to convert .eml files. We do not |     # The gotenberg chromium route is used to convert .eml files. We do not | ||||||
|     # want to allow external content like tracking pixels or even javascript. |     # want to allow external content like tracking pixels or even javascript. | ||||||
|   | |||||||
| @@ -71,7 +71,7 @@ services: | |||||||
|       PAPERLESS_TIKA_ENDPOINT: http://tika:9998 |       PAPERLESS_TIKA_ENDPOINT: http://tika:9998 | ||||||
|  |  | ||||||
|   gotenberg: |   gotenberg: | ||||||
|     image: docker.io/gotenberg/gotenberg:7.10 |     image: docker.io/gotenberg/gotenberg:8.7 | ||||||
|     restart: unless-stopped |     restart: unless-stopped | ||||||
|  |  | ||||||
|     # The gotenberg chromium route is used to convert .eml files. We do not |     # The gotenberg chromium route is used to convert .eml files. We do not | ||||||
|   | |||||||
| @@ -59,7 +59,7 @@ services: | |||||||
|       PAPERLESS_TIKA_ENDPOINT: http://tika:9998 |       PAPERLESS_TIKA_ENDPOINT: http://tika:9998 | ||||||
|  |  | ||||||
|   gotenberg: |   gotenberg: | ||||||
|     image: docker.io/gotenberg/gotenberg:7.10 |     image: docker.io/gotenberg/gotenberg:8.7 | ||||||
|     restart: unless-stopped |     restart: unless-stopped | ||||||
|  |  | ||||||
|     # The gotenberg chromium route is used to convert .eml files. We do not |     # The gotenberg chromium route is used to convert .eml files. We do not | ||||||
|   | |||||||
| @@ -9,7 +9,9 @@ from django.conf import settings | |||||||
| from django.utils.timezone import is_naive | from django.utils.timezone import is_naive | ||||||
| from django.utils.timezone import make_aware | from django.utils.timezone import make_aware | ||||||
| from gotenberg_client import GotenbergClient | from gotenberg_client import GotenbergClient | ||||||
| from gotenberg_client.options import Margin | from gotenberg_client.options import MarginType | ||||||
|  | from gotenberg_client.options import MarginUnitType | ||||||
|  | from gotenberg_client.options import PageMarginsType | ||||||
| from gotenberg_client.options import PageSize | from gotenberg_client.options import PageSize | ||||||
| from gotenberg_client.options import PdfAFormat | from gotenberg_client.options import PdfAFormat | ||||||
| from humanize import naturalsize | from humanize import naturalsize | ||||||
| @@ -20,6 +22,7 @@ from tika_client import TikaClient | |||||||
| from documents.parsers import DocumentParser | from documents.parsers import DocumentParser | ||||||
| from documents.parsers import ParseError | from documents.parsers import ParseError | ||||||
| from documents.parsers import make_thumbnail_from_pdf | from documents.parsers import make_thumbnail_from_pdf | ||||||
|  | from paperless.models import OutputTypeChoices | ||||||
|  |  | ||||||
|  |  | ||||||
| class MailDocumentParser(DocumentParser): | class MailDocumentParser(DocumentParser): | ||||||
| @@ -30,17 +33,22 @@ class MailDocumentParser(DocumentParser): | |||||||
|  |  | ||||||
|     logging_name = "paperless.parsing.mail" |     logging_name = "paperless.parsing.mail" | ||||||
|  |  | ||||||
|     @staticmethod |     def _settings_to_gotenberg_pdfa(self) -> Optional[PdfAFormat]: | ||||||
|     def _settings_to_gotenberg_pdfa() -> Optional[PdfAFormat]: |  | ||||||
|         """ |         """ | ||||||
|         Converts our requested PDF/A output into the Gotenberg API |         Converts our requested PDF/A output into the Gotenberg API | ||||||
|         format |         format | ||||||
|         """ |         """ | ||||||
|         if settings.OCR_OUTPUT_TYPE in {"pdfa", "pdfa-2"}: |         if settings.OCR_OUTPUT_TYPE in { | ||||||
|  |             OutputTypeChoices.PDF_A, | ||||||
|  |             OutputTypeChoices.PDF_A2, | ||||||
|  |         }: | ||||||
|             return PdfAFormat.A2b |             return PdfAFormat.A2b | ||||||
|         elif settings.OCR_OUTPUT_TYPE == "pdfa-1":  # pragma: no cover |         elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A1:  # pragma: no cover | ||||||
|             return PdfAFormat.A1a |             self.log.warn( | ||||||
|         elif settings.OCR_OUTPUT_TYPE == "pdfa-3":  # pragma: no cover |                 "Gotenberg does not support PDF/A-1a, choosing PDF/A-2b instead", | ||||||
|  |             ) | ||||||
|  |             return PdfAFormat.A2b | ||||||
|  |         elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A3:  # pragma: no cover | ||||||
|             return PdfAFormat.A3b |             return PdfAFormat.A3b | ||||||
|         return None |         return None | ||||||
|  |  | ||||||
| @@ -329,7 +337,14 @@ class MailDocumentParser(DocumentParser): | |||||||
|                 response = ( |                 response = ( | ||||||
|                     route.index(email_html_file) |                     route.index(email_html_file) | ||||||
|                     .resource(css_file) |                     .resource(css_file) | ||||||
|                     .margins(Margin(top=0.1, bottom=0.1, left=0.1, right=0.1)) |                     .margins( | ||||||
|  |                         PageMarginsType( | ||||||
|  |                             top=MarginType(0.1, MarginUnitType.Inches), | ||||||
|  |                             bottom=MarginType(0.1, MarginUnitType.Inches), | ||||||
|  |                             left=MarginType(0.1, MarginUnitType.Inches), | ||||||
|  |                             right=MarginType(0.1, MarginUnitType.Inches), | ||||||
|  |                         ), | ||||||
|  |                     ) | ||||||
|                     .size(PageSize(height=11.7, width=8.27)) |                     .size(PageSize(height=11.7, width=8.27)) | ||||||
|                     .scale(1.0) |                     .scale(1.0) | ||||||
|                     .run() |                     .run() | ||||||
| @@ -404,7 +419,14 @@ class MailDocumentParser(DocumentParser): | |||||||
|             route.index(html_clean_file) |             route.index(html_clean_file) | ||||||
|  |  | ||||||
|             # Set page size, margins |             # Set page size, margins | ||||||
|             route.margins(Margin(top=0.1, bottom=0.1, left=0.1, right=0.1)).size( |             route.margins( | ||||||
|  |                 PageMarginsType( | ||||||
|  |                     top=MarginType(0.1, MarginUnitType.Inches), | ||||||
|  |                     bottom=MarginType(0.1, MarginUnitType.Inches), | ||||||
|  |                     left=MarginType(0.1, MarginUnitType.Inches), | ||||||
|  |                     right=MarginType(0.1, MarginUnitType.Inches), | ||||||
|  |                 ), | ||||||
|  |             ).size( | ||||||
|                 PageSize(height=11.7, width=8.27), |                 PageSize(height=11.7, width=8.27), | ||||||
|             ).scale(1.0) |             ).scale(1.0) | ||||||
|  |  | ||||||
|   | |||||||
| @@ -102,7 +102,10 @@ class TikaDocumentParser(DocumentParser): | |||||||
|             }: |             }: | ||||||
|                 route.pdf_format(PdfAFormat.A2b) |                 route.pdf_format(PdfAFormat.A2b) | ||||||
|             elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A1: |             elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A1: | ||||||
|                 route.pdf_format(PdfAFormat.A1a) |                 self.log.warn( | ||||||
|  |                     "Gotenberg does not support PDF/A-1a, choosing PDF/A-2b instead", | ||||||
|  |                 ) | ||||||
|  |                 route.pdf_format(PdfAFormat.A2b) | ||||||
|             elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A3: |             elif settings.OCR_OUTPUT_TYPE == OutputTypeChoices.PDF_A3: | ||||||
|                 route.pdf_format(PdfAFormat.A3b) |                 route.pdf_format(PdfAFormat.A3b) | ||||||
|  |  | ||||||
|   | |||||||
| @@ -109,7 +109,7 @@ class TestTikaParser(HttpxMockMixin, TestCase): | |||||||
|         for setting, expected_key in [ |         for setting, expected_key in [ | ||||||
|             ("pdfa", "PDF/A-2b"), |             ("pdfa", "PDF/A-2b"), | ||||||
|             ("pdfa-2", "PDF/A-2b"), |             ("pdfa-2", "PDF/A-2b"), | ||||||
|             ("pdfa-1", "PDF/A-1a"), |             ("pdfa-1", "PDF/A-2b"), | ||||||
|             ("pdfa-3", "PDF/A-3b"), |             ("pdfa-3", "PDF/A-3b"), | ||||||
|         ]: |         ]: | ||||||
|             with override_settings(OCR_OUTPUT_TYPE=setting): |             with override_settings(OCR_OUTPUT_TYPE=setting): | ||||||
| @@ -124,9 +124,10 @@ class TestTikaParser(HttpxMockMixin, TestCase): | |||||||
|                 request = self.httpx_mock.get_request() |                 request = self.httpx_mock.get_request() | ||||||
|                 found = False |                 found = False | ||||||
|                 for field in request.stream.fields: |                 for field in request.stream.fields: | ||||||
|                     if isinstance(field, DataField) and field.name == "pdfFormat": |                     if isinstance(field, DataField) and field.name == "pdfa": | ||||||
|                         self.assertEqual(field.value, expected_key) |                         self.assertEqual(field.value, expected_key) | ||||||
|                         found = True |                         found = True | ||||||
|  |                         break | ||||||
|                 self.assertTrue(found) |                 self.assertTrue(found) | ||||||
|  |  | ||||||
|                 self.httpx_mock.reset(assert_all_responses_were_requested=False) |                 self.httpx_mock.reset(assert_all_responses_were_requested=False) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Trenton H
					Trenton H