diff --git a/src/paperless_mail/parsers.py b/src/paperless_mail/parsers.py index cc5d4e3c8..f1ee263aa 100644 --- a/src/paperless_mail/parsers.py +++ b/src/paperless_mail/parsers.py @@ -271,6 +271,16 @@ class MailDocumentParser(DocumentParser): "paperHeight": "11.7", "scale": "1.0", } + + # Set the output format of the resulting PDF + # Valid inputs: https://gotenberg.dev/docs/modules/pdf-engines#uno + if settings.OCR_OUTPUT_TYPE in {"pdfa", "pdfa-2"}: + data["pdfFormat"] = "PDF/A-2b" + elif settings.OCR_OUTPUT_TYPE == "pdfa-1": + data["pdfFormat"] = "PDF/A-1a" + elif settings.OCR_OUTPUT_TYPE == "pdfa-3": + data["pdfFormat"] = "PDF/A-3b" + try: response = requests.post( url, diff --git a/src/paperless_tika/parsers.py b/src/paperless_tika/parsers.py index 1cfb1eecb..f34ecbbab 100644 --- a/src/paperless_tika/parsers.py +++ b/src/paperless_tika/parsers.py @@ -95,9 +95,19 @@ class TikaDocumentParser(DocumentParser): ), } headers = {} + data = {} + + # Set the output format of the resulting PDF + # Valid inputs: https://gotenberg.dev/docs/modules/pdf-engines#uno + if settings.OCR_OUTPUT_TYPE in {"pdfa", "pdfa-2"}: + data["pdfFormat"] = "PDF/A-2b" + elif settings.OCR_OUTPUT_TYPE == "pdfa-1": + data["pdfFormat"] = "PDF/A-1a" + elif settings.OCR_OUTPUT_TYPE == "pdfa-3": + data["pdfFormat"] = "PDF/A-3b" try: - response = requests.post(url, files=files, headers=headers) + response = requests.post(url, files=files, headers=headers, data=data) response.raise_for_status() # ensure we notice bad responses except Exception as err: raise ParseError(