Feature: Switches to a new client to handle communication with Gotenberg (#4391)

Switches to a new client to handle communication with Gotenberg for merging and generating PDFs
This commit is contained in:
Trenton H 2023-10-19 17:27:29 -07:00 committed by GitHub
parent 5f0eba694c
commit 999ae678c2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 198 additions and 178 deletions

View File

@ -51,6 +51,7 @@ flower = "*"
bleach = "*" bleach = "*"
zxing-cpp = {version = "*", platform_machine = "== 'x86_64'"} zxing-cpp = {version = "*", platform_machine = "== 'x86_64'"}
django-multiselectfield = "*" django-multiselectfield = "*"
gotenberg-client = "*"
[dev-packages] [dev-packages]
# Linting # Linting

39
Pipfile.lock generated
View File

@ -1,7 +1,7 @@
{ {
"_meta": { "_meta": {
"hash": { "hash": {
"sha256": "3025da2940433d347b2fd2ac222852c21f4aa73eeefbd1ee9152cbfd7a7a48e9" "sha256": "505bd6b18d31ed64988ef307c12a5acb70f611cafd932a391e985a11bbbc8000"
}, },
"pipfile-spec": 6, "pipfile-spec": 6,
"requires": {}, "requires": {},
@ -539,6 +539,15 @@
"markers": "python_version >= '3.7'", "markers": "python_version >= '3.7'",
"version": "==2.0.1" "version": "==2.0.1"
}, },
"gotenberg-client": {
"hashes": [
"sha256:4508ecb913ef2d553dd2ceb78e32cee001000ba08c910ba1f9ace38350d1589e",
"sha256:7a3f8a02caee768391373b3610c6ec25a853cccf391ed6b5d5a1292c3ed15e7e"
],
"index": "pypi",
"markers": "python_version >= '3.8'",
"version": "==0.3.0"
},
"gunicorn": { "gunicorn": {
"hashes": [ "hashes": [
"sha256:3213aa5e8c24949e792bcacfc176fef362e7aac80b76c56f6b5122bf350722f0", "sha256:3213aa5e8c24949e792bcacfc176fef362e7aac80b76c56f6b5122bf350722f0",
@ -556,6 +565,13 @@
"markers": "python_version >= '3.7'", "markers": "python_version >= '3.7'",
"version": "==0.14.0" "version": "==0.14.0"
}, },
"h2": {
"hashes": [
"sha256:03a46bcf682256c95b5fd9e9a99c1323584c3eec6440d379b9903d709476bc6d",
"sha256:a83aca08fbe7aacb79fec788c9c0bac936343560ed9ec18b82a13a12c28d2abb"
],
"version": "==4.1.0"
},
"hiredis": { "hiredis": {
"hashes": [ "hashes": [
"sha256:071c5814b850574036506a8118034f97c3cbf2fe9947ff45a27b07a48da56240", "sha256:071c5814b850574036506a8118034f97c3cbf2fe9947ff45a27b07a48da56240",
@ -650,6 +666,14 @@
], ],
"version": "==2.2.3" "version": "==2.2.3"
}, },
"hpack": {
"hashes": [
"sha256:84a076fad3dc9a9f8063ccb8041ef100867b1878b25ef0ee63847a5d53818a6c",
"sha256:fc41de0c63e687ebffde81187a948221294896f6bdc0ae2312708df339430095"
],
"markers": "python_full_version >= '3.6.1'",
"version": "==4.0.0"
},
"httpcore": { "httpcore": {
"hashes": [ "hashes": [
"sha256:13b5e5cd1dca1a6636a6aaea212b19f4f85cd88c366a2b82304181b769aab3c9", "sha256:13b5e5cd1dca1a6636a6aaea212b19f4f85cd88c366a2b82304181b769aab3c9",
@ -699,6 +723,9 @@
"version": "==0.6.0" "version": "==0.6.0"
}, },
"httpx": { "httpx": {
"extras": [
"http2"
],
"hashes": [ "hashes": [
"sha256:181ea7f8ba3a82578be86ef4171554dd45fec26a02556a744db029a0a27b7100", "sha256:181ea7f8ba3a82578be86ef4171554dd45fec26a02556a744db029a0a27b7100",
"sha256:47ecda285389cb32bb2691cc6e069e3ab0205956f681c5b2ad2325719751d875" "sha256:47ecda285389cb32bb2691cc6e069e3ab0205956f681c5b2ad2325719751d875"
@ -714,6 +741,14 @@
"markers": "python_version >= '3.8'", "markers": "python_version >= '3.8'",
"version": "==4.8.0" "version": "==4.8.0"
}, },
"hyperframe": {
"hashes": [
"sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15",
"sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"
],
"markers": "python_full_version >= '3.6.1'",
"version": "==6.0.1"
},
"idna": { "idna": {
"hashes": [ "hashes": [
"sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4", "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4",
@ -1782,7 +1817,7 @@
"sha256:8f92fc8806f9a6b641eaa5318da32b44d401efaac0f6678c9bc448ba3605faa0", "sha256:8f92fc8806f9a6b641eaa5318da32b44d401efaac0f6678c9bc448ba3605faa0",
"sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef" "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef"
], ],
"markers": "python_version < '3.11'", "markers": "python_version < '3.10'",
"version": "==4.8.0" "version": "==4.8.0"
}, },
"tzdata": { "tzdata": {

View File

@ -1,13 +1,17 @@
import re import re
from html import escape from html import escape
from pathlib import Path from pathlib import Path
from typing import Optional
import httpx
from bleach import clean from bleach import clean
from bleach import linkify from bleach import linkify
from django.conf import settings from django.conf import settings
from django.utils.timezone import is_naive from django.utils.timezone import is_naive
from django.utils.timezone import make_aware from django.utils.timezone import make_aware
from gotenberg_client import GotenbergClient
from gotenberg_client.options import Margin
from gotenberg_client.options import PageSize
from gotenberg_client.options import PdfAFormat
from humanize import naturalsize from humanize import naturalsize
from imap_tools import MailAttachment from imap_tools import MailAttachment
from imap_tools import MailMessage from imap_tools import MailMessage
@ -24,11 +28,22 @@ class MailDocumentParser(DocumentParser):
Gotenberg and sends the html part to a Tika server for text extraction. Gotenberg and sends the html part to a Tika server for text extraction.
""" """
gotenberg_server = settings.TIKA_GOTENBERG_ENDPOINT
tika_server = settings.TIKA_ENDPOINT
logging_name = "paperless.parsing.mail" logging_name = "paperless.parsing.mail"
@staticmethod
def _settings_to_gotenberg_pdfa() -> Optional[PdfAFormat]:
"""
Converts our requested PDF/A output into the Gotenberg API
format
"""
if settings.OCR_OUTPUT_TYPE in {"pdfa", "pdfa-2"}:
return PdfAFormat.A2b
elif settings.OCR_OUTPUT_TYPE == "pdfa-1": # pragma: no cover
return PdfAFormat.A1a
elif settings.OCR_OUTPUT_TYPE == "pdfa-3": # pragma: no cover
return PdfAFormat.A3b
return None
def get_thumbnail(self, document_path: Path, mime_type: str, file_name=None): def get_thumbnail(self, document_path: Path, mime_type: str, file_name=None):
if not self.archive_path: if not self.archive_path:
self.archive_path = self.generate_pdf( self.archive_path = self.generate_pdf(
@ -173,7 +188,7 @@ class MailDocumentParser(DocumentParser):
self.log.info("Sending content to Tika server") self.log.info("Sending content to Tika server")
try: try:
with TikaClient(tika_url=self.tika_server) as client: with TikaClient(tika_url=settings.TIKA_ENDPOINT) as client:
parsed = client.tika.as_text.from_buffer(html, "text/html") parsed = client.tika.as_text.from_buffer(html, "text/html")
if parsed.content is not None: if parsed.content is not None:
@ -182,7 +197,7 @@ class MailDocumentParser(DocumentParser):
except Exception as err: except Exception as err:
raise ParseError( raise ParseError(
f"Could not parse content with tika server at " f"Could not parse content with tika server at "
f"{self.tika_server}: {err}", f"{settings.TIKA_ENDPOINT}: {err}",
) from err ) from err
def generate_pdf(self, mail_message: MailMessage) -> Path: def generate_pdf(self, mail_message: MailMessage) -> Path:
@ -195,45 +210,29 @@ class MailDocumentParser(DocumentParser):
if not mail_message.html: if not mail_message.html:
archive_path.write_bytes(mail_pdf_file.read_bytes()) archive_path.write_bytes(mail_pdf_file.read_bytes())
else: else:
url_merge = self.gotenberg_server + "/forms/pdfengines/merge"
pdf_of_html_content = self.generate_pdf_from_html( pdf_of_html_content = self.generate_pdf_from_html(
mail_message.html, mail_message.html,
mail_message.attachments, mail_message.attachments,
) )
pdf_collection = { with GotenbergClient(
"1_mail.pdf": ("1_mail.pdf", mail_pdf_file, "application/pdf"), host=settings.TIKA_GOTENBERG_ENDPOINT,
"2_html.pdf": ("2_html.pdf", pdf_of_html_content, "application/pdf"), timeout=settings.CELERY_TASK_TIME_LIMIT,
} ) as client, client.merge.merge() as route:
# Configure requested PDF/A formatting, if any
pdf_a_format = self._settings_to_gotenberg_pdfa()
if pdf_a_format is not None:
route.pdf_format(pdf_a_format)
try: route.merge([mail_pdf_file, pdf_of_html_content])
# Open a handle to each file, replacing the tuple
for filename in pdf_collection:
file_multi_part = pdf_collection[filename]
pdf_collection[filename] = (
file_multi_part[0],
file_multi_part[1].open("rb"),
file_multi_part[2],
)
response = httpx.post( try:
url_merge, response = route.run()
files=pdf_collection, archive_path.write_bytes(response.content)
timeout=settings.CELERY_TASK_TIME_LIMIT, except Exception as err:
) raise ParseError(
response.raise_for_status() # ensure we notice bad responses f"Error while merging email HTML into PDF: {err}",
) from err
archive_path.write_bytes(response.content)
except Exception as err:
raise ParseError(
f"Error while merging email HTML into PDF: {err}",
) from err
finally:
for filename in pdf_collection:
file_multi_part_handle = pdf_collection[filename][1]
file_multi_part_handle.close()
return archive_path return archive_path
@ -299,48 +298,29 @@ class MailDocumentParser(DocumentParser):
Creates a PDF based on the given email, using the email's values in a Creates a PDF based on the given email, using the email's values in a
an HTML template an HTML template
""" """
url = self.gotenberg_server + "/forms/chromium/convert/html"
self.log.info("Converting mail to PDF") self.log.info("Converting mail to PDF")
css_file = Path(__file__).parent / "templates" / "output.css" css_file = Path(__file__).parent / "templates" / "output.css"
email_html_file = self.mail_to_html(mail) email_html_file = self.mail_to_html(mail)
with css_file.open("rb") as css_handle, email_html_file.open( with GotenbergClient(
"rb", host=settings.TIKA_GOTENBERG_ENDPOINT,
) as email_html_handle: timeout=settings.CELERY_TASK_TIME_LIMIT,
files = { ) as client, client.chromium.html_to_pdf() as route:
"html": ("index.html", email_html_handle, "text/html"), # Configure requested PDF/A formatting, if any
"css": ("output.css", css_handle, "text/css"), pdf_a_format = self._settings_to_gotenberg_pdfa()
} if pdf_a_format is not None:
headers = {} route.pdf_format(pdf_a_format)
data = {
"marginTop": "0.1",
"marginBottom": "0.1",
"marginLeft": "0.1",
"marginRight": "0.1",
"paperWidth": "8.27",
"paperHeight": "11.7",
"scale": "1.0",
}
# Set the output format of the resulting PDF
# Valid inputs: https://gotenberg.dev/docs/modules/pdf-engines#uno
if settings.OCR_OUTPUT_TYPE in {"pdfa", "pdfa-2"}:
data["pdfFormat"] = "PDF/A-2b"
elif settings.OCR_OUTPUT_TYPE == "pdfa-1":
data["pdfFormat"] = "PDF/A-1a"
elif settings.OCR_OUTPUT_TYPE == "pdfa-3":
data["pdfFormat"] = "PDF/A-3b"
try: try:
response = httpx.post( response = (
url, route.index(email_html_file)
files=files, .resource(css_file)
headers=headers, .margins(Margin(top=0.1, bottom=0.1, left=0.1, right=0.1))
data=data, .size(PageSize(height=11.7, width=8.27))
timeout=settings.CELERY_TASK_TIME_LIMIT, .scale(1.0)
.run()
) )
response.raise_for_status() # ensure we notice bad responses
except Exception as err: except Exception as err:
raise ParseError( raise ParseError(
f"Error while converting email to PDF: {err}", f"Error while converting email to PDF: {err}",
@ -368,69 +348,57 @@ class MailDocumentParser(DocumentParser):
text = compiled_close.sub("</div", text) text = compiled_close.sub("</div", text)
return text return text
url = self.gotenberg_server + "/forms/chromium/convert/html"
self.log.info("Converting html to PDF") self.log.info("Converting html to PDF")
tempdir = Path(self.tempdir) tempdir = Path(self.tempdir)
html_clean = clean_html_script(orig_html) html_clean = clean_html_script(orig_html)
files = {}
for attachment in attachments:
# Clean the attachment name to be valid
name_cid = f"cid:{attachment.content_id}"
name_clean = "".join(e for e in name_cid if e.isalnum())
# Write attachment payload to a temp file
temp_file = tempdir / name_clean
temp_file.write_bytes(attachment.payload)
# Store the attachment for upload
files[name_clean] = (name_clean, temp_file, attachment.content_type)
# Replace as needed the name with the clean name
html_clean = html_clean.replace(name_cid, name_clean)
# Now store the cleaned up HTML version
html_clean_file = tempdir / "index.html" html_clean_file = tempdir / "index.html"
html_clean_file.write_text(html_clean) html_clean_file.write_text(html_clean)
files["index.html"] = ("index.html", html_clean_file, "text/html") with GotenbergClient(
host=settings.TIKA_GOTENBERG_ENDPOINT,
timeout=settings.CELERY_TASK_TIME_LIMIT,
) as client, client.chromium.html_to_pdf() as route:
# Configure requested PDF/A formatting, if any
pdf_a_format = self._settings_to_gotenberg_pdfa()
if pdf_a_format is not None:
route.pdf_format(pdf_a_format)
data = { # Add attachments as resources, cleaning the filename and replacing
"marginTop": "0.1", # it in the index file for inclusion
"marginBottom": "0.1", for attachment in attachments:
"marginLeft": "0.1", # Clean the attachment name to be valid
"marginRight": "0.1", name_cid = f"cid:{attachment.content_id}"
"paperWidth": "8.27", name_clean = "".join(e for e in name_cid if e.isalnum())
"paperHeight": "11.7",
"scale": "1.0",
}
try:
# Open a handle to each file, replacing the tuple
for filename in files:
file_multi_part = files[filename]
files[filename] = (
file_multi_part[0],
file_multi_part[1].open("rb"),
file_multi_part[2],
)
response = httpx.post( # Write attachment payload to a temp file
url, temp_file = tempdir / name_clean
files=files, temp_file.write_bytes(attachment.payload)
data=data,
timeout=settings.CELERY_TASK_TIME_LIMIT, route.resource(temp_file)
)
response.raise_for_status() # ensure we notice bad responses # Replace as needed the name with the clean name
except Exception as err: html_clean = html_clean.replace(name_cid, name_clean)
raise ParseError(f"Error while converting document to PDF: {err}") from err
finally: # Now store the cleaned up HTML version
# Ensure all file handles as closed html_clean_file = tempdir / "index.html"
for filename in files: html_clean_file.write_text(html_clean)
file_multi_part_handle = files[filename][1] # This is our index file, the main page basically
file_multi_part_handle.close() route.index(html_clean_file)
# Set page size, margins
route.margins(Margin(top=0.1, bottom=0.1, left=0.1, right=0.1)).size(
PageSize(height=11.7, width=8.27),
).scale(1.0)
try:
response = route.run()
except Exception as err:
raise ParseError(
f"Error while converting document to PDF: {err}",
) from err
html_pdf = tempdir / "html.pdf" html_pdf = tempdir / "html.pdf"
html_pdf.write_bytes(response.content) html_pdf.write_bytes(response.content)

View File

@ -341,7 +341,7 @@ class TestTikaHtmlParse(HttpxMockMixin, BaseMailParserTestCase):
) )
parsed = self.parser.tika_parse(html) parsed = self.parser.tika_parse(html)
self.assertEqual(expected_text, parsed.strip()) self.assertEqual(expected_text, parsed.strip())
self.assertIn(self.parser.tika_server, str(self.httpx_mock.get_request().url)) self.assertIn("http://localhost:9998", str(self.httpx_mock.get_request().url))
def test_tika_parse_exception(self): def test_tika_parse_exception(self):
""" """
@ -653,5 +653,5 @@ class TestParser(FileSystemAssertsMixin, HttpxMockMixin, BaseMailParserTestCase)
self.assertEqual( self.assertEqual(
str(request.url), str(request.url),
self.parser.gotenberg_server + "/forms/chromium/convert/html", "http://localhost:3000/forms/chromium/convert/html",
) )

View File

@ -1,11 +1,14 @@
import os import os
import shutil
import subprocess
import tempfile
from pathlib import Path
from unittest import mock from unittest import mock
import httpx import httpx
import pytest import pytest
from django.test import TestCase from django.test import TestCase
from imagehash import average_hash from imagehash import average_hash
from pdfminer.high_level import extract_text
from PIL import Image from PIL import Image
from documents.tests.utils import FileSystemAssertsMixin from documents.tests.utils import FileSystemAssertsMixin
@ -13,6 +16,29 @@ from documents.tests.utils import util_call_with_backoff
from paperless_mail.tests.test_parsers import BaseMailParserTestCase from paperless_mail.tests.test_parsers import BaseMailParserTestCase
def extract_text(pdf_path: Path) -> str:
"""
Using pdftotext from poppler, extracts the text of a PDF into a file,
then reads the file contents and returns it
"""
with tempfile.NamedTemporaryFile(
mode="w+",
) as tmp:
subprocess.run(
[
shutil.which("pdftotext"),
"-q",
"-layout",
"-enc",
"UTF-8",
str(pdf_path),
tmp.name,
],
check=True,
)
return tmp.read()
class MailAttachmentMock: class MailAttachmentMock:
def __init__(self, payload, content_id): def __init__(self, payload, content_id):
self.payload = payload self.payload = payload
@ -150,7 +176,7 @@ class TestParserLive(FileSystemAssertsMixin, BaseMailParserTestCase):
extracted = extract_text(pdf_path) extracted = extract_text(pdf_path)
expected = ( expected = (
"first\tPDF\tto\tbe\tmerged.\n\n\x0csecond\tPDF\tto\tbe\tmerged.\n\n\x0c" "first PDF to be merged.\n\x0csecond PDF to be merged.\n\x0c"
) )
self.assertEqual(expected, extracted) self.assertEqual(expected, extracted)

View File

@ -1,9 +1,10 @@
import os
from pathlib import Path from pathlib import Path
import httpx import httpx
from django.conf import settings from django.conf import settings
from django.utils import timezone from django.utils import timezone
from gotenberg_client import GotenbergClient
from gotenberg_client.options import PdfAFormat
from tika_client import TikaClient from tika_client import TikaClient
from documents.parsers import DocumentParser from documents.parsers import DocumentParser
@ -80,47 +81,33 @@ class TikaDocumentParser(DocumentParser):
self.archive_path = self.convert_to_pdf(document_path, file_name) self.archive_path = self.convert_to_pdf(document_path, file_name)
def convert_to_pdf(self, document_path, file_name): def convert_to_pdf(self, document_path: Path, file_name):
pdf_path = os.path.join(self.tempdir, "convert.pdf") pdf_path = Path(self.tempdir) / "convert.pdf"
gotenberg_server = settings.TIKA_GOTENBERG_ENDPOINT
url = gotenberg_server + "/forms/libreoffice/convert"
self.log.info(f"Converting {document_path} to PDF as {pdf_path}") self.log.info(f"Converting {document_path} to PDF as {pdf_path}")
with open(document_path, "rb") as document_handle:
files = {
"files": (
"convert" + os.path.splitext(document_path)[-1],
document_handle,
),
}
headers = {}
data = {}
with GotenbergClient(
host=settings.TIKA_GOTENBERG_ENDPOINT,
timeout=settings.CELERY_TASK_TIME_LIMIT,
) as client, client.libre_office.to_pdf() as route:
# Set the output format of the resulting PDF # Set the output format of the resulting PDF
# Valid inputs: https://gotenberg.dev/docs/modules/pdf-engines#uno
if settings.OCR_OUTPUT_TYPE in {"pdfa", "pdfa-2"}: if settings.OCR_OUTPUT_TYPE in {"pdfa", "pdfa-2"}:
data["pdfFormat"] = "PDF/A-2b" route.pdf_format(PdfAFormat.A2b)
elif settings.OCR_OUTPUT_TYPE == "pdfa-1": elif settings.OCR_OUTPUT_TYPE == "pdfa-1":
data["pdfFormat"] = "PDF/A-1a" route.pdf_format(PdfAFormat.A1a)
elif settings.OCR_OUTPUT_TYPE == "pdfa-3": elif settings.OCR_OUTPUT_TYPE == "pdfa-3":
data["pdfFormat"] = "PDF/A-3b" route.pdf_format(PdfAFormat.A3b)
route.convert(document_path)
try: try:
response = httpx.post( response = route.run()
url,
files=files, pdf_path.write_bytes(response.content)
headers=headers,
data=data, return pdf_path
timeout=settings.CELERY_TASK_TIME_LIMIT,
)
response.raise_for_status() # ensure we notice bad responses
except Exception as err: except Exception as err:
raise ParseError( raise ParseError(
f"Error while converting document to PDF: {err}", f"Error while converting document to PDF: {err}",
) from err ) from err
with open(pdf_path, "wb") as file:
file.write(response.content)
file.close()
return pdf_path

View File

@ -2,12 +2,11 @@ import datetime
import os import os
import zoneinfo import zoneinfo
from pathlib import Path from pathlib import Path
from unittest import mock
from django.test import TestCase from django.test import TestCase
from django.test import override_settings from django.test import override_settings
from httpx import Request from httpx import codes
from httpx import Response from httpx._multipart import DataField
from rest_framework import status from rest_framework import status
from documents.parsers import ParseError from documents.parsers import ParseError
@ -95,8 +94,7 @@ class TestTikaParser(HttpxMockMixin, TestCase):
with self.assertRaises(ParseError): with self.assertRaises(ParseError):
self.parser.convert_to_pdf(file, None) self.parser.convert_to_pdf(file, None)
@mock.patch("paperless_tika.parsers.httpx.post") def test_request_pdf_a_format(self):
def test_request_pdf_a_format(self, post: mock.Mock):
""" """
GIVEN: GIVEN:
- Document needs to be converted to PDF - Document needs to be converted to PDF
@ -108,10 +106,6 @@ class TestTikaParser(HttpxMockMixin, TestCase):
file = Path(os.path.join(self.parser.tempdir, "input.odt")) file = Path(os.path.join(self.parser.tempdir, "input.odt"))
file.touch() file.touch()
response = Response(status_code=status.HTTP_200_OK)
response.request = Request("POST", "/somewhere/")
post.return_value = response
for setting, expected_key in [ for setting, expected_key in [
("pdfa", "PDF/A-2b"), ("pdfa", "PDF/A-2b"),
("pdfa-2", "PDF/A-2b"), ("pdfa-2", "PDF/A-2b"),
@ -119,11 +113,20 @@ class TestTikaParser(HttpxMockMixin, TestCase):
("pdfa-3", "PDF/A-3b"), ("pdfa-3", "PDF/A-3b"),
]: ]:
with override_settings(OCR_OUTPUT_TYPE=setting): with override_settings(OCR_OUTPUT_TYPE=setting):
self.httpx_mock.add_response(
status_code=codes.OK,
content=b"PDF document",
method="POST",
)
self.parser.convert_to_pdf(file, None) self.parser.convert_to_pdf(file, None)
post.assert_called_once() request = self.httpx_mock.get_request()
_, kwargs = post.call_args found = False
for field in request.stream.fields:
if isinstance(field, DataField) and field.name == "pdfFormat":
self.assertEqual(field.value, expected_key)
found = True
self.assertTrue(found)
self.assertEqual(kwargs["data"]["pdfFormat"], expected_key) self.httpx_mock.reset(assert_all_responses_were_requested=False)
post.reset_mock()

View File

@ -7,7 +7,7 @@ max-line-length = 88
[tool:pytest] [tool:pytest]
DJANGO_SETTINGS_MODULE=paperless.settings DJANGO_SETTINGS_MODULE=paperless.settings
addopts = --pythonwarnings=all --cov --cov-report=html --cov-report=xml --numprocesses auto --quiet --durations=50 addopts = --pythonwarnings=all --cov --cov-report=html --cov-report=xml --numprocesses auto --maxprocesses=16 --quiet --durations=50
env = env =
PAPERLESS_DISABLE_DBHANDLER=true PAPERLESS_DISABLE_DBHANDLER=true