mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-07-30 18:27:45 -05:00
Feature: Switches to a new client to handle communication with Gotenberg (#4391)
Switches to a new client to handle communication with Gotenberg for merging and generating PDFs
This commit is contained in:
@@ -341,7 +341,7 @@ class TestTikaHtmlParse(HttpxMockMixin, BaseMailParserTestCase):
|
||||
)
|
||||
parsed = self.parser.tika_parse(html)
|
||||
self.assertEqual(expected_text, parsed.strip())
|
||||
self.assertIn(self.parser.tika_server, str(self.httpx_mock.get_request().url))
|
||||
self.assertIn("http://localhost:9998", str(self.httpx_mock.get_request().url))
|
||||
|
||||
def test_tika_parse_exception(self):
|
||||
"""
|
||||
@@ -653,5 +653,5 @@ class TestParser(FileSystemAssertsMixin, HttpxMockMixin, BaseMailParserTestCase)
|
||||
|
||||
self.assertEqual(
|
||||
str(request.url),
|
||||
self.parser.gotenberg_server + "/forms/chromium/convert/html",
|
||||
"http://localhost:3000/forms/chromium/convert/html",
|
||||
)
|
||||
|
@@ -1,11 +1,14 @@
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
from django.test import TestCase
|
||||
from imagehash import average_hash
|
||||
from pdfminer.high_level import extract_text
|
||||
from PIL import Image
|
||||
|
||||
from documents.tests.utils import FileSystemAssertsMixin
|
||||
@@ -13,6 +16,29 @@ from documents.tests.utils import util_call_with_backoff
|
||||
from paperless_mail.tests.test_parsers import BaseMailParserTestCase
|
||||
|
||||
|
||||
def extract_text(pdf_path: Path) -> str:
|
||||
"""
|
||||
Using pdftotext from poppler, extracts the text of a PDF into a file,
|
||||
then reads the file contents and returns it
|
||||
"""
|
||||
with tempfile.NamedTemporaryFile(
|
||||
mode="w+",
|
||||
) as tmp:
|
||||
subprocess.run(
|
||||
[
|
||||
shutil.which("pdftotext"),
|
||||
"-q",
|
||||
"-layout",
|
||||
"-enc",
|
||||
"UTF-8",
|
||||
str(pdf_path),
|
||||
tmp.name,
|
||||
],
|
||||
check=True,
|
||||
)
|
||||
return tmp.read()
|
||||
|
||||
|
||||
class MailAttachmentMock:
|
||||
def __init__(self, payload, content_id):
|
||||
self.payload = payload
|
||||
@@ -150,7 +176,7 @@ class TestParserLive(FileSystemAssertsMixin, BaseMailParserTestCase):
|
||||
|
||||
extracted = extract_text(pdf_path)
|
||||
expected = (
|
||||
"first\tPDF\tto\tbe\tmerged.\n\n\x0csecond\tPDF\tto\tbe\tmerged.\n\n\x0c"
|
||||
"first PDF to be merged.\n\x0csecond PDF to be merged.\n\x0c"
|
||||
)
|
||||
|
||||
self.assertEqual(expected, extracted)
|
||||
|
Reference in New Issue
Block a user