mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-01-28 22:59:03 -06:00
Chore: Use a local http server instead of external to reduce flakiness (#11916)
This commit is contained in:
@@ -34,3 +34,13 @@ services:
|
|||||||
ports:
|
ports:
|
||||||
- "3143:3143" # IMAP
|
- "3143:3143" # IMAP
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
nginx:
|
||||||
|
image: docker.io/nginx:1.29-alpine
|
||||||
|
hostname: nginx
|
||||||
|
container_name: nginx
|
||||||
|
ports:
|
||||||
|
- "8080:8080"
|
||||||
|
restart: unless-stopped
|
||||||
|
volumes:
|
||||||
|
- ../../docs/assets:/usr/share/nginx/html/assets:ro
|
||||||
|
- ./test-nginx.conf:/etc/nginx/conf.d/default.conf:ro
|
||||||
|
|||||||
14
docker/compose/test-nginx.conf
Normal file
14
docker/compose/test-nginx.conf
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
server {
|
||||||
|
listen 8080;
|
||||||
|
server_name localhost;
|
||||||
|
|
||||||
|
root /usr/share/nginx/html;
|
||||||
|
|
||||||
|
# Enable CORS for test requests
|
||||||
|
add_header 'Access-Control-Allow-Origin' '*' always;
|
||||||
|
add_header 'Access-Control-Allow-Methods' 'GET, HEAD, OPTIONS' always;
|
||||||
|
|
||||||
|
location / {
|
||||||
|
try_files $uri $uri/ =404;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -300,6 +300,14 @@ norecursedirs = [ "src/locale/", ".venv/", "src-ui/" ]
|
|||||||
|
|
||||||
DJANGO_SETTINGS_MODULE = "paperless.settings"
|
DJANGO_SETTINGS_MODULE = "paperless.settings"
|
||||||
|
|
||||||
|
markers = [
|
||||||
|
"live: Integration tests requiring external services (Gotenberg, Tika, nginx, etc)",
|
||||||
|
"nginx: Tests that make HTTP requests to the local nginx service",
|
||||||
|
"gotenberg: Tests requiring Gotenberg service",
|
||||||
|
"tika: Tests requiring Tika service",
|
||||||
|
"greenmail: Tests requiring Greenmail service",
|
||||||
|
]
|
||||||
|
|
||||||
[tool.pytest_env]
|
[tool.pytest_env]
|
||||||
PAPERLESS_DISABLE_DBHANDLER = "true"
|
PAPERLESS_DISABLE_DBHANDLER = "true"
|
||||||
PAPERLESS_CACHE_BACKEND = "django.core.cache.backends.locmem.LocMemCache"
|
PAPERLESS_CACHE_BACKEND = "django.core.cache.backends.locmem.LocMemCache"
|
||||||
|
|||||||
@@ -816,7 +816,7 @@ class TestCommandWatch:
|
|||||||
f.flush()
|
f.flush()
|
||||||
sleep(0.05)
|
sleep(0.05)
|
||||||
|
|
||||||
sleep(0.5)
|
sleep(0.8)
|
||||||
|
|
||||||
if thread.exception:
|
if thread.exception:
|
||||||
raise thread.exception
|
raise thread.exception
|
||||||
@@ -837,7 +837,7 @@ class TestCommandWatch:
|
|||||||
(consumption_dir / "._document.pdf").write_bytes(b"test")
|
(consumption_dir / "._document.pdf").write_bytes(b"test")
|
||||||
shutil.copy(sample_pdf, consumption_dir / "valid.pdf")
|
shutil.copy(sample_pdf, consumption_dir / "valid.pdf")
|
||||||
|
|
||||||
sleep(0.5)
|
sleep(0.8)
|
||||||
|
|
||||||
if thread.exception:
|
if thread.exception:
|
||||||
raise thread.exception
|
raise thread.exception
|
||||||
|
|||||||
@@ -89,3 +89,11 @@ def greenmail_mail_account(db: None) -> Generator[MailAccount, None, None]:
|
|||||||
@pytest.fixture()
|
@pytest.fixture()
|
||||||
def mail_account_handler() -> MailAccountHandler:
|
def mail_account_handler() -> MailAccountHandler:
|
||||||
return MailAccountHandler()
|
return MailAccountHandler()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def nginx_base_url() -> Generator[str, None, None]:
|
||||||
|
"""
|
||||||
|
The base URL for the nginx HTTP server we expect to be alive
|
||||||
|
"""
|
||||||
|
yield "http://localhost:8080"
|
||||||
|
|||||||
@@ -55,7 +55,7 @@ Content-Transfer-Encoding: 7bit
|
|||||||
<p>Some Text</p>
|
<p>Some Text</p>
|
||||||
<p>
|
<p>
|
||||||
<img src="cid:part1.pNdUSz0s.D3NqVtPg@example.de" alt="Has to be rewritten to work..">
|
<img src="cid:part1.pNdUSz0s.D3NqVtPg@example.de" alt="Has to be rewritten to work..">
|
||||||
<img src="https://docs.paperless-ngx.com/assets/logo_full_white.svg" alt="This image should not be shown.">
|
<img src="http://localhost:8080/assets/logo_full_white.svg" alt="This image should not be shown.">
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<p>and an embedded image.<br>
|
<p>and an embedded image.<br>
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
<p>Some Text</p>
|
<p>Some Text</p>
|
||||||
<p>
|
<p>
|
||||||
<img src="cid:part1.pNdUSz0s.D3NqVtPg@example.de" alt="Has to be rewritten to work..">
|
<img src="cid:part1.pNdUSz0s.D3NqVtPg@example.de" alt="Has to be rewritten to work..">
|
||||||
<img src="https://docs.paperless-ngx.com/assets/logo_full_white.svg" alt="This image should not be shown.">
|
<img src="http://localhost:8080/assets/logo_full_white.svg" alt="This image should not be shown.">
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<p>and an embedded image.<br>
|
<p>and an embedded image.<br>
|
||||||
|
|||||||
@@ -6,6 +6,8 @@ from paperless_mail.models import MailAccount
|
|||||||
from paperless_mail.models import MailRule
|
from paperless_mail.models import MailRule
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.live
|
||||||
|
@pytest.mark.greenmail
|
||||||
@pytest.mark.django_db
|
@pytest.mark.django_db
|
||||||
class TestMailGreenmail:
|
class TestMailGreenmail:
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ from paperless_mail.parsers import MailDocumentParser
|
|||||||
def extract_text(pdf_path: Path) -> str:
|
def extract_text(pdf_path: Path) -> str:
|
||||||
"""
|
"""
|
||||||
Using pdftotext from poppler, extracts the text of a PDF into a file,
|
Using pdftotext from poppler, extracts the text of a PDF into a file,
|
||||||
then reads the file contents and returns it
|
then reads the file contents and returns it.
|
||||||
"""
|
"""
|
||||||
with tempfile.NamedTemporaryFile(
|
with tempfile.NamedTemporaryFile(
|
||||||
mode="w+",
|
mode="w+",
|
||||||
@@ -38,71 +38,107 @@ def extract_text(pdf_path: Path) -> str:
|
|||||||
|
|
||||||
|
|
||||||
class MailAttachmentMock:
|
class MailAttachmentMock:
|
||||||
def __init__(self, payload, content_id):
|
def __init__(self, payload: bytes, content_id: str) -> None:
|
||||||
self.payload = payload
|
self.payload = payload
|
||||||
self.content_id = content_id
|
self.content_id = content_id
|
||||||
self.content_type = "image/png"
|
self.content_type = "image/png"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.live
|
||||||
|
@pytest.mark.nginx
|
||||||
@pytest.mark.skipif(
|
@pytest.mark.skipif(
|
||||||
"PAPERLESS_CI_TEST" not in os.environ,
|
"PAPERLESS_CI_TEST" not in os.environ,
|
||||||
reason="No Gotenberg/Tika servers to test with",
|
reason="No Gotenberg/Tika servers to test with",
|
||||||
)
|
)
|
||||||
class TestUrlCanary:
|
class TestNginxService:
|
||||||
"""
|
"""
|
||||||
Verify certain URLs are still available so testing is valid still
|
Verify the local nginx server is responding correctly.
|
||||||
|
These tests validate that the test infrastructure is working properly
|
||||||
|
before running the actual parser tests that depend on HTTP resources.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def test_online_image_exception_on_not_available(self):
|
def test_non_existent_resource_returns_404(
|
||||||
|
self,
|
||||||
|
nginx_base_url: str,
|
||||||
|
) -> None:
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
- Fresh start
|
- Local nginx server is running
|
||||||
WHEN:
|
WHEN:
|
||||||
- nonexistent image is requested
|
- A non-existent resource is requested
|
||||||
THEN:
|
THEN:
|
||||||
- An exception shall be thrown
|
- An HTTP 404 status code shall be returned
|
||||||
"""
|
|
||||||
"""
|
|
||||||
A public image is used in the html sample file. We have no control
|
|
||||||
whether this image stays online forever, so here we check if we can detect if is not
|
|
||||||
available anymore.
|
|
||||||
"""
|
"""
|
||||||
resp = httpx.get(
|
resp = httpx.get(
|
||||||
"https://docs.paperless-ngx.com/assets/non-existent.png",
|
f"{nginx_base_url}/assets/non-existent.png",
|
||||||
|
timeout=5.0,
|
||||||
)
|
)
|
||||||
with pytest.raises(httpx.HTTPStatusError) as exec_info:
|
with pytest.raises(httpx.HTTPStatusError) as exec_info:
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
|
|
||||||
assert exec_info.value.response.status_code == httpx.codes.NOT_FOUND
|
assert exec_info.value.response.status_code == httpx.codes.NOT_FOUND
|
||||||
|
|
||||||
def test_is_online_image_still_available(self):
|
def test_valid_resource_is_available(
|
||||||
|
self,
|
||||||
|
nginx_base_url: str,
|
||||||
|
) -> None:
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
- Fresh start
|
- Local nginx server is running
|
||||||
WHEN:
|
WHEN:
|
||||||
- A public image used in the html sample file is requested
|
- A valid test fixture resource is requested
|
||||||
THEN:
|
THEN:
|
||||||
- No exception shall be thrown
|
- The resource shall be returned with HTTP 200 status code
|
||||||
|
- The response shall contain the expected content type
|
||||||
"""
|
"""
|
||||||
"""
|
|
||||||
A public image is used in the html sample file. We have no control
|
|
||||||
whether this image stays online forever, so here we check if it is still there
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Now check the URL used in samples/sample.html
|
|
||||||
resp = httpx.get(
|
resp = httpx.get(
|
||||||
"https://docs.paperless-ngx.com/assets/logo_full_white.svg",
|
f"{nginx_base_url}/assets/logo_full_white.svg",
|
||||||
|
timeout=5.0,
|
||||||
)
|
)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
|
|
||||||
|
assert resp.status_code == httpx.codes.OK
|
||||||
|
assert "svg" in resp.headers.get("content-type", "").lower()
|
||||||
|
|
||||||
|
def test_server_connectivity(
|
||||||
|
self,
|
||||||
|
nginx_base_url: str,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Local test fixtures server should be running
|
||||||
|
WHEN:
|
||||||
|
- A request is made to the server root
|
||||||
|
THEN:
|
||||||
|
- The server shall respond without connection errors
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
resp = httpx.get(
|
||||||
|
nginx_base_url,
|
||||||
|
timeout=5.0,
|
||||||
|
follow_redirects=True,
|
||||||
|
)
|
||||||
|
# We don't care about the status code, just that we can connect
|
||||||
|
assert resp.status_code in {200, 404, 403}
|
||||||
|
except httpx.ConnectError as e:
|
||||||
|
pytest.fail(
|
||||||
|
f"Cannot connect to nginx server at {nginx_base_url}. "
|
||||||
|
f"Ensure the nginx container is running via docker-compose.ci-test.yml. "
|
||||||
|
f"Error: {e}",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.live
|
||||||
|
@pytest.mark.gotenberg
|
||||||
|
@pytest.mark.tika
|
||||||
|
@pytest.mark.nginx
|
||||||
@pytest.mark.skipif(
|
@pytest.mark.skipif(
|
||||||
"PAPERLESS_CI_TEST" not in os.environ,
|
"PAPERLESS_CI_TEST" not in os.environ,
|
||||||
reason="No Gotenberg/Tika servers to test with",
|
reason="No Gotenberg/Tika servers to test with",
|
||||||
)
|
)
|
||||||
class TestParserLive:
|
class TestParserLive:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def imagehash(file, hash_size=18):
|
def imagehash(file: Path, hash_size: int = 18) -> str:
|
||||||
return f"{average_hash(Image.open(file), hash_size)}"
|
return f"{average_hash(Image.open(file), hash_size)}"
|
||||||
|
|
||||||
def test_get_thumbnail(
|
def test_get_thumbnail(
|
||||||
@@ -112,14 +148,15 @@ class TestParserLive:
|
|||||||
simple_txt_email_file: Path,
|
simple_txt_email_file: Path,
|
||||||
simple_txt_email_pdf_file: Path,
|
simple_txt_email_pdf_file: Path,
|
||||||
simple_txt_email_thumbnail_file: Path,
|
simple_txt_email_thumbnail_file: Path,
|
||||||
):
|
) -> None:
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
- Fresh start
|
- A simple text email file
|
||||||
|
- Mocked PDF generation returning a known PDF
|
||||||
WHEN:
|
WHEN:
|
||||||
- The Thumbnail is requested
|
- The thumbnail is requested
|
||||||
THEN:
|
THEN:
|
||||||
- The returned thumbnail image file is as expected
|
- The returned thumbnail image file shall match the expected hash
|
||||||
"""
|
"""
|
||||||
mock_generate_pdf = mocker.patch(
|
mock_generate_pdf = mocker.patch(
|
||||||
"paperless_mail.parsers.MailDocumentParser.generate_pdf",
|
"paperless_mail.parsers.MailDocumentParser.generate_pdf",
|
||||||
@@ -134,22 +171,28 @@ class TestParserLive:
|
|||||||
assert self.imagehash(thumb) == self.imagehash(
|
assert self.imagehash(thumb) == self.imagehash(
|
||||||
simple_txt_email_thumbnail_file,
|
simple_txt_email_thumbnail_file,
|
||||||
), (
|
), (
|
||||||
f"Created Thumbnail {thumb} differs from expected file {simple_txt_email_thumbnail_file}"
|
f"Created thumbnail {thumb} differs from expected file "
|
||||||
|
f"{simple_txt_email_thumbnail_file}"
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_tika_parse_successful(self, mail_parser: MailDocumentParser):
|
def test_tika_parse_successful(self, mail_parser: MailDocumentParser) -> None:
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
- Fresh start
|
- HTML content to parse
|
||||||
|
- Tika server is running
|
||||||
WHEN:
|
WHEN:
|
||||||
- tika parsing is called
|
- Tika parsing is called
|
||||||
THEN:
|
THEN:
|
||||||
- a web request to tika shall be done and the reply es returned
|
- A web request to Tika shall be made
|
||||||
|
- The parsed text content shall be returned
|
||||||
"""
|
"""
|
||||||
html = '<html><head><meta http-equiv="content-type" content="text/html; charset=UTF-8"></head><body><p>Some Text</p></body></html>'
|
html = (
|
||||||
|
'<html><head><meta http-equiv="content-type" '
|
||||||
|
'content="text/html; charset=UTF-8"></head>'
|
||||||
|
"<body><p>Some Text</p></body></html>"
|
||||||
|
)
|
||||||
expected_text = "Some Text"
|
expected_text = "Some Text"
|
||||||
|
|
||||||
# Check successful parsing
|
|
||||||
parsed = mail_parser.tika_parse(html)
|
parsed = mail_parser.tika_parse(html)
|
||||||
assert expected_text == parsed.strip()
|
assert expected_text == parsed.strip()
|
||||||
|
|
||||||
@@ -160,14 +203,17 @@ class TestParserLive:
|
|||||||
html_email_file: Path,
|
html_email_file: Path,
|
||||||
merged_pdf_first: Path,
|
merged_pdf_first: Path,
|
||||||
merged_pdf_second: Path,
|
merged_pdf_second: Path,
|
||||||
):
|
) -> None:
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
- Intermediary pdfs to be merged
|
- Intermediary PDFs to be merged
|
||||||
|
- An HTML email file
|
||||||
WHEN:
|
WHEN:
|
||||||
- pdf generation is requested with html file requiring merging of pdfs
|
- PDF generation is requested with HTML file requiring merging
|
||||||
THEN:
|
THEN:
|
||||||
- gotenberg is called to merge files and the resulting file is returned
|
- Gotenberg shall be called to merge files
|
||||||
|
- The resulting merged PDF shall be returned
|
||||||
|
- The merged PDF shall contain text from both source PDFs
|
||||||
"""
|
"""
|
||||||
mock_generate_pdf_from_html = mocker.patch(
|
mock_generate_pdf_from_html = mocker.patch(
|
||||||
"paperless_mail.parsers.MailDocumentParser.generate_pdf_from_html",
|
"paperless_mail.parsers.MailDocumentParser.generate_pdf_from_html",
|
||||||
@@ -200,16 +246,17 @@ class TestParserLive:
|
|||||||
html_email_file: Path,
|
html_email_file: Path,
|
||||||
html_email_pdf_file: Path,
|
html_email_pdf_file: Path,
|
||||||
html_email_thumbnail_file: Path,
|
html_email_thumbnail_file: Path,
|
||||||
):
|
) -> None:
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
- Fresh start
|
- An HTML email file
|
||||||
WHEN:
|
WHEN:
|
||||||
- pdf generation from simple eml file is requested
|
- PDF generation from the email file is requested
|
||||||
THEN:
|
THEN:
|
||||||
- Gotenberg is called and the resulting file is returned and look as expected.
|
- Gotenberg shall be called to generate the PDF
|
||||||
|
- The archive PDF shall contain the expected content
|
||||||
|
- The generated thumbnail shall match the expected image hash
|
||||||
"""
|
"""
|
||||||
|
|
||||||
util_call_with_backoff(mail_parser.parse, [html_email_file, "message/rfc822"])
|
util_call_with_backoff(mail_parser.parse, [html_email_file, "message/rfc822"])
|
||||||
|
|
||||||
# Check the archive PDF
|
# Check the archive PDF
|
||||||
@@ -217,7 +264,7 @@ class TestParserLive:
|
|||||||
archive_text = extract_text(archive_path)
|
archive_text = extract_text(archive_path)
|
||||||
expected_archive_text = extract_text(html_email_pdf_file)
|
expected_archive_text = extract_text(html_email_pdf_file)
|
||||||
|
|
||||||
# Archive includes the HTML content, so use in
|
# Archive includes the HTML content
|
||||||
assert expected_archive_text in archive_text
|
assert expected_archive_text in archive_text
|
||||||
|
|
||||||
# Check the thumbnail
|
# Check the thumbnail
|
||||||
@@ -227,9 +274,12 @@ class TestParserLive:
|
|||||||
)
|
)
|
||||||
generated_thumbnail_hash = self.imagehash(generated_thumbnail)
|
generated_thumbnail_hash = self.imagehash(generated_thumbnail)
|
||||||
|
|
||||||
# The created pdf is not reproducible. But the converted image should always look the same.
|
# The created PDF is not reproducible, but the converted image
|
||||||
|
# should always look the same
|
||||||
expected_hash = self.imagehash(html_email_thumbnail_file)
|
expected_hash = self.imagehash(html_email_thumbnail_file)
|
||||||
|
|
||||||
assert generated_thumbnail_hash == expected_hash, (
|
assert generated_thumbnail_hash == expected_hash, (
|
||||||
f"PDF looks different. Check if {generated_thumbnail} looks weird."
|
f"PDF thumbnail differs from expected. "
|
||||||
|
f"Generated: {generated_thumbnail}, "
|
||||||
|
f"Hash: {generated_thumbnail_hash} vs {expected_hash}"
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user