add unittest for external images

This commit is contained in:
phail 2022-10-22 00:44:32 +02:00
parent 09b5bd17f2
commit f1f5227ccd
7 changed files with 52 additions and 6 deletions

View File

@ -80,6 +80,13 @@ services:
image: docker.io/gotenberg/gotenberg:7.4 image: docker.io/gotenberg/gotenberg:7.4
restart: unless-stopped restart: unless-stopped
# The gotenberg chromium route is used to convert .eml files. We do not
# want to allow external content like tracking pixels or even javascript.
command:
- "gotenberg"
- "--chromium-disable-javascript=true"
- "--chromium-allow-list=file:///tmp/.*"
tika: tika:
image: ghcr.io/paperless-ngx/tika:latest image: ghcr.io/paperless-ngx/tika:latest
restart: unless-stopped restart: unless-stopped

View File

@ -68,6 +68,13 @@ services:
image: docker.io/gotenberg/gotenberg:7.4 image: docker.io/gotenberg/gotenberg:7.4
restart: unless-stopped restart: unless-stopped
# The gotenberg chromium route is used to convert .eml files. We do not
# want to allow external content like tracking pixels or even javascript.
command:
- "gotenberg"
- "--chromium-disable-javascript=true"
- "--chromium-allow-list=file:///tmp/.*"
tika: tika:
image: ghcr.io/paperless-ngx/tika:latest image: ghcr.io/paperless-ngx/tika:latest
restart: unless-stopped restart: unless-stopped

View File

@ -515,6 +515,13 @@ requires are as follows:
image: gotenberg/gotenberg:7.4 image: gotenberg/gotenberg:7.4
restart: unless-stopped restart: unless-stopped
# The gotenberg chromium route is used to convert .eml files. We do not
# want to allow external content like tracking pixels or even javascript.
command:
- "gotenberg"
- "--chromium-disable-javascript=true"
- "--chromium-allow-list=file:///tmp/.*"
tika: tika:
image: ghcr.io/paperless-ngx/tika:latest image: ghcr.io/paperless-ngx/tika:latest
restart: unless-stopped restart: unless-stopped

View File

@ -127,9 +127,14 @@ If using docker-compose, this is achieved by the following configuration change
gotenberg: gotenberg:
image: gotenberg/gotenberg:7.4 image: gotenberg/gotenberg:7.4
restart: unless-stopped restart: unless-stopped
# The gotenberg chromium route is used to convert .eml files. We do not
# want to allow external content like tracking pixels or even javascript.
command: command:
- "gotenberg" - "gotenberg"
- "--api-timeout=60" - "--chromium-disable-javascript=true"
- "--chromium-allow-list=file:///tmp/.*"
- "--api-timeout=60"
Permission denied errors in the consumption directory Permission denied errors in the consumption directory
##################################################### #####################################################

View File

@ -2,5 +2,5 @@
docker run -p 5432:5432 -e POSTGRES_PASSWORD=password -v paperless_pgdata:/var/lib/postgresql/data -d postgres:13 docker run -p 5432:5432 -e POSTGRES_PASSWORD=password -v paperless_pgdata:/var/lib/postgresql/data -d postgres:13
docker run -d -p 6379:6379 redis:latest docker run -d -p 6379:6379 redis:latest
docker run -p 3000:3000 -d gotenberg/gotenberg:7.4 docker run -p 3000:3000 -d gotenberg/gotenberg:7.4 gotenberg --chromium-disable-javascript=true --chromium-allow-list=file:///tmp/.*
docker run -p 9998:9998 -d ghcr.io/paperless-ngx/tika:latest docker run -p 9998:9998 -d ghcr.io/paperless-ngx/tika:latest

View File

@ -4,7 +4,11 @@
</head> </head>
<body> <body>
<p>Some Text</p> <p>Some Text</p>
<p><img src="cid:part1.pNdUSz0s.D3NqVtPg@example.de" alt=""></p> <p>
<img src="cid:part1.pNdUSz0s.D3NqVtPg@example.de" alt="Has to be rewritten to work..">
<img src="https://upload.wikimedia.org/wikipedia/en/f/f7/RickRoll.png" alt="This image should not be shown.">
</p>
<p>and an embedded image.<br> <p>and an embedded image.<br>
</p> </p>
<p id="changeme">Paragraph unchanged.</p> <p id="changeme">Paragraph unchanged.</p>

View File

@ -2,6 +2,8 @@ import datetime
import hashlib import hashlib
import os import os
from unittest import mock from unittest import mock
from urllib.error import HTTPError
from urllib.request import urlopen
import pytest import pytest
from django.test import TestCase from django.test import TestCase
@ -350,10 +352,24 @@ class TestParser(TestCase):
# The created pdf is not reproducible. But the converted image should always look the same. # The created pdf is not reproducible. But the converted image should always look the same.
expected_hash = ( expected_hash = (
"88dee024ec77b1139b77913547717bd7e94f53651d489c54a7084d30a82e389e" "267d61f0ab8f128a037002a424b2cb4bfe18a81e17f0b70f15d241688ed47d1a"
) )
self.assertEqual( self.assertEqual(
thumb_hash, thumb_hash,
expected_hash, expected_hash,
"PDF looks different.", f"PDF looks different. Check if {converted} looks weird. "
f"If Rick Astley is shown, Gotenberg loads from web which is bad for Mail content.",
) )
def test_is_online_image_still_available(self):
"""
A public image is used in the html sample file. We have no control
whether this image stays online forever, so here we check if it is still there
"""
# Start by Testing if nonexistent URL really throws an Exception
with pytest.raises(HTTPError):
urlopen("https://upload.wikimedia.org/wikipedia/en/f/f7/nonexistent.png")
# Now check the URL used in samples/sample.html
urlopen("https://upload.wikimedia.org/wikipedia/en/f/f7/RickRoll.png")