diff --git a/docker/compose/docker-compose.postgres-tika.yml b/docker/compose/docker-compose.postgres-tika.yml index ca47ebfdc..216138d7a 100644 --- a/docker/compose/docker-compose.postgres-tika.yml +++ b/docker/compose/docker-compose.postgres-tika.yml @@ -80,6 +80,13 @@ services: image: docker.io/gotenberg/gotenberg:7.4 restart: unless-stopped + # The gotenberg chromium route is used to convert .eml files. We do not + # want to allow external content like tracking pixels or even javascript. + command: + - "gotenberg" + - "--chromium-disable-javascript=true" + - "--chromium-allow-list=file:///tmp/.*" + tika: image: ghcr.io/paperless-ngx/tika:latest restart: unless-stopped diff --git a/docker/compose/docker-compose.sqlite-tika.yml b/docker/compose/docker-compose.sqlite-tika.yml index 9b6eeae1c..eea7dd5f3 100644 --- a/docker/compose/docker-compose.sqlite-tika.yml +++ b/docker/compose/docker-compose.sqlite-tika.yml @@ -68,6 +68,13 @@ services: image: docker.io/gotenberg/gotenberg:7.4 restart: unless-stopped + # The gotenberg chromium route is used to convert .eml files. We do not + # want to allow external content like tracking pixels or even javascript. + command: + - "gotenberg" + - "--chromium-disable-javascript=true" + - "--chromium-allow-list=file:///tmp/.*" + tika: image: ghcr.io/paperless-ngx/tika:latest restart: unless-stopped diff --git a/docs/configuration.rst b/docs/configuration.rst index 48ffb3588..fb7ba8505 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -515,6 +515,13 @@ requires are as follows: image: gotenberg/gotenberg:7.4 restart: unless-stopped + # The gotenberg chromium route is used to convert .eml files. We do not + # want to allow external content like tracking pixels or even javascript. + command: + - "gotenberg" + - "--chromium-disable-javascript=true" + - "--chromium-allow-list=file:///tmp/.*" + tika: image: ghcr.io/paperless-ngx/tika:latest restart: unless-stopped diff --git a/docs/troubleshooting.rst b/docs/troubleshooting.rst index e4498bdd9..a092ce6be 100644 --- a/docs/troubleshooting.rst +++ b/docs/troubleshooting.rst @@ -127,9 +127,14 @@ If using docker-compose, this is achieved by the following configuration change gotenberg: image: gotenberg/gotenberg:7.4 restart: unless-stopped + + # The gotenberg chromium route is used to convert .eml files. We do not + # want to allow external content like tracking pixels or even javascript. command: - - "gotenberg" - - "--api-timeout=60" + - "gotenberg" + - "--chromium-disable-javascript=true" + - "--chromium-allow-list=file:///tmp/.*" + - "--api-timeout=60" Permission denied errors in the consumption directory ##################################################### diff --git a/scripts/start_services.sh b/scripts/start_services.sh index 3d0addb55..e3f90258f 100755 --- a/scripts/start_services.sh +++ b/scripts/start_services.sh @@ -2,5 +2,5 @@ docker run -p 5432:5432 -e POSTGRES_PASSWORD=password -v paperless_pgdata:/var/lib/postgresql/data -d postgres:13 docker run -d -p 6379:6379 redis:latest -docker run -p 3000:3000 -d gotenberg/gotenberg:7.4 +docker run -p 3000:3000 -d gotenberg/gotenberg:7.4 gotenberg --chromium-disable-javascript=true --chromium-allow-list=file:///tmp/.* docker run -p 9998:9998 -d ghcr.io/paperless-ngx/tika:latest diff --git a/src/paperless_mail/tests/samples/sample.html b/src/paperless_mail/tests/samples/sample.html index 3aa5f615d..584cd5d64 100644 --- a/src/paperless_mail/tests/samples/sample.html +++ b/src/paperless_mail/tests/samples/sample.html @@ -4,7 +4,11 @@

Some Text

-

+

+ Has to be rewritten to work.. + This image should not be shown. +

+

and an embedded image.

Paragraph unchanged.

diff --git a/src/paperless_mail/tests/test_eml.py b/src/paperless_mail/tests/test_eml.py index d6fce2bda..da868ef56 100644 --- a/src/paperless_mail/tests/test_eml.py +++ b/src/paperless_mail/tests/test_eml.py @@ -2,6 +2,8 @@ import datetime import hashlib import os from unittest import mock +from urllib.error import HTTPError +from urllib.request import urlopen import pytest from django.test import TestCase @@ -350,10 +352,24 @@ class TestParser(TestCase): # The created pdf is not reproducible. But the converted image should always look the same. expected_hash = ( - "88dee024ec77b1139b77913547717bd7e94f53651d489c54a7084d30a82e389e" + "267d61f0ab8f128a037002a424b2cb4bfe18a81e17f0b70f15d241688ed47d1a" ) self.assertEqual( thumb_hash, expected_hash, - "PDF looks different.", + f"PDF looks different. Check if {converted} looks weird. " + f"If Rick Astley is shown, Gotenberg loads from web which is bad for Mail content.", ) + + def test_is_online_image_still_available(self): + """ + A public image is used in the html sample file. We have no control + whether this image stays online forever, so here we check if it is still there + """ + + # Start by Testing if nonexistent URL really throws an Exception + with pytest.raises(HTTPError): + urlopen("https://upload.wikimedia.org/wikipedia/en/f/f7/nonexistent.png") + + # Now check the URL used in samples/sample.html + urlopen("https://upload.wikimedia.org/wikipedia/en/f/f7/RickRoll.png")