mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-09-22 00:52:42 -05:00
Merge branch 'dev' into feature-ai
This commit is contained in:
@@ -41,6 +41,7 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
title="B",
|
||||
correspondent=self.c1,
|
||||
document_type=self.dt1,
|
||||
page_count=5,
|
||||
)
|
||||
self.doc3 = Document.objects.create(
|
||||
checksum="C",
|
||||
@@ -1369,6 +1370,218 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"pages must be a list of integers", response.content)
|
||||
|
||||
@mock.patch("documents.serialisers.bulk_edit.edit_pdf")
|
||||
def test_edit_pdf(self, m):
|
||||
self.setup_mock(m, "edit_pdf")
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {"operations": [{"page": 1}]},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, status.HTTP_200_OK)
|
||||
|
||||
m.assert_called_once()
|
||||
args, kwargs = m.call_args
|
||||
self.assertCountEqual(args[0], [self.doc2.id])
|
||||
self.assertEqual(kwargs["operations"], [{"page": 1}])
|
||||
self.assertEqual(kwargs["user"], self.user)
|
||||
|
||||
def test_edit_pdf_invalid_params(self):
|
||||
# multiple documents
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id, self.doc3.id],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {"operations": [{"page": 1}]},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"Edit PDF method only supports one document", response.content)
|
||||
|
||||
# no operations specified
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"operations not specified", response.content)
|
||||
|
||||
# operations not a list
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {"operations": "not_a_list"},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"operations must be a list", response.content)
|
||||
|
||||
# invalid operation
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {"operations": ["invalid_operation"]},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"invalid operation entry", response.content)
|
||||
|
||||
# page not an int
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {"operations": [{"page": "not_an_int"}]},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"page must be an integer", response.content)
|
||||
|
||||
# rotate not an int
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {"operations": [{"page": 1, "rotate": "not_an_int"}]},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"rotate must be an integer", response.content)
|
||||
|
||||
# doc not an int
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {"operations": [{"page": 1, "doc": "not_an_int"}]},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"doc must be an integer", response.content)
|
||||
|
||||
# update_document not a boolean
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {
|
||||
"update_document": "not_a_bool",
|
||||
"operations": [{"page": 1}],
|
||||
},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"update_document must be a boolean", response.content)
|
||||
|
||||
# include_metadata not a boolean
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {
|
||||
"include_metadata": "not_a_bool",
|
||||
"operations": [{"page": 1}],
|
||||
},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"include_metadata must be a boolean", response.content)
|
||||
|
||||
# update_document True but output would be multiple documents
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {
|
||||
"update_document": True,
|
||||
"operations": [{"page": 1, "doc": 1}, {"page": 2, "doc": 2}],
|
||||
},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(
|
||||
b"update_document only allowed with a single output document",
|
||||
response.content,
|
||||
)
|
||||
|
||||
@mock.patch("documents.serialisers.bulk_edit.edit_pdf")
|
||||
def test_edit_pdf_page_out_of_bounds(self, m):
|
||||
"""
|
||||
GIVEN:
|
||||
- API data for editing PDF is called
|
||||
- The page number is out of bounds
|
||||
WHEN:
|
||||
- API is called
|
||||
THEN:
|
||||
- The API fails with a correct error code
|
||||
"""
|
||||
self.setup_mock(m, "edit_pdf")
|
||||
response = self.client.post(
|
||||
"/api/documents/bulk_edit/",
|
||||
json.dumps(
|
||||
{
|
||||
"documents": [self.doc2.id],
|
||||
"method": "edit_pdf",
|
||||
"parameters": {"operations": [{"page": 99}]},
|
||||
},
|
||||
),
|
||||
content_type="application/json",
|
||||
)
|
||||
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
|
||||
self.assertIn(b"out of bounds", response.content)
|
||||
|
||||
@override_settings(AUDIT_LOG_ENABLED=True)
|
||||
def test_bulk_edit_audit_log_enabled_simple_field(self):
|
||||
"""
|
||||
|
@@ -909,3 +909,156 @@ class TestPDFActions(DirectoriesMixin, TestCase):
|
||||
expected_str = "Error deleting pages from document"
|
||||
self.assertIn(expected_str, error_str)
|
||||
mock_update_archive_file.assert_not_called()
|
||||
|
||||
@mock.patch("documents.bulk_edit.group")
|
||||
@mock.patch("documents.tasks.consume_file.s")
|
||||
def test_edit_pdf_basic_operations(self, mock_consume_file, mock_group):
|
||||
"""
|
||||
GIVEN:
|
||||
- Existing document
|
||||
WHEN:
|
||||
- edit_pdf is called with two operations to split the doc and rotate pages
|
||||
THEN:
|
||||
- A grouped task is generated and delay() is called
|
||||
"""
|
||||
mock_group.return_value.delay.return_value = None
|
||||
doc_ids = [self.doc2.id]
|
||||
operations = [{"page": 1, "doc": 0}, {"page": 2, "doc": 1, "rotate": 90}]
|
||||
|
||||
result = bulk_edit.edit_pdf(doc_ids, operations)
|
||||
self.assertEqual(result, "OK")
|
||||
mock_group.return_value.delay.assert_called_once()
|
||||
|
||||
@mock.patch("documents.bulk_edit.group")
|
||||
@mock.patch("documents.tasks.consume_file.s")
|
||||
def test_edit_pdf_with_user_override(self, mock_consume_file, mock_group):
|
||||
"""
|
||||
GIVEN:
|
||||
- Existing document
|
||||
WHEN:
|
||||
- edit_pdf is called with user override
|
||||
THEN:
|
||||
- Task is created with user context
|
||||
"""
|
||||
mock_group.return_value.delay.return_value = None
|
||||
doc_ids = [self.doc2.id]
|
||||
operations = [{"page": 1, "doc": 0}, {"page": 2, "doc": 1}]
|
||||
user = User.objects.create(username="editor")
|
||||
|
||||
result = bulk_edit.edit_pdf(doc_ids, operations, user=user)
|
||||
self.assertEqual(result, "OK")
|
||||
mock_group.return_value.delay.assert_called_once()
|
||||
|
||||
@mock.patch("documents.bulk_edit.chord")
|
||||
@mock.patch("documents.tasks.consume_file.s")
|
||||
def test_edit_pdf_with_delete_original(self, mock_consume_file, mock_chord):
|
||||
"""
|
||||
GIVEN:
|
||||
- Existing document
|
||||
WHEN:
|
||||
- edit_pdf is called with delete_original=True
|
||||
THEN:
|
||||
- Task group is triggered
|
||||
"""
|
||||
mock_chord.return_value.delay.return_value = None
|
||||
doc_ids = [self.doc2.id]
|
||||
operations = [{"page": 1}, {"page": 2}]
|
||||
|
||||
result = bulk_edit.edit_pdf(doc_ids, operations, delete_original=True)
|
||||
self.assertEqual(result, "OK")
|
||||
mock_chord.assert_called_once()
|
||||
|
||||
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.delay")
|
||||
def test_edit_pdf_with_update_document(self, mock_update_document):
|
||||
"""
|
||||
GIVEN:
|
||||
- A single existing PDF document
|
||||
WHEN:
|
||||
- edit_pdf is called with update_document=True and a single output
|
||||
THEN:
|
||||
- The original document is updated in-place
|
||||
- The update_document_content_maybe_archive_file task is triggered
|
||||
"""
|
||||
doc_ids = [self.doc2.id]
|
||||
operations = [{"page": 1}, {"page": 2}]
|
||||
original_checksum = self.doc2.checksum
|
||||
original_page_count = self.doc2.page_count
|
||||
|
||||
result = bulk_edit.edit_pdf(
|
||||
doc_ids,
|
||||
operations=operations,
|
||||
update_document=True,
|
||||
delete_original=False,
|
||||
)
|
||||
|
||||
self.assertEqual(result, "OK")
|
||||
self.doc2.refresh_from_db()
|
||||
self.assertNotEqual(self.doc2.checksum, original_checksum)
|
||||
self.assertNotEqual(self.doc2.page_count, original_page_count)
|
||||
mock_update_document.assert_called_once_with(document_id=self.doc2.id)
|
||||
|
||||
@mock.patch("documents.bulk_edit.group")
|
||||
@mock.patch("documents.tasks.consume_file.s")
|
||||
def test_edit_pdf_without_metadata(self, mock_consume_file, mock_group):
|
||||
"""
|
||||
GIVEN:
|
||||
- Existing document
|
||||
WHEN:
|
||||
- edit_pdf is called with include_metadata=False
|
||||
THEN:
|
||||
- Tasks are created with empty metadata
|
||||
"""
|
||||
mock_group.return_value.delay.return_value = None
|
||||
doc_ids = [self.doc2.id]
|
||||
operations = [{"page": 1}]
|
||||
|
||||
result = bulk_edit.edit_pdf(doc_ids, operations, include_metadata=False)
|
||||
self.assertEqual(result, "OK")
|
||||
mock_group.return_value.delay.assert_called_once()
|
||||
|
||||
@mock.patch("documents.bulk_edit.group")
|
||||
@mock.patch("documents.tasks.consume_file.s")
|
||||
def test_edit_pdf_open_failure(self, mock_consume_file, mock_group):
|
||||
"""
|
||||
GIVEN:
|
||||
- Existing document
|
||||
WHEN:
|
||||
- edit_pdf fails to open PDF
|
||||
THEN:
|
||||
- Task group is not called
|
||||
"""
|
||||
doc_ids = [self.doc2.id]
|
||||
operations = [
|
||||
{"page": 9999}, # invalid page, forces error during PDF load
|
||||
]
|
||||
with self.assertLogs("paperless.bulk_edit", level="ERROR"):
|
||||
with self.assertRaises(Exception):
|
||||
bulk_edit.edit_pdf(doc_ids, operations)
|
||||
mock_group.assert_not_called()
|
||||
mock_consume_file.assert_not_called()
|
||||
|
||||
@mock.patch("documents.bulk_edit.group")
|
||||
@mock.patch("documents.tasks.consume_file.s")
|
||||
def test_edit_pdf_multiple_outputs_with_update_flag_errors(
|
||||
self,
|
||||
mock_consume_file,
|
||||
mock_group,
|
||||
):
|
||||
"""
|
||||
GIVEN:
|
||||
- Existing document
|
||||
WHEN:
|
||||
- edit_pdf is called with multiple outputs and update_document=True
|
||||
THEN:
|
||||
- An error is logged and task group is not called
|
||||
"""
|
||||
doc_ids = [self.doc2.id]
|
||||
operations = [
|
||||
{"page": 1, "doc": 0},
|
||||
{"page": 2, "doc": 1},
|
||||
]
|
||||
with self.assertLogs("paperless.bulk_edit", level="ERROR"):
|
||||
with self.assertRaises(ValueError):
|
||||
bulk_edit.edit_pdf(doc_ids, operations, update_document=True)
|
||||
mock_group.assert_not_called()
|
||||
mock_consume_file.assert_not_called()
|
||||
|
@@ -1,8 +1,10 @@
|
||||
import shutil
|
||||
import socket
|
||||
from datetime import timedelta
|
||||
from typing import TYPE_CHECKING
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
from django.contrib.auth.models import Group
|
||||
from django.contrib.auth.models import User
|
||||
from django.test import override_settings
|
||||
@@ -10,6 +12,7 @@ from django.utils import timezone
|
||||
from guardian.shortcuts import assign_perm
|
||||
from guardian.shortcuts import get_groups_with_perms
|
||||
from guardian.shortcuts import get_users_with_perms
|
||||
from httpx import HTTPError
|
||||
from httpx import HTTPStatusError
|
||||
from pytest_httpx import HTTPXMock
|
||||
from rest_framework.test import APITestCase
|
||||
@@ -2825,6 +2828,8 @@ class TestWorkflows(
|
||||
content="Test message",
|
||||
headers={},
|
||||
files=None,
|
||||
follow_redirects=False,
|
||||
timeout=5,
|
||||
)
|
||||
|
||||
expected_str = "Webhook sent to http://paperless-ngx.com"
|
||||
@@ -2842,6 +2847,8 @@ class TestWorkflows(
|
||||
data={"message": "Test message"},
|
||||
headers={},
|
||||
files=None,
|
||||
follow_redirects=False,
|
||||
timeout=5,
|
||||
)
|
||||
|
||||
@mock.patch("httpx.post")
|
||||
@@ -2962,3 +2969,164 @@ class TestWebhookSend:
|
||||
as_json=True,
|
||||
)
|
||||
assert httpx_mock.get_request().headers["Content-Type"] == "application/json"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def resolve_to(monkeypatch):
|
||||
"""
|
||||
Force DNS resolution to a specific IP for any hostname.
|
||||
"""
|
||||
|
||||
def _set(ip: str):
|
||||
def fake_getaddrinfo(host, *_args, **_kwargs):
|
||||
return [(socket.AF_INET, None, None, "", (ip, 0))]
|
||||
|
||||
monkeypatch.setattr(socket, "getaddrinfo", fake_getaddrinfo)
|
||||
|
||||
return _set
|
||||
|
||||
|
||||
class TestWebhookSecurity:
|
||||
def test_blocks_invalid_scheme_or_hostname(self, httpx_mock: HTTPXMock):
|
||||
"""
|
||||
GIVEN:
|
||||
- Invalid URL schemes or hostnames
|
||||
WHEN:
|
||||
- send_webhook is called with such URLs
|
||||
THEN:
|
||||
- ValueError is raised
|
||||
"""
|
||||
with pytest.raises(ValueError):
|
||||
send_webhook(
|
||||
"ftp://example.com",
|
||||
data="",
|
||||
headers={},
|
||||
files=None,
|
||||
as_json=False,
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
send_webhook(
|
||||
"http:///nohost",
|
||||
data="",
|
||||
headers={},
|
||||
files=None,
|
||||
as_json=False,
|
||||
)
|
||||
|
||||
@override_settings(WEBHOOKS_ALLOWED_PORTS=[80, 443])
|
||||
def test_blocks_disallowed_port(self, httpx_mock: HTTPXMock):
|
||||
"""
|
||||
GIVEN:
|
||||
- URL with a disallowed port
|
||||
WHEN:
|
||||
- send_webhook is called with such URL
|
||||
THEN:
|
||||
- ValueError is raised
|
||||
"""
|
||||
with pytest.raises(ValueError):
|
||||
send_webhook(
|
||||
"http://paperless-ngx.com:8080",
|
||||
data="",
|
||||
headers={},
|
||||
files=None,
|
||||
as_json=False,
|
||||
)
|
||||
|
||||
assert httpx_mock.get_request() is None
|
||||
|
||||
@override_settings(WEBHOOKS_ALLOW_INTERNAL_REQUESTS=False)
|
||||
def test_blocks_private_loopback_linklocal(self, httpx_mock: HTTPXMock, resolve_to):
|
||||
"""
|
||||
GIVEN:
|
||||
- URL with a private, loopback, or link-local IP address
|
||||
- WEBHOOKS_ALLOW_INTERNAL_REQUESTS is False
|
||||
WHEN:
|
||||
- send_webhook is called with such URL
|
||||
THEN:
|
||||
- ValueError is raised
|
||||
"""
|
||||
resolve_to("127.0.0.1")
|
||||
with pytest.raises(ValueError):
|
||||
send_webhook(
|
||||
"http://paperless-ngx.com",
|
||||
data="",
|
||||
headers={},
|
||||
files=None,
|
||||
as_json=False,
|
||||
)
|
||||
|
||||
def test_allows_public_ip_and_sends(self, httpx_mock: HTTPXMock, resolve_to):
|
||||
"""
|
||||
GIVEN:
|
||||
- URL with a public IP address
|
||||
WHEN:
|
||||
- send_webhook is called with such URL
|
||||
THEN:
|
||||
- Request is sent successfully
|
||||
"""
|
||||
resolve_to("52.207.186.75")
|
||||
httpx_mock.add_response(content=b"ok")
|
||||
|
||||
send_webhook(
|
||||
url="http://paperless-ngx.com",
|
||||
data="hi",
|
||||
headers={},
|
||||
files=None,
|
||||
as_json=False,
|
||||
)
|
||||
|
||||
req = httpx_mock.get_request()
|
||||
assert req.url.host == "paperless-ngx.com"
|
||||
|
||||
def test_follow_redirects_disabled(self, httpx_mock: HTTPXMock, resolve_to):
|
||||
"""
|
||||
GIVEN:
|
||||
- A URL that redirects
|
||||
WHEN:
|
||||
- send_webhook is called with follow_redirects=False
|
||||
THEN:
|
||||
- Request is made to the original URL and does not follow the redirect
|
||||
"""
|
||||
resolve_to("52.207.186.75")
|
||||
# Return a redirect and ensure we don't follow it (only one request recorded)
|
||||
httpx_mock.add_response(
|
||||
status_code=302,
|
||||
headers={"location": "http://internal-service.local"},
|
||||
content=b"",
|
||||
)
|
||||
|
||||
with pytest.raises(HTTPError):
|
||||
send_webhook(
|
||||
"http://paperless-ngx.com",
|
||||
data="",
|
||||
headers={},
|
||||
files=None,
|
||||
as_json=False,
|
||||
)
|
||||
|
||||
assert len(httpx_mock.get_requests()) == 1
|
||||
|
||||
def test_strips_user_supplied_host_header(self, httpx_mock: HTTPXMock, resolve_to):
|
||||
"""
|
||||
GIVEN:
|
||||
- A URL with a user-supplied Host header
|
||||
WHEN:
|
||||
- send_webhook is called with a malicious Host header
|
||||
THEN:
|
||||
- The Host header is stripped and replaced with the resolved hostname
|
||||
"""
|
||||
resolve_to("52.207.186.75")
|
||||
httpx_mock.add_response(content=b"ok")
|
||||
|
||||
send_webhook(
|
||||
url="http://paperless-ngx.com",
|
||||
data="ok",
|
||||
headers={"Host": "evil.test"},
|
||||
files=None,
|
||||
as_json=False,
|
||||
)
|
||||
|
||||
req = httpx_mock.get_request()
|
||||
assert req.headers["Host"] == "paperless-ngx.com"
|
||||
assert "evil.test" not in req.headers.get("Host", "")
|
||||
|
Reference in New Issue
Block a user