Merge branch 'dev' into feature-ai

This commit is contained in:
shamoon
2025-08-11 10:50:15 -07:00
committed by GitHub
29 changed files with 1549 additions and 912 deletions

View File

@@ -41,6 +41,7 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
title="B",
correspondent=self.c1,
document_type=self.dt1,
page_count=5,
)
self.doc3 = Document.objects.create(
checksum="C",
@@ -1369,6 +1370,218 @@ class TestBulkEditAPI(DirectoriesMixin, APITestCase):
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"pages must be a list of integers", response.content)
@mock.patch("documents.serialisers.bulk_edit.edit_pdf")
def test_edit_pdf(self, m):
self.setup_mock(m, "edit_pdf")
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {"operations": [{"page": 1}]},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
m.assert_called_once()
args, kwargs = m.call_args
self.assertCountEqual(args[0], [self.doc2.id])
self.assertEqual(kwargs["operations"], [{"page": 1}])
self.assertEqual(kwargs["user"], self.user)
def test_edit_pdf_invalid_params(self):
# multiple documents
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id, self.doc3.id],
"method": "edit_pdf",
"parameters": {"operations": [{"page": 1}]},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"Edit PDF method only supports one document", response.content)
# no operations specified
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"operations not specified", response.content)
# operations not a list
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {"operations": "not_a_list"},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"operations must be a list", response.content)
# invalid operation
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {"operations": ["invalid_operation"]},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"invalid operation entry", response.content)
# page not an int
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {"operations": [{"page": "not_an_int"}]},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"page must be an integer", response.content)
# rotate not an int
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {"operations": [{"page": 1, "rotate": "not_an_int"}]},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"rotate must be an integer", response.content)
# doc not an int
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {"operations": [{"page": 1, "doc": "not_an_int"}]},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"doc must be an integer", response.content)
# update_document not a boolean
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {
"update_document": "not_a_bool",
"operations": [{"page": 1}],
},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"update_document must be a boolean", response.content)
# include_metadata not a boolean
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {
"include_metadata": "not_a_bool",
"operations": [{"page": 1}],
},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"include_metadata must be a boolean", response.content)
# update_document True but output would be multiple documents
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {
"update_document": True,
"operations": [{"page": 1, "doc": 1}, {"page": 2, "doc": 2}],
},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(
b"update_document only allowed with a single output document",
response.content,
)
@mock.patch("documents.serialisers.bulk_edit.edit_pdf")
def test_edit_pdf_page_out_of_bounds(self, m):
"""
GIVEN:
- API data for editing PDF is called
- The page number is out of bounds
WHEN:
- API is called
THEN:
- The API fails with a correct error code
"""
self.setup_mock(m, "edit_pdf")
response = self.client.post(
"/api/documents/bulk_edit/",
json.dumps(
{
"documents": [self.doc2.id],
"method": "edit_pdf",
"parameters": {"operations": [{"page": 99}]},
},
),
content_type="application/json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertIn(b"out of bounds", response.content)
@override_settings(AUDIT_LOG_ENABLED=True)
def test_bulk_edit_audit_log_enabled_simple_field(self):
"""

View File

@@ -909,3 +909,156 @@ class TestPDFActions(DirectoriesMixin, TestCase):
expected_str = "Error deleting pages from document"
self.assertIn(expected_str, error_str)
mock_update_archive_file.assert_not_called()
@mock.patch("documents.bulk_edit.group")
@mock.patch("documents.tasks.consume_file.s")
def test_edit_pdf_basic_operations(self, mock_consume_file, mock_group):
"""
GIVEN:
- Existing document
WHEN:
- edit_pdf is called with two operations to split the doc and rotate pages
THEN:
- A grouped task is generated and delay() is called
"""
mock_group.return_value.delay.return_value = None
doc_ids = [self.doc2.id]
operations = [{"page": 1, "doc": 0}, {"page": 2, "doc": 1, "rotate": 90}]
result = bulk_edit.edit_pdf(doc_ids, operations)
self.assertEqual(result, "OK")
mock_group.return_value.delay.assert_called_once()
@mock.patch("documents.bulk_edit.group")
@mock.patch("documents.tasks.consume_file.s")
def test_edit_pdf_with_user_override(self, mock_consume_file, mock_group):
"""
GIVEN:
- Existing document
WHEN:
- edit_pdf is called with user override
THEN:
- Task is created with user context
"""
mock_group.return_value.delay.return_value = None
doc_ids = [self.doc2.id]
operations = [{"page": 1, "doc": 0}, {"page": 2, "doc": 1}]
user = User.objects.create(username="editor")
result = bulk_edit.edit_pdf(doc_ids, operations, user=user)
self.assertEqual(result, "OK")
mock_group.return_value.delay.assert_called_once()
@mock.patch("documents.bulk_edit.chord")
@mock.patch("documents.tasks.consume_file.s")
def test_edit_pdf_with_delete_original(self, mock_consume_file, mock_chord):
"""
GIVEN:
- Existing document
WHEN:
- edit_pdf is called with delete_original=True
THEN:
- Task group is triggered
"""
mock_chord.return_value.delay.return_value = None
doc_ids = [self.doc2.id]
operations = [{"page": 1}, {"page": 2}]
result = bulk_edit.edit_pdf(doc_ids, operations, delete_original=True)
self.assertEqual(result, "OK")
mock_chord.assert_called_once()
@mock.patch("documents.tasks.update_document_content_maybe_archive_file.delay")
def test_edit_pdf_with_update_document(self, mock_update_document):
"""
GIVEN:
- A single existing PDF document
WHEN:
- edit_pdf is called with update_document=True and a single output
THEN:
- The original document is updated in-place
- The update_document_content_maybe_archive_file task is triggered
"""
doc_ids = [self.doc2.id]
operations = [{"page": 1}, {"page": 2}]
original_checksum = self.doc2.checksum
original_page_count = self.doc2.page_count
result = bulk_edit.edit_pdf(
doc_ids,
operations=operations,
update_document=True,
delete_original=False,
)
self.assertEqual(result, "OK")
self.doc2.refresh_from_db()
self.assertNotEqual(self.doc2.checksum, original_checksum)
self.assertNotEqual(self.doc2.page_count, original_page_count)
mock_update_document.assert_called_once_with(document_id=self.doc2.id)
@mock.patch("documents.bulk_edit.group")
@mock.patch("documents.tasks.consume_file.s")
def test_edit_pdf_without_metadata(self, mock_consume_file, mock_group):
"""
GIVEN:
- Existing document
WHEN:
- edit_pdf is called with include_metadata=False
THEN:
- Tasks are created with empty metadata
"""
mock_group.return_value.delay.return_value = None
doc_ids = [self.doc2.id]
operations = [{"page": 1}]
result = bulk_edit.edit_pdf(doc_ids, operations, include_metadata=False)
self.assertEqual(result, "OK")
mock_group.return_value.delay.assert_called_once()
@mock.patch("documents.bulk_edit.group")
@mock.patch("documents.tasks.consume_file.s")
def test_edit_pdf_open_failure(self, mock_consume_file, mock_group):
"""
GIVEN:
- Existing document
WHEN:
- edit_pdf fails to open PDF
THEN:
- Task group is not called
"""
doc_ids = [self.doc2.id]
operations = [
{"page": 9999}, # invalid page, forces error during PDF load
]
with self.assertLogs("paperless.bulk_edit", level="ERROR"):
with self.assertRaises(Exception):
bulk_edit.edit_pdf(doc_ids, operations)
mock_group.assert_not_called()
mock_consume_file.assert_not_called()
@mock.patch("documents.bulk_edit.group")
@mock.patch("documents.tasks.consume_file.s")
def test_edit_pdf_multiple_outputs_with_update_flag_errors(
self,
mock_consume_file,
mock_group,
):
"""
GIVEN:
- Existing document
WHEN:
- edit_pdf is called with multiple outputs and update_document=True
THEN:
- An error is logged and task group is not called
"""
doc_ids = [self.doc2.id]
operations = [
{"page": 1, "doc": 0},
{"page": 2, "doc": 1},
]
with self.assertLogs("paperless.bulk_edit", level="ERROR"):
with self.assertRaises(ValueError):
bulk_edit.edit_pdf(doc_ids, operations, update_document=True)
mock_group.assert_not_called()
mock_consume_file.assert_not_called()

View File

@@ -1,8 +1,10 @@
import shutil
import socket
from datetime import timedelta
from typing import TYPE_CHECKING
from unittest import mock
import pytest
from django.contrib.auth.models import Group
from django.contrib.auth.models import User
from django.test import override_settings
@@ -10,6 +12,7 @@ from django.utils import timezone
from guardian.shortcuts import assign_perm
from guardian.shortcuts import get_groups_with_perms
from guardian.shortcuts import get_users_with_perms
from httpx import HTTPError
from httpx import HTTPStatusError
from pytest_httpx import HTTPXMock
from rest_framework.test import APITestCase
@@ -2825,6 +2828,8 @@ class TestWorkflows(
content="Test message",
headers={},
files=None,
follow_redirects=False,
timeout=5,
)
expected_str = "Webhook sent to http://paperless-ngx.com"
@@ -2842,6 +2847,8 @@ class TestWorkflows(
data={"message": "Test message"},
headers={},
files=None,
follow_redirects=False,
timeout=5,
)
@mock.patch("httpx.post")
@@ -2962,3 +2969,164 @@ class TestWebhookSend:
as_json=True,
)
assert httpx_mock.get_request().headers["Content-Type"] == "application/json"
@pytest.fixture
def resolve_to(monkeypatch):
"""
Force DNS resolution to a specific IP for any hostname.
"""
def _set(ip: str):
def fake_getaddrinfo(host, *_args, **_kwargs):
return [(socket.AF_INET, None, None, "", (ip, 0))]
monkeypatch.setattr(socket, "getaddrinfo", fake_getaddrinfo)
return _set
class TestWebhookSecurity:
def test_blocks_invalid_scheme_or_hostname(self, httpx_mock: HTTPXMock):
"""
GIVEN:
- Invalid URL schemes or hostnames
WHEN:
- send_webhook is called with such URLs
THEN:
- ValueError is raised
"""
with pytest.raises(ValueError):
send_webhook(
"ftp://example.com",
data="",
headers={},
files=None,
as_json=False,
)
with pytest.raises(ValueError):
send_webhook(
"http:///nohost",
data="",
headers={},
files=None,
as_json=False,
)
@override_settings(WEBHOOKS_ALLOWED_PORTS=[80, 443])
def test_blocks_disallowed_port(self, httpx_mock: HTTPXMock):
"""
GIVEN:
- URL with a disallowed port
WHEN:
- send_webhook is called with such URL
THEN:
- ValueError is raised
"""
with pytest.raises(ValueError):
send_webhook(
"http://paperless-ngx.com:8080",
data="",
headers={},
files=None,
as_json=False,
)
assert httpx_mock.get_request() is None
@override_settings(WEBHOOKS_ALLOW_INTERNAL_REQUESTS=False)
def test_blocks_private_loopback_linklocal(self, httpx_mock: HTTPXMock, resolve_to):
"""
GIVEN:
- URL with a private, loopback, or link-local IP address
- WEBHOOKS_ALLOW_INTERNAL_REQUESTS is False
WHEN:
- send_webhook is called with such URL
THEN:
- ValueError is raised
"""
resolve_to("127.0.0.1")
with pytest.raises(ValueError):
send_webhook(
"http://paperless-ngx.com",
data="",
headers={},
files=None,
as_json=False,
)
def test_allows_public_ip_and_sends(self, httpx_mock: HTTPXMock, resolve_to):
"""
GIVEN:
- URL with a public IP address
WHEN:
- send_webhook is called with such URL
THEN:
- Request is sent successfully
"""
resolve_to("52.207.186.75")
httpx_mock.add_response(content=b"ok")
send_webhook(
url="http://paperless-ngx.com",
data="hi",
headers={},
files=None,
as_json=False,
)
req = httpx_mock.get_request()
assert req.url.host == "paperless-ngx.com"
def test_follow_redirects_disabled(self, httpx_mock: HTTPXMock, resolve_to):
"""
GIVEN:
- A URL that redirects
WHEN:
- send_webhook is called with follow_redirects=False
THEN:
- Request is made to the original URL and does not follow the redirect
"""
resolve_to("52.207.186.75")
# Return a redirect and ensure we don't follow it (only one request recorded)
httpx_mock.add_response(
status_code=302,
headers={"location": "http://internal-service.local"},
content=b"",
)
with pytest.raises(HTTPError):
send_webhook(
"http://paperless-ngx.com",
data="",
headers={},
files=None,
as_json=False,
)
assert len(httpx_mock.get_requests()) == 1
def test_strips_user_supplied_host_header(self, httpx_mock: HTTPXMock, resolve_to):
"""
GIVEN:
- A URL with a user-supplied Host header
WHEN:
- send_webhook is called with a malicious Host header
THEN:
- The Host header is stripped and replaced with the resolved hostname
"""
resolve_to("52.207.186.75")
httpx_mock.add_response(content=b"ok")
send_webhook(
url="http://paperless-ngx.com",
data="ok",
headers={"Host": "evil.test"},
files=None,
as_json=False,
)
req = httpx_mock.get_request()
assert req.headers["Host"] == "paperless-ngx.com"
assert "evil.test" not in req.headers.get("Host", "")