mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-11-17 04:16:54 -06:00
Chore: add backoff ro handle 429 Wikimedia requests in tests (#11364)
This commit is contained in:
@@ -2,6 +2,7 @@ import os
|
|||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
@@ -53,14 +54,33 @@ class TestUrlCanary:
|
|||||||
Verify certain URLs are still available so testing is valid still
|
Verify certain URLs are still available so testing is valid still
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Wikimedia rejects requests without a browser-like User-Agent header and returns 403.
|
@classmethod
|
||||||
_WIKIMEDIA_HEADERS = {
|
def _fetch_wikimedia(cls, url: str) -> httpx.Response:
|
||||||
"User-Agent": (
|
"""
|
||||||
"Mozilla/5.0 (X11; Linux x86_64) "
|
Wikimedia occasionally throttles automated requests (HTTP 429). Retry a few
|
||||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
times with a short backoff so the tests stay stable, and skip if throttling
|
||||||
"Chrome/123.0.0.0 Safari/537.36"
|
persists.
|
||||||
),
|
"""
|
||||||
}
|
last_resp: httpx.Response | None = None
|
||||||
|
# Wikimedia rejects requests without a browser-like User-Agent header and returns 403.
|
||||||
|
headers = {
|
||||||
|
"User-Agent": (
|
||||||
|
"Mozilla/5.0 (X11; Linux x86_64) "
|
||||||
|
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||||
|
"Chrome/123.0.0.0 Safari/537.36"
|
||||||
|
),
|
||||||
|
}
|
||||||
|
for delay in (0, 1, 2):
|
||||||
|
resp = httpx.get(url, headers=headers, timeout=30.0)
|
||||||
|
if resp.status_code != httpx.codes.TOO_MANY_REQUESTS:
|
||||||
|
return resp
|
||||||
|
last_resp = resp
|
||||||
|
time.sleep(delay)
|
||||||
|
|
||||||
|
pytest.skip(
|
||||||
|
"Wikimedia throttled the canary request with HTTP 429; try rerunning later.",
|
||||||
|
)
|
||||||
|
return last_resp # pragma: no cover
|
||||||
|
|
||||||
def test_online_image_exception_on_not_available(self):
|
def test_online_image_exception_on_not_available(self):
|
||||||
"""
|
"""
|
||||||
@@ -76,11 +96,10 @@ class TestUrlCanary:
|
|||||||
whether this image stays online forever, so here we check if we can detect if is not
|
whether this image stays online forever, so here we check if we can detect if is not
|
||||||
available anymore.
|
available anymore.
|
||||||
"""
|
"""
|
||||||
|
resp = self._fetch_wikimedia(
|
||||||
|
"https://upload.wikimedia.org/wikipedia/en/f/f7/nonexistent.png",
|
||||||
|
)
|
||||||
with pytest.raises(httpx.HTTPStatusError) as exec_info:
|
with pytest.raises(httpx.HTTPStatusError) as exec_info:
|
||||||
resp = httpx.get(
|
|
||||||
"https://upload.wikimedia.org/wikipedia/en/f/f7/nonexistent.png",
|
|
||||||
headers=self._WIKIMEDIA_HEADERS,
|
|
||||||
)
|
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
|
|
||||||
assert exec_info.value.response.status_code == httpx.codes.NOT_FOUND
|
assert exec_info.value.response.status_code == httpx.codes.NOT_FOUND
|
||||||
@@ -100,9 +119,8 @@ class TestUrlCanary:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# Now check the URL used in samples/sample.html
|
# Now check the URL used in samples/sample.html
|
||||||
resp = httpx.get(
|
resp = self._fetch_wikimedia(
|
||||||
"https://upload.wikimedia.org/wikipedia/en/f/f7/RickRoll.png",
|
"https://upload.wikimedia.org/wikipedia/en/f/f7/RickRoll.png",
|
||||||
headers=self._WIKIMEDIA_HEADERS,
|
|
||||||
)
|
)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user