Chore: add backoff ro handle 429 Wikimedia requests in tests (#11364)

This commit is contained in:
shamoon
2025-11-14 15:58:29 -08:00
committed by GitHub
parent dd6f7fad32
commit 69514d8d70

View File

@@ -2,6 +2,7 @@ import os
import shutil import shutil
import subprocess import subprocess
import tempfile import tempfile
import time
from pathlib import Path from pathlib import Path
import httpx import httpx
@@ -53,14 +54,33 @@ class TestUrlCanary:
Verify certain URLs are still available so testing is valid still Verify certain URLs are still available so testing is valid still
""" """
@classmethod
def _fetch_wikimedia(cls, url: str) -> httpx.Response:
"""
Wikimedia occasionally throttles automated requests (HTTP 429). Retry a few
times with a short backoff so the tests stay stable, and skip if throttling
persists.
"""
last_resp: httpx.Response | None = None
# Wikimedia rejects requests without a browser-like User-Agent header and returns 403. # Wikimedia rejects requests without a browser-like User-Agent header and returns 403.
_WIKIMEDIA_HEADERS = { headers = {
"User-Agent": ( "User-Agent": (
"Mozilla/5.0 (X11; Linux x86_64) " "Mozilla/5.0 (X11; Linux x86_64) "
"AppleWebKit/537.36 (KHTML, like Gecko) " "AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/123.0.0.0 Safari/537.36" "Chrome/123.0.0.0 Safari/537.36"
), ),
} }
for delay in (0, 1, 2):
resp = httpx.get(url, headers=headers, timeout=30.0)
if resp.status_code != httpx.codes.TOO_MANY_REQUESTS:
return resp
last_resp = resp
time.sleep(delay)
pytest.skip(
"Wikimedia throttled the canary request with HTTP 429; try rerunning later.",
)
return last_resp # pragma: no cover
def test_online_image_exception_on_not_available(self): def test_online_image_exception_on_not_available(self):
""" """
@@ -76,11 +96,10 @@ class TestUrlCanary:
whether this image stays online forever, so here we check if we can detect if is not whether this image stays online forever, so here we check if we can detect if is not
available anymore. available anymore.
""" """
with pytest.raises(httpx.HTTPStatusError) as exec_info: resp = self._fetch_wikimedia(
resp = httpx.get(
"https://upload.wikimedia.org/wikipedia/en/f/f7/nonexistent.png", "https://upload.wikimedia.org/wikipedia/en/f/f7/nonexistent.png",
headers=self._WIKIMEDIA_HEADERS,
) )
with pytest.raises(httpx.HTTPStatusError) as exec_info:
resp.raise_for_status() resp.raise_for_status()
assert exec_info.value.response.status_code == httpx.codes.NOT_FOUND assert exec_info.value.response.status_code == httpx.codes.NOT_FOUND
@@ -100,9 +119,8 @@ class TestUrlCanary:
""" """
# Now check the URL used in samples/sample.html # Now check the URL used in samples/sample.html
resp = httpx.get( resp = self._fetch_wikimedia(
"https://upload.wikimedia.org/wikipedia/en/f/f7/RickRoll.png", "https://upload.wikimedia.org/wikipedia/en/f/f7/RickRoll.png",
headers=self._WIKIMEDIA_HEADERS,
) )
resp.raise_for_status() resp.raise_for_status()