Files
JobSourceAgent/jobsource/http.py
2026-06-17 08:38:15 -04:00

98 lines
3.2 KiB
Python

"""Shared httpx client factory and a small bounded-retry helper.
Every outbound HTTP call in the pipeline should go through a client built here
so timeouts, headers, and bounded retries are applied consistently. Connection-
level retries are handled by the transport; request_with_retries adds bounded
retries for transient HTTP status codes.
"""
from __future__ import annotations
import logging
import time
from collections.abc import Iterable
import httpx
from .config import get_settings
logger = logging.getLogger(__name__)
_RETRY_STATUS = frozenset({429, 500, 502, 503, 504})
def default_headers() -> dict[str, str]:
settings = get_settings()
return {
"User-Agent": settings.user_agent,
"Accept": "text/html,application/xhtml+xml,application/json;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
}
def build_client(**overrides: object) -> httpx.Client:
"""Create a configured sync httpx client.
Timeout and connection-level retries come from settings; callers may pass
httpx.Client kwargs as overrides (e.g. base_url, extra headers).
"""
settings = get_settings()
kwargs: dict[str, object] = {
"timeout": httpx.Timeout(settings.http_timeout),
"headers": default_headers(),
"follow_redirects": True,
"transport": httpx.HTTPTransport(retries=settings.http_max_retries),
}
kwargs.update(overrides)
return httpx.Client(**kwargs) # type: ignore[arg-type]
def request_with_retries(
client: httpx.Client,
method: str,
url: str,
*,
max_retries: int | None = None,
retry_status: Iterable[int] = _RETRY_STATUS,
**kwargs: object,
) -> httpx.Response:
"""Issue a request, retrying on transient status codes with exponential backoff."""
settings = get_settings()
retries = settings.http_max_retries if max_retries is None else max_retries
backoff = settings.http_backoff_factor
statuses = frozenset(retry_status)
last_exc: Exception | None = None
for attempt in range(retries + 1):
try:
response = client.request(method, url, **kwargs) # type: ignore[arg-type]
if response.status_code in statuses and attempt < retries:
sleep_for = backoff * (2**attempt)
logger.warning(
"HTTP %s on %s (attempt %d/%d); retrying in %.1fs",
response.status_code,
url,
attempt + 1,
retries,
sleep_for,
)
time.sleep(sleep_for)
continue
return response
except httpx.HTTPError as exc:
last_exc = exc
if attempt < retries:
sleep_for = backoff * (2**attempt)
logger.warning(
"HTTP error on %s (attempt %d/%d): %s; retrying in %.1fs",
url,
attempt + 1,
retries,
exc,
sleep_for,
)
time.sleep(sleep_for)
continue
raise
if last_exc is not None: # pragma: no cover - defensive
raise last_exc
raise RuntimeError("request_with_retries exhausted without a response")