phase2-ATS + heuristic careers finding
This commit is contained in:
@@ -18,6 +18,7 @@ from .config import get_settings
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_RETRY_STATUS = frozenset({429, 500, 502, 503, 504})
|
||||
_HEAD_NOT_SUPPORTED = frozenset({405, 501})
|
||||
|
||||
|
||||
def default_headers() -> dict[str, str]:
|
||||
@@ -95,3 +96,21 @@ def request_with_retries(
|
||||
if last_exc is not None: # pragma: no cover - defensive
|
||||
raise last_exc
|
||||
raise RuntimeError("request_with_retries exhausted without a response")
|
||||
|
||||
|
||||
def probe_url(client: httpx.Client, url: str) -> str | None:
|
||||
"""Probe a URL with HEAD (fallback GET on 405/501); return final URL or None.
|
||||
|
||||
Returns the str representation of the final URL after redirects when the
|
||||
server responds with a non-error status (<400). Returns None on any
|
||||
network error or error status.
|
||||
"""
|
||||
try:
|
||||
resp = request_with_retries(client, "HEAD", url, max_retries=1)
|
||||
if resp.status_code in _HEAD_NOT_SUPPORTED:
|
||||
resp = request_with_retries(client, "GET", url, max_retries=1)
|
||||
if resp.status_code < 400:
|
||||
return str(resp.url)
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
Reference in New Issue
Block a user