phase2-ATS + heuristic careers finding

This commit is contained in:
ldy
2026-06-17 17:33:11 -04:00
parent cd9ab9b95e
commit 113a4ced36
11 changed files with 2836 additions and 39 deletions

View File

@@ -18,6 +18,7 @@ from .config import get_settings
logger = logging.getLogger(__name__)
_RETRY_STATUS = frozenset({429, 500, 502, 503, 504})
_HEAD_NOT_SUPPORTED = frozenset({405, 501})
def default_headers() -> dict[str, str]:
@@ -95,3 +96,21 @@ def request_with_retries(
if last_exc is not None: # pragma: no cover - defensive
raise last_exc
raise RuntimeError("request_with_retries exhausted without a response")
def probe_url(client: httpx.Client, url: str) -> str | None:
"""Probe a URL with HEAD (fallback GET on 405/501); return final URL or None.
Returns the str representation of the final URL after redirects when the
server responds with a non-error status (<400). Returns None on any
network error or error status.
"""
try:
resp = request_with_retries(client, "HEAD", url, max_retries=1)
if resp.status_code in _HEAD_NOT_SUPPORTED:
resp = request_with_retries(client, "GET", url, max_retries=1)
if resp.status_code < 400:
return str(resp.url)
return None
except Exception:
return None