750 lines
29 KiB
Python
750 lines
29 KiB
Python
"""Tests for jobsource/careers/ats.py — all network-free via monkeypatching."""
|
|
from __future__ import annotations
|
|
|
|
import pytest
|
|
|
|
from jobsource.careers.ats import (
|
|
ATSBoard,
|
|
ATSFetch,
|
|
ATSResult,
|
|
_board_from_slug,
|
|
_domain_stem,
|
|
_fetch_ashby,
|
|
_fetch_greenhouse,
|
|
_fetch_lever,
|
|
_fetch_workday,
|
|
_loose_name_match,
|
|
_slug_candidates,
|
|
detect_and_fetch,
|
|
detect_ats_in_html,
|
|
detect_ats_in_url,
|
|
recover_via_slug_guess,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tiny fake HTTP response for monkeypatching request_with_retries
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class FakeResponse:
|
|
def __init__(self, status_code: int, body: object, url: str = "https://example.com"):
|
|
self.status_code = status_code
|
|
self._body = body
|
|
self.url = url
|
|
self.text = str(body)
|
|
|
|
def json(self) -> object:
|
|
return self._body
|
|
|
|
|
|
class FakeClient:
|
|
"""Stands in for httpx.Client; never actually used in network calls here."""
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# detect_ats_in_html — Greenhouse
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestDetectATSInHtmlGreenhouse:
|
|
def test_boards_greenhouse_script_tag(self):
|
|
html = '<script src="https://boards.greenhouse.io/embed/job_board?for=airbnb"></script>'
|
|
board = detect_ats_in_html(html)
|
|
assert board is not None
|
|
assert board.ats_name == "greenhouse"
|
|
assert board.slug == "airbnb"
|
|
assert board.careers_url == "https://boards.greenhouse.io/airbnb"
|
|
|
|
def test_boards_greenhouse_direct_link(self):
|
|
html = '<a href="https://boards.greenhouse.io/acme">Jobs</a>'
|
|
board = detect_ats_in_html(html)
|
|
assert board is not None
|
|
assert board.ats_name == "greenhouse"
|
|
assert board.slug == "acme"
|
|
|
|
def test_job_boards_subdomain(self):
|
|
html = '<a href="https://job-boards.greenhouse.io/stripe">Jobs</a>'
|
|
board = detect_ats_in_html(html)
|
|
assert board is not None
|
|
assert board.ats_name == "greenhouse"
|
|
assert board.slug == "stripe"
|
|
|
|
def test_no_match_returns_none(self):
|
|
assert detect_ats_in_html("<html><body>Nothing here</body></html>") is None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# detect_ats_in_html — Lever
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestDetectATSInHtmlLever:
|
|
def test_jobs_lever_link(self):
|
|
html = '<a href="https://jobs.lever.co/leverdemo">Open roles</a>'
|
|
board = detect_ats_in_html(html)
|
|
assert board is not None
|
|
assert board.ats_name == "lever"
|
|
assert board.slug == "leverdemo"
|
|
assert board.careers_url == "https://jobs.lever.co/leverdemo"
|
|
|
|
def test_lever_embed_script(self):
|
|
html = 'var lever = "jobs.lever.co/acme-corp";'
|
|
board = detect_ats_in_html(html)
|
|
assert board is not None
|
|
assert board.ats_name == "lever"
|
|
assert board.slug == "acme-corp"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# detect_ats_in_html — Ashby
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestDetectATSInHtmlAshby:
|
|
def test_jobs_ashbyhq_link(self):
|
|
html = '<a href="https://jobs.ashbyhq.com/Ramp">Careers</a>'
|
|
board = detect_ats_in_html(html)
|
|
assert board is not None
|
|
assert board.ats_name == "ashby"
|
|
assert board.slug == "Ramp"
|
|
assert board.careers_url == "https://jobs.ashbyhq.com/Ramp"
|
|
|
|
def test_lowercase_slug(self):
|
|
html = '<a href="https://jobs.ashbyhq.com/linear">Join us</a>'
|
|
board = detect_ats_in_html(html)
|
|
assert board is not None
|
|
assert board.slug == "linear"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# detect_ats_in_html — Workday
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestDetectATSInHtmlWorkday:
|
|
def test_myworkdayjobs_link(self):
|
|
html = '<a href="https://nvidia.wd5.myworkdayjobs.com/en-US/NVIDIAExternalCareerSite">Jobs</a>'
|
|
board = detect_ats_in_html(html)
|
|
assert board is not None
|
|
assert board.ats_name == "workday"
|
|
assert board.wd_host == "nvidia.wd5.myworkdayjobs.com"
|
|
assert board.wd_tenant == "nvidia"
|
|
assert board.wd_site == "NVIDIAExternalCareerSite"
|
|
assert "en-US" in board.careers_url
|
|
assert board.careers_url == "https://nvidia.wd5.myworkdayjobs.com/en-US/NVIDIAExternalCareerSite"
|
|
|
|
def test_workday_without_locale(self):
|
|
html = '<a href="https://acme.wd1.myworkdayjobs.com/AcmeCareers">Careers</a>'
|
|
board = detect_ats_in_html(html)
|
|
assert board is not None
|
|
assert board.ats_name == "workday"
|
|
assert board.wd_site == "AcmeCareers"
|
|
|
|
def test_workday_missing_site_returns_none(self):
|
|
# Just the host with no path — can't form a board
|
|
html = 'https://acme.wd1.myworkdayjobs.com'
|
|
board = detect_ats_in_html(html)
|
|
assert board is None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# detect_ats_in_url
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestDetectATSInUrl:
|
|
def test_greenhouse_url(self):
|
|
board = detect_ats_in_url("https://boards.greenhouse.io/stripe")
|
|
assert board is not None
|
|
assert board.ats_name == "greenhouse"
|
|
assert board.slug == "stripe"
|
|
|
|
def test_lever_url(self):
|
|
board = detect_ats_in_url("https://jobs.lever.co/leverdemo")
|
|
assert board is not None
|
|
assert board.ats_name == "lever"
|
|
|
|
def test_ashby_url(self):
|
|
board = detect_ats_in_url("https://jobs.ashbyhq.com/linear")
|
|
assert board is not None
|
|
assert board.ats_name == "ashby"
|
|
|
|
def test_workday_url(self):
|
|
board = detect_ats_in_url(
|
|
"https://nvidia.wd5.myworkdayjobs.com/en-US/NVIDIAExternalCareerSite"
|
|
)
|
|
assert board is not None
|
|
assert board.ats_name == "workday"
|
|
|
|
def test_non_ats_url_returns_none(self):
|
|
assert detect_ats_in_url("https://www.acme.com/careers") is None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _fetch_greenhouse
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestFetchGreenhouse:
|
|
def _board(self, slug: str = "airbnb") -> ATSBoard:
|
|
return ATSBoard(
|
|
ats_name="greenhouse",
|
|
slug=slug,
|
|
careers_url=f"https://boards.greenhouse.io/{slug}",
|
|
)
|
|
|
|
def test_extracts_absolute_url(self, monkeypatch):
|
|
fake_body = {
|
|
"jobs": [{"absolute_url": "https://careers.airbnb.com/positions/123",
|
|
"company_name": "Airbnb"}],
|
|
"meta": {"total": 42},
|
|
}
|
|
monkeypatch.setattr(
|
|
"jobsource.careers.ats.request_with_retries",
|
|
lambda client, method, url, **kw: FakeResponse(200, fake_body),
|
|
)
|
|
fetch = _fetch_greenhouse(self._board(), FakeClient())
|
|
assert fetch.first_url == "https://careers.airbnb.com/positions/123"
|
|
assert fetch.job_count == 42
|
|
assert fetch.org_name == "Airbnb"
|
|
|
|
def test_empty_jobs_list(self, monkeypatch):
|
|
monkeypatch.setattr(
|
|
"jobsource.careers.ats.request_with_retries",
|
|
lambda *a, **kw: FakeResponse(200, {"jobs": [], "meta": {"total": 0}}),
|
|
)
|
|
fetch = _fetch_greenhouse(self._board(), FakeClient())
|
|
assert fetch.first_url is None
|
|
assert fetch.job_count == 0
|
|
assert fetch.org_name is None
|
|
|
|
def test_non_200_returns_none(self, monkeypatch):
|
|
monkeypatch.setattr(
|
|
"jobsource.careers.ats.request_with_retries",
|
|
lambda *a, **kw: FakeResponse(404, {}),
|
|
)
|
|
fetch = _fetch_greenhouse(self._board(), FakeClient())
|
|
assert fetch.first_url is None
|
|
assert fetch.job_count == 0
|
|
|
|
def test_network_error_returns_none(self, monkeypatch):
|
|
def boom(*a, **kw):
|
|
raise RuntimeError("network down")
|
|
monkeypatch.setattr("jobsource.careers.ats.request_with_retries", boom)
|
|
fetch = _fetch_greenhouse(self._board(), FakeClient())
|
|
assert fetch.first_url is None
|
|
assert fetch.job_count == 0
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _fetch_lever
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestFetchLever:
|
|
def _board(self, slug: str = "leverdemo") -> ATSBoard:
|
|
return ATSBoard(
|
|
ats_name="lever",
|
|
slug=slug,
|
|
careers_url=f"https://jobs.lever.co/{slug}",
|
|
)
|
|
|
|
def test_extracts_hosted_url(self, monkeypatch):
|
|
fake_body = [{"hostedUrl": "https://jobs.lever.co/leverdemo/abc-123"}]
|
|
monkeypatch.setattr(
|
|
"jobsource.careers.ats.request_with_retries",
|
|
lambda *a, **kw: FakeResponse(200, fake_body),
|
|
)
|
|
fetch = _fetch_lever(self._board(), FakeClient())
|
|
assert fetch.first_url == "https://jobs.lever.co/leverdemo/abc-123"
|
|
assert fetch.job_count == 1
|
|
|
|
def test_empty_list(self, monkeypatch):
|
|
monkeypatch.setattr(
|
|
"jobsource.careers.ats.request_with_retries",
|
|
lambda *a, **kw: FakeResponse(200, []),
|
|
)
|
|
fetch = _fetch_lever(self._board(), FakeClient())
|
|
assert fetch.first_url is None
|
|
assert fetch.job_count == 0
|
|
|
|
def test_non_list_response(self, monkeypatch):
|
|
monkeypatch.setattr(
|
|
"jobsource.careers.ats.request_with_retries",
|
|
lambda *a, **kw: FakeResponse(200, {"error": "not found"}),
|
|
)
|
|
fetch = _fetch_lever(self._board(), FakeClient())
|
|
assert fetch.first_url is None
|
|
assert fetch.job_count == 0
|
|
|
|
def test_non_200_returns_none(self, monkeypatch):
|
|
monkeypatch.setattr(
|
|
"jobsource.careers.ats.request_with_retries",
|
|
lambda *a, **kw: FakeResponse(404, []),
|
|
)
|
|
fetch = _fetch_lever(self._board(), FakeClient())
|
|
assert fetch.first_url is None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _fetch_ashby
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestFetchAshby:
|
|
def _board(self, slug: str = "Ramp") -> ATSBoard:
|
|
return ATSBoard(
|
|
ats_name="ashby",
|
|
slug=slug,
|
|
careers_url=f"https://jobs.ashbyhq.com/{slug}",
|
|
)
|
|
|
|
def test_extracts_job_url(self, monkeypatch):
|
|
fake_body = {
|
|
"jobs": [{"jobUrl": "https://jobs.ashbyhq.com/Ramp/abc-def"}],
|
|
"apiVersion": "1",
|
|
}
|
|
monkeypatch.setattr(
|
|
"jobsource.careers.ats.request_with_retries",
|
|
lambda *a, **kw: FakeResponse(200, fake_body),
|
|
)
|
|
fetch = _fetch_ashby(self._board(), FakeClient())
|
|
assert fetch.first_url == "https://jobs.ashbyhq.com/Ramp/abc-def"
|
|
assert fetch.job_count == 1
|
|
|
|
def test_empty_jobs(self, monkeypatch):
|
|
monkeypatch.setattr(
|
|
"jobsource.careers.ats.request_with_retries",
|
|
lambda *a, **kw: FakeResponse(200, {"jobs": []}),
|
|
)
|
|
fetch = _fetch_ashby(self._board(), FakeClient())
|
|
assert fetch.first_url is None
|
|
assert fetch.job_count == 0
|
|
|
|
def test_network_error_returns_none(self, monkeypatch):
|
|
monkeypatch.setattr(
|
|
"jobsource.careers.ats.request_with_retries",
|
|
lambda *a, **kw: (_ for _ in ()).throw(RuntimeError("timeout")),
|
|
)
|
|
fetch = _fetch_ashby(self._board(), FakeClient())
|
|
assert fetch.first_url is None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _fetch_workday
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestFetchWorkday:
|
|
def _board(self) -> ATSBoard:
|
|
return ATSBoard(
|
|
ats_name="workday",
|
|
slug="nvidia/NVIDIAExternalCareerSite",
|
|
careers_url="https://nvidia.wd5.myworkdayjobs.com/en-US/NVIDIAExternalCareerSite",
|
|
wd_host="nvidia.wd5.myworkdayjobs.com",
|
|
wd_tenant="nvidia",
|
|
wd_site="NVIDIAExternalCareerSite",
|
|
)
|
|
|
|
def test_builds_full_job_url(self, monkeypatch):
|
|
fake_body = {
|
|
"total": 2000,
|
|
"jobPostings": [{"externalPath": "/job/US/SWE_JR123"}],
|
|
}
|
|
monkeypatch.setattr(
|
|
"jobsource.careers.ats.request_with_retries",
|
|
lambda *a, **kw: FakeResponse(200, fake_body),
|
|
)
|
|
fetch = _fetch_workday(self._board(), FakeClient())
|
|
assert fetch.first_url == "https://nvidia.wd5.myworkdayjobs.com/en-US/NVIDIAExternalCareerSite/job/US/SWE_JR123"
|
|
assert fetch.job_count == 2000
|
|
|
|
def test_empty_postings(self, monkeypatch):
|
|
monkeypatch.setattr(
|
|
"jobsource.careers.ats.request_with_retries",
|
|
lambda *a, **kw: FakeResponse(200, {"total": 0, "jobPostings": []}),
|
|
)
|
|
fetch = _fetch_workday(self._board(), FakeClient())
|
|
assert fetch.first_url is None
|
|
assert fetch.job_count == 0
|
|
|
|
def test_missing_wd_coords_returns_none(self):
|
|
board = ATSBoard(
|
|
ats_name="workday", slug="x", careers_url="https://x.wd1.myworkdayjobs.com"
|
|
)
|
|
fetch = _fetch_workday(board, FakeClient())
|
|
assert fetch.first_url is None
|
|
assert fetch.job_count == 0
|
|
|
|
def test_non_200_returns_none(self, monkeypatch):
|
|
monkeypatch.setattr(
|
|
"jobsource.careers.ats.request_with_retries",
|
|
lambda *a, **kw: FakeResponse(403, {}),
|
|
)
|
|
fetch = _fetch_workday(self._board(), FakeClient())
|
|
assert fetch.first_url is None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# detect_and_fetch orchestration
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestDetectAndFetch:
|
|
def test_greenhouse_full_flow(self, monkeypatch):
|
|
html = '<a href="https://boards.greenhouse.io/airbnb">Jobs</a>'
|
|
job_resp = {
|
|
"jobs": [{"absolute_url": "https://careers.airbnb.com/positions/1"}],
|
|
"meta": {"total": 5},
|
|
}
|
|
monkeypatch.setattr(
|
|
"jobsource.careers.ats.request_with_retries",
|
|
lambda *a, **kw: FakeResponse(200, job_resp),
|
|
)
|
|
result = detect_and_fetch("https://www.airbnb.com", FakeClient(), homepage_html=html)
|
|
assert result is not None
|
|
assert result.ats_name == "greenhouse"
|
|
assert result.careers_url == "https://boards.greenhouse.io/airbnb"
|
|
assert result.position_url == "https://careers.airbnb.com/positions/1"
|
|
assert result.job_count == 5
|
|
|
|
def test_no_ats_returns_none(self, monkeypatch):
|
|
html = "<html><body>No ATS here</body></html>"
|
|
result = detect_and_fetch("https://www.example.com", FakeClient(), homepage_html=html)
|
|
assert result is None
|
|
|
|
def test_api_failure_returns_result_without_position_url(self, monkeypatch):
|
|
html = '<a href="https://jobs.lever.co/acme">Jobs</a>'
|
|
monkeypatch.setattr(
|
|
"jobsource.careers.ats.request_with_retries",
|
|
lambda *a, **kw: FakeResponse(500, []),
|
|
)
|
|
result = detect_and_fetch("https://www.acme.com", FakeClient(), homepage_html=html)
|
|
assert result is not None
|
|
assert result.ats_name == "lever"
|
|
assert result.careers_url == "https://jobs.lever.co/acme"
|
|
assert result.position_url is None
|
|
|
|
def test_homepage_fetch_failure_returns_none(self, monkeypatch):
|
|
"""When homepage_html is None and the fetch fails, return None."""
|
|
def boom(*a, **kw):
|
|
raise RuntimeError("connection refused")
|
|
monkeypatch.setattr("jobsource.careers.ats.request_with_retries", boom)
|
|
result = detect_and_fetch("https://www.example.com", FakeClient())
|
|
assert result is None
|
|
|
|
def test_uses_provided_html_without_fetching(self, monkeypatch):
|
|
"""If homepage_html is provided, request_with_retries is only called for the API."""
|
|
html = '<a href="https://jobs.ashbyhq.com/linear">Jobs</a>'
|
|
calls: list[str] = []
|
|
job_resp = {"jobs": [{"jobUrl": "https://jobs.ashbyhq.com/linear/xyz"}]}
|
|
|
|
def fake_req(client, method, url, **kw):
|
|
calls.append(url)
|
|
return FakeResponse(200, job_resp)
|
|
|
|
monkeypatch.setattr("jobsource.careers.ats.request_with_retries", fake_req)
|
|
result = detect_and_fetch("https://www.linear.app", FakeClient(), homepage_html=html)
|
|
assert result is not None
|
|
# Only one call: the API fetch (not the homepage)
|
|
assert len(calls) == 1
|
|
assert "ashby" in calls[0]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Pure-unit helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestDomainStem:
|
|
def test_strips_www(self):
|
|
assert _domain_stem("https://www.anthropic.com") == "anthropic"
|
|
|
|
def test_no_www(self):
|
|
assert _domain_stem("https://linear.app") == "linear"
|
|
|
|
def test_with_path(self):
|
|
assert _domain_stem("https://www.figma.com/careers/") == "figma"
|
|
|
|
def test_invalid_returns_none(self):
|
|
assert _domain_stem("") is None or isinstance(_domain_stem(""), (str, type(None)))
|
|
|
|
|
|
class TestSlugCandidates:
|
|
def test_domain_stem_first(self):
|
|
candidates = _slug_candidates("https://www.anthropic.com", "Anthropic")
|
|
assert candidates[0] == "anthropic"
|
|
|
|
def test_deduplicates_stem_and_name(self):
|
|
# stem == normalized name → only one entry
|
|
candidates = _slug_candidates("https://www.anthropic.com", "Anthropic")
|
|
assert candidates.count("anthropic") == 1
|
|
|
|
def test_different_stem_and_name(self):
|
|
# stem differs from normalized name → both appear
|
|
candidates = _slug_candidates("https://www.acmecorp.com", "Acme Corp Inc")
|
|
assert "acmecorp" in candidates
|
|
assert "acmecorp" in candidates or "acmecorp" in candidates
|
|
# normalized name strips "Inc" → "acmecorp"
|
|
|
|
def test_name_only_candidate_when_stem_equal(self):
|
|
# When stem and slug match, only one entry
|
|
candidates = _slug_candidates("https://ramp.com", "Ramp")
|
|
assert len(candidates) == 1
|
|
assert candidates[0] == "ramp"
|
|
|
|
def test_no_company_name_uses_stem_only(self):
|
|
candidates = _slug_candidates("https://www.anthropic.com", None)
|
|
assert candidates == ["anthropic"]
|
|
|
|
def test_max_three_candidates(self):
|
|
# Can't produce more than 3
|
|
candidates = _slug_candidates("https://www.x.com", "X Corp Inc")
|
|
assert len(candidates) <= 3
|
|
|
|
|
|
class TestLooseNameMatch:
|
|
def test_exact_match(self):
|
|
assert _loose_name_match("Anthropic", "Anthropic") is True
|
|
|
|
def test_one_substring_of_other(self):
|
|
assert _loose_name_match("Acme", "Acme Corp Inc") is True
|
|
|
|
def test_clear_mismatch(self):
|
|
assert _loose_name_match("Acme", "Globex") is False
|
|
|
|
def test_empty_input_returns_true(self):
|
|
assert _loose_name_match("", "Acme") is True
|
|
|
|
def test_empty_org_returns_true(self):
|
|
assert _loose_name_match("Acme", "") is True
|
|
|
|
def test_case_insensitive(self):
|
|
assert _loose_name_match("ANTHROPIC", "anthropic") is True
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# recover_via_slug_guess
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestRecoverViaSlugGuess:
|
|
"""All tests drive the real _fetch_* via a URL-dispatching fake request_with_retries."""
|
|
|
|
def _gh_resp(self, slug: str, company_name: str, count: int = 5) -> dict:
|
|
"""Canned Greenhouse response with jobs."""
|
|
return {
|
|
"jobs": [{"absolute_url": f"https://boards.greenhouse.io/{slug}/jobs/1",
|
|
"company_name": company_name}],
|
|
"meta": {"total": count},
|
|
}
|
|
|
|
def _gh_empty(self) -> dict:
|
|
return {"jobs": [], "meta": {"total": 0}}
|
|
|
|
def _lever_resp(self, slug: str, count: int = 3) -> list:
|
|
return [{"hostedUrl": f"https://jobs.lever.co/{slug}/abc"}] * count
|
|
|
|
def _ashby_resp(self, slug: str, count: int = 2) -> dict:
|
|
return {"jobs": [{"jobUrl": f"https://jobs.ashbyhq.com/{slug}/xyz"}] * count}
|
|
|
|
def _ashby_empty(self) -> dict:
|
|
return {"jobs": []}
|
|
|
|
def _lever_empty(self) -> list:
|
|
return []
|
|
|
|
# ----- Domain-stem hit (Anthropic-style) -----
|
|
|
|
def test_domain_stem_greenhouse_hit(self, monkeypatch):
|
|
"""Greenhouse slug derived from domain stem → returns ATSResult."""
|
|
def fake_req(client, method, url, **kw):
|
|
if "boards-api.greenhouse.io/v1/boards/anthropic" in url:
|
|
return FakeResponse(200, self._gh_resp("anthropic", "Anthropic", count=370))
|
|
# All other probes empty
|
|
if "lever.co" in url:
|
|
return FakeResponse(200, self._lever_empty())
|
|
if "ashby" in url:
|
|
return FakeResponse(200, self._ashby_empty())
|
|
return FakeResponse(404, {})
|
|
|
|
monkeypatch.setattr("jobsource.careers.ats.request_with_retries", fake_req)
|
|
result = recover_via_slug_guess(
|
|
"https://www.anthropic.com", "Anthropic", FakeClient()
|
|
)
|
|
assert result is not None
|
|
assert result.ats_name == "greenhouse"
|
|
assert result.careers_url == "https://boards.greenhouse.io/anthropic"
|
|
assert result.position_url is not None
|
|
assert result.job_count == 370
|
|
|
|
# ----- Name-candidate fallback when stem misses -----
|
|
|
|
def test_name_candidate_fallback(self, monkeypatch):
|
|
"""Stem misses; _slug(company_name) slug hits on Lever."""
|
|
def fake_req(client, method, url, **kw):
|
|
# Stem slug "acmecorp" → greenhouse empty, lever empty, ashby empty
|
|
if "acmecorp" in url and "boards-api" in url:
|
|
return FakeResponse(200, self._gh_empty())
|
|
if "acmecorp" in url and "lever" in url:
|
|
return FakeResponse(200, self._lever_empty())
|
|
if "acmecorp" in url and "ashby" in url:
|
|
return FakeResponse(200, self._ashby_empty())
|
|
# Name slug "acme" → lever hit
|
|
if "acme" in url and "lever" in url:
|
|
return FakeResponse(200, self._lever_resp("acme"))
|
|
if "acme" in url and "boards-api" in url:
|
|
return FakeResponse(200, self._gh_empty())
|
|
return FakeResponse(404, {})
|
|
|
|
monkeypatch.setattr("jobsource.careers.ats.request_with_retries", fake_req)
|
|
result = recover_via_slug_guess(
|
|
"https://www.acmecorp.com", "Acme", FakeClient()
|
|
)
|
|
assert result is not None
|
|
assert result.ats_name == "lever"
|
|
assert "acme" in result.careers_url
|
|
|
|
# ----- 0-jobs reject -----
|
|
|
|
def test_zero_jobs_rejected(self, monkeypatch):
|
|
"""All slugs resolve but job_count==0 everywhere → None."""
|
|
def fake_req(client, method, url, **kw):
|
|
if "boards-api" in url:
|
|
return FakeResponse(200, self._gh_empty())
|
|
if "lever" in url:
|
|
return FakeResponse(200, self._lever_empty())
|
|
if "ashby" in url:
|
|
return FakeResponse(200, self._ashby_empty())
|
|
return FakeResponse(404, {})
|
|
|
|
monkeypatch.setattr("jobsource.careers.ats.request_with_retries", fake_req)
|
|
result = recover_via_slug_guess(
|
|
"https://www.acme.com", "Acme", FakeClient()
|
|
)
|
|
assert result is None
|
|
|
|
# ----- Org-name mismatch reject (collision guard) -----
|
|
|
|
def test_org_name_mismatch_rejected(self, monkeypatch):
|
|
"""Greenhouse returns jobs but org_name is a different company → skip to lever/ashby → miss."""
|
|
def fake_req(client, method, url, **kw):
|
|
if "boards-api" in url:
|
|
# Returns jobs but for wrong company
|
|
return FakeResponse(200, self._gh_resp("acme", "Globex Corporation", count=10))
|
|
if "lever" in url:
|
|
return FakeResponse(200, self._lever_empty())
|
|
if "ashby" in url:
|
|
return FakeResponse(200, self._ashby_empty())
|
|
return FakeResponse(404, {})
|
|
|
|
monkeypatch.setattr("jobsource.careers.ats.request_with_retries", fake_req)
|
|
result = recover_via_slug_guess(
|
|
"https://www.acme.com", "Acme Corp", FakeClient()
|
|
)
|
|
assert result is None
|
|
|
|
# ----- Lever hit (no org_name) accepted on job_count alone -----
|
|
|
|
def test_lever_hit_without_org_name_accepted(self, monkeypatch):
|
|
"""Lever doesn't expose org_name → cross-check is skipped; job_count>0 wins."""
|
|
def fake_req(client, method, url, **kw):
|
|
if "boards-api" in url:
|
|
return FakeResponse(200, self._gh_empty())
|
|
if "lever" in url:
|
|
return FakeResponse(200, self._lever_resp("acme"))
|
|
if "ashby" in url:
|
|
return FakeResponse(200, self._ashby_empty())
|
|
return FakeResponse(404, {})
|
|
|
|
monkeypatch.setattr("jobsource.careers.ats.request_with_retries", fake_req)
|
|
result = recover_via_slug_guess(
|
|
"https://www.acme.com", "Completely Different Name", FakeClient()
|
|
)
|
|
# Lever returns jobs, no org_name → cross-check skipped → accepted
|
|
assert result is not None
|
|
assert result.ats_name == "lever"
|
|
|
|
# ----- Short-circuit: greenhouse hit stops remaining probes -----
|
|
|
|
def test_short_circuits_on_first_hit(self, monkeypatch):
|
|
"""Once Greenhouse hits, Lever and Ashby are NOT probed."""
|
|
probed: list[str] = []
|
|
|
|
def fake_req(client, method, url, **kw):
|
|
probed.append(url)
|
|
if "boards-api.greenhouse.io/v1/boards/acme" in url:
|
|
return FakeResponse(200, self._gh_resp("acme", "Acme", count=5))
|
|
if "lever" in url:
|
|
return FakeResponse(200, self._lever_resp("acme"))
|
|
if "ashby" in url:
|
|
return FakeResponse(200, self._ashby_resp("acme"))
|
|
return FakeResponse(404, {})
|
|
|
|
monkeypatch.setattr("jobsource.careers.ats.request_with_retries", fake_req)
|
|
result = recover_via_slug_guess(
|
|
"https://www.acme.com", "Acme", FakeClient()
|
|
)
|
|
assert result is not None
|
|
assert result.ats_name == "greenhouse"
|
|
# Lever and Ashby URLs must not have been probed
|
|
assert not any("lever" in u for u in probed)
|
|
assert not any("ashby" in u for u in probed)
|
|
|
|
# ----- company_name=None: stem-only, cross-check skipped -----
|
|
|
|
def test_no_company_name_uses_stem_and_skips_crosscheck(self, monkeypatch):
|
|
"""With company_name=None, use domain stem only; org_name cross-check skipped."""
|
|
def fake_req(client, method, url, **kw):
|
|
if "boards-api.greenhouse.io/v1/boards/acme" in url:
|
|
return FakeResponse(200, self._gh_resp("acme", "Some Other Company"))
|
|
if "lever" in url:
|
|
return FakeResponse(200, self._lever_empty())
|
|
if "ashby" in url:
|
|
return FakeResponse(200, self._ashby_empty())
|
|
return FakeResponse(404, {})
|
|
|
|
monkeypatch.setattr("jobsource.careers.ats.request_with_retries", fake_req)
|
|
# company_name=None → cross-check disabled even if org_name differs
|
|
result = recover_via_slug_guess(
|
|
"https://www.acme.com", None, FakeClient()
|
|
)
|
|
assert result is not None
|
|
assert result.ats_name == "greenhouse"
|
|
|
|
# ----- All-miss -----
|
|
|
|
def test_all_miss_returns_none(self, monkeypatch):
|
|
monkeypatch.setattr(
|
|
"jobsource.careers.ats.request_with_retries",
|
|
lambda *a, **kw: FakeResponse(404, {}),
|
|
)
|
|
result = recover_via_slug_guess(
|
|
"https://www.nobody.com", "Nobody Inc", FakeClient()
|
|
)
|
|
assert result is None
|
|
|
|
# ----- Network error on one probe falls through -----
|
|
|
|
def test_single_probe_error_falls_through(self, monkeypatch):
|
|
"""A probe that raises should not abort recovery; others are still tried."""
|
|
call_count = [0]
|
|
|
|
def fake_req(client, method, url, **kw):
|
|
call_count[0] += 1
|
|
if "boards-api" in url:
|
|
raise RuntimeError("greenhouse down")
|
|
if "lever" in url:
|
|
return FakeResponse(200, self._lever_resp("acme"))
|
|
if "ashby" in url:
|
|
return FakeResponse(200, self._ashby_empty())
|
|
return FakeResponse(404, {})
|
|
|
|
monkeypatch.setattr("jobsource.careers.ats.request_with_retries", fake_req)
|
|
result = recover_via_slug_guess(
|
|
"https://www.acme.com", None, FakeClient()
|
|
)
|
|
assert result is not None
|
|
assert result.ats_name == "lever"
|