scaffold
This commit is contained in:
1
jobsource/careers/__init__.py
Normal file
1
jobsource/careers/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Careers page discovery sub-package (Stage 2 cascade)."""
|
||||
17
jobsource/careers/ats.py
Normal file
17
jobsource/careers/ats.py
Normal file
@@ -0,0 +1,17 @@
|
||||
"""ATS detection and public JSON API fetching (Stage 2, tier 1).
|
||||
|
||||
Scaffold stub -- not implemented yet.
|
||||
"""
|
||||
# TODO (Stage 2, tier 1): implement per CLAUDE.md "Stage 2 — ATS detection".
|
||||
# Detect Greenhouse / Lever / Ashby / Workday from the company website, then call
|
||||
# their public JSON APIs (no login needed). On success, return both the careers page URL
|
||||
# AND the first job posting URL (so Stage 3 can skip its own cascade for ATS companies).
|
||||
#
|
||||
# Confirmed ATS JSON field shapes (verify live before trusting — see CLAUDE.md Gotchas):
|
||||
# Greenhouse: GET https://boards-api.greenhouse.io/v1/boards/{slug}/jobs
|
||||
# → {"jobs": [{"absolute_url": "...", ...}, ...]}
|
||||
# Lever: GET https://api.lever.co/v0/postings/{company}?mode=json
|
||||
# → [{"hostedUrl": "...", ...}, ...]
|
||||
# Ashby: POST https://api.ashbyhq.com/posting-api/job-board/{slug}
|
||||
# → {"jobs": [{"jobUrl": "...", ...}, ...]}
|
||||
# Workday: varies by tenant — needs per-tenant discovery logic
|
||||
13
jobsource/careers/cascade.py
Normal file
13
jobsource/careers/cascade.py
Normal file
@@ -0,0 +1,13 @@
|
||||
"""find_careers_page(): orchestrate the Stage 2 tier cascade.
|
||||
|
||||
Scaffold stub -- not implemented yet.
|
||||
"""
|
||||
# TODO (Stage 2): implement per CLAUDE.md "Stage 2 — Find careers page (cascade, return on first hit)".
|
||||
# Cascade order (return early on first success):
|
||||
# 1. ATS detection → ats.detect_and_fetch()
|
||||
# 2. URL patterns → heuristics.probe_url_patterns()
|
||||
# 3. Homepage scan → heuristics.scan_homepage_links()
|
||||
# 4. Sitemap → heuristics.parse_sitemap()
|
||||
# 5. Cheap-LLM → classify_llm.classify_careers_link()
|
||||
# 6. Browser agent → agent_fallback.run_fused_agent() (also handles Stage 3)
|
||||
# Returns (careers_url: str | None, method: str, ats_name: str | None).
|
||||
13
jobsource/careers/classify_llm.py
Normal file
13
jobsource/careers/classify_llm.py
Normal file
@@ -0,0 +1,13 @@
|
||||
"""Cheap-LLM link classification for careers page and job links (Stage 2, tier 5 / Stage 3, tier 4).
|
||||
|
||||
Scaffold stub -- not implemented yet.
|
||||
"""
|
||||
# TODO (Stage 2 tier 5 / Stage 3 tier 4): implement per CLAUDE.md "Cheap-LLM classification".
|
||||
# Uses Pydantic AI (model-agnostic) with the `classifier_model` from config.
|
||||
# Two typed tasks:
|
||||
# 1. classify_careers_link(anchors: list[Anchor]) -> CareerLinkResult
|
||||
# Given extracted <a> tags from a page, pick the careers/jobs page URL.
|
||||
# 2. classify_job_link(anchors: list[Anchor]) -> JobLinkResult
|
||||
# Given extracted <a> tags from a careers page, pick one open-position URL.
|
||||
# Both return a typed Pydantic result including the chosen URL and confidence.
|
||||
# Graceful degradation: if llm_api_key is placeholder or call fails, return None.
|
||||
11
jobsource/careers/heuristics.py
Normal file
11
jobsource/careers/heuristics.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""Deterministic careers-page heuristics: URL probing, homepage scan, sitemap (Stage 2, tiers 2–4).
|
||||
|
||||
Scaffold stub -- not implemented yet.
|
||||
"""
|
||||
# TODO (Stage 2, tiers 2–4): implement per CLAUDE.md "Stage 2 — URL patterns / homepage / sitemap".
|
||||
# Tier 2 — URL patterns: probe /careers, /career, /jobs, /join-us, /join,
|
||||
# careers.{domain}, jobs.{domain} via HTTP HEAD (or GET if HEAD fails).
|
||||
# Tier 3 — Homepage link scan: fetch homepage HTML, parse with BeautifulSoup + lxml,
|
||||
# rank <a> anchors by career/job keywords in href/text, return highest-ranked.
|
||||
# Tier 4 — Sitemap: fetch sitemap.xml (and sitemap index if present), scan for career/job URLs.
|
||||
# Each function returns (url: str | None) so cascade.py can return early on first hit.
|
||||
Reference in New Issue
Block a user