This commit is contained in:
ldy
2026-06-17 08:38:15 -04:00
commit f13b8fc1ca
28 changed files with 894 additions and 0 deletions

View File

@@ -0,0 +1 @@
"""Job source provider package."""

View File

@@ -0,0 +1,8 @@
"""Apify ingestion provider (alternative, paid) — implements JobSource.
Scaffold stub -- not implemented yet.
"""
# TODO (Stage 1): implement ApifySource per CLAUDE.md "Stage 1 — Ingest".
# Drop-in alternative to JobSpySource; same JobSource interface.
# Uses apify-client; actor ID from config (APIFY_ACTOR env var).
# Map Apify actor output fields → RawJob; same dedup key (LinkedIn jobPostingId).

16
jobsource/sources/base.py Normal file
View File

@@ -0,0 +1,16 @@
"""JobSource interface: every ingestion provider must implement fetch_recent_jobs().
Scaffold stub -- not implemented yet.
"""
# TODO (Stage 1): define the JobSource ABC per CLAUDE.md "Stage 1 — Ingest (deterministic)".
# Interface:
# class JobSource(ABC):
# @abstractmethod
# def fetch_recent_jobs(
# self,
# search_terms: list[str],
# location: str,
# hours_old: int,
# results_wanted: int,
# ) -> list[RawJob]: ...
# Implementations: jobspy_source.JobSpySource, apify_source.ApifySource.

View File

@@ -0,0 +1,10 @@
"""JobSpy ingestion provider (default, free) — implements JobSource.
Scaffold stub -- not implemented yet.
"""
# TODO (Stage 1): implement JobSpySource per CLAUDE.md "Stage 1 — Ingest".
# Uses python-jobspy (python_jobspy). Key notes:
# - Search LinkedIn via JobSpy; parse LinkedIn numeric jobPostingId from the job URL.
# - Map JobSpy result fields → RawJob (company, website from company_url_direct if present).
# - Strip tracking query params from linkedin_url; keep only /jobs/view/{id}.
# - Log observed fill rate of company_url_direct (see CLAUDE.md Gotchas).