Files
JobSourceAgent/jobsource/config.py
2026-06-17 08:38:15 -04:00

65 lines
2.5 KiB
Python

"""Application configuration, loaded from the environment via pydantic-settings.
Every setting is env-driven. Model identifiers and API keys are read from the
environment with inert placeholder defaults — the operator supplies real values
in `.env`. Never hardcode real model IDs or secrets in this file.
"""
from __future__ import annotations
from functools import lru_cache
from pathlib import Path
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
extra="ignore",
case_sensitive=False,
)
# -- Job source / ingestion --------------------------------------------
job_source: str = Field(default="jobspy", description="Ingestion provider: 'jobspy' | 'apify'.")
search_terms: list[str] = Field(default_factory=lambda: ["software engineer"])
location: str = "United States"
hours_old: int = 72
batch_size: int = 20
results_wanted: int = 50
# -- Apify (only used when job_source == 'apify') ----------------------
apify_token: str = "PLACEHOLDER_APIFY_TOKEN"
apify_actor: str = "PLACEHOLDER_APIFY_ACTOR"
# -- Website resolution (optional search API) --------------------------
search_api_enabled: bool = False
search_api_key: str = "PLACEHOLDER_SEARCH_API_KEY"
# -- LLM / agent models (placeholders -- set real IDs in .env) ---------
# NEVER hardcode real model identifiers. These are inert placeholders.
llm_api_key: str = "PLACEHOLDER_LLM_API_KEY"
classifier_model: str = "PLACEHOLDER_CLASSIFIER_MODEL" # cheap model: link classification
agent_model: str = "PLACEHOLDER_AGENT_MODEL" # stronger model: browser agent
# -- HTTP client -------------------------------------------------------
http_timeout: float = 20.0
http_max_retries: int = 3
http_backoff_factor: float = 0.5
user_agent: str = "JobSourceAgent/0.1 (+https://example.com)"
# -- Storage / output --------------------------------------------------
db_path: Path = Path("output/jobsource.db")
output_csv: Path = Path("output/results.csv")
# -- Browser agent (fallback tier) -------------------------------------
enable_browser_agent: bool = True
browser_headless: bool = True
@lru_cache
def get_settings() -> Settings:
"""Return the cached Settings singleton (call get_settings.cache_clear() in tests)."""
return Settings()