scaffold
This commit is contained in:
88
jobsource/models.py
Normal file
88
jobsource/models.py
Normal file
@@ -0,0 +1,88 @@
|
||||
"""Pydantic data models shared across the pipeline.
|
||||
|
||||
RawJob is the normalized output of any job source (Stage 1). JobResult is the
|
||||
per-job record that flows through the cascade and becomes one CSV row. The CSV
|
||||
contract is exactly three columns: company_name, career_page_url,
|
||||
open_position_url.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class JobStatus(str, Enum):
|
||||
"""Lifecycle of a single job record. Complete == position_found."""
|
||||
|
||||
new = "new"
|
||||
website_resolved = "website_resolved"
|
||||
careers_found = "careers_found"
|
||||
position_found = "position_found"
|
||||
failed = "failed"
|
||||
needs_review = "needs_review"
|
||||
|
||||
|
||||
class RawJob(BaseModel):
|
||||
"""Normalized job posting from a source provider (Stage 1 output)."""
|
||||
|
||||
job_id: str = Field(..., description="LinkedIn numeric jobPostingId, parsed from the job URL.")
|
||||
company: str = Field(..., description="Company name as reported by the source.")
|
||||
linkedin_url: str = Field(..., description="Canonical LinkedIn job-view URL.")
|
||||
website: str | None = Field(default=None, description="Company's own site, if provided.")
|
||||
listed_at: datetime | None = Field(default=None, description="When the job was posted, if known.")
|
||||
title: str | None = Field(default=None, description="Job title, if provided.")
|
||||
location: str | None = Field(default=None, description="Job location, if provided.")
|
||||
|
||||
|
||||
class JobResult(BaseModel):
|
||||
"""Per-job record carried through the cascade; serializes to one CSV row."""
|
||||
|
||||
job_id: str
|
||||
company_name: str
|
||||
company_key: str | None = Field(
|
||||
default=None, description="Normalized domain, else lowercased name."
|
||||
)
|
||||
website: str | None = None
|
||||
career_page_url: str | None = None
|
||||
open_position_url: str | None = None
|
||||
status: JobStatus = JobStatus.new
|
||||
linkedin_url: str | None = None
|
||||
listed_at: datetime | None = None
|
||||
title: str | None = None
|
||||
location: str | None = None
|
||||
# Observability: which cascade tier/method resolved each stage.
|
||||
careers_method: str | None = None
|
||||
position_method: str | None = None
|
||||
|
||||
@property
|
||||
def is_complete(self) -> bool:
|
||||
"""A record is complete once an open position has been found."""
|
||||
return self.status == JobStatus.position_found
|
||||
|
||||
@classmethod
|
||||
def from_raw(cls, raw: RawJob) -> "JobResult":
|
||||
"""Seed a result from a raw job (status starts at `new`)."""
|
||||
return cls(
|
||||
job_id=raw.job_id,
|
||||
company_name=raw.company,
|
||||
website=raw.website,
|
||||
linkedin_url=raw.linkedin_url,
|
||||
listed_at=raw.listed_at,
|
||||
title=raw.title,
|
||||
location=raw.location,
|
||||
status=JobStatus.new,
|
||||
)
|
||||
|
||||
def to_csv_row(self) -> dict[str, str]:
|
||||
"""Return exactly the three contract columns (empty string for None)."""
|
||||
return {
|
||||
"company_name": self.company_name or "",
|
||||
"career_page_url": self.career_page_url or "",
|
||||
"open_position_url": self.open_position_url or "",
|
||||
}
|
||||
|
||||
|
||||
# The CSV output contract — exactly these columns, in this order.
|
||||
CSV_COLUMNS: tuple[str, str, str] = ("company_name", "career_page_url", "open_position_url")
|
||||
Reference in New Issue
Block a user