Source code for ncaa_eval.cli.export

"""Kaggle submission export orchestration.

Loads a trained model, builds a pairwise probability matrix for all
teams in a season, and writes a Kaggle-format CSV.
"""

from __future__ import annotations

import sys
from pathlib import Path

from rich.console import Console

from ncaa_eval.evaluation.bracket import MatchupContext
from ncaa_eval.evaluation.kaggle_export import KAGGLE_NEUTRAL_DAY_NUM, format_kaggle_submission
from ncaa_eval.evaluation.providers import EloProvider, build_probability_matrix
from ncaa_eval.ingest import ParquetRepository
from ncaa_eval.model.base import StatefulModel
from ncaa_eval.model.tracking import RunStore


[docs] def build_kaggle_submission(*, run_id: str, season: int, data_dir: Path) -> str: """Load a model and return a Kaggle submission CSV string. Pure orchestration: loads the model, collects all team IDs for the season, builds the all-pairs probability matrix, and formats the CSV. No I/O side-effects — callers decide where to write the result. Args: run_id: Model run identifier. season: Tournament season year (e.g. 2026). data_dir: Path to the local data directory. Returns: CSV string with ``ID,Pred`` header and C(n,2) data rows. Raises: FileNotFoundError: If the run, model, or season games cannot be loaded. TypeError: If the model type is not supported for Kaggle export. """ store = RunStore(base_path=data_dir) model = store.load_model(run_id) if model is None: msg = f"No model found for run {run_id!r}" raise FileNotFoundError(msg) if not isinstance(model, StatefulModel): msg = ( "Kaggle export currently supports stateful (Elo) models only. " "Stateless model export requires a feature server and is not yet implemented." ) raise TypeError(msg) repo = ParquetRepository(base_path=data_dir) games = repo.get_games(season) if not games: msg = f"No games found for season {season}" raise FileNotFoundError(msg) team_id_set: set[int] = set() for g in games: team_id_set.add(g.w_team_id) team_id_set.add(g.l_team_id) team_ids = sorted(team_id_set) provider = EloProvider(model) context = MatchupContext(season=season, day_num=KAGGLE_NEUTRAL_DAY_NUM, is_neutral=True) prob_matrix = build_probability_matrix(provider, team_ids, context) return format_kaggle_submission(season, team_ids, prob_matrix)
[docs] def run_export( *, run_id: str, season: int, data_dir: Path, output: Path | None, console: Console | None = None, ) -> str: """Load a model and produce a Kaggle submission CSV. Thin CLI wrapper around ``build_kaggle_submission`` that handles progress output and writing to a file or stdout. Args: run_id: Model run identifier. season: Tournament season year (e.g. 2026). data_dir: Path to the local data directory. output: File path to write the CSV. ``None`` means stdout. console: Rich Console instance for status output. Returns: The CSV string. Raises: FileNotFoundError: If the run or model cannot be loaded. TypeError: If the model type is not supported for export. """ con = console or Console() con.print(f"Building Kaggle submission for season {season}...") csv_str = build_kaggle_submission(run_id=run_id, season=season, data_dir=data_dir) if output is not None: output.write_text(csv_str) con.print(f"[green]Kaggle submission written to {output}[/green]") else: # Write raw CSV to stdout (no Rich formatting) so the output is # pipe-safe: `ncaa_eval export ... | gzip > submission.csv.gz` sys.stdout.write(csv_str) return csv_str