Source code for ncaa_eval.cli.export

"""Kaggle submission export orchestration.

Loads a trained model, builds a pairwise probability matrix for all
teams in a season, and writes a Kaggle-format CSV.
"""

from __future__ import annotations

import sys
from pathlib import Path

from rich.console import Console

from ncaa_eval.evaluation.bracket import MatchupContext
from ncaa_eval.evaluation.kaggle_export import KAGGLE_NEUTRAL_DAY_NUM, format_kaggle_submission
from ncaa_eval.evaluation.providers import EloProvider, build_probability_matrix
from ncaa_eval.ingest import ParquetRepository
from ncaa_eval.model.base import StatefulModel
from ncaa_eval.model.tracking import RunStore



[docs]
def build_kaggle_submission(*, run_id: str, season: int, data_dir: Path) -> str:
    """Load a model and return a Kaggle submission CSV string.

    Pure orchestration: loads the model, collects all team IDs for the
    season, builds the all-pairs probability matrix, and formats the CSV.
    No I/O side-effects — callers decide where to write the result.

    Args:
        run_id: Model run identifier.
        season: Tournament season year (e.g. 2026).
        data_dir: Path to the local data directory.

    Returns:
        CSV string with ``ID,Pred`` header and C(n,2) data rows.

    Raises:
        FileNotFoundError: If the run, model, or season games cannot be loaded.
        TypeError: If the model type is not supported for Kaggle export.
    """
    store = RunStore(base_path=data_dir)
    model = store.load_model(run_id)
    if model is None:
        msg = f"No model found for run {run_id!r}"
        raise FileNotFoundError(msg)

    if not isinstance(model, StatefulModel):
        msg = (
            "Kaggle export currently supports stateful (Elo) models only. "
            "Stateless model export requires a feature server and is not yet implemented."
        )
        raise TypeError(msg)

    repo = ParquetRepository(base_path=data_dir)
    games = repo.get_games(season)
    if not games:
        msg = f"No games found for season {season}"
        raise FileNotFoundError(msg)

    team_id_set: set[int] = set()
    for g in games:
        team_id_set.add(g.w_team_id)
        team_id_set.add(g.l_team_id)
    team_ids = sorted(team_id_set)

    provider = EloProvider(model)
    context = MatchupContext(season=season, day_num=KAGGLE_NEUTRAL_DAY_NUM, is_neutral=True)
    prob_matrix = build_probability_matrix(provider, team_ids, context)
    return format_kaggle_submission(season, team_ids, prob_matrix)




[docs]
def run_export(
    *,
    run_id: str,
    season: int,
    data_dir: Path,
    output: Path | None,
    console: Console | None = None,
) -> str:
    """Load a model and produce a Kaggle submission CSV.

    Thin CLI wrapper around ``build_kaggle_submission`` that handles
    progress output and writing to a file or stdout.

    Args:
        run_id: Model run identifier.
        season: Tournament season year (e.g. 2026).
        data_dir: Path to the local data directory.
        output: File path to write the CSV. ``None`` means stdout.
        console: Rich Console instance for status output.

    Returns:
        The CSV string.

    Raises:
        FileNotFoundError: If the run or model cannot be loaded.
        TypeError: If the model type is not supported for export.
    """
    con = console or Console()
    con.print(f"Building Kaggle submission for season {season}...")

    csv_str = build_kaggle_submission(run_id=run_id, season=season, data_dir=data_dir)

    if output is not None:
        output.write_text(csv_str)
        con.print(f"[green]Kaggle submission written to {output}[/green]")
    else:
        # Write raw CSV to stdout (no Rich formatting) so the output is
        # pipe-safe: `ncaa_eval export ... | gzip > submission.csv.gz`
        sys.stdout.write(csv_str)

    return csv_str