Source code for ncaa_eval.evaluation.providers

"""Probability provider protocols and implementations.

Provides the :class:`ProbabilityProvider` protocol and concrete
implementations for pairwise win probability computation:

* :class:`MatrixProvider` — wraps a pre-computed probability matrix.
* :class:`EloProvider` — wraps a stateful model's ``predict_matchup`` method.
* :class:`EnsembleProvider` — wraps a ``StackedEnsemble`` as a provider.
* :func:`build_probability_matrix` — builds an n×n pairwise matrix.
"""

from __future__ import annotations

from collections.abc import Sequence
from pathlib import Path
from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable

if TYPE_CHECKING:
    from ncaa_eval.model.ensemble import StackedEnsemble

import numpy as np
import numpy.typing as npt

from ncaa_eval.evaluation.bracket import MatchupContext


[docs] @runtime_checkable class ProbabilityProvider(Protocol): """Protocol for pairwise win probability computation. All implementations must satisfy the complementarity contract: ``P(A beats B) + P(B beats A) = 1`` for every ``(A, B)`` pair. """
[docs] def matchup_probability( self, team_a_id: int, team_b_id: int, context: MatchupContext, ) -> float: """Return P(team_a beats team_b). Args: team_a_id: First team's canonical ID. team_b_id: Second team's canonical ID. context: Matchup context (season, day_num, neutral). Returns: Probability in ``[0, 1]``. """ ...
[docs] def batch_matchup_probabilities( self, team_a_ids: Sequence[int], team_b_ids: Sequence[int], context: MatchupContext, ) -> npt.NDArray[np.float64]: """Return P(a_i beats b_i) for all pairs. Args: team_a_ids: Sequence of first-team IDs. team_b_ids: Sequence of second-team IDs (same length). context: Matchup context. Returns: 1-D float64 array of shape ``(len(team_a_ids),)``. """ ...
[docs] class MatrixProvider: """Wraps a pre-computed probability matrix as a :class:`ProbabilityProvider`. Args: prob_matrix: n×n pairwise probability matrix. team_ids: Sequence of team IDs matching matrix indices. """ def __init__( self, prob_matrix: npt.NDArray[np.float64], team_ids: Sequence[int], ) -> None: self._P = prob_matrix self._index = {tid: i for i, tid in enumerate(team_ids)}
[docs] def matchup_probability( self, team_a_id: int, team_b_id: int, context: MatchupContext, ) -> float: """Return P(team_a beats team_b) from the stored matrix. Indexes into the pre-built probability matrix using the team-to-index mapping, returning P(team_i beats team_j) directly from the stored array. """ i = self._index[team_a_id] j = self._index[team_b_id] return float(self._P[i, j])
[docs] def batch_matchup_probabilities( self, team_a_ids: Sequence[int], team_b_ids: Sequence[int], context: MatchupContext, ) -> npt.NDArray[np.float64]: """Return batch probabilities from the stored matrix. Extracts row/column indices from the team pairs, vectorizes lookups into the probability matrix, and returns a list of win probabilities. """ rows = np.array([self._index[a] for a in team_a_ids]) cols = np.array([self._index[b] for b in team_b_ids]) result: npt.NDArray[np.float64] = self._P[rows, cols].astype(np.float64) return result
[docs] class EloProvider: """Wraps a :class:`StatefulModel` as a :class:`ProbabilityProvider`. Uses the model's ``predict_matchup`` method for probability computation. Args: model: Any :class:`StatefulModel` instance with ``predict_matchup``. """ def __init__(self, model: Any) -> None: if not hasattr(model, "predict_matchup"): msg = "model must have a predict_matchup(team_a_id, team_b_id) method" raise TypeError(msg) self._model: Any = model
[docs] def matchup_probability( self, team_a_id: int, team_b_id: int, context: MatchupContext, ) -> float: """Return P(team_a beats team_b) via the model's ``predict_matchup``. Delegates to the model's predict_matchup method, which retrieves both teams' current ratings and applies the Elo logistic expected-score formula. """ result: float = self._model.predict_matchup(team_a_id, team_b_id) return result
[docs] def batch_matchup_probabilities( self, team_a_ids: Sequence[int], team_b_ids: Sequence[int], context: MatchupContext, ) -> npt.NDArray[np.float64]: """Return batch probabilities by looping ``predict_matchup``. Iterates team pairs, calling predict_matchup per matchup, and collects results into a list. Elo is O(1) per pair so looping is acceptable. """ return np.array( [self._model.predict_matchup(a, b) for a, b in zip(team_a_ids, team_b_ids)], dtype=np.float64, )
[docs] class EnsembleProvider: """Wraps a :class:`StackedEnsemble` as a :class:`ProbabilityProvider`. Calls ``ensemble.predict_bracket(data_dir, season)`` once on first use and caches the result as a :class:`MatrixProvider` for subsequent lookups. This allows a ``StackedEnsemble`` to be passed to :func:`build_probability_matrix` and the Monte Carlo bracket simulator identically to single-model mode. Args: ensemble: A trained ``StackedEnsemble`` instance. data_dir: Path to the local Parquet data store. season: Target season year. """ def __init__( self, ensemble: StackedEnsemble, data_dir: Path, season: int, ) -> None: self._ensemble = ensemble self._data_dir = data_dir self._season = season self._delegate: MatrixProvider | None = None def _get_delegate(self) -> MatrixProvider: if self._delegate is None: prob_df = self._ensemble.predict_bracket(self._data_dir, self._season) self._delegate = MatrixProvider( prob_df.to_numpy().astype(np.float64), list(prob_df.index), ) return self._delegate
[docs] def matchup_probability( self, team_a_id: int, team_b_id: int, context: MatchupContext, ) -> float: """Return P(team_a beats team_b) from the ensemble probability matrix. Triggers ensemble bracket prediction on first call; subsequent calls use the cached matrix. """ return self._get_delegate().matchup_probability(team_a_id, team_b_id, context)
[docs] def batch_matchup_probabilities( self, team_a_ids: Sequence[int], team_b_ids: Sequence[int], context: MatchupContext, ) -> npt.NDArray[np.float64]: """Return batch probabilities from the cached ensemble matrix. Triggers ensemble bracket prediction on first call; subsequent calls use the cached matrix. """ return self._get_delegate().batch_matchup_probabilities(team_a_ids, team_b_ids, context)
[docs] def build_probability_matrix( provider: ProbabilityProvider, team_ids: Sequence[int], context: MatchupContext, ) -> npt.NDArray[np.float64]: """Build n×n pairwise win probability matrix. Uses upper-triangle batch call, then fills ``P[j,i] = 1 - P[i,j]`` via the complementarity contract. Args: provider: Probability provider implementing the protocol. team_ids: Team IDs in bracket order. context: Matchup context. Returns: Float64 array of shape ``(n, n)``. Diagonal is zero. """ n = len(team_ids) rows, cols = np.triu_indices(n, k=1) a_ids = [team_ids[int(i)] for i in rows] b_ids = [team_ids[int(j)] for j in cols] probs = provider.batch_matchup_probabilities(a_ids, b_ids, context) P = np.zeros((n, n), dtype=np.float64) P[rows, cols] = probs P[cols, rows] = 1.0 - probs return P