Source code for ncaa_eval.transform.calibration
"""Probability calibration for NCAA basketball model predictions.
Provides calibration wrappers for adjusting model-output probabilities
so they are well-calibrated (when the model says 70%, the event happens
~70% of the time).
* :class:`IsotonicCalibrator` — non-parametric monotonic calibration via
``sklearn.isotonic.IsotonicRegression``. Best with >=1000 calibration
samples.
* :class:`SigmoidCalibrator` — parametric Platt scaling via logistic
regression on log-odds. Better for small folds.
Design invariants:
- **In-fold only**: ``fit()`` on training fold predictions, ``transform()``
on test fold predictions. Never fit on the data being calibrated.
- ``goto_conversion`` was assessed and found **not applicable** — it removes
bookmaker overround from betting odds, which is a fundamentally different
problem from calibrating model-predicted probabilities. See Story 4.7
Dev Notes for the full assessment.
"""
from __future__ import annotations
import logging
from typing import Any, Protocol, runtime_checkable
import numpy as np
import numpy.typing as npt
logger = logging.getLogger(__name__)
[docs]
@runtime_checkable
class Calibrator(Protocol):
"""Protocol for probability calibration transforms.
Both :class:`IsotonicCalibrator` and :class:`SigmoidCalibrator`
structurally satisfy this protocol.
"""
[docs]
def fit(
self,
y_true: npt.NDArray[np.float64],
y_prob: npt.NDArray[np.float64],
) -> None:
"""Fit the calibrator on observed labels and predicted probabilities."""
...
[docs]
class IsotonicCalibrator:
"""Non-parametric monotonic probability calibration.
Wraps ``sklearn.isotonic.IsotonicRegression`` with ``y_min=0.0``,
``y_max=1.0``, and ``out_of_bounds="clip"`` for probability bounds.
Example::
cal = IsotonicCalibrator()
cal.fit(y_true_train, y_prob_train)
calibrated = cal.transform(y_prob_test)
"""
def __init__(self) -> None:
self._fitted = False
self._model: Any = None
[docs]
def fit(
self,
y_true: npt.NDArray[np.float64],
y_prob: npt.NDArray[np.float64],
) -> None:
"""Fit the isotonic regression on training fold predictions.
Args:
y_true: Binary labels (0 or 1) from the training fold.
y_prob: Model-predicted probabilities from the training fold.
"""
from sklearn.isotonic import IsotonicRegression # type: ignore[import-untyped]
ir = IsotonicRegression(y_min=0.0, y_max=1.0, out_of_bounds="clip")
ir.fit(y_prob, y_true)
self._model = ir
self._fitted = True
[docs]
class SigmoidCalibrator:
"""Parametric Platt scaling for probability calibration.
Uses logistic regression to fit a sigmoid function mapping raw
probabilities to calibrated probabilities. More robust than isotonic
regression for small samples (<1000).
Example::
cal = SigmoidCalibrator()
cal.fit(y_true_train, y_prob_train)
calibrated = cal.transform(y_prob_test)
"""
def __init__(self) -> None:
self._fitted = False
self._a: float = 0.0
self._b: float = 0.0
[docs]
def fit(
self,
y_true: npt.NDArray[np.float64],
y_prob: npt.NDArray[np.float64],
) -> None:
"""Fit Platt scaling parameters on training fold predictions.
Args:
y_true: Binary labels (0 or 1) from the training fold.
y_prob: Model-predicted probabilities from the training fold.
"""
from sklearn.linear_model import LogisticRegression # type: ignore[import-untyped]
eps = 1e-15
clipped = np.clip(y_prob, eps, 1.0 - eps)
log_odds = np.log(clipped / (1.0 - clipped)).reshape(-1, 1)
lr = LogisticRegression(solver="lbfgs", max_iter=1000)
lr.fit(log_odds, y_true)
self._a = float(lr.coef_[0, 0])
self._b = float(lr.intercept_[0])
self._fitted = True