Source code for ncaa_eval.model.logistic_regression

"""Minimal logistic regression model — test fixture for the Model contract.

This is NOT a production model.  It exists solely to demonstrate and
test the stateless ``Model`` interface in ~30 lines of logic.
"""

from __future__ import annotations

import json
from pathlib import Path
from typing import Literal, Self

import joblib  # type: ignore[import-untyped]
import numpy as np
import pandas as pd  # type: ignore[import-untyped]
from sklearn.linear_model import LogisticRegression  # type: ignore[import-untyped]

from ncaa_eval.model._feature_config_io import load_feature_config, save_feature_config
from ncaa_eval.model.base import Model, ModelConfig
from ncaa_eval.model.registry import register_model
from ncaa_eval.transform.feature_serving import (
    BatchRatingType,
    FeatureConfig,
    OrdinalCompositeMethod,
)


[docs] class LogisticRegressionConfig(ModelConfig): """Hyperparameters for the logistic regression test fixture.""" model_name: Literal["logistic_regression"] = "logistic_regression" C: float = 1.0 # noqa: N815 — sklearn convention max_iter: int = 200
[docs] @register_model("logistic_regression") class LogisticRegressionModel(Model): """Thin wrapper around sklearn ``LogisticRegression``.""" def __init__( self, config: LogisticRegressionConfig | None = None, *, batch_rating_types: tuple[BatchRatingType, ...] = ("srs",), graph_features_enabled: bool = False, ordinal_composite: OrdinalCompositeMethod | None = None, ) -> None: """Initialize logistic regression model with optional configuration. Args: config: Pydantic config; defaults to :class:`LogisticRegressionConfig` when ``None``. batch_rating_types: Which batch rating systems to include. graph_features_enabled: Whether to compute graph centrality features. ordinal_composite: Composite method for ordinal systems. """ self._config = config or LogisticRegressionConfig() self._clf = LogisticRegression(C=self._config.C, max_iter=self._config.max_iter) self.feature_config = FeatureConfig( batch_rating_types=batch_rating_types, graph_features_enabled=graph_features_enabled, ordinal_composite=ordinal_composite, ) self.feature_names_: list[str] = []
[docs] def fit(self, X: pd.DataFrame, y: pd.Series) -> None: """Train the model on feature matrix *X* and labels *y*.""" self.feature_names_ = list(X.columns) self._clf.fit(X, y)
[docs] def predict_proba(self, X: pd.DataFrame) -> pd.Series: """Return P(team_a wins) in [0, 1] for each row of *X*.""" probs = self._clf.predict_proba(X)[:, 1] return pd.Series(probs, index=X.index)
[docs] def save(self, path: Path) -> None: """Persist the trained classifier, config, and feature config to *path*.""" path.mkdir(parents=True, exist_ok=True) joblib.dump(self._clf, path / "model.joblib") (path / "config.json").write_text(self._config.model_dump_json()) (path / "feature_names.json").write_text(json.dumps(self.feature_names_)) save_feature_config(self.feature_config, path)
[docs] @classmethod def load(cls, path: Path) -> Self: """Load a previously-saved model from *path*.""" config = LogisticRegressionConfig.model_validate_json((path / "config.json").read_text()) instance = cls(config) instance._clf = joblib.load(path / "model.joblib") feature_names_path = path / "feature_names.json" if feature_names_path.exists(): instance.feature_names_ = json.loads(feature_names_path.read_text()) loaded_fc = load_feature_config(path) if loaded_fc is not None: instance.feature_config = loaded_fc return instance
[docs] def get_feature_importances(self) -> list[tuple[str, float]] | None: """Return absolute coefficient values as feature importance.""" if not self.feature_names_ or not hasattr(self._clf, "coef_"): return None coefs = np.abs(self._clf.coef_[0]) return list(zip(self.feature_names_, coefs.tolist()))
[docs] def get_config(self) -> LogisticRegressionConfig: """Return the Pydantic-validated configuration.""" return self._config