Source code for ncaa_eval.ingest.fuzzy
"""Centralized fuzzy team-name matching utility.
Provides a single public function for resolving team names against a
candidate mapping, used by both the ESPN connector and the sync engine.
"""
from __future__ import annotations
from rapidfuzz import fuzz
[docs]
def fuzzy_match_team(
name: str,
candidates: dict[str, int],
threshold: int = 80,
) -> int | None:
"""Match a team name to a candidate mapping using fuzzy lookup.
Applies ``rapidfuzz.fuzz.token_set_ratio`` (case-insensitive) against all
*candidates* and returns the best match whose score meets the *threshold*.
Callers are responsible for attempting exact matches before calling this
function.
Args:
name: Team name to match.
candidates: Mapping of known names to team IDs.
threshold: Minimum rapidfuzz token_set_ratio score (0-100).
Returns:
Matched team ID, or ``None`` if no match meets the threshold.
"""
lower_name = name.lower()
best_score = 0.0
best_id: int | None = None
for candidate_name, tid in candidates.items():
score = fuzz.token_set_ratio(lower_name, candidate_name.lower())
if score > best_score:
best_score = score
best_id = tid
if best_score >= threshold and best_id is not None:
return best_id
return None