Source code for ncaa_eval.ingest.fuzzy

"""Centralized fuzzy team-name matching utility.

Provides a single public function for resolving team names against a
candidate mapping, used by both the ESPN connector and the sync engine.
"""

from __future__ import annotations

from rapidfuzz import fuzz


[docs] def fuzzy_match_team( name: str, candidates: dict[str, int], threshold: int = 80, ) -> int | None: """Match a team name to a candidate mapping using fuzzy lookup. Applies ``rapidfuzz.fuzz.token_set_ratio`` (case-insensitive) against all *candidates* and returns the best match whose score meets the *threshold*. Callers are responsible for attempting exact matches before calling this function. Args: name: Team name to match. candidates: Mapping of known names to team IDs. threshold: Minimum rapidfuzz token_set_ratio score (0-100). Returns: Matched team ID, or ``None`` if no match meets the threshold. """ lower_name = name.lower() best_score = 0.0 best_id: int | None = None for candidate_name, tid in candidates.items(): score = fuzz.token_set_ratio(lower_name, candidate_name.lower()) if score > best_score: best_score = score best_id = tid if best_score >= threshold and best_id is not None: return best_id return None