"""Inference helpers for ClimbingBoardGPT demos. This module is intentionally small and webapp-friendly: it loads trained checkpoints once, keeps them in memory, and exposes route generation helpers. """ from __future__ import annotations from dataclasses import dataclass from pathlib import Path import torch from .config import BoardConfig, load_board_config from .generation import generate_one from .grades import to_grouped_v from .models import JointRouteGPT, JointRouteTransformerRegressor from .tokenization import ( angle_token, board_token, canonicalize_holds, hold_token, parse_frames, ) @dataclass class LoadedGenerator: """A loaded GPT-style route generator plus vocabulary metadata.""" model: JointRouteGPT stoi: dict[str, int] itos: dict[int, str] device: torch.device checkpoint_path: Path @dataclass class LoadedGradePredictor: """A loaded transformer grade predictor plus vocabulary metadata.""" model: JointRouteTransformerRegressor stoi: dict[str, int] itos: dict[int, str] device: torch.device checkpoint_path: Path max_len: int pad_id: int unk_id: int def load_grade_predictor( checkpoint_path: str | Path, device: str | torch.device | None = None, torch_threads: int | None = None, ) -> LoadedGradePredictor: """Load a trained joint grade-prediction checkpoint. Args: checkpoint_path: Path to ``models/joint_transformer_grade_predictor.pth``. device: ``"cpu"``, ``"cuda"``, or None for auto-detection. torch_threads: Optional CPU thread cap for small VPS demos. Returns: LoadedGradePredictor containing the PyTorch model and tokenizer maps. """ if torch_threads is not None: torch.set_num_threads(int(torch_threads)) checkpoint_path = Path(checkpoint_path) if not checkpoint_path.exists(): raise FileNotFoundError(f"Could not find grade predictor checkpoint: {checkpoint_path}") resolved_device = torch.device(device or ("cuda" if torch.cuda.is_available() else "cpu")) try: checkpoint = torch.load(checkpoint_path, map_location=resolved_device, weights_only=False) except TypeError: checkpoint = torch.load(checkpoint_path, map_location=resolved_device) cfg = checkpoint["config"] stoi = {str(k): int(v) for k, v in checkpoint["stoi"].items()} itos = {int(k): str(v) for k, v in checkpoint["itos"].items()} coord_features = checkpoint["coord_features"] if not isinstance(coord_features, torch.Tensor): coord_features = torch.tensor(coord_features, dtype=torch.float32) model = JointRouteTransformerRegressor( vocab_size=cfg["vocab_size"], max_len=cfg["max_len"], coord_features=coord_features, d_model=cfg.get("d_model", 128), nhead=cfg.get("nhead", 4), num_layers=cfg.get("num_layers", 4), dim_feedforward=cfg.get("dim_feedforward", 256), dropout=cfg.get("dropout", 0.10), pad_id=cfg.get("pad_id", stoi[""]), ).to(resolved_device) model.load_state_dict(checkpoint["model_state_dict"]) model.eval() return LoadedGradePredictor( model=model, stoi=stoi, itos=itos, device=resolved_device, checkpoint_path=checkpoint_path, max_len=int(cfg["max_len"]), pad_id=int(cfg.get("pad_id", stoi[""])), unk_id=int(stoi[""]), ) def predict_route_grade( grade_predictor: LoadedGradePredictor, tokens: list[str], ) -> dict[str, object]: """Predict the grade of a route-token sequence. The grade token is removed before scoring, because the predictor should infer the grade from the board, angle, and hold-role tokens rather than reading the requested grade. """ model_tokens = [token for token in tokens if not str(token).startswith("": model_tokens = [""] + model_tokens[1:] else: model_tokens = [""] + model_tokens ids = [grade_predictor.stoi.get(token, grade_predictor.unk_id) for token in model_tokens] ids = ids[: grade_predictor.max_len] mask = [1] * len(ids) if len(ids) < grade_predictor.max_len: pad_n = grade_predictor.max_len - len(ids) ids += [grade_predictor.pad_id] * pad_n mask += [0] * pad_n with torch.no_grad(): input_ids = torch.tensor([ids], dtype=torch.long, device=grade_predictor.device) attention_mask = torch.tensor([mask], dtype=torch.bool, device=grade_predictor.device) pred_display_difficulty = float(grade_predictor.model(input_ids, attention_mask).cpu().item()) return { "predicted_display_difficulty": pred_display_difficulty, "predicted_grouped_v": int(to_grouped_v(pred_display_difficulty)), } def load_route_generator( checkpoint_path: str | Path, device: str | torch.device | None = None, torch_threads: int | None = None, ) -> LoadedGenerator: """Load a trained joint route generator checkpoint. Args: checkpoint_path: Path to ``models/joint_route_gpt_generator.pth``. device: ``"cpu"``, ``"cuda"``, or None for auto-detection. torch_threads: Optional CPU thread cap for small VPS demos. Returns: LoadedGenerator containing the PyTorch model and tokenizer maps. """ if torch_threads is not None: torch.set_num_threads(int(torch_threads)) checkpoint_path = Path(checkpoint_path) if not checkpoint_path.exists(): raise FileNotFoundError(f"Could not find generator checkpoint: {checkpoint_path}") resolved_device = torch.device(device or ("cuda" if torch.cuda.is_available() else "cpu")) try: checkpoint = torch.load(checkpoint_path, map_location=resolved_device, weights_only=False) except TypeError: checkpoint = torch.load(checkpoint_path, map_location=resolved_device) cfg = checkpoint["config"] stoi = {str(k): int(v) for k, v in checkpoint["stoi"].items()} itos = {int(k): str(v) for k, v in checkpoint["itos"].items()} model = JointRouteGPT( vocab_size=cfg["vocab_size"], block_size=cfg["block_size"], n_embd=cfg.get("n_embd", 128), n_head=cfg.get("n_head", 4), n_layer=cfg.get("n_layer", 4), dropout=cfg.get("dropout", 0.10), pad_id=cfg.get("pad_id", stoi[""]), ).to(resolved_device) model.load_state_dict(checkpoint["model_state_dict"]) model.eval() return LoadedGenerator( model=model, stoi=stoi, itos=itos, device=resolved_device, checkpoint_path=checkpoint_path, ) def generate_route( generator: LoadedGenerator, board_config: BoardConfig, angle: int, grade: int, temperature: float = 0.9, top_k: int | None = 50, max_new_tokens: int = 40, ) -> dict[str, object]: """Generate a single route for a board/angle/grade condition.""" return { "board_key": board_config.board_key, "board_display_name": board_config.display_name, **generate_one( model=generator.model, stoi=generator.stoi, itos=generator.itos, device=generator.device, board_prefix=board_config.token_prefix, angle=int(angle), grouped_v=int(grade), role_name_to_id=board_config.role_definitions, temperature=float(temperature), top_k=top_k, max_new_tokens=int(max_new_tokens), ), } def load_board_for_demo(board: str, config_dir: str | Path | None = None) -> BoardConfig: """Load a board config by key, with a clearer demo error message.""" try: return load_board_config(board, config_dir=config_dir) except FileNotFoundError as exc: raise FileNotFoundError( f"Unknown board '{board}'. Expected one of the JSON configs in configs/." ) from exc def build_placement_lookup_from_token_metadata(df_token_meta) -> dict[tuple[str, int], dict]: """Build the placement lookup expected by tokenization helpers. The training-time tokenization code canonicalizes holds using a lookup keyed by ``(board_key, placement_id)``. At inference/demo time, we usually have ``token_metadata.csv`` rather than the raw database, so this reconstructs the necessary coordinate lookup from token metadata. """ hold_meta = df_token_meta[df_token_meta["kind"] == "hold"].dropna(subset=["placement_id"]).copy() lookup: dict[tuple[str, int], dict] = {} for _, row in hold_meta.drop_duplicates(["board_key", "placement_id"]).iterrows(): key = (str(row["board_key"]), int(row["placement_id"])) lookup[key] = { "board_key": str(row["board_key"]), "board_token_prefix": str(row["board_token_prefix"]), "placement_id": int(row["placement_id"]), "x": float(row["x"]), "y": float(row["y"]), "x_norm": float(row.get("x_norm", 0.0)), "y_norm": float(row.get("y_norm", 0.0)), } return lookup def frames_to_grade_model_tokens( frames: str, angle: int, board_config: BoardConfig, df_token_meta, ) -> list[str]: """Convert a user-provided frames string into grade-predictor tokens. Output format matches training for the grade predictor: `` ... `` The route is canonicalized using the same role/y/x ordering used during tokenization. No grade token is included. """ placement_lookup = build_placement_lookup_from_token_metadata(df_token_meta) holds = parse_frames(frames) holds = canonicalize_holds(holds, board_config, placement_lookup) tokens = [ "", board_token(board_config), angle_token(angle), ] tokens.extend( hold_token(placement_id, role_id, board_config) for placement_id, role_id in holds ) tokens.append("") return tokens def predict_frames_grade( grade_predictor: LoadedGradePredictor, frames: str, angle: int, board_config: BoardConfig, df_token_meta, ) -> dict[str, object]: """Predict grade from board, angle, and a BoardLib frames string.""" tokens = frames_to_grade_model_tokens( frames=frames, angle=angle, board_config=board_config, df_token_meta=df_token_meta, ) # predict_route_grade accepts either -style generated tokens or # already-prepared -style model tokens. It will leave the leading # intact through the fallback branch. pred = predict_route_grade(grade_predictor, tokens) return { **pred, "tokens": tokens, "sequence": " ".join(tokens), "board_key": board_config.board_key, "board_display_name": board_config.display_name, "requested_angle": int(angle), "frames": frames, }