from __future__ import annotations import ast import re from typing import Iterable import numpy as np import pandas as pd from scipy.spatial.distance import pdist HOLD_TOKEN_PATTERN = re.compile(r"^<([A-Z0-9_]+)_p(\d+)_(start|middle|finish|foot|unknown)>$") def parse_token_list(value) -> list[str]: if isinstance(value, list): return value if not isinstance(value, str): return [] try: parsed = ast.literal_eval(value) if isinstance(parsed, list): return parsed except Exception: pass return value.split() def tokens_to_hold_records(tokens: Iterable[str]) -> list[dict[str, object]]: rows = [] for token in tokens: match = HOLD_TOKEN_PATTERN.match(token) if match is None: continue rows.append( { "token": token, "board_token_prefix": match.group(1), "placement_id": int(match.group(2)), "role": match.group(3), } ) return rows def validity_from_records(records: list[dict[str, object]], requested_board_prefix: str | None = None) -> dict[str, object]: placements = [int(record["placement_id"]) for record in records] roles = [str(record["role"]) for record in records] prefixes = [str(record["board_token_prefix"]) for record in records] one_board_only = len(set(prefixes)) <= 1 matches_requested_board = requested_board_prefix is None or all(prefix == requested_board_prefix for prefix in prefixes) out = { "n_holds_eval": len(records), "n_unique_placements_eval": len(set(placements)), "has_duplicate_placements_eval": len(records) != len(set(placements)), "one_board_only_eval": one_board_only, "matches_requested_board_eval": matches_requested_board, "n_start_eval": roles.count("start"), "n_middle_eval": roles.count("middle"), "n_foot_eval": roles.count("foot"), "n_finish_eval": roles.count("finish"), "has_start_eval": "start" in roles, "has_middle_eval": "middle" in roles, "has_finish_eval": "finish" in roles, } out["basic_valid_eval"] = ( one_board_only and out["n_holds_eval"] >= 3 and out["n_holds_eval"] == out["n_unique_placements_eval"] and out["has_start_eval"] and out["has_finish_eval"] ) out["strict_valid_eval"] = ( out["basic_valid_eval"] and out["has_middle_eval"] and out["n_holds_eval"] >= 4 ) return out def frames_to_holds(frames: str | None) -> list[tuple[int, int]]: if not isinstance(frames, str): return [] return [(int(p), int(r)) for p, r in re.findall(r"p(\d+)r(\d+)", frames)] def holds_to_placement_set(holds: Iterable[tuple[int, int]]) -> frozenset[int]: return frozenset(int(placement_id) for placement_id, _ in holds) def jaccard(a: frozenset[int], b: frozenset[int]) -> float: if not a and not b: return 1.0 if not a or not b: return 0.0 return len(a & b) / len(a | b) def nearest_real_route_same_board( generated_set: frozenset[int], generated_board_key: str, real_df: pd.DataFrame, ) -> dict[str, object]: board_frame = real_df[real_df["board_key"] == generated_board_key] best = { "nearest_real_jaccard": -1.0, "nearest_real_uuid": None, "nearest_real_name": None, "nearest_real_grouped_v": None, "nearest_real_angle": None, } for _, row in board_frame.iterrows(): similarity = jaccard(generated_set, row["hold_set"]) if similarity > best["nearest_real_jaccard"]: best.update( { "nearest_real_jaccard": similarity, "nearest_real_uuid": row["uuid"], "nearest_real_name": row["climb_name"], "nearest_real_grouped_v": row["grouped_v"], "nearest_real_angle": row["angle"], } ) best["novelty_distance"] = 1.0 - float(best["nearest_real_jaccard"]) return best def build_placement_coords(df_token_meta: pd.DataFrame) -> dict[tuple[str, int], dict[str, float]]: hold_meta = df_token_meta[df_token_meta["kind"] == "hold"].dropna(subset=["placement_id"]).copy() coords = {} for _, row in hold_meta.drop_duplicates(["board_key", "placement_id"]).iterrows(): key = (str(row["board_key"]), int(row["placement_id"])) coords[key] = { "x": float(row["x"]), "y": float(row["y"]), } return coords def simple_route_features( board_key: str, records: list[dict[str, object]], placement_coords: dict[tuple[str, int], dict[str, float]], ) -> dict[str, float]: rows = [] for record in records: key = (str(board_key), int(record["placement_id"])) coord = placement_coords.get(key) if coord is None: continue x = float(coord["x"]) y = float(coord["y"]) if np.isnan(x) or np.isnan(y): continue role = str(record["role"]) rows.append( { "x": x, "y": y, "role": role, "is_hand": role in {"start", "middle", "finish"}, "is_foot": role == "foot", } ) if not rows: return { "geom_n_holds": 0.0, "geom_height": np.nan, "geom_width": np.nan, "geom_mean_y": np.nan, "geom_mean_x_abs": np.nan, "geom_mean_hand_reach": np.nan, "geom_max_hand_reach": np.nan, } d = pd.DataFrame(rows) out = { "geom_n_holds": float(len(d)), "geom_height": float(d["y"].max() - d["y"].min()), "geom_width": float(d["x"].max() - d["x"].min()), "geom_mean_y": float(d["y"].mean()), "geom_mean_x_abs": float(d["x"].abs().mean()), } hands = d[d["is_hand"]].sort_values(["y", "x"]) if len(hands) >= 2: distances = pdist(hands[["x", "y"]].values) out["geom_mean_hand_reach"] = float(distances.mean()) out["geom_max_hand_reach"] = float(distances.max()) else: out["geom_mean_hand_reach"] = np.nan out["geom_max_hand_reach"] = np.nan return out