198 lines
6.2 KiB
Python
198 lines
6.2 KiB
Python
from __future__ import annotations
|
|
|
|
import ast
|
|
import re
|
|
from typing import Iterable
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
from scipy.spatial.distance import pdist
|
|
|
|
HOLD_TOKEN_PATTERN = re.compile(r"^<([A-Z0-9_]+)_p(\d+)_(start|middle|finish|foot|unknown)>$")
|
|
|
|
|
|
def parse_token_list(value) -> list[str]:
|
|
if isinstance(value, list):
|
|
return value
|
|
if not isinstance(value, str):
|
|
return []
|
|
try:
|
|
parsed = ast.literal_eval(value)
|
|
if isinstance(parsed, list):
|
|
return parsed
|
|
except Exception:
|
|
pass
|
|
return value.split()
|
|
|
|
|
|
def tokens_to_hold_records(tokens: Iterable[str]) -> list[dict[str, object]]:
|
|
rows = []
|
|
for token in tokens:
|
|
match = HOLD_TOKEN_PATTERN.match(token)
|
|
if match is None:
|
|
continue
|
|
rows.append(
|
|
{
|
|
"token": token,
|
|
"board_token_prefix": match.group(1),
|
|
"placement_id": int(match.group(2)),
|
|
"role": match.group(3),
|
|
}
|
|
)
|
|
return rows
|
|
|
|
|
|
def validity_from_records(records: list[dict[str, object]], requested_board_prefix: str | None = None) -> dict[str, object]:
|
|
placements = [int(record["placement_id"]) for record in records]
|
|
roles = [str(record["role"]) for record in records]
|
|
prefixes = [str(record["board_token_prefix"]) for record in records]
|
|
one_board_only = len(set(prefixes)) <= 1
|
|
matches_requested_board = requested_board_prefix is None or all(prefix == requested_board_prefix for prefix in prefixes)
|
|
|
|
out = {
|
|
"n_holds_eval": len(records),
|
|
"n_unique_placements_eval": len(set(placements)),
|
|
"has_duplicate_placements_eval": len(records) != len(set(placements)),
|
|
"one_board_only_eval": one_board_only,
|
|
"matches_requested_board_eval": matches_requested_board,
|
|
"n_start_eval": roles.count("start"),
|
|
"n_middle_eval": roles.count("middle"),
|
|
"n_foot_eval": roles.count("foot"),
|
|
"n_finish_eval": roles.count("finish"),
|
|
"has_start_eval": "start" in roles,
|
|
"has_middle_eval": "middle" in roles,
|
|
"has_finish_eval": "finish" in roles,
|
|
}
|
|
out["basic_valid_eval"] = (
|
|
one_board_only
|
|
and out["n_holds_eval"] >= 3
|
|
and out["n_holds_eval"] == out["n_unique_placements_eval"]
|
|
and out["has_start_eval"]
|
|
and out["has_finish_eval"]
|
|
)
|
|
out["strict_valid_eval"] = (
|
|
out["basic_valid_eval"]
|
|
and out["has_middle_eval"]
|
|
and out["n_holds_eval"] >= 4
|
|
)
|
|
return out
|
|
|
|
|
|
def frames_to_holds(frames: str | None) -> list[tuple[int, int]]:
|
|
if not isinstance(frames, str):
|
|
return []
|
|
return [(int(p), int(r)) for p, r in re.findall(r"p(\d+)r(\d+)", frames)]
|
|
|
|
|
|
def holds_to_placement_set(holds: Iterable[tuple[int, int]]) -> frozenset[int]:
|
|
return frozenset(int(placement_id) for placement_id, _ in holds)
|
|
|
|
|
|
def jaccard(a: frozenset[int], b: frozenset[int]) -> float:
|
|
if not a and not b:
|
|
return 1.0
|
|
if not a or not b:
|
|
return 0.0
|
|
return len(a & b) / len(a | b)
|
|
|
|
|
|
def nearest_real_route_same_board(
|
|
generated_set: frozenset[int],
|
|
generated_board_key: str,
|
|
real_df: pd.DataFrame,
|
|
) -> dict[str, object]:
|
|
board_frame = real_df[real_df["board_key"] == generated_board_key]
|
|
best = {
|
|
"nearest_real_jaccard": -1.0,
|
|
"nearest_real_uuid": None,
|
|
"nearest_real_name": None,
|
|
"nearest_real_grouped_v": None,
|
|
"nearest_real_angle": None,
|
|
}
|
|
|
|
for _, row in board_frame.iterrows():
|
|
similarity = jaccard(generated_set, row["hold_set"])
|
|
if similarity > best["nearest_real_jaccard"]:
|
|
best.update(
|
|
{
|
|
"nearest_real_jaccard": similarity,
|
|
"nearest_real_uuid": row["uuid"],
|
|
"nearest_real_name": row["climb_name"],
|
|
"nearest_real_grouped_v": row["grouped_v"],
|
|
"nearest_real_angle": row["angle"],
|
|
}
|
|
)
|
|
|
|
best["novelty_distance"] = 1.0 - float(best["nearest_real_jaccard"])
|
|
return best
|
|
|
|
|
|
def build_placement_coords(df_token_meta: pd.DataFrame) -> dict[tuple[str, int], dict[str, float]]:
|
|
hold_meta = df_token_meta[df_token_meta["kind"] == "hold"].dropna(subset=["placement_id"]).copy()
|
|
coords = {}
|
|
for _, row in hold_meta.drop_duplicates(["board_key", "placement_id"]).iterrows():
|
|
key = (str(row["board_key"]), int(row["placement_id"]))
|
|
coords[key] = {
|
|
"x": float(row["x"]),
|
|
"y": float(row["y"]),
|
|
}
|
|
return coords
|
|
|
|
|
|
def simple_route_features(
|
|
board_key: str,
|
|
records: list[dict[str, object]],
|
|
placement_coords: dict[tuple[str, int], dict[str, float]],
|
|
) -> dict[str, float]:
|
|
rows = []
|
|
for record in records:
|
|
key = (str(board_key), int(record["placement_id"]))
|
|
coord = placement_coords.get(key)
|
|
if coord is None:
|
|
continue
|
|
x = float(coord["x"])
|
|
y = float(coord["y"])
|
|
if np.isnan(x) or np.isnan(y):
|
|
continue
|
|
role = str(record["role"])
|
|
rows.append(
|
|
{
|
|
"x": x,
|
|
"y": y,
|
|
"role": role,
|
|
"is_hand": role in {"start", "middle", "finish"},
|
|
"is_foot": role == "foot",
|
|
}
|
|
)
|
|
|
|
if not rows:
|
|
return {
|
|
"geom_n_holds": 0.0,
|
|
"geom_height": np.nan,
|
|
"geom_width": np.nan,
|
|
"geom_mean_y": np.nan,
|
|
"geom_mean_x_abs": np.nan,
|
|
"geom_mean_hand_reach": np.nan,
|
|
"geom_max_hand_reach": np.nan,
|
|
}
|
|
|
|
d = pd.DataFrame(rows)
|
|
out = {
|
|
"geom_n_holds": float(len(d)),
|
|
"geom_height": float(d["y"].max() - d["y"].min()),
|
|
"geom_width": float(d["x"].max() - d["x"].min()),
|
|
"geom_mean_y": float(d["y"].mean()),
|
|
"geom_mean_x_abs": float(d["x"].abs().mean()),
|
|
}
|
|
|
|
hands = d[d["is_hand"]].sort_values(["y", "x"])
|
|
if len(hands) >= 2:
|
|
distances = pdist(hands[["x", "y"]].values)
|
|
out["geom_mean_hand_reach"] = float(distances.mean())
|
|
out["geom_max_hand_reach"] = float(distances.max())
|
|
else:
|
|
out["geom_mean_hand_reach"] = np.nan
|
|
out["geom_max_hand_reach"] = np.nan
|
|
|
|
return out
|