predict script update
This commit is contained in:
10
README.md
10
README.md
@@ -13,10 +13,10 @@ I recently got into *board climbing*, and have been enjoying using the <a href="
|
|||||||
|
|
||||||
This project analyzes ~130,000 climbs from the Tension Boards in order to do the following.
|
This project analyzes ~130,000 climbs from the Tension Boards in order to do the following.
|
||||||
> 1. **Understand** hold usage patterns and difficulty distributions
|
> 1. **Understand** hold usage patterns and difficulty distributions
|
||||||
> 2. **Quantify** empircal hold difficulty scores (for analysis)
|
> 2. **Quantify** empircal hold difficulty scores
|
||||||
> 3. **Predict** climb grades from spatial and structural features of climbs
|
> 3. **Predict** climb grades from spatial and structural features of climbs
|
||||||
|
|
||||||
Climbing grades are inherently subjective. Different climbers use different beta, setters have different grading standards, and difficulty depends on factors not always captured in data. What makes it harder in the case of the board climbing is that the grade is displayed almost democratically -- it is determined by user input.
|
Climbing grades are inherently subjective. Different climbers use different beta, setters have different grading standards, and difficulty depends on factors not always captured in data. Moreover, on the boards, the displayed grade for any specific climb is based on user input.
|
||||||
|
|
||||||
Using a Tension Board (2) dataset, this project combines:
|
Using a Tension Board (2) dataset, this project combines:
|
||||||
|
|
||||||
@@ -153,7 +153,7 @@ Beyond structural analysis, we can also study how board-climbers behave over tim
|
|||||||

|

|
||||||
|
|
||||||
* Hold difficulty is estimated from climb data
|
* Hold difficulty is estimated from climb data
|
||||||
* We averaged (pre-role/per-angle) difficulty for each hold (with Bayesian smoothing)
|
* We averaged (per-role/per-angle) difficulty for each hold (with Bayesian smoothing)
|
||||||
* Took advantage of the mirrored layout to increase the amount of data per hold
|
* Took advantage of the mirrored layout to increase the amount of data per hold
|
||||||
|
|
||||||
### Key technique: Bayesian smoothing
|
### Key technique: Bayesian smoothing
|
||||||
@@ -273,7 +273,7 @@ Models tested:
|
|||||||
|
|
||||||
### Feature importance
|
### Feature importance
|
||||||
|
|
||||||

|

|
||||||

|

|
||||||
|
|
||||||
Key drivers:
|
Key drivers:
|
||||||
@@ -289,7 +289,7 @@ This confirms that **difficulty is strongly tied to spatial arrangement and move
|
|||||||
|
|
||||||
## 10. Model Performance
|
## 10. Model Performance
|
||||||
|
|
||||||

|

|
||||||

|

|
||||||
|
|
||||||
### Results (in terms of V-grade)
|
### Results (in terms of V-grade)
|
||||||
|
|||||||
|
Before Width: | Height: | Size: 84 KiB After Width: | Height: | Size: 84 KiB |
@@ -866,8 +866,7 @@
|
|||||||
"print(\"\"\"\\nInterpretation:\n",
|
"print(\"\"\"\\nInterpretation:\n",
|
||||||
"- Each row is a climb-angle observation.\n",
|
"- Each row is a climb-angle observation.\n",
|
||||||
"- The target is `display_difficulty`.\n",
|
"- The target is `display_difficulty`.\n",
|
||||||
"- The predictors combine geometry, hold statistics, and aggregate difficulty information.\n",
|
"- The predictors combine geometry and structure\n",
|
||||||
"- Hold-difficulty-based features use Bayesian-smoothed hold scores from Notebook 03.\n",
|
|
||||||
"- The next notebook tests how much predictive signal these engineered features actually contain.\n",
|
"- The next notebook tests how much predictive signal these engineered features actually contain.\n",
|
||||||
"\"\"\")\n"
|
"\"\"\")\n"
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -706,7 +706,7 @@
|
|||||||
"ax.invert_yaxis()\n",
|
"ax.invert_yaxis()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"plt.tight_layout()\n",
|
"plt.tight_layout()\n",
|
||||||
"plt.savefig('../images/05_predictive_modelling/random_forest_importance.png', dpi=150, bbox_inches='tight')\n",
|
"plt.savefig('../images/05_predictive_modelling/random_forest_feature_importance.png', dpi=150, bbox_inches='tight')\n",
|
||||||
"plt.show()"
|
"plt.show()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -23,7 +23,6 @@ ROOT = Path(__file__).resolve().parents[1]
|
|||||||
|
|
||||||
SCALER_PATH = ROOT / "models" / "feature_scaler.pkl"
|
SCALER_PATH = ROOT / "models" / "feature_scaler.pkl"
|
||||||
FEATURE_NAMES_PATH = ROOT / "models" / "feature_names.txt"
|
FEATURE_NAMES_PATH = ROOT / "models" / "feature_names.txt"
|
||||||
HOLD_DIFFICULTY_PATH = ROOT / "data" / "03_hold_difficulty" / "hold_difficulty_scores.csv"
|
|
||||||
PLACEMENTS_PATH = ROOT / "data" / "placements.csv" # adjust if needed
|
PLACEMENTS_PATH = ROOT / "data" / "placements.csv" # adjust if needed
|
||||||
|
|
||||||
|
|
||||||
@@ -164,7 +163,6 @@ scaler = joblib.load(SCALER_PATH)
|
|||||||
with open(FEATURE_NAMES_PATH, "r") as f:
|
with open(FEATURE_NAMES_PATH, "r") as f:
|
||||||
FEATURE_NAMES = [line.strip() for line in f if line.strip()]
|
FEATURE_NAMES = [line.strip() for line in f if line.strip()]
|
||||||
|
|
||||||
df_hold_difficulty = pd.read_csv(HOLD_DIFFICULTY_PATH, index_col="placement_id")
|
|
||||||
df_placements = pd.read_csv(PLACEMENTS_PATH)
|
df_placements = pd.read_csv(PLACEMENTS_PATH)
|
||||||
|
|
||||||
placement_coords = {
|
placement_coords = {
|
||||||
@@ -260,46 +258,14 @@ def parse_frames(frames: str):
|
|||||||
return [(int(p), int(r)) for p, r in matches]
|
return [(int(p), int(r)) for p, r in matches]
|
||||||
|
|
||||||
|
|
||||||
def lookup_hold_difficulty(placement_id, angle, role_type, is_hand, is_foot):
|
|
||||||
"""
|
|
||||||
Preference order:
|
|
||||||
1. role-specific per-angle
|
|
||||||
2. aggregate hand/foot per-angle
|
|
||||||
3. overall_difficulty fallback
|
|
||||||
"""
|
|
||||||
if placement_id not in df_hold_difficulty.index:
|
|
||||||
return np.nan
|
|
||||||
|
|
||||||
row = df_hold_difficulty.loc[placement_id]
|
|
||||||
|
|
||||||
diff_key = f"{role_type}_diff_{int(angle)}deg"
|
|
||||||
hand_diff_key = f"hand_diff_{int(angle)}deg"
|
|
||||||
foot_diff_key = f"foot_diff_{int(angle)}deg"
|
|
||||||
|
|
||||||
difficulty = np.nan
|
|
||||||
|
|
||||||
if diff_key in row.index:
|
|
||||||
difficulty = row[diff_key]
|
|
||||||
|
|
||||||
if pd.isna(difficulty):
|
|
||||||
if is_hand and hand_diff_key in row.index:
|
|
||||||
difficulty = row[hand_diff_key]
|
|
||||||
elif is_foot and foot_diff_key in row.index:
|
|
||||||
difficulty = row[foot_diff_key]
|
|
||||||
|
|
||||||
if pd.isna(difficulty) and "overall_difficulty" in row.index:
|
|
||||||
difficulty = row["overall_difficulty"]
|
|
||||||
|
|
||||||
return difficulty
|
|
||||||
|
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# Feature extraction
|
# Feature extraction
|
||||||
# ============================================================
|
# ============================================================
|
||||||
|
|
||||||
def extract_features_from_raw(angle, frames, is_nomatch=0, description=""):
|
def extract_features_from_raw(angle, frames, is_nomatch=0, description=""):
|
||||||
features = {}
|
"""
|
||||||
|
Extract the clean, leakage-free feature set used by the updated models.
|
||||||
|
"""
|
||||||
holds = parse_frames(frames)
|
holds = parse_frames(frames)
|
||||||
if not holds:
|
if not holds:
|
||||||
raise ValueError("Could not parse any holds from frames.")
|
raise ValueError("Could not parse any holds from frames.")
|
||||||
@@ -311,26 +277,16 @@ def extract_features_from_raw(angle, frames, is_nomatch=0, description=""):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
role_type = get_role_type(role_id)
|
role_type = get_role_type(role_id)
|
||||||
is_hand = role_id in HAND_ROLE_IDS
|
is_hand_role = role_id in HAND_ROLE_IDS
|
||||||
is_foot = role_id in FOOT_ROLE_IDS
|
is_foot_role = role_id in FOOT_ROLE_IDS
|
||||||
|
|
||||||
difficulty = lookup_hold_difficulty(
|
|
||||||
placement_id=placement_id,
|
|
||||||
angle=angle,
|
|
||||||
role_type=role_type,
|
|
||||||
is_hand=is_hand,
|
|
||||||
is_foot=is_foot,
|
|
||||||
)
|
|
||||||
|
|
||||||
hold_data.append({
|
hold_data.append({
|
||||||
"placement_id": placement_id,
|
"placement_id": placement_id,
|
||||||
"x": coords[0],
|
"x": coords[0],
|
||||||
"y": coords[1],
|
"y": coords[1],
|
||||||
"role_id": role_id,
|
|
||||||
"role_type": role_type,
|
"role_type": role_type,
|
||||||
"is_hand": is_hand,
|
"is_hand": is_hand_role,
|
||||||
"is_foot": is_foot,
|
"is_foot": is_foot_role,
|
||||||
"difficulty": difficulty,
|
|
||||||
})
|
})
|
||||||
|
|
||||||
if not hold_data:
|
if not hold_data:
|
||||||
@@ -344,389 +300,157 @@ def extract_features_from_raw(angle, frames, is_nomatch=0, description=""):
|
|||||||
finish_holds = df_holds[df_holds["role_type"] == "finish"]
|
finish_holds = df_holds[df_holds["role_type"] == "finish"]
|
||||||
middle_holds = df_holds[df_holds["role_type"] == "middle"]
|
middle_holds = df_holds[df_holds["role_type"] == "middle"]
|
||||||
|
|
||||||
xs = df_holds["x"].values
|
xs = df_holds["x"].to_numpy()
|
||||||
ys = df_holds["y"].values
|
ys = df_holds["y"].to_numpy()
|
||||||
|
|
||||||
features["angle"] = angle
|
|
||||||
|
|
||||||
features["total_holds"] = len(df_holds)
|
|
||||||
features["hand_holds"] = len(hand_holds)
|
|
||||||
features["foot_holds"] = len(foot_holds)
|
|
||||||
features["start_holds"] = len(start_holds)
|
|
||||||
features["finish_holds"] = len(finish_holds)
|
|
||||||
features["middle_holds"] = len(middle_holds)
|
|
||||||
|
|
||||||
desc = str(description) if description is not None else ""
|
desc = str(description) if description is not None else ""
|
||||||
|
if pd.isna(desc):
|
||||||
|
desc = ""
|
||||||
|
|
||||||
|
center_x = (x_min + x_max) / 2
|
||||||
|
features = {}
|
||||||
|
|
||||||
|
# Core / counts
|
||||||
|
features["angle"] = float(angle)
|
||||||
|
features["angle_squared"] = float(angle) ** 2
|
||||||
|
features["total_holds"] = int(len(df_holds))
|
||||||
|
features["hand_holds"] = int(len(hand_holds))
|
||||||
|
features["foot_holds"] = int(len(foot_holds))
|
||||||
|
features["start_holds"] = int(len(start_holds))
|
||||||
|
features["finish_holds"] = int(len(finish_holds))
|
||||||
|
features["middle_holds"] = int(len(middle_holds))
|
||||||
features["is_nomatch"] = int(
|
features["is_nomatch"] = int(
|
||||||
(is_nomatch == 1) or
|
(is_nomatch == 1) or
|
||||||
bool(re.search(r"\bno\s*match(ing)?\b", desc, flags=re.IGNORECASE))
|
bool(re.search(r"\bno\s*match(ing)?\b", desc, flags=re.IGNORECASE))
|
||||||
)
|
)
|
||||||
|
|
||||||
features["mean_x"] = np.mean(xs)
|
# Spatial
|
||||||
features["mean_y"] = np.mean(ys)
|
features["mean_y"] = float(np.mean(ys))
|
||||||
features["std_x"] = np.std(xs) if len(xs) > 1 else 0
|
features["std_x"] = float(np.std(xs)) if len(xs) > 1 else 0.0
|
||||||
features["std_y"] = np.std(ys) if len(ys) > 1 else 0
|
features["std_y"] = float(np.std(ys)) if len(ys) > 1 else 0.0
|
||||||
features["range_x"] = np.max(xs) - np.min(xs)
|
features["range_x"] = float(np.max(xs) - np.min(xs))
|
||||||
features["range_y"] = np.max(ys) - np.min(ys)
|
features["range_y"] = float(np.max(ys) - np.min(ys))
|
||||||
features["min_y"] = np.min(ys)
|
features["min_y"] = float(np.min(ys))
|
||||||
features["max_y"] = np.max(ys)
|
features["max_y"] = float(np.max(ys))
|
||||||
|
|
||||||
if len(start_holds) > 0:
|
|
||||||
features["start_height"] = start_holds["y"].mean()
|
|
||||||
features["start_height_min"] = start_holds["y"].min()
|
|
||||||
features["start_height_max"] = start_holds["y"].max()
|
|
||||||
else:
|
|
||||||
features["start_height"] = np.nan
|
|
||||||
features["start_height_min"] = np.nan
|
|
||||||
features["start_height_max"] = np.nan
|
|
||||||
|
|
||||||
if len(finish_holds) > 0:
|
|
||||||
features["finish_height"] = finish_holds["y"].mean()
|
|
||||||
features["finish_height_min"] = finish_holds["y"].min()
|
|
||||||
features["finish_height_max"] = finish_holds["y"].max()
|
|
||||||
else:
|
|
||||||
features["finish_height"] = np.nan
|
|
||||||
features["finish_height_min"] = np.nan
|
|
||||||
features["finish_height_max"] = np.nan
|
|
||||||
|
|
||||||
features["height_gained"] = features["max_y"] - features["min_y"]
|
features["height_gained"] = features["max_y"] - features["min_y"]
|
||||||
|
|
||||||
if pd.notna(features["finish_height"]) and pd.notna(features["start_height"]):
|
start_height = float(start_holds["y"].mean()) if len(start_holds) > 0 else np.nan
|
||||||
features["height_gained_start_finish"] = features["finish_height"] - features["start_height"]
|
finish_height = float(finish_holds["y"].mean()) if len(finish_holds) > 0 else np.nan
|
||||||
else:
|
features["height_gained_start_finish"] = (
|
||||||
features["height_gained_start_finish"] = np.nan
|
finish_height - start_height
|
||||||
|
if pd.notna(start_height) and pd.notna(finish_height)
|
||||||
|
else np.nan
|
||||||
|
)
|
||||||
|
|
||||||
bbox_width = features["range_x"]
|
# Density / symmetry
|
||||||
bbox_height = features["range_y"]
|
bbox_area = features["range_x"] * features["range_y"]
|
||||||
features["bbox_area"] = bbox_width * bbox_height
|
features["bbox_area"] = float(bbox_area)
|
||||||
features["bbox_aspect_ratio"] = bbox_width / bbox_height if bbox_height > 0 else 0
|
features["hold_density"] = float(features["total_holds"] / bbox_area) if bbox_area > 0 else 0.0
|
||||||
features["bbox_normalized_area"] = features["bbox_area"] / (board_width * board_height)
|
features["holds_per_vertical_foot"] = float(features["total_holds"] / max(features["range_y"], 1))
|
||||||
|
|
||||||
features["hold_density"] = features["total_holds"] / features["bbox_area"] if features["bbox_area"] > 0 else 0
|
left_holds = int((df_holds["x"] < center_x).sum())
|
||||||
features["holds_per_vertical_foot"] = features["total_holds"] / max(features["range_y"], 1)
|
features["left_ratio"] = left_holds / features["total_holds"] if features["total_holds"] > 0 else 0.5
|
||||||
|
|
||||||
center_x = (x_min + x_max) / 2
|
|
||||||
features["left_holds"] = (df_holds["x"] < center_x).sum()
|
|
||||||
features["right_holds"] = (df_holds["x"] >= center_x).sum()
|
|
||||||
features["left_ratio"] = features["left_holds"] / features["total_holds"] if features["total_holds"] > 0 else 0.5
|
|
||||||
features["symmetry_score"] = 1 - abs(features["left_ratio"] - 0.5) * 2
|
features["symmetry_score"] = 1 - abs(features["left_ratio"] - 0.5) * 2
|
||||||
|
|
||||||
if len(hand_holds) > 0:
|
|
||||||
hand_left = (hand_holds["x"] < center_x).sum()
|
|
||||||
features["hand_left_ratio"] = hand_left / len(hand_holds)
|
|
||||||
features["hand_symmetry"] = 1 - abs(features["hand_left_ratio"] - 0.5) * 2
|
|
||||||
else:
|
|
||||||
features["hand_left_ratio"] = np.nan
|
|
||||||
features["hand_symmetry"] = np.nan
|
|
||||||
|
|
||||||
y_median = np.median(ys)
|
y_median = np.median(ys)
|
||||||
features["upper_holds"] = (df_holds["y"] > y_median).sum()
|
upper_holds = int((df_holds["y"] > y_median).sum())
|
||||||
features["lower_holds"] = (df_holds["y"] <= y_median).sum()
|
features["upper_ratio"] = upper_holds / features["total_holds"]
|
||||||
features["upper_ratio"] = features["upper_holds"] / features["total_holds"]
|
|
||||||
|
|
||||||
|
# Hand reach
|
||||||
if len(hand_holds) >= 2:
|
if len(hand_holds) >= 2:
|
||||||
hand_xs = hand_holds["x"].values
|
hand_points = hand_holds[["x", "y"]].to_numpy()
|
||||||
hand_ys = hand_holds["y"].values
|
hand_distances = pdist(hand_points)
|
||||||
|
hand_xs = hand_holds["x"].to_numpy()
|
||||||
|
hand_ys = hand_holds["y"].to_numpy()
|
||||||
|
|
||||||
hand_distances = []
|
features["mean_hand_reach"] = float(np.mean(hand_distances))
|
||||||
for i in range(len(hand_holds)):
|
features["max_hand_reach"] = float(np.max(hand_distances))
|
||||||
for j in range(i + 1, len(hand_holds)):
|
features["std_hand_reach"] = float(np.std(hand_distances))
|
||||||
dx = hand_xs[i] - hand_xs[j]
|
features["hand_spread_x"] = float(hand_xs.max() - hand_xs.min())
|
||||||
dy = hand_ys[i] - hand_ys[j]
|
features["hand_spread_y"] = float(hand_ys.max() - hand_ys.min())
|
||||||
hand_distances.append(np.sqrt(dx**2 + dy**2))
|
|
||||||
|
|
||||||
features["max_hand_reach"] = max(hand_distances)
|
|
||||||
features["min_hand_reach"] = min(hand_distances)
|
|
||||||
features["mean_hand_reach"] = np.mean(hand_distances)
|
|
||||||
features["std_hand_reach"] = np.std(hand_distances)
|
|
||||||
features["hand_spread_x"] = hand_xs.max() - hand_xs.min()
|
|
||||||
features["hand_spread_y"] = hand_ys.max() - hand_ys.min()
|
|
||||||
else:
|
else:
|
||||||
features["max_hand_reach"] = 0
|
features["mean_hand_reach"] = 0.0
|
||||||
features["min_hand_reach"] = 0
|
features["max_hand_reach"] = 0.0
|
||||||
features["mean_hand_reach"] = 0
|
features["std_hand_reach"] = 0.0
|
||||||
features["std_hand_reach"] = 0
|
features["hand_spread_x"] = 0.0
|
||||||
features["hand_spread_x"] = 0
|
features["hand_spread_y"] = 0.0
|
||||||
features["hand_spread_y"] = 0
|
|
||||||
|
|
||||||
if len(foot_holds) >= 2:
|
|
||||||
foot_xs = foot_holds["x"].values
|
|
||||||
foot_ys = foot_holds["y"].values
|
|
||||||
|
|
||||||
foot_distances = []
|
|
||||||
for i in range(len(foot_holds)):
|
|
||||||
for j in range(i + 1, len(foot_holds)):
|
|
||||||
dx = foot_xs[i] - foot_xs[j]
|
|
||||||
dy = foot_ys[i] - foot_ys[j]
|
|
||||||
foot_distances.append(np.sqrt(dx**2 + dy**2))
|
|
||||||
|
|
||||||
features["max_foot_spread"] = max(foot_distances)
|
|
||||||
features["mean_foot_spread"] = np.mean(foot_distances)
|
|
||||||
features["foot_spread_x"] = foot_xs.max() - foot_xs.min()
|
|
||||||
features["foot_spread_y"] = foot_ys.max() - foot_ys.min()
|
|
||||||
else:
|
|
||||||
features["max_foot_spread"] = 0
|
|
||||||
features["mean_foot_spread"] = 0
|
|
||||||
features["foot_spread_x"] = 0
|
|
||||||
features["foot_spread_y"] = 0
|
|
||||||
|
|
||||||
|
# Hand-foot distances
|
||||||
if len(hand_holds) > 0 and len(foot_holds) > 0:
|
if len(hand_holds) > 0 and len(foot_holds) > 0:
|
||||||
h2f_distances = []
|
hand_points = hand_holds[["x", "y"]].to_numpy()
|
||||||
for _, h in hand_holds.iterrows():
|
foot_points = foot_holds[["x", "y"]].to_numpy()
|
||||||
for _, f in foot_holds.iterrows():
|
dists = []
|
||||||
dx = h["x"] - f["x"]
|
for hx, hy in hand_points:
|
||||||
dy = h["y"] - f["y"]
|
for fx, fy in foot_points:
|
||||||
h2f_distances.append(np.sqrt(dx**2 + dy**2))
|
dists.append(np.sqrt((hx - fx) ** 2 + (hy - fy) ** 2))
|
||||||
|
dists = np.asarray(dists, dtype=float)
|
||||||
|
|
||||||
features["max_hand_to_foot"] = max(h2f_distances)
|
features["min_hand_to_foot"] = float(np.min(dists))
|
||||||
features["min_hand_to_foot"] = min(h2f_distances)
|
features["mean_hand_to_foot"] = float(np.mean(dists))
|
||||||
features["mean_hand_to_foot"] = np.mean(h2f_distances)
|
features["std_hand_to_foot"] = float(np.std(dists))
|
||||||
features["std_hand_to_foot"] = np.std(h2f_distances)
|
|
||||||
else:
|
else:
|
||||||
features["max_hand_to_foot"] = 0
|
features["min_hand_to_foot"] = 0.0
|
||||||
features["min_hand_to_foot"] = 0
|
features["mean_hand_to_foot"] = 0.0
|
||||||
features["mean_hand_to_foot"] = 0
|
features["std_hand_to_foot"] = 0.0
|
||||||
features["std_hand_to_foot"] = 0
|
|
||||||
|
|
||||||
difficulties = df_holds["difficulty"].dropna().values
|
# Global geometry
|
||||||
|
points = np.column_stack([xs, ys])
|
||||||
if len(difficulties) > 0:
|
|
||||||
features["mean_hold_difficulty"] = np.mean(difficulties)
|
|
||||||
features["max_hold_difficulty"] = np.max(difficulties)
|
|
||||||
features["min_hold_difficulty"] = np.min(difficulties)
|
|
||||||
features["std_hold_difficulty"] = np.std(difficulties)
|
|
||||||
features["median_hold_difficulty"] = np.median(difficulties)
|
|
||||||
features["difficulty_range"] = features["max_hold_difficulty"] - features["min_hold_difficulty"]
|
|
||||||
else:
|
|
||||||
features["mean_hold_difficulty"] = np.nan
|
|
||||||
features["max_hold_difficulty"] = np.nan
|
|
||||||
features["min_hold_difficulty"] = np.nan
|
|
||||||
features["std_hold_difficulty"] = np.nan
|
|
||||||
features["median_hold_difficulty"] = np.nan
|
|
||||||
features["difficulty_range"] = np.nan
|
|
||||||
|
|
||||||
hand_diffs = hand_holds["difficulty"].dropna().values if len(hand_holds) > 0 else np.array([])
|
|
||||||
if len(hand_diffs) > 0:
|
|
||||||
features["mean_hand_difficulty"] = np.mean(hand_diffs)
|
|
||||||
features["max_hand_difficulty"] = np.max(hand_diffs)
|
|
||||||
features["std_hand_difficulty"] = np.std(hand_diffs)
|
|
||||||
else:
|
|
||||||
features["mean_hand_difficulty"] = np.nan
|
|
||||||
features["max_hand_difficulty"] = np.nan
|
|
||||||
features["std_hand_difficulty"] = np.nan
|
|
||||||
|
|
||||||
foot_diffs = foot_holds["difficulty"].dropna().values if len(foot_holds) > 0 else np.array([])
|
|
||||||
if len(foot_diffs) > 0:
|
|
||||||
features["mean_foot_difficulty"] = np.mean(foot_diffs)
|
|
||||||
features["max_foot_difficulty"] = np.max(foot_diffs)
|
|
||||||
features["std_foot_difficulty"] = np.std(foot_diffs)
|
|
||||||
else:
|
|
||||||
features["mean_foot_difficulty"] = np.nan
|
|
||||||
features["max_foot_difficulty"] = np.nan
|
|
||||||
features["std_foot_difficulty"] = np.nan
|
|
||||||
|
|
||||||
start_diffs = start_holds["difficulty"].dropna().values if len(start_holds) > 0 else np.array([])
|
|
||||||
finish_diffs = finish_holds["difficulty"].dropna().values if len(finish_holds) > 0 else np.array([])
|
|
||||||
features["start_difficulty"] = np.mean(start_diffs) if len(start_diffs) > 0 else np.nan
|
|
||||||
features["finish_difficulty"] = np.mean(finish_diffs) if len(finish_diffs) > 0 else np.nan
|
|
||||||
|
|
||||||
features["hand_foot_ratio"] = features["hand_holds"] / max(features["foot_holds"], 1)
|
|
||||||
features["movement_density"] = features["total_holds"] / max(features["height_gained"], 1)
|
|
||||||
features["hold_com_x"] = np.average(xs)
|
|
||||||
features["hold_com_y"] = np.average(ys)
|
|
||||||
|
|
||||||
if len(difficulties) > 0 and len(ys) >= len(difficulties):
|
|
||||||
weights = (ys[:len(difficulties)] - ys.min()) / max(ys.max() - ys.min(), 1) + 0.5
|
|
||||||
features["weighted_difficulty"] = np.average(difficulties, weights=weights)
|
|
||||||
else:
|
|
||||||
features["weighted_difficulty"] = features["mean_hold_difficulty"]
|
|
||||||
|
|
||||||
if len(df_holds) >= 3:
|
if len(df_holds) >= 3:
|
||||||
try:
|
try:
|
||||||
points = np.column_stack([xs, ys])
|
|
||||||
hull = ConvexHull(points)
|
hull = ConvexHull(points)
|
||||||
features["convex_hull_area"] = hull.volume
|
features["convex_hull_area"] = float(hull.volume)
|
||||||
features["convex_hull_perimeter"] = hull.area
|
features["hull_area_to_bbox_ratio"] = float(features["convex_hull_area"] / max(bbox_area, 1))
|
||||||
features["hull_area_to_bbox_ratio"] = features["convex_hull_area"] / max(features["bbox_area"], 1)
|
|
||||||
except Exception:
|
except Exception:
|
||||||
features["convex_hull_area"] = np.nan
|
features["convex_hull_area"] = np.nan
|
||||||
features["convex_hull_perimeter"] = np.nan
|
|
||||||
features["hull_area_to_bbox_ratio"] = np.nan
|
features["hull_area_to_bbox_ratio"] = np.nan
|
||||||
else:
|
else:
|
||||||
features["convex_hull_area"] = 0
|
features["convex_hull_area"] = 0.0
|
||||||
features["convex_hull_perimeter"] = 0
|
features["hull_area_to_bbox_ratio"] = 0.0
|
||||||
features["hull_area_to_bbox_ratio"] = 0
|
|
||||||
|
|
||||||
if len(df_holds) >= 2:
|
if len(df_holds) >= 2:
|
||||||
points = np.column_stack([xs, ys])
|
pairwise = pdist(points)
|
||||||
distances = pdist(points)
|
features["mean_pairwise_distance"] = float(np.mean(pairwise))
|
||||||
features["min_nn_distance"] = np.min(distances)
|
features["std_pairwise_distance"] = float(np.std(pairwise))
|
||||||
features["mean_nn_distance"] = np.mean(distances)
|
|
||||||
features["max_nn_distance"] = np.max(distances)
|
|
||||||
features["std_nn_distance"] = np.std(distances)
|
|
||||||
else:
|
else:
|
||||||
features["min_nn_distance"] = 0
|
features["mean_pairwise_distance"] = 0.0
|
||||||
features["mean_nn_distance"] = 0
|
features["std_pairwise_distance"] = 0.0
|
||||||
features["max_nn_distance"] = 0
|
|
||||||
features["std_nn_distance"] = 0
|
|
||||||
|
|
||||||
if len(df_holds) >= 3:
|
|
||||||
points = np.column_stack([xs, ys])
|
|
||||||
dist_matrix = squareform(pdist(points))
|
|
||||||
threshold = 12.0
|
|
||||||
neighbors_count = (dist_matrix < threshold).sum(axis=1) - 1
|
|
||||||
features["mean_neighbors_12in"] = np.mean(neighbors_count)
|
|
||||||
features["max_neighbors_12in"] = np.max(neighbors_count)
|
|
||||||
avg_neighbors = np.mean(neighbors_count)
|
|
||||||
max_possible = len(df_holds) - 1
|
|
||||||
features["clustering_ratio"] = avg_neighbors / max_possible if max_possible > 0 else 0
|
|
||||||
else:
|
|
||||||
features["mean_neighbors_12in"] = 0
|
|
||||||
features["max_neighbors_12in"] = 0
|
|
||||||
features["clustering_ratio"] = 0
|
|
||||||
|
|
||||||
if len(df_holds) >= 2:
|
if len(df_holds) >= 2:
|
||||||
sorted_indices = np.argsort(ys)
|
sorted_idx = np.argsort(ys)
|
||||||
sorted_points = np.column_stack([xs[sorted_indices], ys[sorted_indices]])
|
sorted_points = points[sorted_idx]
|
||||||
|
path_length = 0.0
|
||||||
path_length = 0
|
|
||||||
for i in range(len(sorted_points) - 1):
|
for i in range(len(sorted_points) - 1):
|
||||||
dx = sorted_points[i + 1, 0] - sorted_points[i, 0]
|
dx = sorted_points[i + 1, 0] - sorted_points[i, 0]
|
||||||
dy = sorted_points[i + 1, 1] - sorted_points[i, 1]
|
dy = sorted_points[i + 1, 1] - sorted_points[i, 1]
|
||||||
path_length += np.sqrt(dx ** 2 + dy ** 2)
|
path_length += np.sqrt(dx ** 2 + dy ** 2)
|
||||||
|
|
||||||
features["path_length_vertical"] = path_length
|
features["path_length_vertical"] = float(path_length)
|
||||||
features["path_efficiency"] = features["height_gained"] / max(path_length, 1)
|
features["path_efficiency"] = float(features["height_gained"] / max(path_length, 1))
|
||||||
else:
|
else:
|
||||||
features["path_length_vertical"] = 0
|
features["path_length_vertical"] = 0.0
|
||||||
features["path_efficiency"] = 0
|
features["path_efficiency"] = 0.0
|
||||||
|
|
||||||
if pd.notna(features["finish_difficulty"]) and pd.notna(features["start_difficulty"]):
|
# Normalized / relative
|
||||||
features["difficulty_gradient"] = features["finish_difficulty"] - features["start_difficulty"]
|
features["mean_y_normalized"] = float((features["mean_y"] - y_min) / board_height)
|
||||||
else:
|
features["start_height_normalized"] = float((start_height - y_min) / board_height) if pd.notna(start_height) else np.nan
|
||||||
features["difficulty_gradient"] = np.nan
|
features["finish_height_normalized"] = float((finish_height - y_min) / board_height) if pd.notna(finish_height) else np.nan
|
||||||
|
features["mean_y_relative_to_start"] = float(features["mean_y"] - start_height) if pd.notna(start_height) else np.nan
|
||||||
|
features["spread_x_normalized"] = float(features["range_x"] / board_width)
|
||||||
|
features["spread_y_normalized"] = float(features["range_y"] / board_height)
|
||||||
|
|
||||||
if len(difficulties) > 0:
|
y_q75 = np.percentile(ys, 75)
|
||||||
y_min_val, y_max_val = ys.min(), ys.max()
|
y_q25 = np.percentile(ys, 25)
|
||||||
y_range = y_max_val - y_min_val
|
features["y_q75"] = float(y_q75)
|
||||||
|
features["y_iqr"] = float(y_q75 - y_q25)
|
||||||
|
|
||||||
if y_range > 0:
|
# Engineered clean features
|
||||||
lower_mask = ys <= (y_min_val + y_range / 3)
|
features["complexity_score"] = float(
|
||||||
middle_mask = (ys > y_min_val + y_range / 3) & (ys <= y_min_val + 2 * y_range / 3)
|
features["mean_hand_reach"]
|
||||||
upper_mask = ys > (y_min_val + 2 * y_range / 3)
|
* np.log1p(features["total_holds"])
|
||||||
|
* (1 + features["hold_density"])
|
||||||
df_with_diff = df_holds.copy()
|
)
|
||||||
df_with_diff["lower"] = lower_mask
|
features["angle_x_holds"] = float(features["angle"] * features["total_holds"])
|
||||||
df_with_diff["middle"] = middle_mask
|
|
||||||
df_with_diff["upper"] = upper_mask
|
|
||||||
|
|
||||||
lower_diffs = df_with_diff[df_with_diff["lower"] & df_with_diff["difficulty"].notna()]["difficulty"]
|
|
||||||
middle_diffs = df_with_diff[df_with_diff["middle"] & df_with_diff["difficulty"].notna()]["difficulty"]
|
|
||||||
upper_diffs = df_with_diff[df_with_diff["upper"] & df_with_diff["difficulty"].notna()]["difficulty"]
|
|
||||||
|
|
||||||
features["lower_region_difficulty"] = lower_diffs.mean() if len(lower_diffs) > 0 else np.nan
|
|
||||||
features["middle_region_difficulty"] = middle_diffs.mean() if len(middle_diffs) > 0 else np.nan
|
|
||||||
features["upper_region_difficulty"] = upper_diffs.mean() if len(upper_diffs) > 0 else np.nan
|
|
||||||
|
|
||||||
if pd.notna(features["lower_region_difficulty"]) and pd.notna(features["upper_region_difficulty"]):
|
|
||||||
features["difficulty_progression"] = features["upper_region_difficulty"] - features["lower_region_difficulty"]
|
|
||||||
else:
|
|
||||||
features["difficulty_progression"] = np.nan
|
|
||||||
else:
|
|
||||||
features["lower_region_difficulty"] = features["mean_hold_difficulty"]
|
|
||||||
features["middle_region_difficulty"] = features["mean_hold_difficulty"]
|
|
||||||
features["upper_region_difficulty"] = features["mean_hold_difficulty"]
|
|
||||||
features["difficulty_progression"] = 0
|
|
||||||
else:
|
|
||||||
features["lower_region_difficulty"] = np.nan
|
|
||||||
features["middle_region_difficulty"] = np.nan
|
|
||||||
features["upper_region_difficulty"] = np.nan
|
|
||||||
features["difficulty_progression"] = np.nan
|
|
||||||
|
|
||||||
if len(hand_holds) >= 2 and len(hand_diffs) >= 2:
|
|
||||||
hand_sorted = hand_holds.sort_values("y")
|
|
||||||
hand_diff_sorted = hand_sorted["difficulty"].dropna().values
|
|
||||||
|
|
||||||
if len(hand_diff_sorted) >= 2:
|
|
||||||
difficulty_jumps = np.abs(np.diff(hand_diff_sorted))
|
|
||||||
features["max_difficulty_jump"] = np.max(difficulty_jumps) if len(difficulty_jumps) > 0 else 0
|
|
||||||
features["mean_difficulty_jump"] = np.mean(difficulty_jumps) if len(difficulty_jumps) > 0 else 0
|
|
||||||
else:
|
|
||||||
features["max_difficulty_jump"] = 0
|
|
||||||
features["mean_difficulty_jump"] = 0
|
|
||||||
else:
|
|
||||||
features["max_difficulty_jump"] = 0
|
|
||||||
features["mean_difficulty_jump"] = 0
|
|
||||||
|
|
||||||
if len(hand_holds) >= 2 and len(hand_diffs) >= 2:
|
|
||||||
hand_sorted = hand_holds.sort_values("y")
|
|
||||||
xs_sorted = hand_sorted["x"].values
|
|
||||||
ys_sorted = hand_sorted["y"].values
|
|
||||||
diffs_sorted = hand_sorted["difficulty"].fillna(np.mean(hand_diffs)).values
|
|
||||||
|
|
||||||
weighted_reach = []
|
|
||||||
for i in range(len(hand_sorted) - 1):
|
|
||||||
dx = xs_sorted[i + 1] - xs_sorted[i]
|
|
||||||
dy = ys_sorted[i + 1] - ys_sorted[i]
|
|
||||||
dist = np.sqrt(dx**2 + dy**2)
|
|
||||||
avg_diff = (diffs_sorted[i] + diffs_sorted[i + 1]) / 2
|
|
||||||
weighted_reach.append(dist * avg_diff)
|
|
||||||
|
|
||||||
features["difficulty_weighted_reach"] = np.mean(weighted_reach) if weighted_reach else 0
|
|
||||||
features["max_weighted_reach"] = np.max(weighted_reach) if weighted_reach else 0
|
|
||||||
else:
|
|
||||||
features["difficulty_weighted_reach"] = 0
|
|
||||||
features["max_weighted_reach"] = 0
|
|
||||||
|
|
||||||
features["mean_x_normalized"] = (features["mean_x"] - x_min) / board_width
|
|
||||||
features["mean_y_normalized"] = (features["mean_y"] - y_min) / board_height
|
|
||||||
features["std_x_normalized"] = features["std_x"] / board_width
|
|
||||||
features["std_y_normalized"] = features["std_y"] / board_height
|
|
||||||
|
|
||||||
if pd.notna(features["start_height"]):
|
|
||||||
features["start_height_normalized"] = (features["start_height"] - y_min) / board_height
|
|
||||||
else:
|
|
||||||
features["start_height_normalized"] = np.nan
|
|
||||||
|
|
||||||
if pd.notna(features["finish_height"]):
|
|
||||||
features["finish_height_normalized"] = (features["finish_height"] - y_min) / board_height
|
|
||||||
else:
|
|
||||||
features["finish_height_normalized"] = np.nan
|
|
||||||
|
|
||||||
typical_start_y = y_min + board_height * 0.15
|
|
||||||
typical_finish_y = y_min + board_height * 0.85
|
|
||||||
|
|
||||||
if pd.notna(features["start_height"]):
|
|
||||||
features["start_offset_from_typical"] = abs(features["start_height"] - typical_start_y)
|
|
||||||
else:
|
|
||||||
features["start_offset_from_typical"] = np.nan
|
|
||||||
|
|
||||||
if pd.notna(features["finish_height"]):
|
|
||||||
features["finish_offset_from_typical"] = abs(features["finish_height"] - typical_finish_y)
|
|
||||||
else:
|
|
||||||
features["finish_offset_from_typical"] = np.nan
|
|
||||||
|
|
||||||
if len(start_holds) > 0:
|
|
||||||
start_y = start_holds["y"].mean()
|
|
||||||
features["mean_y_relative_to_start"] = features["mean_y"] - start_y
|
|
||||||
features["max_y_relative_to_start"] = features["max_y"] - start_y
|
|
||||||
else:
|
|
||||||
features["mean_y_relative_to_start"] = np.nan
|
|
||||||
features["max_y_relative_to_start"] = np.nan
|
|
||||||
|
|
||||||
features["spread_x_normalized"] = features["range_x"] / board_width
|
|
||||||
features["spread_y_normalized"] = features["range_y"] / board_height
|
|
||||||
features["bbox_coverage_x"] = features["range_x"] / board_width
|
|
||||||
features["bbox_coverage_y"] = features["range_y"] / board_height
|
|
||||||
|
|
||||||
y_quartiles = np.percentile(ys, [25, 50, 75])
|
|
||||||
features["y_q25"] = y_quartiles[0]
|
|
||||||
features["y_q50"] = y_quartiles[1]
|
|
||||||
features["y_q75"] = y_quartiles[2]
|
|
||||||
features["y_iqr"] = y_quartiles[2] - y_quartiles[0]
|
|
||||||
|
|
||||||
features["holds_bottom_quartile"] = (ys < y_quartiles[0]).sum()
|
|
||||||
features["holds_top_quartile"] = (ys >= y_quartiles[2]).sum()
|
|
||||||
|
|
||||||
return features
|
return features
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user