Add web demo polish and smoke-test pipeline
This commit is contained in:
@@ -104,6 +104,12 @@ Examples:
|
||||
default=3,
|
||||
help="Random seed for reproducible splits (default: 3)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-routes-per-board",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Optional smoke-test row limit per board before tokenization.",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
@@ -121,6 +127,8 @@ def main() -> None:
|
||||
8. Save all artifacts to disk
|
||||
"""
|
||||
args = parse_args()
|
||||
if args.max_routes_per_board is not None and args.max_routes_per_board < 3:
|
||||
raise ValueError("--max-routes-per-board must be at least 3 so train/val/test splits can exist.")
|
||||
|
||||
# Set random seed for reproducibility
|
||||
# This ensures train/val/test splits are the same across runs
|
||||
@@ -165,7 +173,13 @@ def main() -> None:
|
||||
# placement 369 with role 6 (middle)
|
||||
# placement 603 with role 7 (finish)
|
||||
print("\nLoading data from databases...")
|
||||
df_climbs, df_placements = load_multi_board_data(configs, project_root=REPO_ROOT)
|
||||
if args.max_routes_per_board is not None:
|
||||
print(f" Smoke-test limit: loading at most {args.max_routes_per_board:,} climb-angle rows per board")
|
||||
df_climbs, df_placements = load_multi_board_data(
|
||||
configs,
|
||||
project_root=REPO_ROOT,
|
||||
max_climbs_per_board=args.max_routes_per_board,
|
||||
)
|
||||
placement_lookup = make_placement_lookup(df_placements)
|
||||
|
||||
print(f" Total climb-angle entries: {len(df_climbs):,}")
|
||||
|
||||
@@ -62,8 +62,11 @@ from climbingboardgpt.datasets import RouteGradeDataset
|
||||
from climbingboardgpt.grades import to_grouped_v
|
||||
from climbingboardgpt.metrics import metrics_by_board, print_metrics, regression_metrics
|
||||
from climbingboardgpt.models import JointRouteTransformerRegressor
|
||||
from climbingboardgpt.tokenization import encode as encode_tokens
|
||||
from climbingboardgpt.utils import set_seed, write_json
|
||||
|
||||
MSE_LOSS = nn.MSELoss()
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
"""Parse command-line arguments for grade predictor training.
|
||||
@@ -101,9 +104,31 @@ accuracy (within ±1 V-grade).
|
||||
parser.add_argument("--dropout", type=float, default=0.10, help="Dropout probability")
|
||||
parser.add_argument("--seed", type=int, default=3, help="Random seed")
|
||||
parser.add_argument("--device", type=str, default=None, help="Device (cpu or cuda)")
|
||||
parser.add_argument("--num-workers", type=int, default=0, help="DataLoader worker processes")
|
||||
parser.add_argument(
|
||||
"--smoke-test",
|
||||
action="store_true",
|
||||
help="Use a tiny CPU model and one epoch to exercise the training/evaluation code path.",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def apply_smoke_test_defaults(args: argparse.Namespace) -> None:
|
||||
"""Mutate args to a tiny deterministic configuration for code-path checks."""
|
||||
if not args.smoke_test:
|
||||
return
|
||||
args.epochs = 1
|
||||
args.patience = 1
|
||||
args.batch_size = min(args.batch_size, 16)
|
||||
args.d_model = 32
|
||||
args.nhead = 2
|
||||
args.num_layers = 1
|
||||
args.dim_feedforward = 64
|
||||
args.dropout = 0.0
|
||||
args.device = "cpu"
|
||||
args.num_workers = 0
|
||||
|
||||
|
||||
def build_coord_features(df_token_meta: pd.DataFrame, vocab_size: int) -> torch.Tensor:
|
||||
"""Build coordinate feature matrix for the transformer model.
|
||||
|
||||
@@ -148,9 +173,8 @@ def run_epoch(model, loader, device, optimizer=None):
|
||||
"""
|
||||
is_train = optimizer is not None
|
||||
model.train(is_train)
|
||||
criterion = nn.MSELoss()
|
||||
|
||||
losses, preds, targets, uuids, boards = [], [], [], [], []
|
||||
losses, preds, targets, row_ids, uuids, boards = [], [], [], [], [], []
|
||||
|
||||
for batch in loader:
|
||||
input_ids = batch["input_ids"].to(device)
|
||||
@@ -162,7 +186,7 @@ def run_epoch(model, loader, device, optimizer=None):
|
||||
|
||||
# Forward pass: model predicts difficulty from token sequence
|
||||
pred = model(input_ids, attention_mask)
|
||||
loss = criterion(pred, target)
|
||||
loss = MSE_LOSS(pred, target)
|
||||
|
||||
if is_train:
|
||||
# Backward pass: compute gradients and update weights
|
||||
@@ -174,11 +198,12 @@ def run_epoch(model, loader, device, optimizer=None):
|
||||
losses.append(loss.item() * input_ids.size(0))
|
||||
preds.extend(pred.detach().cpu().numpy().tolist())
|
||||
targets.extend(target.detach().cpu().numpy().tolist())
|
||||
row_ids.extend(batch["row_id"].detach().cpu().numpy().tolist())
|
||||
uuids.extend(batch["uuid"])
|
||||
boards.extend(batch["board_key"])
|
||||
|
||||
avg_loss = sum(losses) / max(1, len(loader.dataset))
|
||||
return avg_loss, np.asarray(preds), np.asarray(targets), uuids, boards
|
||||
return avg_loss, np.asarray(preds), np.asarray(targets), row_ids, uuids, boards
|
||||
|
||||
|
||||
def main() -> None:
|
||||
@@ -195,6 +220,7 @@ def main() -> None:
|
||||
8. Save model checkpoint and metrics
|
||||
"""
|
||||
args = parse_args()
|
||||
apply_smoke_test_defaults(args)
|
||||
set_seed(args.seed)
|
||||
args.out_dir.mkdir(parents=True, exist_ok=True)
|
||||
args.model_dir.mkdir(parents=True, exist_ok=True)
|
||||
@@ -215,7 +241,7 @@ def main() -> None:
|
||||
df_token_meta = pd.read_csv(meta_path)
|
||||
|
||||
pad_id = stoi["<PAD>"]
|
||||
unk_id = stoi["<UNK>"]
|
||||
device = torch.device(args.device or ("cuda" if torch.cuda.is_available() else "cpu"))
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
# Step 2: Prepare input sequences
|
||||
@@ -223,15 +249,13 @@ def main() -> None:
|
||||
# For grade prediction, we use the "no_grade" version of the sequence
|
||||
# and prepend <CLS> for sequence-level pooling.
|
||||
# The model must PREDICT the grade, not see it in the input!
|
||||
def encode(tokens):
|
||||
return [stoi.get(token, unk_id) for token in tokens]
|
||||
|
||||
df_routes["tokens_no_grade"] = df_routes["sequence_no_grade"].fillna("").str.split()
|
||||
df_routes["model_tokens"] = df_routes["tokens_no_grade"].apply(
|
||||
lambda tokens: ["<CLS>"] + tokens[1:] if tokens else ["<CLS>"]
|
||||
)
|
||||
df_routes["model_ids"] = df_routes["model_tokens"].apply(encode)
|
||||
df_routes["model_ids"] = df_routes["model_tokens"].apply(lambda tokens: encode_tokens(tokens, stoi))
|
||||
df_routes["seq_len"] = df_routes["model_ids"].apply(len)
|
||||
df_routes["row_id"] = np.arange(len(df_routes), dtype=np.int64)
|
||||
max_len = int(df_routes["seq_len"].max())
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
@@ -245,14 +269,17 @@ def main() -> None:
|
||||
val_ds = RouteGradeDataset(val_df, max_len=max_len, pad_id=pad_id)
|
||||
test_ds = RouteGradeDataset(test_df, max_len=max_len, pad_id=pad_id)
|
||||
|
||||
train_loader = DataLoader(train_ds, batch_size=args.batch_size, shuffle=True)
|
||||
val_loader = DataLoader(val_ds, batch_size=args.batch_size, shuffle=False)
|
||||
test_loader = DataLoader(test_ds, batch_size=args.batch_size, shuffle=False)
|
||||
loader_kwargs = {
|
||||
"num_workers": int(args.num_workers),
|
||||
"pin_memory": device.type == "cuda",
|
||||
}
|
||||
train_loader = DataLoader(train_ds, batch_size=args.batch_size, shuffle=True, **loader_kwargs)
|
||||
val_loader = DataLoader(val_ds, batch_size=args.batch_size, shuffle=False, **loader_kwargs)
|
||||
test_loader = DataLoader(test_ds, batch_size=args.batch_size, shuffle=False, **loader_kwargs)
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
# Step 4: Initialize model
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
device = torch.device(args.device or ("cuda" if torch.cuda.is_available() else "cpu"))
|
||||
coord_features = build_coord_features(df_token_meta, vocab_size=len(stoi))
|
||||
|
||||
model = JointRouteTransformerRegressor(
|
||||
@@ -286,8 +313,8 @@ def main() -> None:
|
||||
|
||||
print("\nStarting training...")
|
||||
for epoch in range(1, args.epochs + 1):
|
||||
train_loss, train_pred, train_true, _, _ = run_epoch(model, train_loader, device, optimizer)
|
||||
val_loss, val_pred, val_true, _, _ = run_epoch(model, val_loader, device, optimizer=None)
|
||||
train_loss, train_pred, train_true, _, _, _ = run_epoch(model, train_loader, device, optimizer)
|
||||
val_loss, val_pred, val_true, _, _, _ = run_epoch(model, val_loader, device, optimizer=None)
|
||||
|
||||
train_metrics = regression_metrics(train_true, train_pred)
|
||||
val_metrics = regression_metrics(val_true, val_pred)
|
||||
@@ -332,10 +359,11 @@ def main() -> None:
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
# Step 6: Test set evaluation
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
test_loss, test_pred, test_true, test_uuid, test_board = run_epoch(model, test_loader, device, optimizer=None)
|
||||
test_loss, test_pred, test_true, test_row_id, test_uuid, test_board = run_epoch(model, test_loader, device, optimizer=None)
|
||||
overall_metrics = regression_metrics(test_true, test_pred)
|
||||
|
||||
pred_df = pd.DataFrame({
|
||||
"row_id": test_row_id,
|
||||
"uuid": test_uuid,
|
||||
"board_key": test_board,
|
||||
"y_true": test_true,
|
||||
@@ -344,13 +372,15 @@ def main() -> None:
|
||||
"true_v": [to_grouped_v(value) for value in test_true],
|
||||
"pred_v": [to_grouped_v(value) for value in test_pred],
|
||||
})
|
||||
pred_df = pred_df.merge(
|
||||
df_routes[["uuid", "climb_name", "angle", "boulder_grade", "sequence_no_grade"]],
|
||||
on="uuid",
|
||||
how="left",
|
||||
)
|
||||
board_metrics_df = metrics_by_board(pred_df)
|
||||
|
||||
pred_df = pred_df.merge(
|
||||
df_routes[["row_id", "climb_name", "angle", "boulder_grade", "sequence_no_grade"]],
|
||||
on="row_id",
|
||||
how="left",
|
||||
validate="one_to_one",
|
||||
)
|
||||
|
||||
print_metrics("Overall joint test performance", overall_metrics)
|
||||
print("\nBoard-specific test performance:")
|
||||
print(board_metrics_df.to_string(index=False))
|
||||
@@ -390,4 +420,4 @@ def main() -> None:
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
main()
|
||||
|
||||
@@ -63,6 +63,7 @@ from climbingboardgpt.config import load_board_configs, parse_board_keys
|
||||
from climbingboardgpt.datasets import RouteGPTDataset
|
||||
from climbingboardgpt.generation import generate_one
|
||||
from climbingboardgpt.models import JointRouteGPT
|
||||
from climbingboardgpt.tokenization import encode as encode_tokens
|
||||
from climbingboardgpt.utils import set_seed
|
||||
|
||||
|
||||
@@ -106,9 +107,32 @@ specific board, or leave unset to generate for all boards.
|
||||
parser.add_argument("--generate-grades", type=str, default=None, help="Comma-separated V-grades")
|
||||
parser.add_argument("--seed", type=int, default=3, help="Random seed")
|
||||
parser.add_argument("--device", type=str, default=None, help="Device (cpu or cuda)")
|
||||
parser.add_argument("--num-workers", type=int, default=0, help="DataLoader worker processes")
|
||||
parser.add_argument(
|
||||
"--smoke-test",
|
||||
action="store_true",
|
||||
help="Use a tiny CPU model, one epoch, and a tiny generation grid to exercise the full code path.",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def apply_smoke_test_defaults(args: argparse.Namespace) -> None:
|
||||
"""Mutate args to a tiny deterministic configuration for code-path checks."""
|
||||
if not args.smoke_test:
|
||||
return
|
||||
args.epochs = 1
|
||||
args.patience = 1
|
||||
args.batch_size = min(args.batch_size, 16)
|
||||
args.n_embd = 32
|
||||
args.n_head = 2
|
||||
args.n_layer = 1
|
||||
args.dropout = 0.0
|
||||
args.max_new_tokens = min(args.max_new_tokens, 16)
|
||||
args.n_per_condition = 1
|
||||
args.device = "cpu"
|
||||
args.num_workers = 0
|
||||
|
||||
|
||||
def evaluate_loss(model, loader, device) -> float:
|
||||
"""Evaluate the model on a data loader, returning average loss.
|
||||
|
||||
@@ -168,6 +192,7 @@ def main() -> None:
|
||||
7. Save model checkpoint and generated routes
|
||||
"""
|
||||
args = parse_args()
|
||||
apply_smoke_test_defaults(args)
|
||||
set_seed(args.seed)
|
||||
args.out_dir.mkdir(parents=True, exist_ok=True)
|
||||
args.model_dir.mkdir(parents=True, exist_ok=True)
|
||||
@@ -185,7 +210,7 @@ def main() -> None:
|
||||
stoi = {str(k): int(v) for k, v in vocab["stoi"].items()}
|
||||
itos = {int(k): str(v) for k, v in vocab["itos"].items()}
|
||||
pad_id = stoi["<PAD>"]
|
||||
unk_id = stoi["<UNK>"]
|
||||
device = torch.device(args.device or ("cuda" if torch.cuda.is_available() else "cpu"))
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
# Step 2: Prepare sequences for causal language modeling
|
||||
@@ -198,11 +223,8 @@ def main() -> None:
|
||||
#
|
||||
# The input is shifted right by one position compared to the target.
|
||||
# This is the standard causal language modeling setup.
|
||||
def encode(tokens):
|
||||
return [stoi.get(token, unk_id) for token in tokens]
|
||||
|
||||
df_routes["gpt_tokens"] = df_routes["sequence_with_grade"].fillna("").str.split()
|
||||
df_routes["gpt_ids"] = df_routes["gpt_tokens"].apply(encode)
|
||||
df_routes["gpt_ids"] = df_routes["gpt_tokens"].apply(lambda tokens: encode_tokens(tokens, stoi))
|
||||
df_routes["seq_len"] = df_routes["gpt_ids"].apply(len)
|
||||
max_len = int(df_routes["seq_len"].max())
|
||||
if max_len < 2:
|
||||
@@ -220,14 +242,17 @@ def main() -> None:
|
||||
val_ds = RouteGPTDataset(val_df, max_len=max_len, pad_id=pad_id)
|
||||
test_ds = RouteGPTDataset(test_df, max_len=max_len, pad_id=pad_id)
|
||||
|
||||
train_loader = DataLoader(train_ds, batch_size=args.batch_size, shuffle=True)
|
||||
val_loader = DataLoader(val_ds, batch_size=args.batch_size, shuffle=False)
|
||||
test_loader = DataLoader(test_ds, batch_size=args.batch_size, shuffle=False)
|
||||
loader_kwargs = {
|
||||
"num_workers": int(args.num_workers),
|
||||
"pin_memory": device.type == "cuda",
|
||||
}
|
||||
train_loader = DataLoader(train_ds, batch_size=args.batch_size, shuffle=True, **loader_kwargs)
|
||||
val_loader = DataLoader(val_ds, batch_size=args.batch_size, shuffle=False, **loader_kwargs)
|
||||
test_loader = DataLoader(test_ds, batch_size=args.batch_size, shuffle=False, **loader_kwargs)
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
# Step 4: Initialize model
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
device = torch.device(args.device or ("cuda" if torch.cuda.is_available() else "cpu"))
|
||||
model = JointRouteGPT(
|
||||
vocab_size=len(stoi),
|
||||
block_size=block_size,
|
||||
@@ -385,4 +410,4 @@ def main() -> None:
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
main()
|
||||
|
||||
@@ -49,6 +49,7 @@ from climbingboardgpt.evaluation import (
|
||||
tokens_to_hold_records,
|
||||
validity_from_records,
|
||||
)
|
||||
from climbingboardgpt.checkpoints import load_checkpoint
|
||||
from climbingboardgpt.grades import to_grouped_v
|
||||
from climbingboardgpt.models import JointRouteTransformerRegressor
|
||||
|
||||
@@ -86,10 +87,7 @@ def load_grade_critic(model_path: Path, device: torch.device):
|
||||
"""
|
||||
if not model_path.exists():
|
||||
return None
|
||||
try:
|
||||
checkpoint = torch.load(model_path, map_location=device, weights_only=False)
|
||||
except TypeError:
|
||||
checkpoint = torch.load(model_path, map_location=device)
|
||||
checkpoint = load_checkpoint(model_path, map_location=device, trusted=True)
|
||||
|
||||
cfg = checkpoint["config"]
|
||||
stoi = {str(k): int(v) for k, v in checkpoint["stoi"].items()}
|
||||
@@ -333,4 +331,4 @@ def main() -> None:
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
main()
|
||||
|
||||
@@ -88,7 +88,7 @@ def parse_args() -> argparse.Namespace:
|
||||
help=(
|
||||
"Optional board image to draw under the scatter plot. "
|
||||
"If omitted, the script automatically uses images/tb2_board_12x12_composite.png "
|
||||
"for TB2 and images/kilter-original-16x12_compose.png for Kilter when present."
|
||||
"for TB2 and images/kilter-original-16x12_composite.png for Kilter when present."
|
||||
),
|
||||
)
|
||||
return parser.parse_args()
|
||||
@@ -98,7 +98,7 @@ def parse_args() -> argparse.Namespace:
|
||||
def default_background_for_board(board: str) -> Path | None:
|
||||
candidates = {
|
||||
"tb2": REPO_ROOT / "images" / "tb2_board_12x12_composite.png",
|
||||
"kilter": REPO_ROOT / "images" / "kilter-original-16x12_compose.png",
|
||||
"kilter": REPO_ROOT / "images" / "kilter-original-16x12_composite.png",
|
||||
}
|
||||
path = candidates.get(board)
|
||||
return path if path is not None and path.exists() else None
|
||||
|
||||
@@ -48,7 +48,7 @@ from climbingboardgpt.visualization import load_token_metadata, visualize_route_
|
||||
def default_background_for_board(board: str) -> Path | None:
|
||||
candidates = {
|
||||
"tb2": REPO_ROOT / "images" / "tb2_board_12x12_composite.png",
|
||||
"kilter": REPO_ROOT / "images" / "kilter-original-16x12_compose.png",
|
||||
"kilter": REPO_ROOT / "images" / "kilter-original-16x12_composite.png",
|
||||
}
|
||||
path = candidates.get(board)
|
||||
return path if path is not None and path.exists() else None
|
||||
|
||||
Reference in New Issue
Block a user