From ecc47230137918afcfd61bec79fecb7c90555e01 Mon Sep 17 00:00:00 2001 From: Pawel Date: Fri, 22 May 2026 14:22:54 -0400 Subject: [PATCH] 20226-05-22 minor update --- .gitignore | 3 ++- README.md | 12 ++++++++++++ notebooks/01_unified_route_tokenization.ipynb | 2 +- pyproject.toml | 2 +- Dockerfile.webapp => webapp/Dockerfile | 2 +- webapp/static/index.html | 4 +++- 6 files changed, 20 insertions(+), 5 deletions(-) rename Dockerfile.webapp => webapp/Dockerfile (98%) diff --git a/.gitignore b/.gitignore index 1cadb04..3d49109 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ models/* images/* notebooks/*_executed.ipynb src/climbingboardgpt/__pycache__ -outputs/ \ No newline at end of file +outputs/ +webapp/__pycache__ \ No newline at end of file diff --git a/README.md b/README.md index baff419..dd1d981 100644 --- a/README.md +++ b/README.md @@ -160,6 +160,10 @@ ClimbingBoardGPT/ │ ├── joint_transformer_grade_predictor.pth │ └── joint_route_gpt_generator.pth ├── notebooks/ +│ ├── 01_unified_route_tokenization.ipynb +│ ├── 02_joint_transformer_grade_prediction.ipynb +│ ├── 03_joint_route_generator.ipynb +│ └── 04_generated_route_evaluation.ipynb ├── scripts/ │ ├── 01_tokenize_routes.py │ ├── 02_train_grade_predictor.py @@ -172,6 +176,14 @@ ClimbingBoardGPT/ │ ├── demo_predict_tb2.py │ └── demo_predict_kilter.py ├── src/climbingboardgpt/ +├── webapp/ +│ ├── app.py +│ ├── app.css +│ ├── app.js +│ ├── index.html +│ └── Dockerfile +├── docker-compose.webapp.yml +├── LICENSE ├── README.md ├── requirements.txt └── pyproject.toml diff --git a/notebooks/01_unified_route_tokenization.ipynb b/notebooks/01_unified_route_tokenization.ipynb index 3575001..f3f9a3a 100644 --- a/notebooks/01_unified_route_tokenization.ipynb +++ b/notebooks/01_unified_route_tokenization.ipynb @@ -9,7 +9,7 @@ "\n", "## What is tokenization and why does it matter?\n", "\n", - "In natural language processing, **tokenization** is the process of converting raw text into a sequence of discrete symbols (tokens) that a model can process. For example, the sentence \"I love climbing\" might be tokenized as `[\"I\", \" love\", \" climbing\"]` using a subword tokenizer like BPE.\n", + "In natural language processing, **tokenization** is the process of converting raw text into a sequence of discrete symbols (tokens) that a model can process. For example, the sentence \"I climb rocks\" might be tokenized as `[\"I\", \" climb\", \" rocks\"]` using a subword tokenizer like BPE.\n", "\n", "For climbing board routes, we face an analogous problem: how do we convert a climb — which is fundamentally a *set of holds at specific positions with specific roles* — into a sequence of tokens that a transformer can learn from?\n", "\n", diff --git a/pyproject.toml b/pyproject.toml index b0e2c07..48d02c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ name = "climbingboardgpt" version = "0.2.1" description = "Unified TB2/Kilter transformer route modeling, grade prediction, and GPT-style route generation." readme = "README.md" -requires-python = "" +requires-python = ">=3.12" dependencies = [ "numpy", "pandas", diff --git a/Dockerfile.webapp b/webapp/Dockerfile similarity index 98% rename from Dockerfile.webapp rename to webapp/Dockerfile index e1a3ed9..b417790 100644 --- a/Dockerfile.webapp +++ b/webapp/Dockerfile @@ -21,4 +21,4 @@ RUN pip install --no-cache-dir --upgrade pip \ EXPOSE 8055 -CMD ["uvicorn", "webapp.app:app", "--host", "0.0.0.0", "--port", "8055"] +CMD ["uvicorn", "webapp.app:app", "--host", "0.0.0.0", "--port", "8055"] \ No newline at end of file diff --git a/webapp/static/index.html b/webapp/static/index.html index 18eda8f..121b0a8 100644 --- a/webapp/static/index.html +++ b/webapp/static/index.html @@ -109,10 +109,12 @@

Links