import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing

# Load the data
housing = fetch_california_housing()
X = housing.data          # shape (20640, 8)
y = housing.target       # shape (20640,)
feature_names = housing.feature_names

# Convert to DataFrame for convenience
df = pd.DataFrame(X, columns=feature_names)
df['MedHouseVal'] = y

print(f"Data shape: {df.shape}")
df.head()

Data shape: (20640, 9)

# Basic statistics
df.describe()

# Illustrate the three-way split
fig, ax = plt.subplots(figsize=(12, 3))

# Create rectangles for each split
ax.barh(0, 60, left=0, height=0.5, color='blue', alpha=0.7, label='Training (60%)')
ax.barh(0, 20, left=60, height=0.5, color='orange', alpha=0.7, label='Validation (20%)')
ax.barh(0, 20, left=80, height=0.5, color='red', alpha=0.7, label='Test (20%)')

# Add labels
ax.text(30, 0, 'Train Model\nParameters', ha='center', va='center', fontsize=10, fontweight='bold')
ax.text(70, 0, 'Tune\nHyperparams', ha='center', va='center', fontsize=10, fontweight='bold')
ax.text(90, 0, 'Final\nEvaluation', ha='center', va='center', fontsize=10, fontweight='bold')

ax.set_xlim(0, 100)
ax.set_ylim(-0.5, 0.5)
ax.set_xlabel('Percentage of Data')
ax.set_yticks([])
ax.set_title('Train/Validation/Test Split')
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=3)

plt.tight_layout()
plt.savefig('../images/train_validation_test_split.png')
plt.show()

RANDOM_STATE=3

import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# Generate synthetic data to illustrate the concept
np.random.seed(3)
n = 50
X = np.random.uniform(-5,5,n) # synthetic, wider range

# True relationship
a_true = 2.0
c_true = 5.0
noise = np.random.normal(0,3,n)

y = a_true * X**2 + c_true + noise

# Perform train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X,y, test_size=0.3, random_state=3
)

# Sort for plotting
X_curve = np.linspace(X.min(), X. max())
y_true = a_true * X_curve**2 + c_true



# Plot
fig, ax = plt.subplots(figsize=(10,6))
ax.scatter(X_train, y_train, color='blue', s=50, label='Training data', zorder=3)
ax.scatter(X_test, y_test, color='red', s=50, label='Test data', zorder=3)
ax.plot(X_curve, y_true, linewidth=2, label='True relationship', alpha=0.7)

ax.set_xlabel('X')
ax.set_ylabel('y')
ax.set_title('Train/Test Split')
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('../images/train_test_split_illustration.png')
plt.show()

print(f"Total samples: {n}")
print(f"Training samples: {len(X_train)} ({len(X_train)/n*100:.0f}%)")
print(f"Test samples: {len(X_test)} ({len(X_test)/n*100:.0f}%)")

Total samples: 50
Training samples: 35 (70%)
Test samples: 15 (30%)

from sklearn.model_selection import train_test_split

# Separate features and target
X = df[feature_names].values
y = df['MedHouseVal'].values

# Split: 80% train, 20% test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_STATE)

print(f"Training set size: {X_train.shape[0]}")
print(f"Test set size:     {X_test.shape[0]}")

Training set size: 16512
Test set size:     4128

# Visualize relationships between features and target
fig, axes = plt.subplots(2, 4, figsize=(16, 8))
axes = axes.flatten()

for i, (name, ax) in enumerate(zip(feature_names, axes)):
    ax.scatter(X_train[:, i], y_train, alpha=0.1, s=1)
    ax.set_xlabel(name)
    ax.set_ylabel('MedHouseVal')
    ax.set_title(f'{name} vs Price')

plt.tight_layout()
plt.savefig('../images/california_housing_scatter.png')
plt.show()

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Fit linear regression
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

# Predict on train and test
y_train_pred = lin_reg.predict(X_train)
y_test_pred = lin_reg.predict(X_test)

# Evaluate
train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_test_pred)
train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_test_pred)

print(f"Train MSE: {train_mse:.4f},  Train R²: {train_r2:.4f}")
print(f"Test  MSE: {test_mse:.4f},  Test  R²: {test_r2:.4f}")

Train MSE: 0.5229,  Train R²: 0.6079
Test  MSE: 0.5381,  Test  R²: 0.5931

# Generate quadratic data (similar to notebook 02) – using distinct names
np.random.seed(3)
n_synth = 50
x_synth = np.random.uniform(-5, 5, n_synth)
y_true_synth = 2.0 * x_synth**2 + 5.0
noise_synth = np.random.normal(0, 3, n_synth)
y_synth = y_true_synth + noise_synth

# Fit polynomials of degree 1 (underfit), 2 (good), 11 (overfit)
degrees = [1, 2, 11]
x_plot = np.linspace(-5, 5, 200)

fig, axes = plt.subplots(1, 3, figsize=(15, 4))
for idx, d in enumerate(degrees):
    coeff = np.polyfit(x_synth, y_synth, d)
    p = np.poly1d(coeff)
    axes[idx].scatter(x_synth, y_synth, alpha=0.7, label='Data')
    axes[idx].plot(x_plot, p(x_plot), 'r-', linewidth=2, label=f'Degree {d}')
    axes[idx].set_title(f'Degree {d} fit')
    axes[idx].set_xlabel('x')
    axes[idx].set_ylabel('y')
    axes[idx].legend()
    axes[idx].grid(True)
plt.tight_layout()
plt.savefig('../images/underfitting_vs_overfitting.png')
plt.show()

from sklearn.preprocessing import PolynomialFeatures

# Create polynomial features of degree 2 (includes interactions)
poly = PolynomialFeatures(degree=2, include_bias=False)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

print(f"Original training features: {X_train.shape[1]}")
print(f"Polynomial training features: {X_train_poly.shape[1]}")

# Condition number of the augmented polynomial design matrix (with intercept added later)
from numpy.linalg import cond

X_train_poly_with_intercept = np.hstack([np.ones((X_train_poly.shape[0], 1)), X_train_poly])
print(f"Condition number of polynomial design matrix: {cond(X_train_poly_with_intercept):.2e}")

Original training features: 8
Polynomial training features: 44
Condition number of polynomial design matrix: 1.55e+11
Condition number of polynomial design matrix: 1.55e+11

# Fit linear regression on polynomial features
poly_reg = LinearRegression()
poly_reg.fit(X_train_poly, y_train)

y_train_pred_poly = poly_reg.predict(X_train_poly)
y_test_pred_poly = poly_reg.predict(X_test_poly)

train_mse_poly = mean_squared_error(y_train, y_train_pred_poly)
test_mse_poly = mean_squared_error(y_test, y_test_pred_poly)

print(f"Polynomial (deg=2) Train MSE: {train_mse_poly:.4f}")
print(f"Polynomial (deg=2) Test  MSE: {test_mse_poly:.4f}")

Polynomial (deg=2) Train MSE: 0.4217
Polynomial (deg=2) Test  MSE: 0.4669

from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_score

# We'll use the polynomial features because ridge can help with overfitting
# Choose lambda via cross-validation on the training set
alphas = np.logspace(-3, 3, 20)
cv_scores = []

for alpha in alphas:
    ridge = Ridge(alpha=alpha)
    # 5-fold cross-validation, negative MSE (scoring expects higher = better)
    scores = cross_val_score(ridge, X_train_poly, y_train, cv=5, scoring='neg_mean_squared_error')
    cv_scores.append(-scores.mean())

best_alpha = alphas[np.argmin(cv_scores)]
print(f"Best alpha from CV: {best_alpha:.4f}")

# Plot CV error vs alpha
plt.figure(figsize=(8,4))
plt.semilogx(alphas, cv_scores)
plt.xlabel('alpha (λ)')
plt.ylabel('Cross-validated MSE')
plt.title('Ridge Regularisation on Polynomial Features')
plt.grid(True)
plt.savefig('../images/ridge_regularization_polynomial_features_unscaled.png')
plt.show()

/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=5.61091e-21): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=1.8355e-20): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=6.12863e-21): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=6.2106e-21): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=6.54461e-21): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=5.8756e-21): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=1.92268e-20): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=6.38803e-21): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=6.47667e-21): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=6.81721e-21): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=6.42347e-21): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=2.1031e-20): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=6.92486e-21): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=7.02729e-21): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=7.38133e-21): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=7.55789e-21): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=2.47656e-20): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=8.03616e-21): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=8.16704e-21): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=8.54886e-21): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=9.90852e-21): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=3.24995e-20): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=1.03379e-20): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=1.05273e-20): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=1.09661e-20): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=1.47609e-20): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=4.8532e-20): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=1.51111e-20): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=1.54201e-20): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=1.59756e-20): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=2.48243e-20): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=8.18458e-20): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=2.5036e-20): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=2.55854e-20): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=2.63843e-20): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=4.58073e-20): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=1.5141e-19): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=4.57846e-20): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=4.68043e-20): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=4.81413e-20): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=8.99842e-20): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=2.97942e-19): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=8.95471e-20): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=9.14672e-20): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=9.41068e-20): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=1.84197e-19): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=6.10261e-19): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=1.82904e-19): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=1.8654e-19): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=1.92511e-19): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=4.2525e-19): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=1.40749e-18): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=4.23705e-19): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=4.31565e-19): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=4.36802e-19): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=8.62957e-19): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=2.85107e-18): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=8.62987e-19): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=8.72841e-19): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=8.85718e-19): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=1.79172e-18): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=5.9137e-18): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=1.79522e-18): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=1.81032e-18): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=1.83658e-18): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=3.74634e-18): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=1.23675e-17): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=3.75365e-18): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=3.78706e-18): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=3.837e-18): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=7.84044e-18): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=2.6169e-17): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=7.83226e-18): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=7.97521e-18): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=8.10899e-18): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=1.62385e-17): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=5.40767e-17): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=1.62848e-17): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=1.64981e-17): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=1.67422e-17): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=3.37154e-17): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=3.37402e-17): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=3.42091e-17): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=3.46597e-17): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=7.00456e-17): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=7.00113e-17): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=7.10119e-17): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)
/usr/lib64/python3.14/site-packages/scipy/_lib/_util.py:1233: LinAlgWarning: Ill-conditioned matrix (rcond=7.18479e-17): result may not be accurate.
  return f(*arrays, *other_args, **kwargs)

Best alpha from CV: 1000.0000

from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler

# Add scaler
scaler = StandardScaler()
X_train_poly_scaled = scaler.fit_transform(X_train_poly)
X_test_poly_scaled = scaler.transform(X_test_poly)

# We'll use the polynomial features because ridge can help with overfitting
# Choose lambda via cross-validation on the training set
alphas = np.logspace(-3, 3, 20)
cv_scores = []

for alpha in alphas:
    ridge = Ridge(alpha=alpha)
    scores = cross_val_score(ridge, X_train_poly_scaled, y_train, cv=5, scoring='neg_mean_squared_error')
    cv_scores.append(-scores.mean())

best_alpha = alphas[np.argmin(cv_scores)]
print(f"Best alpha from CV: {best_alpha:.4f}")

# Plot CV error vs alpha
plt.figure(figsize=(8,4))
plt.semilogx(alphas, cv_scores)
plt.xlabel('alpha (λ)')
plt.ylabel('Cross-validated MSE')
plt.title('Ridge Regularisation on Polynomial Features')
plt.grid(True)
plt.savefig('../images/ridge_regularization_polynomial_features_scaled.png')
plt.show()

Best alpha from CV: 233.5721

# Fit ridge with best alpha on SCALED polynomial features
ridge_best = Ridge(alpha=best_alpha)
ridge_best.fit(X_train_poly_scaled, y_train)

y_test_pred_ridge = ridge_best.predict(X_test_poly_scaled)
test_mse_ridge = mean_squared_error(y_test, y_test_pred_ridge)
print(f"Ridge (poly deg=2) Test MSE: {test_mse_ridge:.4f}")
print(f"Ridge improved over plain polynomial (MSE {test_mse_poly:.4f} → {test_mse_ridge:.4f})")

Ridge (poly deg=2) Test MSE: 0.4791
Ridge improved over plain polynomial (MSE 0.4669 → 0.4791)

# Visualize how Ridge shrinks coefficients relative to singular values
from sklearn.preprocessing import StandardScaler

# Use scaled data for clean SVD interpretation
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Compute SVD of centered design matrix
U, s, Vt = np.linalg.svd(X_train_scaled, full_matrices=False)

# For different lambda values, compute the "shrinkage factor" for each singular direction
lambdas = [0, 0.1, 1, 10, 100]

plt.figure(figsize=(10, 5))

for lam in lambdas:
    if lam == 0:
        # OLS: no shrinkage
        shrinkage = np.ones_like(s)
        label = 'OLS (λ=0)'
    else:
        # Ridge shrinkage factor: sigma / (sigma^2 + lambda)
        shrinkage = s / (s**2 + lam)
        # Normalize so we can compare shapes
        shrinkage = shrinkage / shrinkage[0]  # normalize to first component
        label = f'Ridge (λ={lam})'
    
    plt.plot(range(1, len(s)+1), shrinkage, 'o-', label=label, markersize=8)

plt.xlabel('Singular value index (decreasing)')
plt.ylabel('Shrinkage factor (normalized)')
plt.title('Ridge Shrinkage: How λ Dampens Small Singular Directions')
plt.legend()
plt.grid(True, alpha=0.3)
plt.xticks(range(1, len(s)+1))
plt.tight_layout()
plt.savefig('../images/ridge_svd_shrinkage.png')
plt.show()

# Show condition number improvement
print("Singular values:", s.round(2))
print(f"\nCondition number (OLS): {s[0]/s[-1]:.2f}")
for lam in [0.1, 1, 10]:
    effective_cond = (s[0]**2 + lam) / (s[-1]**2 + lam)
    print(f"Effective condition number (λ={lam}): {effective_cond:.2f}")

Singular values: [182.41 176.4  144.85 130.91 128.68 104.03  37.56  28.16]

Condition number (OLS): 6.48
Effective condition number (λ=0.1): 41.96
Effective condition number (λ=1): 41.91
Effective condition number (λ=10): 41.45

from sklearn.linear_model import Lasso

# Lasso also requires tuning of alpha
lasso = Lasso(alpha=0.01, max_iter=10000)  # start with a small alpha
lasso.fit(X_train_poly, y_train)

# Count non-zero coefficients
n_nonzero = np.sum(np.abs(lasso.coef_) > 1e-10)
print(f"Number of non-zero coefficients: {n_nonzero} out of {len(lasso.coef_)}")

y_test_pred_lasso = lasso.predict(X_test_poly)
test_mse_lasso = mean_squared_error(y_test, y_test_pred_lasso)
print(f"Lasso (poly deg=2) Test MSE: {test_mse_lasso:.4f}")

# Cross-validation for Lasso alpha
from sklearn.linear_model import LassoCV

lasso_cv = LassoCV(alphas=np.logspace(-3, 1, 30), cv=5, max_iter=10000, random_state=RANDOM_STATE)
lasso_cv.fit(X_train_poly, y_train)
print(f"Best alpha from LassoCV: {lasso_cv.alpha_:.4f}")
print(f"Number of non-zero coefficients (CV best): {np.sum(np.abs(lasso_cv.coef_) > 1e-10)}")

y_test_pred_lasso_cv = lasso_cv.predict(X_test_poly)
test_mse_lasso_cv = mean_squared_error(y_test, y_test_pred_lasso_cv)
print(f"LassoCV Test MSE: {test_mse_lasso_cv:.4f}")

/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:716: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.725e+03, tolerance: 2.202e+00
  model = cd_fast.enet_coordinate_descent(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.501e+03, tolerance: 1.774e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.286e+03, tolerance: 1.774e+00
  model = cd_fast.enet_coordinate_descent_gram(

Number of non-zero coefficients: 33 out of 44
Lasso (poly deg=2) Test MSE: 0.4538

/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.247e+03, tolerance: 1.774e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.192e+03, tolerance: 1.774e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.136e+03, tolerance: 1.774e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.106e+03, tolerance: 1.774e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.047e+03, tolerance: 1.774e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.229e+03, tolerance: 1.774e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.927e+03, tolerance: 1.774e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.995e+03, tolerance: 1.774e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.035e+03, tolerance: 1.774e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.017e+03, tolerance: 1.774e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.002e+03, tolerance: 1.774e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.989e+03, tolerance: 1.774e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.977e+03, tolerance: 1.774e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.966e+03, tolerance: 1.774e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.957e+03, tolerance: 1.774e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.949e+03, tolerance: 1.774e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.941e+03, tolerance: 1.774e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.933e+03, tolerance: 1.774e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.926e+03, tolerance: 1.774e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.920e+03, tolerance: 1.774e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 1.902e+03, tolerance: 1.759e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.045e+03, tolerance: 1.759e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.033e+03, tolerance: 1.759e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.987e+03, tolerance: 1.759e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.939e+03, tolerance: 1.759e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.892e+03, tolerance: 1.759e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.049e+03, tolerance: 1.759e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.055e+03, tolerance: 1.759e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.031e+03, tolerance: 1.759e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.009e+03, tolerance: 1.759e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.990e+03, tolerance: 1.759e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.975e+03, tolerance: 1.759e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.964e+03, tolerance: 1.759e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.953e+03, tolerance: 1.759e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.944e+03, tolerance: 1.759e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.936e+03, tolerance: 1.759e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.929e+03, tolerance: 1.759e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.921e+03, tolerance: 1.759e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.913e+03, tolerance: 1.759e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.906e+03, tolerance: 1.759e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.900e+03, tolerance: 1.759e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 1.612e+03, tolerance: 1.753e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.935e+03, tolerance: 1.753e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.917e+03, tolerance: 1.753e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.900e+03, tolerance: 1.753e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.885e+03, tolerance: 1.753e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.871e+03, tolerance: 1.753e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.857e+03, tolerance: 1.753e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.845e+03, tolerance: 1.753e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.834e+03, tolerance: 1.753e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.826e+03, tolerance: 1.753e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.816e+03, tolerance: 1.753e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.806e+03, tolerance: 1.753e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.798e+03, tolerance: 1.753e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.791e+03, tolerance: 1.753e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.694e+03, tolerance: 1.771e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.248e+03, tolerance: 1.771e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.219e+03, tolerance: 1.771e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.166e+03, tolerance: 1.771e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.117e+03, tolerance: 1.771e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.075e+03, tolerance: 1.771e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.039e+03, tolerance: 1.771e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.133e+03, tolerance: 1.771e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.112e+03, tolerance: 1.771e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.090e+03, tolerance: 1.771e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.071e+03, tolerance: 1.771e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.056e+03, tolerance: 1.771e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.044e+03, tolerance: 1.771e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.033e+03, tolerance: 1.771e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.022e+03, tolerance: 1.771e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.012e+03, tolerance: 1.771e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.002e+03, tolerance: 1.771e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.991e+03, tolerance: 1.771e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.981e+03, tolerance: 1.771e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.972e+03, tolerance: 1.771e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.965e+03, tolerance: 1.771e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.153e+03, tolerance: 1.752e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.201e+03, tolerance: 1.752e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.168e+03, tolerance: 1.752e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.116e+03, tolerance: 1.752e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.067e+03, tolerance: 1.752e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.026e+03, tolerance: 1.752e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.987e+03, tolerance: 1.752e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.057e+03, tolerance: 1.752e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.040e+03, tolerance: 1.752e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.016e+03, tolerance: 1.752e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.995e+03, tolerance: 1.752e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.979e+03, tolerance: 1.752e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.965e+03, tolerance: 1.752e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.952e+03, tolerance: 1.752e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.940e+03, tolerance: 1.752e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.930e+03, tolerance: 1.752e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.922e+03, tolerance: 1.752e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.913e+03, tolerance: 1.752e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.904e+03, tolerance: 1.752e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.896e+03, tolerance: 1.752e+00
  model = cd_fast.enet_coordinate_descent_gram(
/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:701: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 2.889e+03, tolerance: 1.752e+00
  model = cd_fast.enet_coordinate_descent_gram(

Best alpha from LassoCV: 0.0067
Number of non-zero coefficients (CV best): 34
LassoCV Test MSE: 0.4587

/home/pks/.local/lib/python3.14/site-packages/sklearn/linear_model/_coordinate_descent.py:716: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.709e+03, tolerance: 2.202e+00
  model = cd_fast.enet_coordinate_descent(

from sklearn.linear_model import Lasso, LassoCV

# Lasso with more iterations
lasso = Lasso(alpha=0.01, max_iter=100000, tol=1e-4)
lasso.fit(X_train_poly_scaled, y_train)

# Count non-zero coefficients
n_nonzero = np.sum(np.abs(lasso.coef_) > 1e-10)
print(f"Number of non-zero coefficients: {n_nonzero} out of {len(lasso.coef_)}")

y_test_pred_lasso = lasso.predict(X_test_poly_scaled)
test_mse_lasso = mean_squared_error(y_test, y_test_pred_lasso)
print(f"Lasso Test MSE: {test_mse_lasso:.4f}")

# Cross-validation for Lasso alpha
lasso_cv = LassoCV(
    alphas=np.logspace(-3, 1, 30), 
    cv=5, 
    max_iter=100000, 
    tol=1e-4,
    random_state=RANDOM_STATE
)
lasso_cv.fit(X_train_poly_scaled, y_train)
print(f"Best alpha from LassoCV: {lasso_cv.alpha_:.4f}")
print(f"Number of non-zero coefficients (CV best): {np.sum(np.abs(lasso_cv.coef_) > 1e-10)}")

y_test_pred_lasso_cv = lasso_cv.predict(X_test_poly_scaled)
test_mse_lasso_cv = mean_squared_error(y_test, y_test_pred_lasso_cv)
print(f"LassoCV Test MSE: {test_mse_lasso_cv:.4f}")

Number of non-zero coefficients: 15 out of 44
Lasso Test MSE: 0.5347
Best alpha from LassoCV: 0.0067
Number of non-zero coefficients (CV best): 16
LassoCV Test MSE: 0.5305

# Visualize L1 vs L2 constraint regions and why Lasso gives sparsity
import numpy as np
import matplotlib.pyplot as plt

fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# L1 ball (diamond)
theta = np.linspace(0, 2*np.pi, 100)
r = 1

# L1 ball vertices
l1_x = [r, 0, -r, 0, r]
l1_y = [0, r, 0, -r, 0]

# L2 ball (circle)
l2_x = r * np.cos(theta)
l2_y = r * np.sin(theta)

# Simulated loss contours (ellipses centered away from origin)
# The OLS solution is at some point (beta1_ols, beta2_ols)
beta_ols = np.array([0.7, 0.3])

for idx, (ax, ball_type) in enumerate(zip(axes, ['Lasso (L¹)', 'Ridge (L²)'])):
    # Draw constraint region
    if idx == 0:  # Lasso - L1 ball
        ax.fill(l1_x, l1_y, alpha=0.3, color='blue', label='L¹ constraint region')
        ax.plot(l1_x, l1_y, 'b-', linewidth=2)
    else:  # Ridge - L2 ball
        ax.fill(l2_x, l2_y, alpha=0.3, color='green', label='L² constraint region')
        ax.plot(l2_x, l2_y, 'g-', linewidth=2)
    
    # Draw loss contours (ellipses)
    # Simplified: concentric ellipses around OLS solution
    for scale in [0.3, 0.5, 0.7, 1.0]:
        ellipse_x = beta_ols[0] + scale * 0.4 * np.cos(theta)
        ellipse_y = beta_ols[1] + scale * 0.2 * np.sin(theta)
        ax.plot(ellipse_x, ellipse_y, 'r--', alpha=0.5, linewidth=1)
    
    # Mark OLS solution
    ax.scatter(*beta_ols, color='red', s=100, zorder=5, label='OLS solution')
    
    # Mark the "first contact" point (approximate)
    if idx == 0:  # Lasso hits corner
        contact = np.array([1.0, 0.0])  # on the axis!
        ax.scatter(*contact, color='purple', s=150, marker='*', zorder=6, label='Lasso solution (sparse!)')
    else:  # Ridge hits smooth part
        contact = np.array([0.85, 0.35])  # not on axis
        ax.scatter(*contact, color='purple', s=150, marker='*', zorder=6, label='Ridge solution')
    
    ax.set_xlim(-1.5, 1.5)
    ax.set_ylim(-1.5, 1.5)
    ax.set_xlabel(r'$\beta_1$')
    ax.set_ylabel(r'$\beta_2$')
    ax.set_title(f'{ball_type} Constraint')
    ax.legend(loc='upper right', fontsize=9)
    ax.set_aspect('equal')
    ax.grid(True, alpha=0.3)
    ax.axhline(0, color='k', linewidth=0.5)
    ax.axvline(0, color='k', linewidth=0.5)

plt.tight_layout()
plt.savefig('../images/lasso_vs_ridge_geometry.png')
plt.show()

from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score

# Compare PCR with varying number of components
n_components_range = range(1, X_train_scaled.shape[1] + 1)
pcr_scores = []

for n_comp in n_components_range:
    pcr = make_pipeline(
        PCA(n_components=n_comp),
        LinearRegression()
    )
    # Negative MSE (sklearn convention: higher is better)
    scores = cross_val_score(pcr, X_train_scaled, y_train, cv=5, scoring='neg_mean_squared_error')
    pcr_scores.append(-scores.mean())

# Also compute variance explained
pca_full = PCA()
pca_full.fit(X_train_scaled)
var_explained = np.cumsum(pca_full.explained_variance_ratio_)

# Plot
fig, ax1 = plt.subplots(figsize=(10, 5))

ax1.plot(n_components_range, pcr_scores, 'b-o', label='CV MSE')
ax1.set_xlabel('Number of Principal Components')
ax1.set_ylabel('Cross-Validated MSE', color='b')
ax1.tick_params(axis='y', labelcolor='b')

ax2 = ax1.twinx()
ax2.plot(n_components_range, var_explained, 'r--s', label='Variance Explained')
ax2.set_ylabel('Cumulative Variance Explained', color='r')
ax2.tick_params(axis='y', labelcolor='r')
ax2.set_ylim(0, 1.05)

plt.title('Principal Component Regression: Choosing k')
fig.legend(loc='center right', bbox_to_anchor=(0.85, 0.5))
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('../images/pcr_components_selection.png')
plt.show()

# Best number of components
best_n_comp = n_components_range[np.argmin(pcr_scores)]
print(f"Best number of components: {best_n_comp}")
print(f"Variance explained: {var_explained[best_n_comp-1]:.2%}")

# Compare with OLS and Ridge
print(f"\nModel Comparison (Test MSE):")
print(f"  OLS (all features):  {test_mse:.4f}")
print(f"  PCR (k={best_n_comp}):       {pcr_scores[best_n_comp-1]:.4f}")
print(f"  Ridge (λ={best_alpha:.2f}):   {test_mse_ridge:.4f}")

Best number of components: 8
Variance explained: 100.00%

Model Comparison (Test MSE):
  OLS (all features):  0.5381
  PCR (k=8):       0.5272
  Ridge (λ=233.57):   0.4791

# Demonstrate how condition number affects gradient descent convergence
from sklearn.preprocessing import StandardScaler

def gradient_descent_linear(X, y, learning_rate=0.01, n_iter=1000, verbose=False):
    """Batch gradient descent for linear regression."""
    n, p = X.shape
    beta = np.zeros(p)
    losses = []
    for i in range(n_iter):
        residual = y - X @ beta
        grad = - (1/n) * X.T @ residual
        beta -= learning_rate * grad
        loss = (1/(2*n)) * np.linalg.norm(residual)**2
        losses.append(loss)
    return beta, losses

# Use a subset for illustration
X_subset = X_train[:1000]
y_subset = y_train[:1000]

# Add intercept
X_subset_aug = np.hstack([np.ones((X_subset.shape[0], 1)), X_subset])

# Compute eigenvalues of X^T X
eigenvalues = np.linalg.eigvalsh(X_subset_aug.T @ X_subset_aug)
lambda_max, lambda_min = eigenvalues.max(), eigenvalues[eigenvalues > 1e-10].min()
cond_num = lambda_max / lambda_min

print(f"Eigenvalue range: [{lambda_min:.2e}, {lambda_max:.2e}]")
print(f"Condition number: {cond_num:.2e}")
print(f"Max stable learning rate: {2/lambda_max:.2e}")

# Try gradient descent with different learning rates on UNSCALED data
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# UNSCALED
learning_rates = [1e-10, 1e-9, 1e-8]
for lr in learning_rates:
    _, losses = gradient_descent_linear(X_subset_aug, y_subset, learning_rate=lr, n_iter=200)
    axes[0].plot(losses, label=f'η = {lr:.0e}')
axes[0].set_xlabel('Iteration')
axes[0].set_ylabel('Loss (MSE)')
axes[0].set_title(f'Unscaled Data (κ = {cond_num:.1e})')
axes[0].legend()
axes[0].grid(True, alpha=0.3)
axes[0].set_yscale('log')

# SCALED
scaler = StandardScaler()
X_subset_scaled = scaler.fit_transform(X_subset)
X_subset_scaled_aug = np.hstack([np.ones((X_subset_scaled.shape[0], 1)), X_subset_scaled])

eigenvalues_scaled = np.linalg.eigvalsh(X_subset_scaled_aug.T @ X_subset_scaled_aug)
lambda_max_s, lambda_min_s = eigenvalues_scaled.max(), eigenvalues_scaled[eigenvalues_scaled > 1e-10].min()
cond_num_scaled = lambda_max_s / lambda_min_s

learning_rates_scaled = [0.001, 0.01, 0.1]
for lr in learning_rates_scaled:
    _, losses = gradient_descent_linear(X_subset_scaled_aug, y_subset, learning_rate=lr, n_iter=200)
    axes[1].plot(losses, label=f'η = {lr}')
axes[1].set_xlabel('Iteration')
axes[1].set_ylabel('Loss (MSE)')
axes[1].set_title(f'Scaled Data (κ = {cond_num_scaled:.1f})')
axes[1].legend()
axes[1].grid(True, alpha=0.3)
axes[1].set_yscale('log')

plt.tight_layout()
plt.savefig('../images/gd_condition_number_effect.png')
plt.show()

print(f"\nScaling reduced condition number from {cond_num:.1e} to {cond_num_scaled:.1f}")
print("This allows much larger learning rates and faster convergence.")

Eigenvalue range: [5.59e-02, 3.07e+09]
Condition number: 5.49e+10
Max stable learning rate: 6.52e-10

Scaling reduced condition number from 5.5e+10 to 42.2
This allows much larger learning rates and faster convergence.

# Implement batch gradient descent for linear regression on a small subset for illustration
def gradient_descent_linear(X, y, learning_rate=0.01, n_iter=1000, verbose=False):
    n, p = X.shape
    beta = np.zeros(p)
    losses = []
    for i in range(n_iter):
        grad = - (1/n) * X.T @ (y - X @ beta)
        beta -= learning_rate * grad
        loss = (1/(2*n)) * np.linalg.norm(y - X @ beta)**2
        losses.append(loss)
        if verbose and i % 200 == 0:
            print(f"Iter {i}: loss = {loss:.6f}")
    return beta, losses

# Use a small subset for speed
X_small = X_train[:1000]
y_small = y_train[:1000]

# Add intercept column
X_small_aug = np.hstack([np.ones((X_small.shape[0], 1)), X_small])

beta_gd, losses = gradient_descent_linear(X_small_aug, y_small, learning_rate=0.01, n_iter=500)

plt.figure(figsize=(8,4))
plt.plot(losses)
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.title('Gradient Descent Convergence')
plt.grid(True)
plt.savefig('../images/gradient_descent_convergence_unscaled')
plt.show()

# Compare with closed-form solution on the same subset
beta_closed = np.linalg.lstsq(X_small_aug, y_small, rcond=None)[0]
print(f"Difference between GD and closed-form: {np.linalg.norm(beta_gd - beta_closed):.2e}")

/usr/lib64/python3.14/site-packages/numpy/linalg/_linalg.py:2792: RuntimeWarning: overflow encountered in dot
  sqnorm = x.dot(x)
/tmp/ipykernel_66479/3132444904.py:7: RuntimeWarning: overflow encountered in matmul
  grad = - (1/n) * X.T @ (y - X @ beta)
/tmp/ipykernel_66479/3132444904.py:8: RuntimeWarning: invalid value encountered in subtract
  beta -= learning_rate * grad

Difference between GD and closed-form: nan

from sklearn.preprocessing import StandardScaler

# 1. Prepare Data
# Use a small subset for speed
X_small = X_train[:1000].copy() # Use .copy() to avoid SettingWithCopyWarning
y_small = y_train[:1000].copy()

# 2. SCALE THE FEATURES (Critical for Gradient Descent!)
scaler = StandardScaler()
X_small_scaled = scaler.fit_transform(X_small)

# Add intercept column AFTER scaling
# (We don't scale the intercept column, it stays as 1s)
X_small_aug = np.hstack([np.ones((X_small_scaled.shape[0], 1)), X_small_scaled])

# 3. Run Gradient Descent
def gradient_descent_linear(X, y, learning_rate=0.01, n_iter=1000, verbose=False):
    n, p = X.shape
    beta = np.zeros(p)
    losses = []
    for i in range(n_iter):
        # Predict
        prediction = X @ beta
        # Residual
        residual = y - prediction
        # Gradient
        grad = - (1/n) * X.T @ residual
        # Update
        beta -= learning_rate * grad
        
        # Calculate Loss (MSE)
        loss = (1/(2*n)) * np.linalg.norm(residual)**2
        losses.append(loss)
        
        if verbose and i % 200 == 0:
            print(f"Iter {i}: loss = {loss:.6f}")
    return beta, losses

# With scaled data, learning_rate=0.01 or even 0.1 is usually safe
beta_gd, losses = gradient_descent_linear(X_small_aug, y_small, learning_rate=0.1, n_iter=500, verbose=True)

# Plot convergence
plt.figure(figsize=(8,4))
plt.plot(losses)
plt.xlabel('Iteration')
plt.ylabel('Loss (MSE)')
plt.title('Gradient Descent Convergence (Scaled Data)')
plt.grid(True)
plt.savefig('../images/gradient_descent_convergence_scaled')
plt.show()

Iter 0: loss = 2.758919
Iter 200: loss = 0.234124
Iter 400: loss = 0.230774

from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# SGDRegressor is sensitive to feature scaling, so we use a pipeline
# penalty=None means no regularization (standard Linear Regression)
sgd_reg = make_pipeline(
    StandardScaler(),
    SGDRegressor(penalty=None, learning_rate='constant', eta0=0.01, max_iter=1000, random_state=42)
)

sgd_reg.fit(X_train, y_train)

# Note: SGDRegressor optimizes a different loss function formulation by default,
# so coefficients might differ slightly from closed-form, but the prediction quality is similar.
print(f"Coefficients: {sgd_reg.named_steps['sgdregressor'].coef_}")

Coefficients: [ 3.97676073e+09 -1.14418633e+10 -1.78357850e+10  1.01065426e+11
 -1.80378121e+10 -3.02815983e+09 -5.43520408e+10 -4.51215845e+10]

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

# Single decision tree (max depth 10)
tree = DecisionTreeRegressor(max_depth=10, random_state=RANDOM_STATE)
tree.fit(X_train, y_train)
y_test_pred_tree = tree.predict(X_test)
test_mse_tree = mean_squared_error(y_test, y_test_pred_tree)

# Random forest (100 trees)
rf = RandomForestRegressor(n_estimators=100, max_depth=10, random_state=RANDOM_STATE, n_jobs=-1)
rf.fit(X_train, y_train)
y_test_pred_rf = rf.predict(X_test)
test_mse_rf = mean_squared_error(y_test, y_test_pred_rf)

print(f"Decision Tree Test MSE: {test_mse_tree:.4f}")
print(f"Random Forest Test MSE: {test_mse_rf:.4f}")

# Compare with best linear model
print(f"Ridge (poly) Test MSE: {test_mse_ridge:.4f}")
print(f"LassoCV Test MSE:      {test_mse_lasso_cv:.4f}")

Decision Tree Test MSE: 0.3961
Random Forest Test MSE: 0.2752
Ridge (poly) Test MSE: 0.4791
LassoCV Test MSE:      0.5305

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Create binary target: 1 if house value > median, else 0
# Use the ORIGINAL dataframe to avoid confusion with scaled/transformed versions
y_binary = (df['MedHouseVal'] > df['MedHouseVal'].median()).astype(int).values
X_original = df[feature_names].values  # original features, not overwritten

# Split
X_train_bin, X_test_bin, y_train_bin, y_test_bin = train_test_split(
    X_original, y_binary, test_size=0.2, random_state=RANDOM_STATE
)

# Scale features (important for logistic regression with regularization)
scaler_bin = StandardScaler()
X_train_bin_scaled = scaler_bin.fit_transform(X_train_bin)
X_test_bin_scaled = scaler_bin.transform(X_test_bin)

# Train logistic regression
log_reg = LogisticRegression(max_iter=1000, random_state=RANDOM_STATE)
log_reg.fit(X_train_bin_scaled, y_train_bin)

# Predict
y_pred_bin = log_reg.predict(X_test_bin_scaled)
accuracy = accuracy_score(y_test_bin, y_pred_bin)

print(f"Logistic Regression Accuracy: {accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test_bin, y_pred_bin))

# Coefficients (on scaled features)
coef_df = pd.DataFrame({
    'Feature': feature_names,
    'Coefficient': log_reg.coef_[0]
}).sort_values('Coefficient', key=abs, ascending=False)
print("\nLogistic Regression Coefficients (scaled features):")
print(coef_df)

Logistic Regression Accuracy: 0.8324

Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.84      0.83      2083
           1       0.83      0.83      0.83      2045

    accuracy                           0.83      4128
   macro avg       0.83      0.83      0.83      4128
weighted avg       0.83      0.83      0.83      4128


Logistic Regression Coefficients (scaled features):
      Feature  Coefficient
6    Latitude    -3.532385
7   Longitude    -3.328767
5    AveOccup    -3.094635
0      MedInc     2.512414
3   AveBedrms     0.899888
2    AveRooms    -0.786384
1    HouseAge     0.276838
4  Population     0.062450
              precision    recall  f1-score   support

           0       0.83      0.84      0.83      2083
           1       0.83      0.83      0.83      2045

    accuracy                           0.83      4128
   macro avg       0.83      0.83      0.83      4128
weighted avg       0.83      0.83      0.83      4128


Logistic Regression Coefficients (scaled features):
      Feature  Coefficient
6    Latitude    -3.532385
7   Longitude    -3.328767
5    AveOccup    -3.094635
0      MedInc     2.512414
3   AveBedrms     0.899888
2    AveRooms    -0.786384
1    HouseAge     0.276838
4  Population     0.062450

# Illustrate 5-fold cross-validation
from sklearn.model_selection import KFold

n_points = 20
X_cv = np.arange(n_points).reshape(-1, 1)
colors = plt.cm.tab10(np.linspace(0, 1, 5))

kf = KFold(n_splits=5, shuffle=True, random_state=3)

fig, axes = plt.subplots(5, 1, figsize=(12, 8))

for i, (train_idx, test_idx) in enumerate(kf.split(X_cv)):
    ax = axes[i]
    
    # Plot all points
    for j in range(n_points):
        if j in test_idx:
            ax.scatter(j, 0, s=200, c='red', marker='s', label='Test' if j == test_idx[0] else '')
        else:
            ax.scatter(j, 0, s=200, c='blue', marker='o', label='Train' if j == train_idx[0] else '')
    
    ax.set_xlim(-1, n_points)
    ax.set_ylim(-0.5, 0.5)
    ax.set_yticks([])
    ax.set_ylabel(f'Fold {i+1}', rotation=0, labelpad=30)
    
    if i == 0:
        ax.legend(loc='upper right', ncol=2)
    if i < 4:
        ax.set_xticks([])

axes[-1].set_xlabel('Sample Index')
axes[2].set_title('5-Fold Cross-Validation', pad=20)

plt.tight_layout()
plt.savefig('../images/cross_validation_illustration.png')
plt.show()

from sklearn.model_selection import cross_val_score, KFold

# 5-fold CV on linear regression
lin_reg_cv = LinearRegression()
scores = cross_val_score(lin_reg_cv, X_train, y_train, cv=5, scoring='r2')
print(f"5-fold CV R² scores: {scores}")
print(f"Mean R²: {scores.mean():.4f} (+/- {scores.std()*2:.4f})")

# We can also use a custom cross-validator
kf = KFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE)
scores_shuffled = cross_val_score(lin_reg_cv, X_train, y_train, cv=kf, scoring='r2')
print(f"Shuffled CV R² scores: {scores_shuffled}")
print(f"Mean R² (shuffled): {scores_shuffled.mean():.4f}")

5-fold CV R² scores: [0.60709214 0.59544452 0.58112984 0.63060861 0.61005689]
Mean R²: 0.6049 (+/- 0.0328)
Shuffled CV R² scores: [0.60563739 0.59602593 0.5917264  0.61941109 0.62268184]
Mean R² (shuffled): 0.6071

from sklearn.preprocessing import StandardScaler

# Create scaler
scaler = StandardScaler()

# Fit on training data only
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Compare condition number before and after scaling
X_train_aug = np.hstack([np.ones((X_train.shape[0], 1)), X_train])
X_train_scaled_aug = np.hstack([np.ones((X_train_scaled.shape[0], 1)), X_train_scaled])

print(f"Condition number (original): {cond(X_train_aug):.2e}")
print(f"Condition number (scaled):   {cond(X_train_scaled_aug):.2e}")

# Fit linear regression on scaled data
lin_reg_scaled = LinearRegression()
lin_reg_scaled.fit(X_train_scaled, y_train)
y_test_pred_scaled = lin_reg_scaled.predict(X_test_scaled)
test_mse_scaled = mean_squared_error(y_test, y_test_pred_scaled)
print(f"Linear regression (scaled) Test MSE: {test_mse_scaled:.4f}")
print(f"Linear regression (original) Test MSE: {test_mse:.4f}")

Condition number (original): 2.40e+05
Condition number (scaled):   6.48e+00
Linear regression (scaled) Test MSE: 0.5381
Linear regression (original) Test MSE: 0.5381

# Train Ridge on scaled data (with default alpha)
ridge_scaled = Ridge(alpha=1.0)
ridge_scaled.fit(X_train_scaled, y_train)

# Display coefficients
coef_df = pd.DataFrame({
    'Feature': feature_names,
    'Coefficient': ridge_scaled.coef_
})
print("Ridge coefficients (scaled features):")
print(coef_df.sort_values('Coefficient', key=abs, ascending=False))

# Random forest feature importance
rf.fit(X_train, y_train)  # already fitted earlier, but ensure
importances = rf.feature_importances_
importance_df = pd.DataFrame({
    'Feature': feature_names,
    'Importance': importances
}).sort_values('Importance', ascending=False)

print("\nRandom Forest Feature Importances:")
print(importance_df)

# Plot
plt.figure(figsize=(8,4))
plt.barh(importance_df['Feature'], importance_df['Importance'])
plt.xlabel('Importance')
plt.title('Random Forest Feature Importance')
plt.gca().invert_yaxis()
plt.savefig('../images/RF_feature_importance.png')
plt.show()

Ridge coefficients (scaled features):
      Feature  Coefficient
6    Latitude    -0.896656
7   Longitude    -0.870257
0      MedInc     0.848402
3   AveBedrms     0.332536
2    AveRooms    -0.287161
1    HouseAge     0.125807
5    AveOccup    -0.040522
4  Population    -0.002190

Random Forest Feature Importances:
      Feature  Importance
0      MedInc    0.589486
5    AveOccup    0.137379
6    Latitude    0.078123
7   Longitude    0.077486
1    HouseAge    0.047525
2    AveRooms    0.034377
4  Population    0.018634
3   AveBedrms    0.016990

from sklearn.model_selection import GridSearchCV

# Define parameter grid
param_grid = {
    'n_estimators': [50, 100],
    'max_depth': [5, 10, None],
    'min_samples_split': [2, 5]
}

# Create random forest
rf_tune = RandomForestRegressor(random_state=42, n_jobs=-1)

# Grid search with 3-fold CV (use a subset of training data for speed)
X_train_subset = X_train[:5000]
y_train_subset = y_train[:5000]

grid_search = GridSearchCV(rf_tune, param_grid, cv=3, scoring='neg_mean_squared_error', verbose=1)
grid_search.fit(X_train_subset, y_train_subset)

print("Best parameters:", grid_search.best_params_)
print("Best CV MSE:", -grid_search.best_score_)

# Evaluate on test set
best_rf = grid_search.best_estimator_
y_test_pred_best_rf = best_rf.predict(X_test)
test_mse_best_rf = mean_squared_error(y_test, y_test_pred_best_rf)
print(f"Tuned Random Forest Test MSE: {test_mse_best_rf:.4f}")

Fitting 3 folds for each of 12 candidates, totalling 36 fits
Best parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 100}
Best CV MSE: 0.3210370034255883
Tuned Random Forest Test MSE: 0.2928

	MedInc	HouseAge	AveRooms	AveBedrms	Population	AveOccup	Latitude	Longitude	MedHouseVal
0	8.3252	41.0	6.984127	1.023810	322.0	2.555556	37.88	-122.23	4.526
1	8.3014	21.0	6.238137	0.971880	2401.0	2.109842	37.86	-122.22	3.585
2	7.2574	52.0	8.288136	1.073446	496.0	2.802260	37.85	-122.24	3.521
3	5.6431	52.0	5.817352	1.073059	558.0	2.547945	37.85	-122.25	3.413
4	3.8462	52.0	6.281853	1.081081	565.0	2.181467	37.85	-122.25	3.422

	MedInc	HouseAge	AveRooms	AveBedrms	Population	AveOccup	Latitude	Longitude	MedHouseVal
count	20640.000000	20640.000000	20640.000000	20640.000000	20640.000000	20640.000000	20640.000000	20640.000000	20640.000000
mean	3.870671	28.639486	5.429000	1.096675	1425.476744	3.070655	35.631861	-119.569704	2.068558
std	1.899822	12.585558	2.474173	0.473911	1132.462122	10.386050	2.135952	2.003532	1.153956
min	0.499900	1.000000	0.846154	0.333333	3.000000	0.692308	32.540000	-124.350000	0.149990
25%	2.563400	18.000000	4.440716	1.006079	787.000000	2.429741	33.930000	-121.800000	1.196000
50%	3.534800	29.000000	5.229129	1.048780	1166.000000	2.818116	34.260000	-118.490000	1.797000
75%	4.743250	37.000000	6.052381	1.099526	1725.000000	3.282261	37.710000	-118.010000	2.647250
max	15.000100	52.000000	141.909091	34.066667	35682.000000	1243.333333	41.950000	-114.310000	5.000010

Method	Linearity	Regularisation	Feature Selection	Scalability
Linear regression	Yes	No	No	Good (closed‑form)
Polynomial regression	In features	No	No	Poor (exploding dimension)
Ridge	Yes	$L^2$	No (shrinks only)	Good
Lasso	Yes	$L^1$	Yes	Good (via coordinate descent)
Logistic regression	Decision boundary linear	Optional	With L1/L2	Good
Gradient descent	Yes (or any differentiable)	Optional	Optional	Excellent (very large data)
Decision trees	No	No (but depth limits)	Implicitly	Moderate
Random forests	No	No (ensemble reduces variance)	Implicitly	Moderate (parallelisable)

Modelling 101: Train/Test Splits & Beyond Linear Regression¶

Introduction¶

A Real Dataset: California Housing¶

Train / Test Split (and Validation)¶

Linear Regression in Practice¶

Polynomial Regression and the Danger of Overfitting¶

Illustration: Underfitting vs Overfitting¶

Ridge Regression ($L^2$ Regularisation)¶

Ridge from the SVD Perspective¶

Lasso Regression ($L^1$ Regularisation)¶

Why Lasso Produces Sparse Solutions: The L¹ Geometry¶

Principal Component Regression (PCR)¶

Gradient Descent: When the Normal Equations Are Not Enough¶

The Linear Algebra of Convergence¶

Decision Trees and Random Forests¶

Logistic Regression for Classification¶

Cross‑Validation: A Deeper Look¶

Feature Scaling¶

Model Interpretation¶

Linear Models (Ridge, Lasso)¶

Decision Trees¶

Random Forests¶

Hyperparameter Tuning with Grid Search¶

Summary and Additional Considerations¶