Regression¶
A detailed walkthrough of DeepGBoostRegressor covering:
- Basic fit / predict on the Diabetes dataset
- Hyperparameter exploration (n_layers, learning_rate, max_depth)
- Linear projection (
linear_projection=True) for data with linear trends - Early stopping with a validation set
- Feature importances
In [1]:
Copied!
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from deepgboost import (
DeepGBoostRegressor,
EarlyStoppingCallback,
EvaluationMonitorCallback,
plot_importance,
)
RNG = 42
X, y = load_diabetes(return_X_y=True)
feature_names = load_diabetes().feature_names
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=RNG
)
print(f"Train: {X_train.shape} | Test: {X_test.shape}")
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from deepgboost import (
DeepGBoostRegressor,
EarlyStoppingCallback,
EvaluationMonitorCallback,
plot_importance,
)
RNG = 42
X, y = load_diabetes(return_X_y=True)
feature_names = load_diabetes().feature_names
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=RNG
)
print(f"Train: {X_train.shape} | Test: {X_test.shape}")
Train: (353, 10) | Test: (89, 10)
1. Basic Fit & Predict¶
In [2]:
Copied!
reg = DeepGBoostRegressor(
n_trees=10,
n_layers=20,
max_depth=4,
learning_rate=0.1,
random_state=RNG,
)
reg.fit(X_train, y_train)
y_pred = reg.predict(X_test)
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"R²: {r2:.4f}")
print(f"RMSE: {rmse:.2f}")
reg = DeepGBoostRegressor(
n_trees=10,
n_layers=20,
max_depth=4,
learning_rate=0.1,
random_state=RNG,
)
reg.fit(X_train, y_train)
y_pred = reg.predict(X_test)
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"R²: {r2:.4f}")
print(f"RMSE: {rmse:.2f}")
R²: 0.4786 RMSE: 52.56
In [3]:
Copied!
fig, ax = plt.subplots(figsize=(5, 5))
ax.scatter(y_test, y_pred, alpha=0.6, edgecolors="k", linewidths=0.4)
lims = [min(y_test.min(), y_pred.min()), max(y_test.max(), y_pred.max())]
ax.plot(lims, lims, "r--", linewidth=1)
ax.set_xlabel("Actual")
ax.set_ylabel("Predicted")
ax.set_title(f"Diabetes — Actual vs Predicted (R²={r2:.3f})")
plt.tight_layout()
plt.show()
fig, ax = plt.subplots(figsize=(5, 5))
ax.scatter(y_test, y_pred, alpha=0.6, edgecolors="k", linewidths=0.4)
lims = [min(y_test.min(), y_pred.min()), max(y_test.max(), y_pred.max())]
ax.plot(lims, lims, "r--", linewidth=1)
ax.set_xlabel("Actual")
ax.set_ylabel("Predicted")
ax.set_title(f"Diabetes — Actual vs Predicted (R²={r2:.3f})")
plt.tight_layout()
plt.show()
In [4]:
Copied!
layer_counts = [2, 5, 10, 20, 40]
r2_scores = []
for n in layer_counts:
m = DeepGBoostRegressor(
n_trees=10, n_layers=n, max_depth=4, learning_rate=0.1, random_state=RNG
)
m.fit(X_train, y_train)
r2_scores.append(r2_score(y_test, m.predict(X_test)))
plt.figure(figsize=(6, 3))
plt.plot(layer_counts, r2_scores, marker="o")
plt.xlabel("n_layers")
plt.ylabel("R²")
plt.title("R² vs number of layers")
plt.tight_layout()
plt.show()
layer_counts = [2, 5, 10, 20, 40]
r2_scores = []
for n in layer_counts:
m = DeepGBoostRegressor(
n_trees=10, n_layers=n, max_depth=4, learning_rate=0.1, random_state=RNG
)
m.fit(X_train, y_train)
r2_scores.append(r2_score(y_test, m.predict(X_test)))
plt.figure(figsize=(6, 3))
plt.plot(layer_counts, r2_scores, marker="o")
plt.xlabel("n_layers")
plt.ylabel("R²")
plt.title("R² vs number of layers")
plt.tight_layout()
plt.show()
Effect of learning_rate¶
In [5]:
Copied!
lrs = [0.01, 0.05, 0.1, 0.2, 0.5]
r2_lr = []
for lr in lrs:
m = DeepGBoostRegressor(
n_trees=10, n_layers=20, max_depth=4, learning_rate=lr, random_state=RNG
)
m.fit(X_train, y_train)
r2_lr.append(r2_score(y_test, m.predict(X_test)))
plt.figure(figsize=(6, 3))
plt.semilogx(lrs, r2_lr, marker="o")
plt.xlabel("learning_rate (log scale)")
plt.ylabel("R²")
plt.title("R² vs learning rate")
plt.tight_layout()
plt.show()
lrs = [0.01, 0.05, 0.1, 0.2, 0.5]
r2_lr = []
for lr in lrs:
m = DeepGBoostRegressor(
n_trees=10, n_layers=20, max_depth=4, learning_rate=lr, random_state=RNG
)
m.fit(X_train, y_train)
r2_lr.append(r2_score(y_test, m.predict(X_test)))
plt.figure(figsize=(6, 3))
plt.semilogx(lrs, r2_lr, marker="o")
plt.xlabel("learning_rate (log scale)")
plt.ylabel("R²")
plt.title("R² vs learning rate")
plt.tight_layout()
plt.show()
3. Linear Projection¶
linear_projection=True adds a Ridge regression correction at each layer,
analogous to XGBoost's booster='gblinear'. It helps when the target has a
strong linear component.
In [6]:
Copied!
# Synthetic data with a strong linear trend + small noise
rng = np.random.default_rng(0)
n = 300
X_lin = rng.standard_normal((n, 5))
y_lin = 3 * X_lin[:, 0] - 2 * X_lin[:, 2] + 0.5 * rng.standard_normal(n)
X_lin_tr, X_lin_te, y_lin_tr, y_lin_te = train_test_split(
X_lin, y_lin, test_size=0.25, random_state=0
)
for use_lin in [False, True]:
m = DeepGBoostRegressor(
n_trees=5,
n_layers=10,
max_depth=3,
learning_rate=0.1,
linear_projection=use_lin,
random_state=0,
)
m.fit(X_lin_tr, y_lin_tr)
score = r2_score(y_lin_te, m.predict(X_lin_te))
label = "with linear_projection" if use_lin else "without linear_projection"
print(f"R² {label}: {score:.4f}")
# Synthetic data with a strong linear trend + small noise
rng = np.random.default_rng(0)
n = 300
X_lin = rng.standard_normal((n, 5))
y_lin = 3 * X_lin[:, 0] - 2 * X_lin[:, 2] + 0.5 * rng.standard_normal(n)
X_lin_tr, X_lin_te, y_lin_tr, y_lin_te = train_test_split(
X_lin, y_lin, test_size=0.25, random_state=0
)
for use_lin in [False, True]:
m = DeepGBoostRegressor(
n_trees=5,
n_layers=10,
max_depth=3,
learning_rate=0.1,
linear_projection=use_lin,
random_state=0,
)
m.fit(X_lin_tr, y_lin_tr)
score = r2_score(y_lin_te, m.predict(X_lin_te))
label = "with linear_projection" if use_lin else "without linear_projection"
print(f"R² {label}: {score:.4f}")
R² without linear_projection: 0.7734 R² with linear_projection: 0.9452
4. Early Stopping¶
In [7]:
Copied!
X_tr, X_val, y_tr, y_val = train_test_split(
X_train, y_train, test_size=0.2, random_state=1
)
es = EarlyStoppingCallback(patience=5, restore_best=True)
reg_es = DeepGBoostRegressor(
n_trees=10,
n_layers=100,
learning_rate=0.1,
random_state=RNG,
)
reg_es.fit(X_tr, y_tr, eval_set=[(X_val, y_val)], callbacks=[es])
val_losses = list(reg_es.evals_result_.values())[0]["train_loss"]
print(f"Stopped at layer {len(val_losses)} / 100")
plt.figure(figsize=(7, 3))
plt.plot(val_losses)
plt.axvline(
len(val_losses) - 1 - es.patience,
color="r",
linestyle="--",
label=f"best (patience={es.patience})",
)
plt.xlabel("Layer")
plt.ylabel("Val RMSE")
plt.title("Early stopping on validation RMSE")
plt.legend()
plt.tight_layout()
plt.show()
X_tr, X_val, y_tr, y_val = train_test_split(
X_train, y_train, test_size=0.2, random_state=1
)
es = EarlyStoppingCallback(patience=5, restore_best=True)
reg_es = DeepGBoostRegressor(
n_trees=10,
n_layers=100,
learning_rate=0.1,
random_state=RNG,
)
reg_es.fit(X_tr, y_tr, eval_set=[(X_val, y_val)], callbacks=[es])
val_losses = list(reg_es.evals_result_.values())[0]["train_loss"]
print(f"Stopped at layer {len(val_losses)} / 100")
plt.figure(figsize=(7, 3))
plt.plot(val_losses)
plt.axvline(
len(val_losses) - 1 - es.patience,
color="r",
linestyle="--",
label=f"best (patience={es.patience})",
)
plt.xlabel("Layer")
plt.ylabel("Val RMSE")
plt.title("Early stopping on validation RMSE")
plt.legend()
plt.tight_layout()
plt.show()
Stopped at layer 28 / 100
5. Feature Importances¶
In [8]:
Copied!
reg_fi = DeepGBoostRegressor(
n_trees=10, n_layers=20, max_depth=4, learning_rate=0.1, random_state=RNG
)
reg_fi.fit(X_train, y_train)
fig, ax = plot_importance(
reg_fi,
feature_names=list(feature_names),
title="Diabetes — Feature Importances",
)
plt.tight_layout()
plt.show()
# Also accessible as a numpy array
fi = reg_fi.feature_importances_
print("Sum of importances:", fi.sum().round(6))
reg_fi = DeepGBoostRegressor(
n_trees=10, n_layers=20, max_depth=4, learning_rate=0.1, random_state=RNG
)
reg_fi.fit(X_train, y_train)
fig, ax = plot_importance(
reg_fi,
feature_names=list(feature_names),
title="Diabetes — Feature Importances",
)
plt.tight_layout()
plt.show()
# Also accessible as a numpy array
fi = reg_fi.feature_importances_
print("Sum of importances:", fi.sum().round(6))
Sum of importances: 1.0