Note
Go to the end to download the full example code.
5. hyperparameter optimization
Now that we have selected the model and input features. Now we will try to improve the prediction performance of our model using hyperparameter optimization.
import os
from typing import Union
import numpy as np
import matplotlib.pyplot as plt
from ai4water import Model
from ai4water.utils.utils import jsonize
from ai4water.utils.utils import TrainTestSplit, dateandtime_now
from ai4water.hyperopt import Categorical, Real, Integer, HyperOpt
from utils import prepare_data, set_rcParams, SAVE, version_info
for lib, ver in version_info().items():
print(lib, ver)
python 3.12.10 (main, May 6 2025, 10:49:23) [GCC 11.4.0]
os posix
ai4water 1.07
lightgbm 4.6.0
catboost 1.2.10
xgboost 3.2.0
easy_mpl 0.21.5
SeqMetrics 2.0.0
numpy 1.26.4
pandas 2.2.3
matplotlib 3.10.8
h5py 3.16.0
sklearn 1.3.1
optuna 4.8.0
skopt 0.10.2
plotly 6.6.0
seaborn 0.13.2
crepes 0.9.0
mapie 0.9.2
shap 0.49.1
scipy 1.17.1
set_rcParams()
inputs = ['Solution pH', 'Time (m)', 'Anions', 'Ni (At%)', 'HA (mg/L)',
'loading (g)', 'Pore size (nm)', 'O (At%)',
'Light intensity (watt)', 'Mo (At%)', 'Dye concentration (mg/L)']
data, _ = prepare_data(inputs=inputs, outputs="k")
input_features = data.columns.tolist()[0:-1]
print(input_features)
['Solution pH', 'Time (m)', 'Anions', 'Ni (At%)', 'HA (mg/L)', 'loading (g)', 'Pore size (nm)', 'O (At%)', 'Light intensity (watt)', 'Mo (At%)', 'Dye concentration (mg/L)']
output_features = data.columns.tolist()[-1:]
print(output_features)
['k']
TrainX, TestX, TrainY, TestY = TrainTestSplit(seed=313).split_by_random(
data[input_features],
data[output_features]
)
print(TrainX.shape, TestX.shape, TrainY.shape, TestY.shape)
(1068, 11) (459, 11) (1068, 1) (459, 1)
Evaluation with default parameters
model = Model(
model="DecisionTreeRegressor",
input_features=input_features,
output_features=output_features,
verbosity=-1
)
model.fit(TrainX, TrainY.values)
# evaluate model performance
print(model.evaluate(
TestX, TestY,
metrics=["r2", "r2_score", "rmse"]))
{'r2': 0.9422912405029966, 'r2_score': 0.9415415360380074, 'rmse': 0.0017336044254300542}
setup
ITER = 0
VAL_SCORES = []
SUGGESTIONS = []
num_iterations = 100 # number of hyperparameter iterations
SEP = os.sep
PREFIX = f"hpo_{dateandtime_now()}" # folder name where to save the results
algorithm = "tpe"
backend = "optuna"
parameters space
param_space = [
Categorical(["best", "random"], name='splitter'),
Integer(low=2, high=10, name='min_samples_split'),
Integer(low=1, high=40, name='max_depth'),
#Integer(low=2, high=10, name="min_samples_leaf"),
Real(low=0.0, high=0.005, name="min_weight_fraction_leaf"),
#Categorical(categories=['sqrt', 'log2'], name="max_features"),
#Integer(low=2, high=10, name="max_leaf_nodes"),
]
x0 = ['best',
10,
5,
#5,
0.1,
#"sqrt",
# #5
]
objective function
def objective_fn(
return_model:bool = False,
**suggestions
)->Union[float, Model]:
"""
The output of this function will be minimized
:param return_model: whether to return the trained model or the validation
score. This will be set to True, after we have optimized the hyperparameters
:param suggestions: contains values of hyperparameters at each iteration
:return: the scalar value which we want to minimize. If return_model is True
then it returns the trained model
"""
global ITER
suggestions = jsonize(suggestions)
SUGGESTIONS.append(suggestions)
# build the model
_model = Model(
model={"DecisionTreeRegressor": suggestions},
cross_validator={"KFold": {"n_splits": 10}},
input_features=input_features,
output_features=output_features,
verbosity=-1
)
if return_model:
_model.fit(TrainX.values, TrainY.values,
validation_data=(TestX, TestY.values))
print(_model.evaluate(TestX, TestY,
metrics=["r2", "r2_score", "rmse"]))
return _model
# get the cross validation score which we will minimize
val_score_ = _model.cross_val_score(TrainX.values, TrainY.values)[0]
# since cross val score is r2_score, we need to subtract it from 1. Because
# we are interested in increasing r2_score, and HyperOpt algorithm always
# minizes the objective function
val_score = 1 - val_score_
VAL_SCORES.append(val_score)
best_score = round(np.nanmin(VAL_SCORES).item(), 2)
bst_iter = np.argmin(VAL_SCORES)
ITER += 1
print(f"{ITER} {round(val_score, 2)} {round(val_score_, 2)}. Best was {best_score} at {bst_iter}")
return val_score
running optimization loop
optimizer = HyperOpt(
algorithm=algorithm,
objective_fn=objective_fn,
param_space=param_space,
x0=x0,
num_iterations=num_iterations,
process_results=False, # we can turn it False if we want post-processing of results
opt_path=f"results{SEP}{PREFIX}",
backend=backend,
)
res = optimizer.fit()
1 0.32 0.68. Best was 0.32 at 0
2 0.29 0.71. Best was 0.29 at 1
3 0.19 0.81. Best was 0.19 at 2
4 0.23 0.77. Best was 0.19 at 2
5 0.38 0.62. Best was 0.19 at 2
6 0.19 0.81. Best was 0.19 at 2
7 0.39 0.61. Best was 0.19 at 2
8 0.31 0.69. Best was 0.19 at 2
9 0.17 0.83. Best was 0.17 at 8
10 0.4 0.6. Best was 0.17 at 8
11 0.28 0.72. Best was 0.17 at 8
12 0.11 0.89. Best was 0.11 at 11
13 0.17 0.83. Best was 0.11 at 11
14 0.22 0.78. Best was 0.11 at 11
15 0.13 0.87. Best was 0.11 at 11
16 0.2 0.8. Best was 0.11 at 11
17 0.22 0.78. Best was 0.11 at 11
18 0.16 0.84. Best was 0.11 at 11
19 0.24 0.76. Best was 0.11 at 11
20 0.24 0.76. Best was 0.11 at 11
21 0.15 0.85. Best was 0.11 at 11
22 0.13 0.87. Best was 0.11 at 11
23 0.16 0.84. Best was 0.11 at 11
24 0.2 0.8. Best was 0.11 at 11
25 0.63 0.37. Best was 0.11 at 11
26 0.14 0.86. Best was 0.11 at 11
27 0.29 0.71. Best was 0.11 at 11
28 0.12 0.88. Best was 0.11 at 11
29 0.11 0.89. Best was 0.11 at 11
30 0.16 0.84. Best was 0.11 at 11
31 0.17 0.83. Best was 0.11 at 11
32 0.13 0.87. Best was 0.11 at 11
33 0.25 0.75. Best was 0.11 at 11
34 0.13 0.87. Best was 0.11 at 11
35 0.18 0.82. Best was 0.11 at 11
36 0.17 0.83. Best was 0.11 at 11
37 0.19 0.81. Best was 0.11 at 11
38 0.19 0.81. Best was 0.11 at 11
39 0.14 0.86. Best was 0.11 at 11
40 0.3 0.7. Best was 0.11 at 11
41 0.18 0.82. Best was 0.11 at 11
42 0.16 0.84. Best was 0.11 at 11
43 0.14 0.86. Best was 0.11 at 11
44 0.11 0.89. Best was 0.11 at 11
45 0.13 0.87. Best was 0.11 at 11
46 0.2 0.8. Best was 0.11 at 11
47 0.29 0.71. Best was 0.11 at 11
48 0.15 0.85. Best was 0.11 at 11
49 0.12 0.88. Best was 0.11 at 11
50 0.18 0.82. Best was 0.11 at 11
51 0.16 0.84. Best was 0.11 at 11
52 0.12 0.88. Best was 0.11 at 11
53 0.11 0.89. Best was 0.11 at 52
54 0.19 0.81. Best was 0.11 at 52
55 0.17 0.83. Best was 0.11 at 52
56 0.13 0.87. Best was 0.11 at 52
57 0.13 0.87. Best was 0.11 at 52
58 0.2 0.8. Best was 0.11 at 52
59 0.21 0.79. Best was 0.11 at 52
60 0.17 0.83. Best was 0.11 at 52
61 0.17 0.83. Best was 0.11 at 52
62 0.14 0.86. Best was 0.11 at 52
63 0.14 0.86. Best was 0.11 at 52
64 0.17 0.83. Best was 0.11 at 52
65 0.12 0.88. Best was 0.11 at 52
66 0.14 0.86. Best was 0.11 at 52
67 0.17 0.83. Best was 0.11 at 52
68 0.23 0.77. Best was 0.11 at 52
69 0.17 0.83. Best was 0.11 at 52
70 0.14 0.86. Best was 0.11 at 52
71 0.28 0.72. Best was 0.11 at 52
72 0.14 0.86. Best was 0.11 at 52
73 0.15 0.85. Best was 0.11 at 52
74 0.13 0.87. Best was 0.11 at 52
75 0.16 0.84. Best was 0.11 at 52
76 0.15 0.85. Best was 0.11 at 52
77 0.14 0.86. Best was 0.11 at 52
78 0.29 0.71. Best was 0.11 at 52
79 0.16 0.84. Best was 0.11 at 52
80 0.16 0.84. Best was 0.11 at 52
81 0.1 0.9. Best was 0.1 at 80
82 0.19 0.81. Best was 0.1 at 80
83 0.17 0.83. Best was 0.1 at 80
84 0.14 0.86. Best was 0.1 at 80
85 0.14 0.86. Best was 0.1 at 80
86 0.11 0.89. Best was 0.1 at 80
87 0.26 0.74. Best was 0.1 at 80
88 0.3 0.7. Best was 0.1 at 80
89 0.14 0.86. Best was 0.1 at 80
90 0.55 0.45. Best was 0.1 at 80
91 0.15 0.85. Best was 0.1 at 80
92 0.13 0.87. Best was 0.1 at 80
93 0.13 0.87. Best was 0.1 at 80
94 0.12 0.88. Best was 0.1 at 80
95 0.15 0.85. Best was 0.1 at 80
96 0.15 0.85. Best was 0.1 at 80
97 0.11 0.89. Best was 0.1 at 80
98 0.13 0.87. Best was 0.1 at 80
99 0.31 0.69. Best was 0.1 at 80
100 0.17 0.83. Best was 0.1 at 80
postprocessing of results
print the best hyperparameters
print(optimizer.best_paras())
{'splitter': 'best', 'min_samples_split': 3, 'max_depth': 32, 'min_weight_fraction_leaf': 0.00027746123140151915}
convergence plot
optimizer.plot_convergence()
if SAVE:
plt.savefig("results/figures/hpo_convergence.png", dpi=600, bbox_inches="tight")
plt.tight_layout()
plt.show()

optimizer.plot_importance(with_optuna=True)
if SAVE:
plt.savefig("results/figures/hpo_importance.png", dpi=600, bbox_inches="tight")
plt.tight_layout()
plt.show()

optimizer.plot_parallel_coords()
if SAVE:
plt.savefig("results/figures/hpo_parallel_coords.png", dpi=600, bbox_inches="tight")
plt.tight_layout()
plt.show()

Evaluation with optimized hyperparameters
bst_model = objective_fn(True, **optimizer.best_paras())
{'r2': 0.9420515255866434, 'r2_score': 0.941282018285209, 'rmse': 0.0017374482051161798}
Total running time of the script: (0 minutes 25.526 seconds)