.. DO NOT EDIT.
.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY.
.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:
.. "auto_examples/hpo.py"
.. LINE NUMBERS ARE GIVEN BELOW.

.. only:: html

    .. note::
        :class: sphx-glr-download-link-note

        :ref:`Go to the end <sphx_glr_download_auto_examples_hpo.py>`
        to download the full example code.

.. rst-class:: sphx-glr-example-title

.. _sphx_glr_auto_examples_hpo.py:


================================
5. hyperparameter optimization
================================
Now that we have selected the model and input features. Now we will
try to improve the prediction performance of our model using hyperparameter
optimization.

.. GENERATED FROM PYTHON SOURCE LINES 9-24

.. code-block:: Python


    import os
    from typing import Union

    import numpy as np

    import matplotlib.pyplot as plt

    from ai4water import Model
    from ai4water.utils.utils import jsonize
    from ai4water.utils.utils import TrainTestSplit, dateandtime_now
    from ai4water.hyperopt import Categorical, Real, Integer, HyperOpt

    from utils import prepare_data, set_rcParams, SAVE, version_info


.. GENERATED FROM PYTHON SOURCE LINES 25-29

.. code-block:: Python


    for lib, ver in version_info().items():
        print(lib, ver)


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    python 3.12.10 (main, May  6 2025, 10:49:23) [GCC 11.4.0]
    os posix
    ai4water 1.07
    lightgbm 4.6.0
    catboost 1.2.10
    xgboost 3.2.0
    easy_mpl 0.21.5
    SeqMetrics 2.0.0
    numpy 1.26.4
    pandas 2.2.3
    matplotlib 3.10.8
    h5py 3.16.0
    sklearn 1.3.1
    optuna 4.8.0
    skopt 0.10.2
    plotly 6.6.0
    seaborn 0.13.2
    crepes 0.9.0
    mapie 0.9.2
    shap 0.49.1
    scipy 1.17.1


.. GENERATED FROM PYTHON SOURCE LINES 30-33

.. code-block:: Python


    set_rcParams()


.. GENERATED FROM PYTHON SOURCE LINES 34-43

.. code-block:: Python

    inputs = ['Solution pH', 'Time (m)', 'Anions', 'Ni (At%)', 'HA (mg/L)',
              'loading (g)', 'Pore size (nm)', 'O (At%)',
              'Light intensity (watt)', 'Mo (At%)', 'Dye concentration (mg/L)']
    data, _ = prepare_data(inputs=inputs, outputs="k")

    input_features = data.columns.tolist()[0:-1]

    print(input_features)


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    ['Solution pH', 'Time (m)', 'Anions', 'Ni (At%)', 'HA (mg/L)', 'loading (g)', 'Pore size (nm)', 'O (At%)', 'Light intensity (watt)', 'Mo (At%)', 'Dye concentration (mg/L)']


.. GENERATED FROM PYTHON SOURCE LINES 44-48

.. code-block:: Python


    output_features = data.columns.tolist()[-1:]
    print(output_features)


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    ['k']


.. GENERATED FROM PYTHON SOURCE LINES 49-57

.. code-block:: Python


    TrainX, TestX, TrainY, TestY = TrainTestSplit(seed=313).split_by_random(
        data[input_features],
        data[output_features]
    )

    print(TrainX.shape, TestX.shape, TrainY.shape, TestY.shape)


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    (1068, 11) (459, 11) (1068, 1) (459, 1)


.. GENERATED FROM PYTHON SOURCE LINES 58-60

Evaluation with default parameters
-------------------------------------

.. GENERATED FROM PYTHON SOURCE LINES 62-76

.. code-block:: Python

    model = Model(
        model="DecisionTreeRegressor",
        input_features=input_features,
        output_features=output_features,
        verbosity=-1
    )

    model.fit(TrainX, TrainY.values)

    # evaluate model performance
    print(model.evaluate(
        TestX, TestY,
        metrics=["r2", "r2_score", "rmse"]))


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    {'r2': 0.9422912405029966, 'r2_score': 0.9415415360380074, 'rmse': 0.0017336044254300542}


.. GENERATED FROM PYTHON SOURCE LINES 77-79

setup
----------

.. GENERATED FROM PYTHON SOURCE LINES 79-89

.. code-block:: Python


    ITER = 0
    VAL_SCORES = []
    SUGGESTIONS = []
    num_iterations = 100  # number of hyperparameter iterations
    SEP = os.sep
    PREFIX = f"hpo_{dateandtime_now()}"  # folder name where to save the results
    algorithm = "tpe"
    backend = "optuna"


.. GENERATED FROM PYTHON SOURCE LINES 90-92

parameters space
-----------------

.. GENERATED FROM PYTHON SOURCE LINES 92-112

.. code-block:: Python


    param_space = [
        Categorical(["best", "random"], name='splitter'),
        Integer(low=2, high=10, name='min_samples_split'),
        Integer(low=1, high=40, name='max_depth'),
        #Integer(low=2, high=10, name="min_samples_leaf"),
        Real(low=0.0, high=0.005, name="min_weight_fraction_leaf"),
        #Categorical(categories=['sqrt', 'log2'], name="max_features"),
        #Integer(low=2, high=10, name="max_leaf_nodes"),
    ]

    x0 = ['best',
          10,
        5,
          #5,
          0.1,
          #"sqrt",
        # #5
          ]


.. GENERATED FROM PYTHON SOURCE LINES 113-115

objective function
--------------------

.. GENERATED FROM PYTHON SOURCE LINES 115-167

.. code-block:: Python


    def objective_fn(
            return_model:bool = False,
            **suggestions
    )->Union[float, Model]:
        """
        The output of this function will be minimized
        :param return_model: whether to return the trained model or the validation
            score. This will be set to True, after we have optimized the hyperparameters
        :param suggestions: contains values of hyperparameters at each iteration
        :return: the scalar value which we want to minimize. If return_model is True
            then it returns the trained model
        """
        global ITER

        suggestions = jsonize(suggestions)
        SUGGESTIONS.append(suggestions)

        # build the model
        _model = Model(
            model={"DecisionTreeRegressor": suggestions},
            cross_validator={"KFold": {"n_splits": 10}},
            input_features=input_features,
            output_features=output_features,
            verbosity=-1
        )

        if return_model:
            _model.fit(TrainX.values, TrainY.values,
                      validation_data=(TestX, TestY.values))
            print(_model.evaluate(TestX, TestY,
                                  metrics=["r2", "r2_score", "rmse"]))
            return _model

        # get the cross validation score which we will minimize
        val_score_ = _model.cross_val_score(TrainX.values, TrainY.values)[0]

        # since cross val score is r2_score, we need to subtract it from 1. Because
        # we are interested in increasing r2_score, and HyperOpt algorithm always
        # minizes the objective function
        val_score = 1 - val_score_

        VAL_SCORES.append(val_score)
        best_score = round(np.nanmin(VAL_SCORES).item(), 2)
        bst_iter = np.argmin(VAL_SCORES)

        ITER += 1

        print(f"{ITER} {round(val_score, 2)} {round(val_score_, 2)}. Best was {best_score} at {bst_iter}")

        return val_score


.. GENERATED FROM PYTHON SOURCE LINES 168-170

running optimization loop
--------------------------

.. GENERATED FROM PYTHON SOURCE LINES 170-182

.. code-block:: Python


    optimizer = HyperOpt(
        algorithm=algorithm,
        objective_fn=objective_fn,
        param_space=param_space,
        x0=x0,
        num_iterations=num_iterations,
        process_results=False,  # we can turn it False if we want post-processing of results
        opt_path=f"results{SEP}{PREFIX}",
        backend=backend,
    )


.. GENERATED FROM PYTHON SOURCE LINES 183-185

.. code-block:: Python

    res = optimizer.fit()


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    1 0.32 0.68. Best was 0.32 at 0
    2 0.29 0.71. Best was 0.29 at 1
    3 0.19 0.81. Best was 0.19 at 2
    4 0.23 0.77. Best was 0.19 at 2
    5 0.38 0.62. Best was 0.19 at 2
    6 0.19 0.81. Best was 0.19 at 2
    7 0.39 0.61. Best was 0.19 at 2
    8 0.31 0.69. Best was 0.19 at 2
    9 0.17 0.83. Best was 0.17 at 8
    10 0.4 0.6. Best was 0.17 at 8
    11 0.28 0.72. Best was 0.17 at 8
    12 0.11 0.89. Best was 0.11 at 11
    13 0.17 0.83. Best was 0.11 at 11
    14 0.22 0.78. Best was 0.11 at 11
    15 0.13 0.87. Best was 0.11 at 11
    16 0.2 0.8. Best was 0.11 at 11
    17 0.22 0.78. Best was 0.11 at 11
    18 0.16 0.84. Best was 0.11 at 11
    19 0.24 0.76. Best was 0.11 at 11
    20 0.24 0.76. Best was 0.11 at 11
    21 0.15 0.85. Best was 0.11 at 11
    22 0.13 0.87. Best was 0.11 at 11
    23 0.16 0.84. Best was 0.11 at 11
    24 0.2 0.8. Best was 0.11 at 11
    25 0.63 0.37. Best was 0.11 at 11
    26 0.14 0.86. Best was 0.11 at 11
    27 0.29 0.71. Best was 0.11 at 11
    28 0.12 0.88. Best was 0.11 at 11
    29 0.11 0.89. Best was 0.11 at 11
    30 0.16 0.84. Best was 0.11 at 11
    31 0.17 0.83. Best was 0.11 at 11
    32 0.13 0.87. Best was 0.11 at 11
    33 0.25 0.75. Best was 0.11 at 11
    34 0.13 0.87. Best was 0.11 at 11
    35 0.18 0.82. Best was 0.11 at 11
    36 0.17 0.83. Best was 0.11 at 11
    37 0.19 0.81. Best was 0.11 at 11
    38 0.19 0.81. Best was 0.11 at 11
    39 0.14 0.86. Best was 0.11 at 11
    40 0.3 0.7. Best was 0.11 at 11
    41 0.18 0.82. Best was 0.11 at 11
    42 0.16 0.84. Best was 0.11 at 11
    43 0.14 0.86. Best was 0.11 at 11
    44 0.11 0.89. Best was 0.11 at 11
    45 0.13 0.87. Best was 0.11 at 11
    46 0.2 0.8. Best was 0.11 at 11
    47 0.29 0.71. Best was 0.11 at 11
    48 0.15 0.85. Best was 0.11 at 11
    49 0.12 0.88. Best was 0.11 at 11
    50 0.18 0.82. Best was 0.11 at 11
    51 0.16 0.84. Best was 0.11 at 11
    52 0.12 0.88. Best was 0.11 at 11
    53 0.11 0.89. Best was 0.11 at 52
    54 0.19 0.81. Best was 0.11 at 52
    55 0.17 0.83. Best was 0.11 at 52
    56 0.13 0.87. Best was 0.11 at 52
    57 0.13 0.87. Best was 0.11 at 52
    58 0.2 0.8. Best was 0.11 at 52
    59 0.21 0.79. Best was 0.11 at 52
    60 0.17 0.83. Best was 0.11 at 52
    61 0.17 0.83. Best was 0.11 at 52
    62 0.14 0.86. Best was 0.11 at 52
    63 0.14 0.86. Best was 0.11 at 52
    64 0.17 0.83. Best was 0.11 at 52
    65 0.12 0.88. Best was 0.11 at 52
    66 0.14 0.86. Best was 0.11 at 52
    67 0.17 0.83. Best was 0.11 at 52
    68 0.23 0.77. Best was 0.11 at 52
    69 0.17 0.83. Best was 0.11 at 52
    70 0.14 0.86. Best was 0.11 at 52
    71 0.28 0.72. Best was 0.11 at 52
    72 0.14 0.86. Best was 0.11 at 52
    73 0.15 0.85. Best was 0.11 at 52
    74 0.13 0.87. Best was 0.11 at 52
    75 0.16 0.84. Best was 0.11 at 52
    76 0.15 0.85. Best was 0.11 at 52
    77 0.14 0.86. Best was 0.11 at 52
    78 0.29 0.71. Best was 0.11 at 52
    79 0.16 0.84. Best was 0.11 at 52
    80 0.16 0.84. Best was 0.11 at 52
    81 0.1 0.9. Best was 0.1 at 80
    82 0.19 0.81. Best was 0.1 at 80
    83 0.17 0.83. Best was 0.1 at 80
    84 0.14 0.86. Best was 0.1 at 80
    85 0.14 0.86. Best was 0.1 at 80
    86 0.11 0.89. Best was 0.1 at 80
    87 0.26 0.74. Best was 0.1 at 80
    88 0.3 0.7. Best was 0.1 at 80
    89 0.14 0.86. Best was 0.1 at 80
    90 0.55 0.45. Best was 0.1 at 80
    91 0.15 0.85. Best was 0.1 at 80
    92 0.13 0.87. Best was 0.1 at 80
    93 0.13 0.87. Best was 0.1 at 80
    94 0.12 0.88. Best was 0.1 at 80
    95 0.15 0.85. Best was 0.1 at 80
    96 0.15 0.85. Best was 0.1 at 80
    97 0.11 0.89. Best was 0.1 at 80
    98 0.13 0.87. Best was 0.1 at 80
    99 0.31 0.69. Best was 0.1 at 80
    100 0.17 0.83. Best was 0.1 at 80


.. GENERATED FROM PYTHON SOURCE LINES 186-189

postprocessing of results
--------------------------
print the best hyperparameters

.. GENERATED FROM PYTHON SOURCE LINES 189-192

.. code-block:: Python


    print(optimizer.best_paras())


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    {'splitter': 'best', 'min_samples_split': 3, 'max_depth': 32, 'min_weight_fraction_leaf': 0.00027746123140151915}


.. GENERATED FROM PYTHON SOURCE LINES 193-194

convergence plot

.. GENERATED FROM PYTHON SOURCE LINES 194-201

.. code-block:: Python


    optimizer.plot_convergence()
    if SAVE:
        plt.savefig("results/figures/hpo_convergence.png", dpi=600, bbox_inches="tight")
    plt.tight_layout()
    plt.show()


.. image-sg:: /auto_examples/images/sphx_glr_hpo_001.png
   :alt: Convergence plot
   :srcset: /auto_examples/images/sphx_glr_hpo_001.png, /auto_examples/images/sphx_glr_hpo_001_2_00x.png 2.00x
   :class: sphx-glr-single-img


.. GENERATED FROM PYTHON SOURCE LINES 202-209

.. code-block:: Python


    optimizer.plot_importance(with_optuna=True)
    if SAVE:
        plt.savefig("results/figures/hpo_importance.png", dpi=600, bbox_inches="tight")
    plt.tight_layout()
    plt.show()


.. image-sg:: /auto_examples/images/sphx_glr_hpo_002.png
   :alt: hpo
   :srcset: /auto_examples/images/sphx_glr_hpo_002.png, /auto_examples/images/sphx_glr_hpo_002_2_00x.png 2.00x
   :class: sphx-glr-single-img


.. GENERATED FROM PYTHON SOURCE LINES 210-216

.. code-block:: Python

    optimizer.plot_parallel_coords()
    if SAVE:
        plt.savefig("results/figures/hpo_parallel_coords.png", dpi=600, bbox_inches="tight")
    plt.tight_layout()
    plt.show()


.. image-sg:: /auto_examples/images/sphx_glr_hpo_003.png
   :alt: Hyperparameters
   :srcset: /auto_examples/images/sphx_glr_hpo_003.png, /auto_examples/images/sphx_glr_hpo_003_2_00x.png 2.00x
   :class: sphx-glr-single-img


.. GENERATED FROM PYTHON SOURCE LINES 217-219

Evaluation with optimized hyperparameters
------------------------------------------

.. GENERATED FROM PYTHON SOURCE LINES 219-221

.. code-block:: Python


    bst_model = objective_fn(True, **optimizer.best_paras())


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    {'r2': 0.9420515255866434, 'r2_score': 0.941282018285209, 'rmse': 0.0017374482051161798}


.. rst-class:: sphx-glr-timing

   **Total running time of the script:** (0 minutes 25.526 seconds)


.. _sphx_glr_download_auto_examples_hpo.py:

.. only:: html

  .. container:: sphx-glr-footer sphx-glr-footer-example

    .. container:: sphx-glr-download sphx-glr-download-jupyter

      :download:`Download Jupyter notebook: hpo.ipynb <hpo.ipynb>`

    .. container:: sphx-glr-download sphx-glr-download-python

      :download:`Download Python source code: hpo.py <hpo.py>`

    .. container:: sphx-glr-download sphx-glr-download-zip

      :download:`Download zipped: hpo.zip <hpo.zip>`


.. only:: html

 .. rst-class:: sphx-glr-signature

    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_