.. DO NOT EDIT.
.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY.
.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:
.. "auto_examples/brouta_feature_selection.py"
.. LINE NUMBERS ARE GIVEN BELOW.

.. only:: html

    .. note::
        :class: sphx-glr-download-link-note

        :ref:`Go to the end <sphx_glr_download_auto_examples_brouta_feature_selection.py>`
        to download the full example code.

.. rst-class:: sphx-glr-example-title

.. _sphx_glr_auto_examples_brouta_feature_selection.py:


============================
3. Feature Selection
============================
Now we know that which model works best for our problem. Now we will
perform feature selection using the best model.

.. GENERATED FROM PYTHON SOURCE LINES 8-45

.. code-block:: Python


    import numpy as np

    np.NaN = np.nan  # for compatibility with older versions of NumPy
    #np.bool = np.bool_  # for compatibility with older versions of NumPy

    import seaborn as sns

    import matplotlib.pyplot as plt

    from easy_mpl import bar_chart

    #####  Monkey-patch SciPy before importing BorutaShap because binom_test was renamed to binomtest in SciPy 1.7.0, and BorutaShap uses the old name.
    import scipy.stats as stats

    # Add the old name if SciPy only has the new one
    if not hasattr(stats, "binom_test") and hasattr(stats, "binomtest"):
        def binom_test(x, n=None, p=0.5, alternative="two-sided"):
            return stats.binomtest(int(x), n=n, p=p, alternative=alternative).pvalue
        stats.binom_test = binom_test

    #####
    from BorutaShap import BorutaShap

    from sklearn.tree import DecisionTreeRegressor
    from sklearn.feature_selection import RFE
    from sklearn.feature_selection import SelectKBest
    from sklearn.feature_selection import f_regression
    from sklearn.feature_selection import SelectFromModel
    from sklearn.feature_selection import VarianceThreshold
    from sklearn.feature_selection import mutual_info_regression
    from sklearn.feature_selection import SequentialFeatureSelector

    from utils import set_rcParams
    from utils import version_info
    from utils import prepare_data, LABEL_MAP, SAVE


.. GENERATED FROM PYTHON SOURCE LINES 46-49

.. code-block:: Python

    for lib, ver in version_info().items():
        print(lib, ver)


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    python 3.12.10 (main, May  6 2025, 10:49:23) [GCC 11.4.0]
    os posix
    ai4water 1.07
    lightgbm 4.6.0
    catboost 1.2.10
    xgboost 3.2.0
    easy_mpl 0.21.5
    SeqMetrics 2.0.0
    numpy 1.26.4
    pandas 2.2.3
    matplotlib 3.10.8
    h5py 3.16.0
    sklearn 1.3.1
    optuna 4.8.0
    skopt 0.10.2
    plotly 6.6.0
    seaborn 0.13.2
    crepes 0.9.0
    mapie 0.9.2
    shap 0.49.1
    scipy 1.17.1


.. GENERATED FROM PYTHON SOURCE LINES 50-65

.. code-block:: Python


    set_rcParams()

    TOP_K = 10

    df, _ = prepare_data(outputs="k")

    df = df.rename(columns=LABEL_MAP)

    feature_names = df.columns.tolist()[0:-1]

    X, y = df.iloc[:, 0:-1], df.iloc[:, -1].values

    print(X.shape, y.shape)


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    (1527, 34) (1527,)


.. GENERATED FROM PYTHON SOURCE LINES 66-68

Information gain
--------------------

.. GENERATED FROM PYTHON SOURCE LINES 68-82

.. code-block:: Python

    importances = mutual_info_regression(
        X, y)

    bar_chart(
        importances,
        feature_names,
        color="teal",
        sort=True,
        show=False,
    )
    plt.tight_layout()
    plt.show()


.. image-sg:: /auto_examples/images/sphx_glr_brouta_feature_selection_001.png
   :alt: brouta feature selection
   :srcset: /auto_examples/images/sphx_glr_brouta_feature_selection_001.png, /auto_examples/images/sphx_glr_brouta_feature_selection_001_2_00x.png 2.00x
   :class: sphx-glr-single-img


.. GENERATED FROM PYTHON SOURCE LINES 83-85

Chi-squared
------------

.. GENERATED FROM PYTHON SOURCE LINES 85-95

.. code-block:: Python


    chi2_features = SelectKBest(f_regression, k=10)
    X_kbest_features = chi2_features.fit_transform(
        X, y)

    chi2_features = np.array(feature_names)[chi2_features.get_support()]
    print(chi2_features)

    chi2_features = chi2_features[0:TOP_K].tolist()


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    ['Band Gap (eV)' 'O (At%)' 'Al (At%)' 'Ni (At%)' 'Volume (L)'
     'HB donor count' 'Solubility (g/L)' 'M.W. (g/mol)' 'pka2' 'Solution pH']


.. GENERATED FROM PYTHON SOURCE LINES 96-98

Variance Threshold
-------------------

.. GENERATED FROM PYTHON SOURCE LINES 98-118

.. code-block:: Python


    v_threshold = VarianceThreshold(threshold=0)
    v_threshold.fit(X)
    v_threshold.get_support()

    bar_chart(
        v_threshold.variances_,
        feature_names,
        color="teal",
        sort=True,
        show=False
    )
    plt.tight_layout()
    plt.show()

    vt_features = {k:v for k,v in zip(v_threshold.variances_, feature_names, )}
    # sort_by_value
    vt_features =  dict(sorted(vt_features.items(), key=lambda item: item[1], reverse=True))
    vt_features = np.array(list(vt_features.values()))[0:TOP_K].tolist()


.. image-sg:: /auto_examples/images/sphx_glr_brouta_feature_selection_002.png
   :alt: brouta feature selection
   :srcset: /auto_examples/images/sphx_glr_brouta_feature_selection_002.png, /auto_examples/images/sphx_glr_brouta_feature_selection_002_2_00x.png 2.00x
   :class: sphx-glr-single-img


.. GENERATED FROM PYTHON SOURCE LINES 119-122

Forward Feature Selection
---------------------------
Starting with empty/minimal feature set and adding features one by one

.. GENERATED FROM PYTHON SOURCE LINES 122-133

.. code-block:: Python


    rgr = DecisionTreeRegressor()
    sfs_forward = SequentialFeatureSelector(
        rgr, n_features_to_select=TOP_K, direction="forward"
    ).fit(X, y)

    ffs_features = np.array(feature_names)[sfs_forward.get_support()]
    print(ffs_features)

    ffs_features = ffs_features.tolist()


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    ['Al (At%)' 'Ni (At%)' 'Pt (At%)' 'Light Int. (W)' 'Light Dist. (cm)'
     'HB acceptor count' 'pka2' 'Initial Conc. (mg/L)' 'Solution pH'
     'HA (mg/L)']


.. GENERATED FROM PYTHON SOURCE LINES 134-139

Backward feature elimination
-----------------------------
Starting with a full set of features and removing one by one
and everytime measuring the decrease in performance. Finally we rank the
features, according to the decrease in performance they cause.

.. GENERATED FROM PYTHON SOURCE LINES 139-149

.. code-block:: Python


    sfs_forward = SequentialFeatureSelector(
        rgr, n_features_to_select=TOP_K, direction="backward"
    ).fit(X, y)

    bfe_features = np.array(feature_names)[sfs_forward.get_support()]
    print(bfe_features)

    bfe_features = bfe_features.tolist()


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    ['Catalyst' 'Al (At%)' 'Pt (At%)' 'Surface Area (m2/g)' 'Light Dist. (cm)'
     'Rxn Time (min)' 'Dyes' 'log Kow' 'HB donor count' 'Initial Conc. (mg/L)']


.. GENERATED FROM PYTHON SOURCE LINES 150-153

Recursive Feeature Elimination
-------------------------------
It is similar to backward feature elemination.

.. GENERATED FROM PYTHON SOURCE LINES 153-158

.. code-block:: Python


    rfe = RFE(DecisionTreeRegressor(), n_features_to_select=TOP_K,
              step=1)
    rfe.fit(X, y)


.. raw:: html

    <div class="output_subarea output_html rendered_html output_result">
    <style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: "▸";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: "▾";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: "";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: "";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: "";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id="sk-container-id-1" class="sk-top-container"><div class="sk-text-repr-fallback"><pre>RFE(estimator=DecisionTreeRegressor(), n_features_to_select=10)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class="sk-container" hidden><div class="sk-item sk-dashed-wrapped"><div class="sk-label-container"><div class="sk-label sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-1" type="checkbox" ><label for="sk-estimator-id-1" class="sk-toggleable__label sk-toggleable__label-arrow">RFE</label><div class="sk-toggleable__content"><pre>RFE(estimator=DecisionTreeRegressor(), n_features_to_select=10)</pre></div></div></div><div class="sk-parallel"><div class="sk-parallel-item"><div class="sk-item"><div class="sk-label-container"><div class="sk-label sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-2" type="checkbox" ><label for="sk-estimator-id-2" class="sk-toggleable__label sk-toggleable__label-arrow">estimator: DecisionTreeRegressor</label><div class="sk-toggleable__content"><pre>DecisionTreeRegressor()</pre></div></div></div><div class="sk-serial"><div class="sk-item"><div class="sk-estimator sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-3" type="checkbox" ><label for="sk-estimator-id-3" class="sk-toggleable__label sk-toggleable__label-arrow">DecisionTreeRegressor</label><div class="sk-toggleable__content"><pre>DecisionTreeRegressor()</pre></div></div></div></div></div></div></div></div></div></div>
    </div>
    <br />
    <br />

.. GENERATED FROM PYTHON SOURCE LINES 159-164

.. code-block:: Python

    rfe_features = np.array(feature_names)[rfe.get_support()]
    print(rfe_features)

    rfe_features = rfe_features[0:TOP_K].tolist()


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    ['O (At%)' 'Al (At%)' 'Pore Vol. (cm3/g)' 'Cat. Loading (g/L)'
     'Light Int. (W)' 'Rxn Time (min)' 'Initial Conc. (mg/L)' 'Solution pH'
     'HA (mg/L)' 'Anions']


.. GENERATED FROM PYTHON SOURCE LINES 165-167

Tree based method
------------------

.. GENERATED FROM PYTHON SOURCE LINES 167-176

.. code-block:: Python


    rgr = DecisionTreeRegressor().fit(X, y)
    model = SelectFromModel(rgr, prefit=True)

    tb_features = np.array(feature_names)[model.get_support()]
    print(tb_features)

    tb_features = tb_features[0:TOP_K].tolist()


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    ['Al (At%)' 'Ni (At%)' 'Cat. Loading (g/L)' 'Light Int. (W)'
     'Rxn Time (min)' 'Initial Conc. (mg/L)' 'Solution pH' 'Anions']


.. GENERATED FROM PYTHON SOURCE LINES 177-192

Boruta shap
-------------
The purpose of Boruta is to find a subset of features from all the given features,
which are relevant for the given task. It creats a copy of a feature which is called
shadow feature. Then the shadow feature is shuffled. The model is trained with the original
feature plus the shuffled shadow feature. After that the feature importance of the original feature
and shadow feature is calcualted using SHAP. If the SHAP importance of a shadow
feature is more than the orignal feature, then it is rejected. The intuition is that,
if a feature is important, then its shuffled version should not have more importnace
than the original feature. Finally, Boruta shap method groups features, either
as confirmed important, or confirmed rejected or tentative features. Since
Boruta involves training the original model again and again, this can be
extremely costly if the model training is time consuming.
For theory see `this article <https://www.jstatsoft.org/article/view/v036i11>`_ and this
`kaggle notebook <https://www.kaggle.com/code/residentmario/automated-feature-selection-with-boruta/notebook>`_ .

.. GENERATED FROM PYTHON SOURCE LINES 192-223

.. code-block:: Python


    class MyBoruta(BorutaShap):
        def box_plot(self, data, X_rotation, X_size, y_scale, figsize):

            if y_scale=='log':
                minimum = data['value'].min()
                if minimum <= 0:
                    data['value'] += abs(minimum) + 0.01

            order = data.groupby(by=["Methods"])["value"].mean().sort_values(ascending=False).index
            my_palette = self.create_mapping_of_features_to_attribute(
                maps= ['#B17BB2', '#EE9E9D', '#00ABAC',  '#B9E6FB'])
                     # 'yellow',   'red',     'green',    'blue'

            # Use a color palette
            plt.figure(figsize=(10, 7))
            ax = sns.boxplot(x=data["Methods"], y=data["value"],
                            order=order, palette=my_palette)

            if y_scale == 'log':ax.set(yscale="log")
            ax.set_xticklabels(ax.get_xticklabels(), rotation=X_rotation, size=14)
            ax.tick_params(labelsize=14)
            ax.grid(visible=True, ls='--', color='lightgrey')
            ax.set_ylabel('Z-Score', fontsize=14)
            ax.set_xlabel('Features', fontsize=14,)
            plt.tight_layout()

            if SAVE:
                plt.savefig("results/figures/boruta_shap.png", dpi=600, bbox_inches="tight")
            return


.. GENERATED FROM PYTHON SOURCE LINES 224-227

.. code-block:: Python


    model = DecisionTreeRegressor()


.. GENERATED FROM PYTHON SOURCE LINES 228-233

.. code-block:: Python


    Feature_Selector = MyBoruta(model=model,
                                  importance_measure='shap',
                                  classification=False)


.. GENERATED FROM PYTHON SOURCE LINES 234-242

We observed that the number of confirmed important and tentative
features remained same after 50 ``n_trials``. At 50 ``n_trials``
the total potential features were 12. Further increasing
the ``n_trials`` only moved the features from 'tentative' category
to 'confirmed important' until 400. For computational constraints on
readthedocs, we are setting ``n_trials`` to 100.
`z_score` on y-axis is a measure of importance and therefore, boxplots
display the distribution of importance.

.. GENERATED FROM PYTHON SOURCE LINES 242-252

.. code-block:: Python


    Feature_Selector.fit(
        X=X, y=y,
         n_trials=100,
         sample=False,
         train_or_test = 'test',
         normalize=True,
        verbose=True
    )


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

      0%|          | 0/100 [00:00<?, ?it/s]      1%|          | 1/100 [00:00<01:08,  1.45it/s]      2%|▏         | 2/100 [00:01<01:05,  1.50it/s]      3%|▎         | 3/100 [00:01<01:01,  1.57it/s]      4%|▍         | 4/100 [00:02<01:02,  1.53it/s]      5%|▌         | 5/100 [00:03<01:00,  1.57it/s]      6%|▌         | 6/100 [00:03<01:00,  1.56it/s]      7%|▋         | 7/100 [00:04<01:00,  1.53it/s]      8%|▊         | 8/100 [00:05<00:58,  1.57it/s]      9%|▉         | 9/100 [00:05<00:59,  1.52it/s]     10%|█         | 10/100 [00:06<00:58,  1.54it/s]     11%|█         | 11/100 [00:07<00:57,  1.55it/s]     12%|█▏        | 12/100 [00:07<00:55,  1.59it/s]     13%|█▎        | 13/100 [00:08<00:55,  1.57it/s]     14%|█▍        | 14/100 [00:08<00:52,  1.64it/s]     15%|█▌        | 15/100 [00:09<00:51,  1.66it/s]     16%|█▌        | 16/100 [00:10<00:49,  1.71it/s]     17%|█▋        | 17/100 [00:10<00:48,  1.72it/s]     18%|█▊        | 18/100 [00:11<00:47,  1.73it/s]     19%|█▉        | 19/100 [00:11<00:44,  1.81it/s]     20%|██        | 20/100 [00:12<00:44,  1.78it/s]     21%|██        | 21/100 [00:12<00:45,  1.74it/s]     22%|██▏       | 22/100 [00:13<00:44,  1.77it/s]     23%|██▎       | 23/100 [00:13<00:43,  1.77it/s]     24%|██▍       | 24/100 [00:14<00:42,  1.80it/s]     25%|██▌       | 25/100 [00:15<00:41,  1.81it/s]     26%|██▌       | 26/100 [00:15<00:41,  1.78it/s]     27%|██▋       | 27/100 [00:16<00:42,  1.72it/s]     28%|██▊       | 28/100 [00:16<00:40,  1.78it/s]     29%|██▉       | 29/100 [00:17<00:38,  1.85it/s]     30%|███       | 30/100 [00:17<00:37,  1.87it/s]     31%|███       | 31/100 [00:18<00:36,  1.91it/s]     32%|███▏      | 32/100 [00:18<00:35,  1.91it/s]     33%|███▎      | 33/100 [00:19<00:35,  1.91it/s]     34%|███▍      | 34/100 [00:19<00:34,  1.89it/s]     35%|███▌      | 35/100 [00:20<00:33,  1.92it/s]     36%|███▌      | 36/100 [00:20<00:32,  1.99it/s]     37%|███▋      | 37/100 [00:21<00:31,  2.01it/s]     38%|███▊      | 38/100 [00:21<00:31,  1.97it/s]     39%|███▉      | 39/100 [00:22<00:30,  2.00it/s]     40%|████      | 40/100 [00:22<00:31,  1.90it/s]     41%|████      | 41/100 [00:23<00:30,  1.91it/s]     42%|████▏     | 42/100 [00:23<00:30,  1.93it/s]     43%|████▎     | 43/100 [00:24<00:28,  1.97it/s]     44%|████▍     | 44/100 [00:24<00:28,  1.95it/s]     45%|████▌     | 45/100 [00:25<00:27,  1.98it/s]     46%|████▌     | 46/100 [00:25<00:27,  1.94it/s]     47%|████▋     | 47/100 [00:26<00:27,  1.93it/s]     48%|████▊     | 48/100 [00:27<00:26,  1.96it/s]     49%|████▉     | 49/100 [00:27<00:25,  1.98it/s]     50%|█████     | 50/100 [00:28<00:25,  1.98it/s]     51%|█████     | 51/100 [00:28<00:25,  1.94it/s]     52%|█████▏    | 52/100 [00:29<00:24,  1.95it/s]     53%|█████▎    | 53/100 [00:29<00:23,  1.96it/s]     54%|█████▍    | 54/100 [00:30<00:23,  1.95it/s]     55%|█████▌    | 55/100 [00:30<00:22,  1.96it/s]     56%|█████▌    | 56/100 [00:31<00:22,  1.96it/s]     57%|█████▋    | 57/100 [00:31<00:22,  1.91it/s]     58%|█████▊    | 58/100 [00:32<00:21,  1.94it/s]     59%|█████▉    | 59/100 [00:32<00:20,  1.97it/s]     60%|██████    | 60/100 [00:33<00:20,  1.96it/s]     61%|██████    | 61/100 [00:33<00:19,  1.97it/s]     62%|██████▏   | 62/100 [00:34<00:19,  1.90it/s]     63%|██████▎   | 63/100 [00:34<00:19,  1.92it/s]     64%|██████▍   | 64/100 [00:35<00:18,  1.92it/s]     65%|██████▌   | 65/100 [00:35<00:18,  1.88it/s]     66%|██████▌   | 66/100 [00:36<00:18,  1.87it/s]     67%|██████▋   | 67/100 [00:36<00:17,  1.93it/s]     68%|██████▊   | 68/100 [00:37<00:16,  1.94it/s]     69%|██████▉   | 69/100 [00:37<00:16,  1.91it/s]     70%|███████   | 70/100 [00:38<00:15,  1.91it/s]     71%|███████   | 71/100 [00:38<00:14,  1.96it/s]     72%|███████▏  | 72/100 [00:39<00:14,  1.94it/s]     73%|███████▎  | 73/100 [00:39<00:13,  1.93it/s]     74%|███████▍  | 74/100 [00:40<00:13,  1.95it/s]     75%|███████▌  | 75/100 [00:40<00:13,  1.90it/s]     76%|███████▌  | 76/100 [00:41<00:12,  1.88it/s]     77%|███████▋  | 77/100 [00:42<00:12,  1.90it/s]     78%|███████▊  | 78/100 [00:42<00:11,  1.90it/s]     79%|███████▉  | 79/100 [00:43<00:11,  1.90it/s]     80%|████████  | 80/100 [00:43<00:10,  1.89it/s]     81%|████████  | 81/100 [00:44<00:09,  1.92it/s]     82%|████████▏ | 82/100 [00:44<00:09,  1.97it/s]     83%|████████▎ | 83/100 [00:45<00:08,  2.00it/s]     84%|████████▍ | 84/100 [00:45<00:07,  2.08it/s]     85%|████████▌ | 85/100 [00:45<00:07,  2.10it/s]     86%|████████▌ | 86/100 [00:46<00:06,  2.11it/s]     87%|████████▋ | 87/100 [00:46<00:06,  2.07it/s]     88%|████████▊ | 88/100 [00:47<00:05,  2.05it/s]     89%|████████▉ | 89/100 [00:47<00:05,  2.05it/s]     90%|█████████ | 90/100 [00:48<00:04,  2.12it/s]     91%|█████████ | 91/100 [00:48<00:04,  2.15it/s]     92%|█████████▏| 92/100 [00:49<00:03,  2.11it/s]     93%|█████████▎| 93/100 [00:49<00:03,  2.16it/s]     94%|█████████▍| 94/100 [00:50<00:02,  2.13it/s]     95%|█████████▌| 95/100 [00:50<00:02,  2.12it/s]     96%|█████████▌| 96/100 [00:51<00:01,  2.10it/s]     97%|█████████▋| 97/100 [00:51<00:01,  2.08it/s]     98%|█████████▊| 98/100 [00:52<00:00,  2.06it/s]     99%|█████████▉| 99/100 [00:52<00:00,  2.09it/s]    100%|██████████| 100/100 [00:53<00:00,  2.08it/s]    100%|██████████| 100/100 [00:53<00:00,  1.88it/s]
    9 attributes confirmed important: ['HA (mg/L)', 'Anions', 'Rxn Time (min)', 'Light Int. (W)', 'Pore Size (nm)', 'Initial Conc. (mg/L)', 'Solution pH', 'Cat. Loading (g/L)', 'O (At%)']
    23 attributes confirmed unimportant: ['M.W. (g/mol)', 'Dyes', 'Catalyst', 'Synth. Time (min)', 'Pt (At%)', 'Solubility (g/L)', 'Band Gap (eV)', 'C (At%)', 'Volume (L)', 'Pd (At%)', 'HB acceptor count', 'Al (At%)', 'HB donor count', 'Fe (At%)', 'Surface Area (m2/g)', 'log Kow', 'pka2', 'S (At%)', 'Light Dist. (cm)', 'Pore Vol. (cm3/g)', 'Bi (At%)', 'pka1', 'Ag (At%)']
    2 tentative attributes remains: ['Ni (At%)', 'Mo (At%)']


.. GENERATED FROM PYTHON SOURCE LINES 253-256

Boxplot of features. Features with grass green color
are considered as confirmed important. The orange color represents
confirmed rejected/unimportant.

.. GENERATED FROM PYTHON SOURCE LINES 256-258

.. code-block:: Python

    Feature_Selector.plot()


.. image-sg:: /auto_examples/images/sphx_glr_brouta_feature_selection_003.png
   :alt: brouta feature selection
   :srcset: /auto_examples/images/sphx_glr_brouta_feature_selection_003.png, /auto_examples/images/sphx_glr_brouta_feature_selection_003_2_00x.png 2.00x
   :class: sphx-glr-single-img


.. GENERATED FROM PYTHON SOURCE LINES 259-261

.. code-block:: Python

    if SAVE:
        Feature_Selector.results_to_csv('results/Boruta_results.csv')


.. GENERATED FROM PYTHON SOURCE LINES 262-263

get the names selected features

.. GENERATED FROM PYTHON SOURCE LINES 263-267

.. code-block:: Python

    print(Feature_Selector.Subset().columns)

    br_features = Feature_Selector.Subset().columns[0:TOP_K].tolist()


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    Index(['HA (mg/L)', 'Anions', 'Rxn Time (min)', 'Light Int. (W)',
           'Pore Size (nm)', 'Initial Conc. (mg/L)', 'Solution pH',
           'Cat. Loading (g/L)', 'O (At%)'],
          dtype='object')


.. GENERATED FROM PYTHON SOURCE LINES 268-269

printing the common features among all methods

.. GENERATED FROM PYTHON SOURCE LINES 269-279

.. code-block:: Python


    mi_features = {k:v for k,v in zip(feature_names, importances)}
    # sort_by_value
    mi_features =  dict(sorted(mi_features.items(), key=lambda item: item[1], reverse=True))
    mi_features = np.array(list(mi_features.keys()))[0:TOP_K].tolist()

    print(set(
        br_features + mi_features + chi2_features + vt_features +\
        ffs_features + bfe_features + rfe_features
    ))


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    {'Dyes', 'Catalyst', 'Synth. Time (min)', 'Light Int. (W)', 'Pt (At%)', 'Pore Size (nm)', 'Solubility (g/L)', 'Initial Conc. (mg/L)', 'Solution pH', 'Band Gap (eV)', 'Volume (L)', 'HB acceptor count', 'Ni (At%)', 'pka1', 'Al (At%)', 'HB donor count', 'O (At%)', 'Fe (At%)', 'Surface Area (m2/g)', 'log Kow', 'pka2', 'Anions', 'S (At%)', 'Cat. Loading (g/L)', 'Light Dist. (cm)', 'HA (mg/L)', 'Pore Vol. (cm3/g)', 'Rxn Time (min)', 'M.W. (g/mol)'}


.. rst-class:: sphx-glr-timing

   **Total running time of the script:** (1 minutes 4.752 seconds)


.. _sphx_glr_download_auto_examples_brouta_feature_selection.py:

.. only:: html

  .. container:: sphx-glr-footer sphx-glr-footer-example

    .. container:: sphx-glr-download sphx-glr-download-jupyter

      :download:`Download Jupyter notebook: brouta_feature_selection.ipynb <brouta_feature_selection.ipynb>`

    .. container:: sphx-glr-download sphx-glr-download-python

      :download:`Download Python source code: brouta_feature_selection.py <brouta_feature_selection.py>`

    .. container:: sphx-glr-download sphx-glr-download-zip

      :download:`Download zipped: brouta_feature_selection.zip <brouta_feature_selection.zip>`


.. only:: html

 .. rst-class:: sphx-glr-signature

    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_