Note

Go to the end to download the full example code.

7. Conformal Analysis

Conformal analysis is a distribution free uncertainty quantification method. By distribution free, we mean that the method does not make any assumptions about the underlying distribution of the errors/residuals. Its purpose is to test the robustness of the trained machine learning method. The standard prodcedure is to dived the data into three sets (training, calibration nand test set). The model is trained on training data. The calibration set is used to select the heuristic (rule, score, strategy) and then this heuristic is applied using the test set. The final robustness (uncertainty) is calculated on the test set which is not shown to the model at any stage before this.

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from lightgbm import LGBMRegressor

from crepes import ConformalRegressor
#from crepes.fillings import sigma_knn, binning
from crepes.extras import binning

from ai4water.utils.utils import TrainTestSplit

from easy_mpl import plot, bar_chart
from easy_mpl.utils import create_subplots

from sklearn.tree import DecisionTreeRegressor

from mapie.subsample import Subsample
from mapie.metrics import regression_coverage_score
from mapie.quantile_regression import MapieQuantileRegressor
from mapie.regression import MapieRegressor

from utils import SAVE, version_info
from utils import prepare_data, set_rcParams, plot_ci

set_rcParams()

for lib, ver in version_info().items():
    print(lib, ver)

python 3.12.10 (main, May  6 2025, 10:49:23) [GCC 11.4.0]
os posix
ai4water 1.07
lightgbm 4.6.0
catboost 1.2.10
xgboost 3.2.0
easy_mpl 0.21.5
SeqMetrics 2.0.0
numpy 1.26.4
pandas 2.2.3
matplotlib 3.10.8
h5py 3.16.0
sklearn 1.3.1
optuna 4.8.0
skopt 0.10.2
plotly 6.6.0
seaborn 0.13.2
crepes 0.9.0
mapie 0.9.2
shap 0.49.1
scipy 1.17.1

LABELS = {
    "jackknife_plus": "Jackknife +",
    "cv_plus": "CV +"
}

Use the inputs selected by Boruta Shap method

inputs = ['Solution pH', 'Time (m)', 'Anions', 'Ni (At%)', 'HA (mg/L)',
          'loading (g)', 'Pore size (nm)', 'O (At%)',
          'Light intensity (watt)', 'Mo (At%)', 'Dye concentration (mg/L)']
data, _ = prepare_data(inputs=inputs, outputs="k")

input_features = data.columns.tolist()[0:-1]
len(input_features)

output_features = data.columns.tolist()[-1:]

y = data[output_features].values.reshape(-1,)
print(y.shape)

# normalize the output variable to be between 0 and 1
y = np.array([(y[i]-y.min())/(y.max()-y.min()) for i in range(len(y))])
print(y.shape, y.min(), y.max())

(1527,)
(1527,) 0.0 1.0

TrainX, X_test, TrainY, y_test = TrainTestSplit(seed=313).split_by_random(
    data[input_features], y)
print(TrainX.shape, TrainY.shape, X_test.shape, y_test.shape)

(1068, 11) (1068,) (459, 11) (459,)

now split the training data into proper training and calibration sets

X_prop_train, X_cal, y_prop_train, y_cal = TrainTestSplit(seed=313).split_by_random(
    TrainX,
    TrainY)
print(X_prop_train.shape, y_prop_train.shape, X_cal.shape, y_cal.shape)

(747, 11) (747,) (321, 11) (321,)

First we train a model

model = DecisionTreeRegressor(random_state=313)

model.fit(X_prop_train, y_prop_train)

y_hat_cal = model.predict(X_cal)

residuals_cal = y_cal - y_hat_cal

y_hat_test = model.predict(X_test)
# Now we have residuals for the calibration set and point predictions
# for the test set. We can now apply conformal regressors to obtain prediction intervals
# for the test set.
lowers = {}
uppers = {}

Standard conformal regressors

cr_std = ConformalRegressor()

We will use the residuals from the calibration set to fit the conformal regressor.

cr_std.fit(residuals=residuals_cal)

ConformalRegressor(fitted=True, normalized=False, mondrian=False)

We may now obtain prediction intervals from the point predictions for the test set; here using a confidence level of 99%.

coverage = 0.95
intervals_std = cr_std.predict_int(y_hat=y_hat_test,
                           confidence=coverage)
print(intervals_std.shape)
lowers["Standard"] = intervals_std[:, 0]
uppers["Standard"] = intervals_std[:, 1]

(459, 2)

Normalized conformal regressors

#sigmas_cal_knn = sigma_knn(X=X_cal, residuals=residuals_cal)
from crepes.extras import DifficultyEstimator
de_knn = DifficultyEstimator()
de_knn.fit(X=X_cal, residuals=residuals_cal)
sigmas_cal_knn = de_knn.apply(X_cal)

cr_norm_knn = ConformalRegressor()

cr_norm_knn.fit(residuals=residuals_cal, sigmas=sigmas_cal_knn)

ConformalRegressor(fitted=True, normalized=True, mondrian=False)

sigmas_test_knn = de_knn.apply(X_test)

intervals_norm_knn = cr_norm_knn.predict_int(
    y_hat=y_hat_test,
    sigmas=sigmas_test_knn,
)

lowers["Normalized"] = intervals_norm_knn[:, 0]
uppers["Normalized"] = intervals_norm_knn[:, 1]

Mondrian conformal regressors

bins_cal, bin_thresholds = binning(values=sigmas_cal_knn, bins=10)

cr_mond = ConformalRegressor()

cr_mond.fit(residuals=residuals_cal, bins=bins_cal)

ConformalRegressor(fitted=True, normalized=False, mondrian=True)

bins_test = binning(values=sigmas_test_knn, bins=bin_thresholds)

intervals_mond = cr_mond.predict_int(
    y_hat=y_hat_test, bins=bins_test)

lowers["Mondrian"] = intervals_mond[:, 0]
uppers["Mondrian"] = intervals_mond[:, 1]

prediction_intervals = {
    "Std CR":intervals_std,
    "Norm CR knn":intervals_norm_knn,
    "Mond CR":intervals_mond,
}

coverages = []
mean_sizes = []
median_sizes = []

for name in prediction_intervals.keys():
    intervals = prediction_intervals[name]
    coverages.append(np.sum([1 if (y_test[i]>=intervals[i,0] and
                                   y_test[i]<=intervals[i,1]) else 0
                            for i in range(len(y_test))])/len(y_test))
    mean_sizes.append((intervals[:,1]-intervals[:,0]).mean())
    median_sizes.append(np.median((intervals[:,1]-intervals[:,0])))

pred_int_df = pd.DataFrame({"Coverage":coverages,
                            "Mean size":mean_sizes,
                            "Median size":median_sizes},
                           index=list(prediction_intervals.keys()))

pred_int_df.loc["Mean"] = [pred_int_df["Coverage"].mean(),
                           pred_int_df["Mean size"].mean(),
                           pred_int_df["Median size"].mean()]

print(pred_int_df)

             Coverage  Mean size  Median size
Std CR       0.928105   0.202552     0.202552
Norm CR knn  0.930283   0.151007     0.103999
Mond CR      0.971678   0.337533     0.112536
Mean         0.943355   0.230364     0.139696

interval_sizes = {}
for name in prediction_intervals.keys():
    interval_sizes[name] = prediction_intervals[name][:,1] \
    - prediction_intervals[name][:,0]

plt.figure(figsize=(8,8))
plt.ylabel("CDF")
plt.xlabel("Interval sizes")

colors = ["b","r","g","y","k","m","c","orange", "teal"]

for i, name in enumerate(interval_sizes.keys()):
    if "Std" in name:
        style = "dotted"
    else:
        style = "solid"
    plt.plot(np.sort(interval_sizes[name]),
             [i/len(interval_sizes[name])
              for i in range(1,len(interval_sizes[name])+1)],
             linestyle=style, c=colors[i], label=name)
plt.grid(visible=True, ls='--', color='lightgrey')
plt.legend()
plt.show()

f, axes = create_subplots(
    3,
    sharex="all",
)
for idx, (strategy, ax) in enumerate(zip(
        lowers.keys(), axes.flat)):

    plot_ci(
        prediction=y_hat_test,
        lower = lowers[strategy].reshape(-1,),
        upper = uppers[strategy].reshape(-1,),
        title=strategy,
        coverage=0.95,
        num_points=70,
        axes=ax,
        legned=False if idx<5 else True
    )

plt.show()

Comparison of Conformal Analysis methods in MAPIE

rgr_quant = LGBMRegressor(random_state=313,
                    alpha=0.05,  # 95% confidence
                    objective="quantile")

model_quant = rgr_quant.fit(X_prop_train, y_prop_train)

STRATEGIES = {
    "Jackknife": dict(method="base", cv=-1),
    "Jackknife +": dict(method="plus", cv=-1),
    "Jackknife minmx": dict(method="minmax", cv=-1),
    "Cv": dict(method="base", cv=10),
    "CV +": dict(method="plus", cv=10),
    "CV minmax": dict(method="minmax", cv=10),
    "Jackknife + ab": dict(method="plus", cv=Subsample(n_resamplings=50)),
    "Jackknife_minmx ab": dict(
        method="minmax", cv=Subsample(n_resamplings=50)
    ),
    "Quantile": dict(
        cv="split", #alpha=0.05
    )
}

y_pred, y_pis = {}, {}
for strategy, params in STRATEGIES.items():
    print(f"running {strategy}")
    if strategy == "Quantile":
        mapie = MapieQuantileRegressor(model_quant, **params)
        mapie.fit(TrainX, TrainY, X_calib=X_cal, y_calib=y_cal,
                  random_state=313)
        y_pred[strategy], y_pis[strategy] = mapie.predict(X_test,
                                                          alpha=0.05)
    else:
        mapie = MapieRegressor(model, verbose=1,
                               n_jobs=4, **params)
        mapie.fit(TrainX, TrainY)
        y_pred[strategy], y_pis[strategy] = mapie.predict(X_test, alpha=0.05)

[LightGBM] [Warning] Found whitespace in feature_names, replace with underlines
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
running Jackknife
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  56 tasks      | elapsed:    4.9s
[Parallel(n_jobs=4)]: Done 1068 out of 1068 | elapsed:    8.4s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done 312 tasks      | elapsed:    0.7s
[Parallel(n_jobs=4)]: Done 1068 out of 1068 | elapsed:    2.1s finished
running Jackknife +
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done 280 tasks      | elapsed:    1.1s
[Parallel(n_jobs=4)]: Done 1068 out of 1068 | elapsed:    3.7s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done 312 tasks      | elapsed:    0.7s
[Parallel(n_jobs=4)]: Done 1068 out of 1068 | elapsed:    2.0s finished
running Jackknife minmx
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done 280 tasks      | elapsed:    1.2s
[Parallel(n_jobs=4)]: Done 1068 out of 1068 | elapsed:    3.8s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done 312 tasks      | elapsed:    0.7s
[Parallel(n_jobs=4)]: Done 1068 out of 1068 | elapsed:    2.0s finished
running Cv
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  10 out of  10 | elapsed:    0.2s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  10 out of  10 | elapsed:    0.0s finished
running CV +
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  10 out of  10 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  10 out of  10 | elapsed:    0.0s finished
running CV minmax
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  10 out of  10 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  10 out of  10 | elapsed:    0.0s finished
running Jackknife + ab
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  50 out of  50 | elapsed:    0.2s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  50 out of  50 | elapsed:    0.1s finished
running Jackknife_minmx ab
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  50 out of  50 | elapsed:    0.3s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  50 out of  50 | elapsed:    0.1s finished
running Quantile
[LightGBM] [Warning] Found whitespace in feature_names, replace with underlines
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] Found whitespace in feature_names, replace with underlines
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] Found whitespace in feature_names, replace with underlines
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
INFO:root:The predictions are ill-sorted.
INFO:root:The predictions are ill-sorted.

f, axes = create_subplots(
    len(STRATEGIES),
    sharex="all",
    figsize=(9, 7)
)
for idx, (strategy, ax) in enumerate(zip(STRATEGIES.keys(), axes.flat)):

    plot_ci(
        prediction=y_pred[strategy],
        lower=y_pis[strategy][:, 0].reshape(-1,),
        upper=y_pis[strategy][:, 1].reshape(-1,),
        title=strategy,
        coverage=0.95,
        num_points=70,
        axes=ax,
        legned=False if idx<8 else True
    )

plt.show()

Jackknife, Jackknife +, Jackknife minmx, Cv, CV +, CV minmax, Jackknife + ab, Jackknife_minmx ab, Quantile

interval_sizes = {}

for strategy in STRATEGIES.keys():
    size = y_pis[strategy][:, 1].reshape(-1,) - y_pis[strategy][:, 0].reshape(-1,)
    interval_sizes[strategy] = size
    plot(
        size,
        label=strategy,
        ax_kws=dict(ylabel="Prediction Interval Width"),
        show=False
    )
plt.show()

plt.figure(figsize=(8,8))
plt.ylabel("CDF")
plt.xlabel("Interval sizes")

for i, name in enumerate(interval_sizes.keys()):
    if "jacknife" in name:
        style = "dotted"
    else:
        style = "solid"
    plt.plot(np.sort(interval_sizes[name]),
             [i/len(interval_sizes[name])
              for i in range(1,len(interval_sizes[name])+1)],
             linestyle=style, c=colors[i], label=name)
plt.grid(visible=True, ls='--', color='lightgrey')
plt.legend()
plt.show()

for strategy in STRATEGIES.keys():
    if strategy not in ["Quantile"]:
        plot(
            y_pis[strategy][:, 1].reshape(-1,) - y_pis[strategy][:, 0].reshape(-1,),
                   label=strategy,
            ax_kws=dict(ylabel="Prediction Interval Width"),
            show=False
    )
plt.show()

plt.figure(figsize=(8,8))
plt.ylabel("CDF")
plt.xlabel("Interval sizes")

for i, strategy in enumerate(interval_sizes.keys()):
    if strategy not in ["Quantile"]:
        if "jacknife" in strategy:
            style = "dotted"
        else:
            style = "solid"
        plt.plot(np.sort(interval_sizes[strategy]),
                 [i/len(interval_sizes[strategy])
                  for i in range(1,len(interval_sizes[strategy])+1)],
                 linestyle=style, c=colors[i], label=strategy)
plt.grid(visible=True, ls='--', color='lightgrey')
plt.legend()
plt.show()

coverage = pd.DataFrame([
    [
        regression_coverage_score(
            y_test, y_pis[strategy][:, 0, 0], y_pis[strategy][:, 1, 0]
        ),
        (
            y_pis[strategy][:, 1, 0] - y_pis[strategy][:, 0, 0]
        ).mean()
    ] for strategy in STRATEGIES
], index=STRATEGIES, columns=["Coverage", "Width average"]).round(2)

print(coverage.head(10))

                    Coverage  Width average
Jackknife               0.95           0.13
Jackknife +             0.95           0.13
Jackknife minmx         0.97           0.15
Cv                      0.96           0.15
CV +                    0.97           0.16
CV minmax               0.97           0.15
Jackknife + ab          0.93           0.11
Jackknife_minmx ab      0.96           0.13
Quantile                0.84           0.19

fig, (ax, ax2) = plt.subplots(1, 2, sharey="all")

bar_chart(
    coverage.iloc[:, 0],
    ax=ax,
    ax_kws=dict(xlabel="Coverage"),
    color="#005066",
    show=False
)
bar_chart(
    coverage.iloc[:, 1],
    ax=ax2,
    color="#B3331D",
    ax_kws=dict(xlabel="Average Width"),
    show=False
)
plt.tight_layout()
plt.show()

f, ax = plt.subplots(figsize=(8,6))
ax.set_ylabel("CDF")
ax.set_xlabel("Interval sizes")

for i, strategy in enumerate(interval_sizes.keys()):
    if strategy not in ["Quantile", "Jackknife_minmx ab",
                        "Jackknife minmx", "Jackknife + ab", "CV minmax"]:
        if "jacknife" in strategy:
            style = "dotted"
        else:
            style = "solid"
        ax.plot(np.sort(interval_sizes[strategy]),
                 [i/len(interval_sizes[strategy])
                  for i in range(1,len(interval_sizes[strategy])+1)],
                 linestyle=style, c=colors[i], label=strategy)

xlim = ax.get_xlim()
ax.set_xlim([xlim[0], 0.5])
ax.grid(visible=True, ls='--', color='lightgrey')
ax.legend()
if SAVE:
    plt.savefig("results/figures/conformal_ci.png", dpi=600, bbox_inches="tight")
plt.show()

strategies = ["Jackknife", "Jackknife +", "Cv", "CV +"]
f, axes = create_subplots(
    len(strategies),
    sharex="all",
    figsize=(7, 6)
)
for idx, (strategy, ax) in enumerate(zip(strategies, axes.flat)):

    plot_ci(
        prediction=y_pred[strategy],
        lower=y_pis[strategy][:, 0].reshape(-1,),
        upper=y_pis[strategy][:, 1].reshape(-1,),
        title=strategy,
        coverage=0.95,
        num_points=70,
        axes=ax,
        legned=False if idx<8 else True
    )

    if idx in [0, 2]:
        ax.set_ylabel("Normalized k")
    ax.set_xlabel("Samples")

plt.tight_layout()
if SAVE:
    plt.savefig("results/figures/conformal_interval_size.png", dpi=600, bbox_inches="tight")
plt.show()

Total running time of the script: (0 minutes 29.688 seconds)

Gallery generated by Sphinx-Gallery