Note
Click here to download the full example code or to run this example in your browser via Binder
Extending Auto-Sklearn with Regression Component¶
The following example demonstrates how to create a new regression component for using in auto-sklearn.
from typing import Optional
from pprint import pprint
from ConfigSpace.configuration_space import ConfigurationSpace
from ConfigSpace.hyperparameters import (
UniformFloatHyperparameter,
UniformIntegerHyperparameter,
CategoricalHyperparameter,
)
from ConfigSpace.conditions import EqualsCondition
import sklearn.metrics
from autosklearn.askl_typing import FEAT_TYPE_TYPE
import autosklearn.regression
import autosklearn.pipeline.components.regression
from autosklearn.pipeline.components.base import AutoSklearnRegressionAlgorithm
from autosklearn.pipeline.constants import (
SPARSE,
DENSE,
SIGNED_DATA,
UNSIGNED_DATA,
PREDICTIONS,
)
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
Implement kernel ridge regression component for auto-sklearn¶
class KernelRidgeRegression(AutoSklearnRegressionAlgorithm):
def __init__(self, alpha, kernel, gamma, degree, coef0, random_state=None):
self.alpha = alpha
self.kernel = kernel
self.gamma = gamma
self.degree = degree
self.coef0 = coef0
self.random_state = random_state
self.estimator = None
def fit(self, X, y):
self.alpha = float(self.alpha)
self.gamma = float(self.gamma)
self.degree = int(self.degree)
self.coef0 = float(self.coef0)
import sklearn.kernel_ridge
self.estimator = sklearn.kernel_ridge.KernelRidge(
alpha=self.alpha,
kernel=self.kernel,
gamma=self.gamma,
degree=self.degree,
coef0=self.coef0,
)
self.estimator.fit(X, y)
return self
def predict(self, X):
if self.estimator is None:
raise NotImplementedError
return self.estimator.predict(X)
@staticmethod
def get_properties(dataset_properties=None):
return {
"shortname": "KRR",
"name": "Kernel Ridge Regression",
"handles_regression": True,
"handles_classification": False,
"handles_multiclass": False,
"handles_multilabel": False,
"handles_multioutput": True,
"is_deterministic": True,
"input": (SPARSE, DENSE, UNSIGNED_DATA, SIGNED_DATA),
"output": (PREDICTIONS,),
}
@staticmethod
def get_hyperparameter_search_space(
feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
):
cs = ConfigurationSpace()
alpha = UniformFloatHyperparameter(
name="alpha", lower=10**-5, upper=1, log=True, default_value=1.0
)
kernel = CategoricalHyperparameter(
name="kernel",
# We restrict ourselves to two possible kernels for this example
choices=["polynomial", "rbf"],
default_value="polynomial",
)
gamma = UniformFloatHyperparameter(
name="gamma", lower=0.00001, upper=1, default_value=0.1, log=True
)
degree = UniformIntegerHyperparameter(
name="degree", lower=2, upper=5, default_value=3
)
coef0 = UniformFloatHyperparameter(
name="coef0",
lower=1e-2,
upper=1e2,
log=True,
default_value=1,
)
cs.add_hyperparameters([alpha, kernel, gamma, degree, coef0])
degree_condition = EqualsCondition(degree, kernel, "polynomial")
coef0_condition = EqualsCondition(coef0, kernel, "polynomial")
cs.add_conditions([degree_condition, coef0_condition])
return cs
# Add KRR component to auto-sklearn.
autosklearn.pipeline.components.regression.add_regressor(KernelRidgeRegression)
cs = KernelRidgeRegression.get_hyperparameter_search_space()
print(cs)
Configuration space object:
Hyperparameters:
alpha, Type: UniformFloat, Range: [1e-05, 1.0], Default: 1.0, on log-scale
coef0, Type: UniformFloat, Range: [0.01, 100.0], Default: 1.0, on log-scale
degree, Type: UniformInteger, Range: [2, 5], Default: 3
gamma, Type: UniformFloat, Range: [1e-05, 1.0], Default: 0.1, on log-scale
kernel, Type: Categorical, Choices: {polynomial, rbf}, Default: polynomial
Conditions:
coef0 | kernel == 'polynomial'
degree | kernel == 'polynomial'
Generate data¶
X, y = load_diabetes(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y)
Fit the model using KRR¶
reg = autosklearn.regression.AutoSklearnRegressor(
time_left_for_this_task=30,
per_run_time_limit=10,
include={"regressor": ["KernelRidgeRegression"]},
# Bellow two flags are provided to speed up calculations
# Not recommended for a real implementation
initial_configurations_via_metalearning=0,
smac_scenario_args={"runcount_limit": 5},
)
reg.fit(X_train, y_train)
Fitting to the training data: 0%| | 0/30 [00:00<?, ?it/s, The total time budget for this task is 0:00:30]
Fitting to the training data: 3%|3 | 1/30 [00:01<00:29, 1.00s/it, The total time budget for this task is 0:00:30]
Fitting to the training data: 7%|6 | 2/30 [00:02<00:28, 1.00s/it, The total time budget for this task is 0:00:30]
Fitting to the training data: 10%|# | 3/30 [00:03<00:27, 1.01s/it, The total time budget for this task is 0:00:30]
Fitting to the training data: 13%|#3 | 4/30 [00:04<00:26, 1.00s/it, The total time budget for this task is 0:00:30]
Fitting to the training data: 17%|#6 | 5/30 [00:05<00:25, 1.00s/it, The total time budget for this task is 0:00:30]
Fitting to the training data: 20%|## | 6/30 [00:06<00:24, 1.00s/it, The total time budget for this task is 0:00:30]
Fitting to the training data: 100%|##########| 30/30 [00:06<00:00, 4.98it/s, The total time budget for this task is 0:00:30]
AutoSklearnRegressor(ensemble_class=<class 'autosklearn.ensembles.ensemble_selection.EnsembleSelection'>,
include={'regressor': ['KernelRidgeRegression']},
initial_configurations_via_metalearning=0,
per_run_time_limit=10,
smac_scenario_args={'runcount_limit': 5},
time_left_for_this_task=30)
Print prediction score and statistics¶
y_pred = reg.predict(X_test)
print("r2 score: ", sklearn.metrics.r2_score(y_pred, y_test))
pprint(reg.show_models(), indent=4)
r2 score: -1.172132043597819
{ 2: { 'cost': 0.819991223708897,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f2af6cc60d0>,
'ensemble_weight': 0.04,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f2b11091460>,
'model_id': 2,
'rank': 2,
'regressor': <autosklearn.pipeline.components.regression.RegressorChoice object at 0x7f2b11091550>,
'sklearn_regressor': KernelRidge(alpha=1.0, coef0=1.0, gamma=0.1, kernel='polynomial')},
5: { 'cost': 0.5883967966895315,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f2af42aaaf0>,
'ensemble_weight': 0.96,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f2af4c7ed60>,
'model_id': 5,
'rank': 1,
'regressor': <autosklearn.pipeline.components.regression.RegressorChoice object at 0x7f2af4c7ee20>,
'sklearn_regressor': KernelRidge(alpha=0.12092279509584172, coef0=0.8084604056638888, degree=2,
gamma=0.0005846787584894724, kernel='polynomial')}}
Total running time of the script: ( 0 minutes 13.321 seconds)