Note
Click here to download the full example code or to run this example in your browser via Binder
Multi-output Regression¶
The following example shows how to fit a multioutput regression model with auto-sklearn.
import numpy as numpy
from pprint import pprint
from sklearn.datasets import make_regression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from autosklearn.regression import AutoSklearnRegressor
Data Loading¶
X, y = make_regression(n_samples=1000, n_features=10, n_informative=5, n_targets=3)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
Build and fit a regressor¶
automl = AutoSklearnRegressor(
time_left_for_this_task=120,
per_run_time_limit=30,
tmp_folder="/tmp/autosklearn_multioutput_regression_example_tmp",
)
automl.fit(X_train, y_train, dataset_name="synthetic")
AutoSklearnRegressor(ensemble_class=<class 'autosklearn.ensembles.ensemble_selection.EnsembleSelection'>,
per_run_time_limit=30, time_left_for_this_task=120,
tmp_folder='/tmp/autosklearn_multioutput_regression_example_tmp')
View the models found by auto-sklearn¶
print(automl.leaderboard())
rank ensemble_weight type cost duration
model_id
17 1 0.88 gaussian_process 2.686994e-08 11.047750
4 2 0.12 gaussian_process 6.650299e-08 4.449338
Print the final ensemble constructed by auto-sklearn¶
pprint(automl.show_models(), indent=4)
{ 4: { 'cost': 6.650299455568387e-08,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d626bb50>,
'ensemble_weight': 0.12,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d4573a30>,
'model_id': 4,
'rank': 1,
'regressor': <autosklearn.pipeline.components.regression.RegressorChoice object at 0x7f05d4573280>,
'sklearn_regressor': GaussianProcessRegressor(alpha=2.6231667524556984e-13,
kernel=RBF(length_scale=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
n_restarts_optimizer=10, normalize_y=True,
random_state=1)},
17: { 'cost': 2.686994005074439e-08,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d3d930a0>,
'ensemble_weight': 0.88,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d624b8b0>,
'model_id': 17,
'rank': 2,
'regressor': <autosklearn.pipeline.components.regression.RegressorChoice object at 0x7f05d624b730>,
'sklearn_regressor': GaussianProcessRegressor(alpha=6.100934970841317e-14,
kernel=RBF(length_scale=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
n_restarts_optimizer=10, normalize_y=True,
random_state=1)}}
Get the Score of the final ensemble¶
predictions = automl.predict(X_test)
print("R2 score:", r2_score(y_test, predictions))
R2 score: 0.9999999400500433
Get the configuration space¶
# The configuration space is reduced, i.e. no SVM.
print(automl.get_configuration_space(X_train, y_train))
Configuration space object:
Hyperparameters:
data_preprocessor:__choice__, Type: Categorical, Choices: {feature_type}, Default: feature_type
data_preprocessor:feature_type:numerical_transformer:imputation:strategy, Type: Categorical, Choices: {mean, median, most_frequent}, Default: mean
data_preprocessor:feature_type:numerical_transformer:rescaling:__choice__, Type: Categorical, Choices: {minmax, none, normalize, power_transformer, quantile_transformer, robust_scaler, standardize}, Default: standardize
data_preprocessor:feature_type:numerical_transformer:rescaling:quantile_transformer:n_quantiles, Type: UniformInteger, Range: [10, 2000], Default: 1000
data_preprocessor:feature_type:numerical_transformer:rescaling:quantile_transformer:output_distribution, Type: Categorical, Choices: {normal, uniform}, Default: normal
data_preprocessor:feature_type:numerical_transformer:rescaling:robust_scaler:q_max, Type: UniformFloat, Range: [0.7, 0.999], Default: 0.75
data_preprocessor:feature_type:numerical_transformer:rescaling:robust_scaler:q_min, Type: UniformFloat, Range: [0.001, 0.3], Default: 0.25
feature_preprocessor:__choice__, Type: Categorical, Choices: {extra_trees_preproc_for_regression, fast_ica, feature_agglomeration, kernel_pca, kitchen_sinks, no_preprocessing, nystroem_sampler, pca, polynomial, random_trees_embedding}, Default: no_preprocessing
feature_preprocessor:extra_trees_preproc_for_regression:bootstrap, Type: Categorical, Choices: {True, False}, Default: False
feature_preprocessor:extra_trees_preproc_for_regression:criterion, Type: Categorical, Choices: {mse, friedman_mse, mae}, Default: mse
feature_preprocessor:extra_trees_preproc_for_regression:max_depth, Type: Constant, Value: None
feature_preprocessor:extra_trees_preproc_for_regression:max_features, Type: UniformFloat, Range: [0.1, 1.0], Default: 1.0
feature_preprocessor:extra_trees_preproc_for_regression:max_leaf_nodes, Type: Constant, Value: None
feature_preprocessor:extra_trees_preproc_for_regression:min_samples_leaf, Type: UniformInteger, Range: [1, 20], Default: 1
feature_preprocessor:extra_trees_preproc_for_regression:min_samples_split, Type: UniformInteger, Range: [2, 20], Default: 2
feature_preprocessor:extra_trees_preproc_for_regression:min_weight_fraction_leaf, Type: Constant, Value: 0.0
feature_preprocessor:extra_trees_preproc_for_regression:n_estimators, Type: Constant, Value: 100
feature_preprocessor:fast_ica:algorithm, Type: Categorical, Choices: {parallel, deflation}, Default: parallel
feature_preprocessor:fast_ica:fun, Type: Categorical, Choices: {logcosh, exp, cube}, Default: logcosh
feature_preprocessor:fast_ica:n_components, Type: UniformInteger, Range: [10, 2000], Default: 100
feature_preprocessor:fast_ica:whiten, Type: Categorical, Choices: {False, True}, Default: False
feature_preprocessor:feature_agglomeration:affinity, Type: Categorical, Choices: {euclidean, manhattan, cosine}, Default: euclidean
feature_preprocessor:feature_agglomeration:linkage, Type: Categorical, Choices: {ward, complete, average}, Default: ward
feature_preprocessor:feature_agglomeration:n_clusters, Type: UniformInteger, Range: [2, 400], Default: 25
feature_preprocessor:feature_agglomeration:pooling_func, Type: Categorical, Choices: {mean, median, max}, Default: mean
feature_preprocessor:kernel_pca:coef0, Type: UniformFloat, Range: [-1.0, 1.0], Default: 0.0
feature_preprocessor:kernel_pca:degree, Type: UniformInteger, Range: [2, 5], Default: 3
feature_preprocessor:kernel_pca:gamma, Type: UniformFloat, Range: [3.0517578125e-05, 8.0], Default: 0.01, on log-scale
feature_preprocessor:kernel_pca:kernel, Type: Categorical, Choices: {poly, rbf, sigmoid, cosine}, Default: rbf
feature_preprocessor:kernel_pca:n_components, Type: UniformInteger, Range: [10, 2000], Default: 100
feature_preprocessor:kitchen_sinks:gamma, Type: UniformFloat, Range: [3.0517578125e-05, 8.0], Default: 1.0, on log-scale
feature_preprocessor:kitchen_sinks:n_components, Type: UniformInteger, Range: [50, 10000], Default: 100, on log-scale
feature_preprocessor:nystroem_sampler:coef0, Type: UniformFloat, Range: [-1.0, 1.0], Default: 0.0
feature_preprocessor:nystroem_sampler:degree, Type: UniformInteger, Range: [2, 5], Default: 3
feature_preprocessor:nystroem_sampler:gamma, Type: UniformFloat, Range: [3.0517578125e-05, 8.0], Default: 0.1, on log-scale
feature_preprocessor:nystroem_sampler:kernel, Type: Categorical, Choices: {poly, rbf, sigmoid, cosine}, Default: rbf
feature_preprocessor:nystroem_sampler:n_components, Type: UniformInteger, Range: [50, 10000], Default: 100, on log-scale
feature_preprocessor:pca:keep_variance, Type: UniformFloat, Range: [0.5, 0.9999], Default: 0.9999
feature_preprocessor:pca:whiten, Type: Categorical, Choices: {False, True}, Default: False
feature_preprocessor:polynomial:degree, Type: UniformInteger, Range: [2, 3], Default: 2
feature_preprocessor:polynomial:include_bias, Type: Categorical, Choices: {True, False}, Default: True
feature_preprocessor:polynomial:interaction_only, Type: Categorical, Choices: {False, True}, Default: False
feature_preprocessor:random_trees_embedding:bootstrap, Type: Categorical, Choices: {True, False}, Default: True
feature_preprocessor:random_trees_embedding:max_depth, Type: UniformInteger, Range: [2, 10], Default: 5
feature_preprocessor:random_trees_embedding:max_leaf_nodes, Type: Constant, Value: None
feature_preprocessor:random_trees_embedding:min_samples_leaf, Type: UniformInteger, Range: [1, 20], Default: 1
feature_preprocessor:random_trees_embedding:min_samples_split, Type: UniformInteger, Range: [2, 20], Default: 2
feature_preprocessor:random_trees_embedding:min_weight_fraction_leaf, Type: Constant, Value: 1.0
feature_preprocessor:random_trees_embedding:n_estimators, Type: UniformInteger, Range: [10, 100], Default: 10
regressor:__choice__, Type: Categorical, Choices: {decision_tree, extra_trees, gaussian_process, k_nearest_neighbors, random_forest}, Default: random_forest
regressor:decision_tree:criterion, Type: Categorical, Choices: {mse, friedman_mse, mae}, Default: mse
regressor:decision_tree:max_depth_factor, Type: UniformFloat, Range: [0.0, 2.0], Default: 0.5
regressor:decision_tree:max_features, Type: Constant, Value: 1.0
regressor:decision_tree:max_leaf_nodes, Type: Constant, Value: None
regressor:decision_tree:min_impurity_decrease, Type: Constant, Value: 0.0
regressor:decision_tree:min_samples_leaf, Type: UniformInteger, Range: [1, 20], Default: 1
regressor:decision_tree:min_samples_split, Type: UniformInteger, Range: [2, 20], Default: 2
regressor:decision_tree:min_weight_fraction_leaf, Type: Constant, Value: 0.0
regressor:extra_trees:bootstrap, Type: Categorical, Choices: {True, False}, Default: False
regressor:extra_trees:criterion, Type: Categorical, Choices: {mse, friedman_mse, mae}, Default: mse
regressor:extra_trees:max_depth, Type: Constant, Value: None
regressor:extra_trees:max_features, Type: UniformFloat, Range: [0.1, 1.0], Default: 1.0
regressor:extra_trees:max_leaf_nodes, Type: Constant, Value: None
regressor:extra_trees:min_impurity_decrease, Type: Constant, Value: 0.0
regressor:extra_trees:min_samples_leaf, Type: UniformInteger, Range: [1, 20], Default: 1
regressor:extra_trees:min_samples_split, Type: UniformInteger, Range: [2, 20], Default: 2
regressor:extra_trees:min_weight_fraction_leaf, Type: Constant, Value: 0.0
regressor:gaussian_process:alpha, Type: UniformFloat, Range: [1e-14, 1.0], Default: 1e-08, on log-scale
regressor:gaussian_process:thetaL, Type: UniformFloat, Range: [1e-10, 0.001], Default: 1e-06, on log-scale
regressor:gaussian_process:thetaU, Type: UniformFloat, Range: [1.0, 100000.0], Default: 100000.0, on log-scale
regressor:k_nearest_neighbors:n_neighbors, Type: UniformInteger, Range: [1, 100], Default: 1, on log-scale
regressor:k_nearest_neighbors:p, Type: Categorical, Choices: {1, 2}, Default: 2
regressor:k_nearest_neighbors:weights, Type: Categorical, Choices: {uniform, distance}, Default: uniform
regressor:random_forest:bootstrap, Type: Categorical, Choices: {True, False}, Default: True
regressor:random_forest:criterion, Type: Categorical, Choices: {mse, friedman_mse, mae}, Default: mse
regressor:random_forest:max_depth, Type: Constant, Value: None
regressor:random_forest:max_features, Type: UniformFloat, Range: [0.1, 1.0], Default: 1.0
regressor:random_forest:max_leaf_nodes, Type: Constant, Value: None
regressor:random_forest:min_impurity_decrease, Type: Constant, Value: 0.0
regressor:random_forest:min_samples_leaf, Type: UniformInteger, Range: [1, 20], Default: 1
regressor:random_forest:min_samples_split, Type: UniformInteger, Range: [2, 20], Default: 2
regressor:random_forest:min_weight_fraction_leaf, Type: Constant, Value: 0.0
Conditions:
data_preprocessor:feature_type:numerical_transformer:imputation:strategy | data_preprocessor:__choice__ == 'feature_type'
data_preprocessor:feature_type:numerical_transformer:rescaling:__choice__ | data_preprocessor:__choice__ == 'feature_type'
data_preprocessor:feature_type:numerical_transformer:rescaling:quantile_transformer:n_quantiles | data_preprocessor:feature_type:numerical_transformer:rescaling:__choice__ == 'quantile_transformer'
data_preprocessor:feature_type:numerical_transformer:rescaling:quantile_transformer:output_distribution | data_preprocessor:feature_type:numerical_transformer:rescaling:__choice__ == 'quantile_transformer'
data_preprocessor:feature_type:numerical_transformer:rescaling:robust_scaler:q_max | data_preprocessor:feature_type:numerical_transformer:rescaling:__choice__ == 'robust_scaler'
data_preprocessor:feature_type:numerical_transformer:rescaling:robust_scaler:q_min | data_preprocessor:feature_type:numerical_transformer:rescaling:__choice__ == 'robust_scaler'
feature_preprocessor:extra_trees_preproc_for_regression:bootstrap | feature_preprocessor:__choice__ == 'extra_trees_preproc_for_regression'
feature_preprocessor:extra_trees_preproc_for_regression:criterion | feature_preprocessor:__choice__ == 'extra_trees_preproc_for_regression'
feature_preprocessor:extra_trees_preproc_for_regression:max_depth | feature_preprocessor:__choice__ == 'extra_trees_preproc_for_regression'
feature_preprocessor:extra_trees_preproc_for_regression:max_features | feature_preprocessor:__choice__ == 'extra_trees_preproc_for_regression'
feature_preprocessor:extra_trees_preproc_for_regression:max_leaf_nodes | feature_preprocessor:__choice__ == 'extra_trees_preproc_for_regression'
feature_preprocessor:extra_trees_preproc_for_regression:min_samples_leaf | feature_preprocessor:__choice__ == 'extra_trees_preproc_for_regression'
feature_preprocessor:extra_trees_preproc_for_regression:min_samples_split | feature_preprocessor:__choice__ == 'extra_trees_preproc_for_regression'
feature_preprocessor:extra_trees_preproc_for_regression:min_weight_fraction_leaf | feature_preprocessor:__choice__ == 'extra_trees_preproc_for_regression'
feature_preprocessor:extra_trees_preproc_for_regression:n_estimators | feature_preprocessor:__choice__ == 'extra_trees_preproc_for_regression'
feature_preprocessor:fast_ica:algorithm | feature_preprocessor:__choice__ == 'fast_ica'
feature_preprocessor:fast_ica:fun | feature_preprocessor:__choice__ == 'fast_ica'
feature_preprocessor:fast_ica:n_components | feature_preprocessor:fast_ica:whiten == 'True'
feature_preprocessor:fast_ica:whiten | feature_preprocessor:__choice__ == 'fast_ica'
feature_preprocessor:feature_agglomeration:affinity | feature_preprocessor:__choice__ == 'feature_agglomeration'
feature_preprocessor:feature_agglomeration:linkage | feature_preprocessor:__choice__ == 'feature_agglomeration'
feature_preprocessor:feature_agglomeration:n_clusters | feature_preprocessor:__choice__ == 'feature_agglomeration'
feature_preprocessor:feature_agglomeration:pooling_func | feature_preprocessor:__choice__ == 'feature_agglomeration'
feature_preprocessor:kernel_pca:coef0 | feature_preprocessor:kernel_pca:kernel in {'poly', 'sigmoid'}
feature_preprocessor:kernel_pca:degree | feature_preprocessor:kernel_pca:kernel == 'poly'
feature_preprocessor:kernel_pca:gamma | feature_preprocessor:kernel_pca:kernel in {'poly', 'rbf'}
feature_preprocessor:kernel_pca:kernel | feature_preprocessor:__choice__ == 'kernel_pca'
feature_preprocessor:kernel_pca:n_components | feature_preprocessor:__choice__ == 'kernel_pca'
feature_preprocessor:kitchen_sinks:gamma | feature_preprocessor:__choice__ == 'kitchen_sinks'
feature_preprocessor:kitchen_sinks:n_components | feature_preprocessor:__choice__ == 'kitchen_sinks'
feature_preprocessor:nystroem_sampler:coef0 | feature_preprocessor:nystroem_sampler:kernel in {'poly', 'sigmoid'}
feature_preprocessor:nystroem_sampler:degree | feature_preprocessor:nystroem_sampler:kernel == 'poly'
feature_preprocessor:nystroem_sampler:gamma | feature_preprocessor:nystroem_sampler:kernel in {'poly', 'rbf', 'sigmoid'}
feature_preprocessor:nystroem_sampler:kernel | feature_preprocessor:__choice__ == 'nystroem_sampler'
feature_preprocessor:nystroem_sampler:n_components | feature_preprocessor:__choice__ == 'nystroem_sampler'
feature_preprocessor:pca:keep_variance | feature_preprocessor:__choice__ == 'pca'
feature_preprocessor:pca:whiten | feature_preprocessor:__choice__ == 'pca'
feature_preprocessor:polynomial:degree | feature_preprocessor:__choice__ == 'polynomial'
feature_preprocessor:polynomial:include_bias | feature_preprocessor:__choice__ == 'polynomial'
feature_preprocessor:polynomial:interaction_only | feature_preprocessor:__choice__ == 'polynomial'
feature_preprocessor:random_trees_embedding:bootstrap | feature_preprocessor:__choice__ == 'random_trees_embedding'
feature_preprocessor:random_trees_embedding:max_depth | feature_preprocessor:__choice__ == 'random_trees_embedding'
feature_preprocessor:random_trees_embedding:max_leaf_nodes | feature_preprocessor:__choice__ == 'random_trees_embedding'
feature_preprocessor:random_trees_embedding:min_samples_leaf | feature_preprocessor:__choice__ == 'random_trees_embedding'
feature_preprocessor:random_trees_embedding:min_samples_split | feature_preprocessor:__choice__ == 'random_trees_embedding'
feature_preprocessor:random_trees_embedding:min_weight_fraction_leaf | feature_preprocessor:__choice__ == 'random_trees_embedding'
feature_preprocessor:random_trees_embedding:n_estimators | feature_preprocessor:__choice__ == 'random_trees_embedding'
regressor:decision_tree:criterion | regressor:__choice__ == 'decision_tree'
regressor:decision_tree:max_depth_factor | regressor:__choice__ == 'decision_tree'
regressor:decision_tree:max_features | regressor:__choice__ == 'decision_tree'
regressor:decision_tree:max_leaf_nodes | regressor:__choice__ == 'decision_tree'
regressor:decision_tree:min_impurity_decrease | regressor:__choice__ == 'decision_tree'
regressor:decision_tree:min_samples_leaf | regressor:__choice__ == 'decision_tree'
regressor:decision_tree:min_samples_split | regressor:__choice__ == 'decision_tree'
regressor:decision_tree:min_weight_fraction_leaf | regressor:__choice__ == 'decision_tree'
regressor:extra_trees:bootstrap | regressor:__choice__ == 'extra_trees'
regressor:extra_trees:criterion | regressor:__choice__ == 'extra_trees'
regressor:extra_trees:max_depth | regressor:__choice__ == 'extra_trees'
regressor:extra_trees:max_features | regressor:__choice__ == 'extra_trees'
regressor:extra_trees:max_leaf_nodes | regressor:__choice__ == 'extra_trees'
regressor:extra_trees:min_impurity_decrease | regressor:__choice__ == 'extra_trees'
regressor:extra_trees:min_samples_leaf | regressor:__choice__ == 'extra_trees'
regressor:extra_trees:min_samples_split | regressor:__choice__ == 'extra_trees'
regressor:extra_trees:min_weight_fraction_leaf | regressor:__choice__ == 'extra_trees'
regressor:gaussian_process:alpha | regressor:__choice__ == 'gaussian_process'
regressor:gaussian_process:thetaL | regressor:__choice__ == 'gaussian_process'
regressor:gaussian_process:thetaU | regressor:__choice__ == 'gaussian_process'
regressor:k_nearest_neighbors:n_neighbors | regressor:__choice__ == 'k_nearest_neighbors'
regressor:k_nearest_neighbors:p | regressor:__choice__ == 'k_nearest_neighbors'
regressor:k_nearest_neighbors:weights | regressor:__choice__ == 'k_nearest_neighbors'
regressor:random_forest:bootstrap | regressor:__choice__ == 'random_forest'
regressor:random_forest:criterion | regressor:__choice__ == 'random_forest'
regressor:random_forest:max_depth | regressor:__choice__ == 'random_forest'
regressor:random_forest:max_features | regressor:__choice__ == 'random_forest'
regressor:random_forest:max_leaf_nodes | regressor:__choice__ == 'random_forest'
regressor:random_forest:min_impurity_decrease | regressor:__choice__ == 'random_forest'
regressor:random_forest:min_samples_leaf | regressor:__choice__ == 'random_forest'
regressor:random_forest:min_samples_split | regressor:__choice__ == 'random_forest'
regressor:random_forest:min_weight_fraction_leaf | regressor:__choice__ == 'random_forest'
Forbidden Clauses:
(Forbidden: feature_preprocessor:feature_agglomeration:affinity in {'cosine', 'manhattan'} && Forbidden: feature_preprocessor:feature_agglomeration:linkage == 'ward')
(Forbidden: feature_preprocessor:__choice__ == 'random_trees_embedding' && Forbidden: regressor:__choice__ == 'gaussian_process')
(Forbidden: regressor:__choice__ == 'decision_tree' && Forbidden: feature_preprocessor:__choice__ == 'kitchen_sinks')
(Forbidden: regressor:__choice__ == 'decision_tree' && Forbidden: feature_preprocessor:__choice__ == 'kernel_pca')
(Forbidden: regressor:__choice__ == 'decision_tree' && Forbidden: feature_preprocessor:__choice__ == 'nystroem_sampler')
(Forbidden: regressor:__choice__ == 'extra_trees' && Forbidden: feature_preprocessor:__choice__ == 'kitchen_sinks')
(Forbidden: regressor:__choice__ == 'extra_trees' && Forbidden: feature_preprocessor:__choice__ == 'kernel_pca')
(Forbidden: regressor:__choice__ == 'extra_trees' && Forbidden: feature_preprocessor:__choice__ == 'nystroem_sampler')
(Forbidden: regressor:__choice__ == 'gaussian_process' && Forbidden: feature_preprocessor:__choice__ == 'kitchen_sinks')
(Forbidden: regressor:__choice__ == 'gaussian_process' && Forbidden: feature_preprocessor:__choice__ == 'kernel_pca')
(Forbidden: regressor:__choice__ == 'gaussian_process' && Forbidden: feature_preprocessor:__choice__ == 'nystroem_sampler')
(Forbidden: regressor:__choice__ == 'k_nearest_neighbors' && Forbidden: feature_preprocessor:__choice__ == 'kitchen_sinks')
(Forbidden: regressor:__choice__ == 'k_nearest_neighbors' && Forbidden: feature_preprocessor:__choice__ == 'kernel_pca')
(Forbidden: regressor:__choice__ == 'k_nearest_neighbors' && Forbidden: feature_preprocessor:__choice__ == 'nystroem_sampler')
(Forbidden: regressor:__choice__ == 'random_forest' && Forbidden: feature_preprocessor:__choice__ == 'kitchen_sinks')
(Forbidden: regressor:__choice__ == 'random_forest' && Forbidden: feature_preprocessor:__choice__ == 'kernel_pca')
(Forbidden: regressor:__choice__ == 'random_forest' && Forbidden: feature_preprocessor:__choice__ == 'nystroem_sampler')
Total running time of the script: ( 1 minutes 58.541 seconds)