Note
Click here to download the full example code or to run this example in your browser via Binder
Extending Auto-Sklearn with Classification Component¶
The following example demonstrates how to create a new classification component for using in auto-sklearn.
from typing import Optional
from pprint import pprint
from ConfigSpace.configuration_space import ConfigurationSpace
from ConfigSpace.hyperparameters import (
CategoricalHyperparameter,
UniformIntegerHyperparameter,
UniformFloatHyperparameter,
)
import sklearn.metrics
from autosklearn.askl_typing import FEAT_TYPE_TYPE
import autosklearn.classification
import autosklearn.pipeline.components.classification
from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm
from autosklearn.pipeline.constants import (
DENSE,
SIGNED_DATA,
UNSIGNED_DATA,
PREDICTIONS,
)
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
Create MLP classifier component for auto-sklearn¶
class MLPClassifier(AutoSklearnClassificationAlgorithm):
def __init__(
self,
hidden_layer_depth,
num_nodes_per_layer,
activation,
alpha,
solver,
random_state=None,
):
self.hidden_layer_depth = hidden_layer_depth
self.num_nodes_per_layer = num_nodes_per_layer
self.activation = activation
self.alpha = alpha
self.solver = solver
self.random_state = random_state
def fit(self, X, y):
self.num_nodes_per_layer = int(self.num_nodes_per_layer)
self.hidden_layer_depth = int(self.hidden_layer_depth)
self.alpha = float(self.alpha)
from sklearn.neural_network import MLPClassifier
hidden_layer_sizes = tuple(
self.num_nodes_per_layer for i in range(self.hidden_layer_depth)
)
self.estimator = MLPClassifier(
hidden_layer_sizes=hidden_layer_sizes,
activation=self.activation,
alpha=self.alpha,
solver=self.solver,
random_state=self.random_state,
)
self.estimator.fit(X, y)
return self
def predict(self, X):
if self.estimator is None:
raise NotImplementedError()
return self.estimator.predict(X)
def predict_proba(self, X):
if self.estimator is None:
raise NotImplementedError()
return self.estimator.predict_proba(X)
@staticmethod
def get_properties(dataset_properties=None):
return {
"shortname": "MLP Classifier",
"name": "MLP CLassifier",
"handles_regression": False,
"handles_classification": True,
"handles_multiclass": True,
"handles_multilabel": False,
"handles_multioutput": False,
"is_deterministic": False,
# Both input and output must be tuple(iterable)
"input": [DENSE, SIGNED_DATA, UNSIGNED_DATA],
"output": [PREDICTIONS],
}
@staticmethod
def get_hyperparameter_search_space(
feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
):
cs = ConfigurationSpace()
hidden_layer_depth = UniformIntegerHyperparameter(
name="hidden_layer_depth", lower=1, upper=3, default_value=1
)
num_nodes_per_layer = UniformIntegerHyperparameter(
name="num_nodes_per_layer", lower=16, upper=216, default_value=32
)
activation = CategoricalHyperparameter(
name="activation",
choices=["identity", "logistic", "tanh", "relu"],
default_value="relu",
)
alpha = UniformFloatHyperparameter(
name="alpha", lower=0.0001, upper=1.0, default_value=0.0001
)
solver = CategoricalHyperparameter(
name="solver", choices=["lbfgs", "sgd", "adam"], default_value="adam"
)
cs.add_hyperparameters(
[
hidden_layer_depth,
num_nodes_per_layer,
activation,
alpha,
solver,
]
)
return cs
# Add MLP classifier component to auto-sklearn.
autosklearn.pipeline.components.classification.add_classifier(MLPClassifier)
cs = MLPClassifier.get_hyperparameter_search_space()
print(cs)
Configuration space object:
Hyperparameters:
activation, Type: Categorical, Choices: {identity, logistic, tanh, relu}, Default: relu
alpha, Type: UniformFloat, Range: [0.0001, 1.0], Default: 0.0001
hidden_layer_depth, Type: UniformInteger, Range: [1, 3], Default: 1
num_nodes_per_layer, Type: UniformInteger, Range: [16, 216], Default: 32
solver, Type: Categorical, Choices: {lbfgs, sgd, adam}, Default: adam
Data Loading¶
X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y)
Fit MLP classifier to the data¶
clf = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=30,
per_run_time_limit=10,
include={"classifier": ["MLPClassifier"]},
# Bellow two flags are provided to speed up calculations
# Not recommended for a real implementation
initial_configurations_via_metalearning=0,
smac_scenario_args={"runcount_limit": 5},
)
clf.fit(X_train, y_train)
Fitting to the training data: 0%| | 0/30 [00:00<?, ?it/s, The total time budget for this task is 0:00:30]
Fitting to the training data: 3%|3 | 1/30 [00:01<00:29, 1.00s/it, The total time budget for this task is 0:00:30]
Fitting to the training data: 7%|6 | 2/30 [00:02<00:28, 1.00s/it, The total time budget for this task is 0:00:30]
Fitting to the training data: 10%|# | 3/30 [00:03<00:27, 1.00s/it, The total time budget for this task is 0:00:30]
Fitting to the training data: 13%|#3 | 4/30 [00:04<00:26, 1.00s/it, The total time budget for this task is 0:00:30]
Fitting to the training data: 17%|#6 | 5/30 [00:05<00:25, 1.00s/it, The total time budget for this task is 0:00:30]
Fitting to the training data: 20%|## | 6/30 [00:06<00:24, 1.01s/it, The total time budget for this task is 0:00:30]
Fitting to the training data: 23%|##3 | 7/30 [00:07<00:23, 1.01s/it, The total time budget for this task is 0:00:30]
Fitting to the training data: 27%|##6 | 8/30 [00:08<00:22, 1.00s/it, The total time budget for this task is 0:00:30]
Fitting to the training data: 30%|### | 9/30 [00:09<00:21, 1.00s/it, The total time budget for this task is 0:00:30]
Fitting to the training data: 33%|###3 | 10/30 [00:10<00:20, 1.00s/it, The total time budget for this task is 0:00:30]
Fitting to the training data: 37%|###6 | 11/30 [00:11<00:19, 1.00s/it, The total time budget for this task is 0:00:30]
Fitting to the training data: 40%|#### | 12/30 [00:12<00:18, 1.00s/it, The total time budget for this task is 0:00:30]
Fitting to the training data: 100%|##########| 30/30 [00:12<00:00, 2.49it/s, The total time budget for this task is 0:00:30]
AutoSklearnClassifier(ensemble_class=<class 'autosklearn.ensembles.ensemble_selection.EnsembleSelection'>,
include={'classifier': ['MLPClassifier']},
initial_configurations_via_metalearning=0,
per_run_time_limit=10,
smac_scenario_args={'runcount_limit': 5},
time_left_for_this_task=30)
Print test accuracy and statistics¶
y_pred = clf.predict(X_test)
print("accuracy: ", sklearn.metrics.accuracy_score(y_pred, y_test))
pprint(clf.show_models(), indent=4)
accuracy: 0.958041958041958
{ 2: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f2afd3c2520>,
'cost': 0.028368794326241176,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f2af4c7f0a0>,
'ensemble_weight': 0.32,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f2afd3c2f70>,
'model_id': 2,
'rank': 2,
'sklearn_classifier': MLPClassifier(hidden_layer_sizes=(32,), random_state=1)},
3: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f2af50ed6d0>,
'cost': 0.021276595744680882,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f2af518bfa0>,
'ensemble_weight': 0.34,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f2af50ed7f0>,
'model_id': 3,
'rank': 1,
'sklearn_classifier': MLPClassifier(activation='identity', alpha=0.2945332422116951,
hidden_layer_sizes=(54,), random_state=1, solver='lbfgs')},
5: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f2af72353d0>,
'cost': 0.36879432624113473,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f2af518b790>,
'ensemble_weight': 0.34,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f2af7235a60>,
'model_id': 5,
'rank': 3,
'sklearn_classifier': MLPClassifier(alpha=0.311408649459819, hidden_layer_sizes=(123, 123, 123),
random_state=1, solver='sgd')}}
Total running time of the script: ( 0 minutes 17.672 seconds)