Note
Click here to download the full example code or to run this example in your browser via Binder
Classification¶
The following example shows how to fit a simple classification model with auto-sklearn.
from pprint import pprint
import sklearn.datasets
import sklearn.metrics
import autosklearn.classification
Data Loading¶
X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
X, y, random_state=1
)
Build and fit a classifier¶
automl = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=120,
per_run_time_limit=30,
tmp_folder="/tmp/autosklearn_classification_example_tmp",
)
automl.fit(X_train, y_train, dataset_name="breast_cancer")
AutoSklearnClassifier(ensemble_class=<class 'autosklearn.ensembles.ensemble_selection.EnsembleSelection'>,
per_run_time_limit=30, time_left_for_this_task=120,
tmp_folder='/tmp/autosklearn_classification_example_tmp')
View the models found by auto-sklearn¶
print(automl.leaderboard())
rank ensemble_weight type cost duration
model_id
7 1 0.16 extra_trees 0.014184 1.569340
27 2 0.04 extra_trees 0.014184 2.449368
16 4 0.04 gradient_boosting 0.021277 1.235045
21 5 0.06 extra_trees 0.021277 1.586606
30 3 0.04 extra_trees 0.021277 12.410941
2 6 0.02 random_forest 0.028369 1.892178
3 7 0.08 mlp 0.028369 1.077336
6 8 0.02 mlp 0.028369 1.222855
11 9 0.02 random_forest 0.028369 2.290498
14 11 0.02 mlp 0.028369 2.054393
22 10 0.06 gradient_boosting 0.028369 1.379215
5 16 0.02 random_forest 0.035461 2.209646
8 15 0.02 random_forest 0.035461 2.130122
12 14 0.02 gradient_boosting 0.035461 1.431612
18 13 0.02 random_forest 0.035461 2.392527
31 12 0.08 random_forest 0.035461 1.798755
9 17 0.04 extra_trees 0.042553 1.930847
28 19 0.08 bernoulli_nb 0.070922 1.001414
33 18 0.02 decision_tree 0.070922 8.978891
34 20 0.02 k_nearest_neighbors 0.070922 0.897243
20 22 0.02 passive_aggressive 0.078014 0.737455
32 21 0.02 gradient_boosting 0.078014 1.069623
29 23 0.08 mlp 0.134752 2.241808
Print the final ensemble constructed by auto-sklearn¶
pprint(automl.show_models(), indent=4)
{ 2: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d62ef550>,
'cost': 0.028368794326241176,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d6436250>,
'ensemble_weight': 0.02,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d62ef490>,
'model_id': 2,
'rank': 1,
'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=512, n_jobs=1,
random_state=1, warm_start=True)},
3: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d61a4640>,
'cost': 0.028368794326241176,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d684ebb0>,
'ensemble_weight': 0.08,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d61a4be0>,
'model_id': 3,
'rank': 2,
'sklearn_classifier': MLPClassifier(activation='tanh', alpha=0.0001363185819149026, beta_1=0.999,
beta_2=0.9, early_stopping=True,
hidden_layer_sizes=(115, 115, 115),
learning_rate_init=0.00018009776276177523, max_iter=32,
n_iter_no_change=32, random_state=1, verbose=0, warm_start=True)},
5: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d48108b0>,
'cost': 0.03546099290780147,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d6403880>,
'ensemble_weight': 0.02,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d5f64c10>,
'model_id': 5,
'rank': 3,
'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=3, min_samples_leaf=2,
n_estimators=512, n_jobs=1, random_state=1,
warm_start=True)},
6: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d4824e50>,
'cost': 0.028368794326241176,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d62e5640>,
'ensemble_weight': 0.02,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d4824c10>,
'model_id': 6,
'rank': 4,
'sklearn_classifier': MLPClassifier(alpha=0.0017940473175767063, beta_1=0.999, beta_2=0.9,
early_stopping=True, hidden_layer_sizes=(101, 101),
learning_rate_init=0.0004684917334431039, max_iter=32,
n_iter_no_change=32, random_state=1, verbose=0, warm_start=True)},
7: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d45bd160>,
'cost': 0.014184397163120588,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d605d910>,
'ensemble_weight': 0.16,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d47c2ee0>,
'model_id': 7,
'rank': 5,
'sklearn_classifier': ExtraTreesClassifier(max_features=34, min_samples_leaf=3, min_samples_split=11,
n_estimators=512, n_jobs=1, random_state=1,
warm_start=True)},
8: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d5ff72b0>,
'cost': 0.03546099290780147,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d5f8d070>,
'ensemble_weight': 0.02,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d45e0fa0>,
'model_id': 8,
'rank': 6,
'sklearn_classifier': RandomForestClassifier(max_features=2, min_samples_leaf=2, n_estimators=512,
n_jobs=1, random_state=1, warm_start=True)},
9: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d41d5e80>,
'cost': 0.04255319148936165,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d47ec880>,
'ensemble_weight': 0.04,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d614d880>,
'model_id': 9,
'rank': 7,
'sklearn_classifier': ExtraTreesClassifier(max_features=9, min_samples_split=10, n_estimators=512,
n_jobs=1, random_state=1, warm_start=True)},
11: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d416d2e0>,
'cost': 0.028368794326241176,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d457fd00>,
'ensemble_weight': 0.02,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d4160e80>,
'model_id': 11,
'rank': 8,
'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=23, min_samples_leaf=7,
n_estimators=512, n_jobs=1, random_state=1,
warm_start=True)},
12: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3f33070>,
'cost': 0.03546099290780147,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d4620bb0>,
'ensemble_weight': 0.02,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3f6f370>,
'model_id': 12,
'rank': 9,
'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
l2_regularization=0.005326508887463406,
learning_rate=0.060800813211425456, max_iter=512,
max_leaf_nodes=6, min_samples_leaf=5,
n_iter_no_change=5, random_state=1,
validation_fraction=None, warm_start=True)},
14: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d2670820>,
'cost': 0.028368794326241176,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d426de50>,
'ensemble_weight': 0.02,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d26b6340>,
'model_id': 14,
'rank': 10,
'sklearn_classifier': MLPClassifier(activation='tanh', alpha=2.5550223982458062e-06, beta_1=0.999,
beta_2=0.9, hidden_layer_sizes=(54, 54, 54),
learning_rate_init=0.00027271287919467994, max_iter=256,
n_iter_no_change=32, random_state=1, validation_fraction=0.0,
verbose=0, warm_start=True)},
16: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d261cac0>,
'cost': 0.021276595744680882,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d40ffe50>,
'ensemble_weight': 0.04,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d261c940>,
'model_id': 16,
'rank': 11,
'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
l2_regularization=3.387912939529945e-10,
learning_rate=0.30755227194768237, max_iter=128,
max_leaf_nodes=60, min_samples_leaf=39,
n_iter_no_change=18, random_state=1,
validation_fraction=None, warm_start=True)},
18: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d40245b0>,
'cost': 0.03546099290780147,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d268c400>,
'ensemble_weight': 0.02,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d247c2b0>,
'model_id': 18,
'rank': 12,
'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=3, n_estimators=512,
n_jobs=1, random_state=1, warm_start=True)},
20: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d62b5220>,
'cost': 0.07801418439716312,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d26b6f70>,
'ensemble_weight': 0.02,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d62b9760>,
'model_id': 20,
'rank': 13,
'sklearn_classifier': PassiveAggressiveClassifier(C=0.14268277711454813, max_iter=32, random_state=1,
tol=0.0002600768160857831, warm_start=True)},
21: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d23da280>,
'cost': 0.021276595744680882,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d255b880>,
'ensemble_weight': 0.06,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d24356d0>,
'model_id': 21,
'rank': 14,
'sklearn_classifier': ExtraTreesClassifier(criterion='entropy', max_features=4, min_samples_leaf=2,
min_samples_split=15, n_estimators=512, n_jobs=1,
random_state=1, warm_start=True)},
22: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1eb1b80>,
'cost': 0.028368794326241176,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d24823d0>,
'ensemble_weight': 0.06,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1f865b0>,
'model_id': 22,
'rank': 15,
'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
l2_regularization=8.057778875694463e-05,
learning_rate=0.09179220974965213, max_iter=256,
max_leaf_nodes=200, n_iter_no_change=18,
random_state=1,
validation_fraction=0.14295295806077554,
warm_start=True)},
27: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1cb3c10>,
'cost': 0.014184397163120588,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d614d250>,
'ensemble_weight': 0.04,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1cb3a90>,
'model_id': 27,
'rank': 16,
'sklearn_classifier': ExtraTreesClassifier(max_features=134, min_samples_leaf=12, min_samples_split=4,
n_estimators=512, n_jobs=1, random_state=1,
warm_start=True)},
28: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1b88e50>,
'cost': 0.07092198581560283,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d2095340>,
'ensemble_weight': 0.08,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1b88430>,
'model_id': 28,
'rank': 17,
'sklearn_classifier': BernoulliNB(alpha=0.011056975175744176)},
29: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1908fa0>,
'cost': 0.13475177304964536,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1fa2670>,
'ensemble_weight': 0.08,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1908c40>,
'model_id': 29,
'rank': 18,
'sklearn_classifier': MLPClassifier(alpha=0.0007119897774330087, beta_1=0.999, beta_2=0.9,
hidden_layer_sizes=(51, 51, 51),
learning_rate_init=0.00028079049815589414, max_iter=256,
n_iter_no_change=32, random_state=1, validation_fraction=0.0,
verbose=0, warm_start=True)},
30: { 'balancing': Balancing(random_state=1, strategy='weighting'),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d187da00>,
'cost': 0.021276595744680882,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1cbbd00>,
'ensemble_weight': 0.04,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d187d910>,
'model_id': 30,
'rank': 19,
'sklearn_classifier': ExtraTreesClassifier(max_features=5390, min_samples_leaf=7, n_estimators=512,
n_jobs=1, random_state=1, warm_start=True)},
31: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d17d6bb0>,
'cost': 0.03546099290780147,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1ab23d0>,
'ensemble_weight': 0.08,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d17d6760>,
'model_id': 31,
'rank': 20,
'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=2, min_samples_leaf=10,
min_samples_split=7, n_estimators=512, n_jobs=1,
random_state=1, warm_start=True)},
32: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d15ad460>,
'cost': 0.07801418439716312,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d192dd60>,
'ensemble_weight': 0.02,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d158dbe0>,
'model_id': 32,
'rank': 21,
'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
l2_regularization=6.118619248990061e-06,
learning_rate=0.9217672331809242, max_iter=512,
max_leaf_nodes=223, min_samples_leaf=122,
n_iter_no_change=6, random_state=1,
validation_fraction=None, warm_start=True)},
33: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1021a60>,
'cost': 0.07092198581560283,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d189ed00>,
'ensemble_weight': 0.02,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d10219a0>,
'model_id': 33,
'rank': 22,
'sklearn_classifier': DecisionTreeClassifier(criterion='entropy', max_depth=30, min_samples_leaf=9,
min_samples_split=14, random_state=1)},
34: { 'balancing': Balancing(random_state=1),
'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d0f03bb0>,
'cost': 0.07092198581560283,
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d165c1c0>,
'ensemble_weight': 0.02,
'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d0fdbcd0>,
'model_id': 34,
'rank': 23,
'sklearn_classifier': KNeighborsClassifier(p=1)}}
Get the Score of the final ensemble¶
predictions = automl.predict(X_test)
print("Accuracy score:", sklearn.metrics.accuracy_score(y_test, predictions))
Accuracy score: 0.958041958041958
Total running time of the script: ( 2 minutes 1.815 seconds)