Note

Click here to download the full example code or to run this example in your browser via Binder

Classification¶

The following example shows how to fit a simple classification model with auto-sklearn.

from pprint import pprint

import sklearn.datasets
import sklearn.metrics

import autosklearn.classification

Data Loading¶

X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
    X, y, random_state=1
)

Build and fit a classifier¶

automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=120,
    per_run_time_limit=30,
    tmp_folder="/tmp/autosklearn_classification_example_tmp",
)
automl.fit(X_train, y_train, dataset_name="breast_cancer")

AutoSklearnClassifier(ensemble_class=<class 'autosklearn.ensembles.ensemble_selection.EnsembleSelection'>,
                      per_run_time_limit=30, time_left_for_this_task=120,
                      tmp_folder='/tmp/autosklearn_classification_example_tmp')

View the models found by auto-sklearn¶

print(automl.leaderboard())

          rank  ensemble_weight                 type      cost   duration
model_id
          1             0.16          extra_trees  0.014184   1.569340
         2             0.04          extra_trees  0.014184   2.449368
         4             0.04    gradient_boosting  0.021277   1.235045
         5             0.06          extra_trees  0.021277   1.586606
         3             0.04          extra_trees  0.021277  12.410941
          6             0.02        random_forest  0.028369   1.892178
          7             0.08                  mlp  0.028369   1.077336
          8             0.02                  mlp  0.028369   1.222855
         9             0.02        random_forest  0.028369   2.290498
        11             0.02                  mlp  0.028369   2.054393
        10             0.06    gradient_boosting  0.028369   1.379215
         16             0.02        random_forest  0.035461   2.209646
         15             0.02        random_forest  0.035461   2.130122
        14             0.02    gradient_boosting  0.035461   1.431612
        13             0.02        random_forest  0.035461   2.392527
        12             0.08        random_forest  0.035461   1.798755
         17             0.04          extra_trees  0.042553   1.930847
        19             0.08         bernoulli_nb  0.070922   1.001414
        18             0.02        decision_tree  0.070922   8.978891
        20             0.02  k_nearest_neighbors  0.070922   0.897243
        22             0.02   passive_aggressive  0.078014   0.737455
        21             0.02    gradient_boosting  0.078014   1.069623
        23             0.08                  mlp  0.134752   2.241808

Print the final ensemble constructed by auto-sklearn¶

pprint(automl.show_models(), indent=4)

{   2: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d62ef550>,
           'cost': 0.028368794326241176,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d6436250>,
           'ensemble_weight': 0.02,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d62ef490>,
           'model_id': 2,
           'rank': 1,
           'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=512, n_jobs=1,
                       random_state=1, warm_start=True)},
    3: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d61a4640>,
           'cost': 0.028368794326241176,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d684ebb0>,
           'ensemble_weight': 0.08,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d61a4be0>,
           'model_id': 3,
           'rank': 2,
           'sklearn_classifier': MLPClassifier(activation='tanh', alpha=0.0001363185819149026, beta_1=0.999,
              beta_2=0.9, early_stopping=True,
              hidden_layer_sizes=(115, 115, 115),
              learning_rate_init=0.00018009776276177523, max_iter=32,
              n_iter_no_change=32, random_state=1, verbose=0, warm_start=True)},
    5: {   'balancing': Balancing(random_state=1, strategy='weighting'),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d48108b0>,
           'cost': 0.03546099290780147,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d6403880>,
           'ensemble_weight': 0.02,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d5f64c10>,
           'model_id': 5,
           'rank': 3,
           'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=3, min_samples_leaf=2,
                       n_estimators=512, n_jobs=1, random_state=1,
                       warm_start=True)},
    6: {   'balancing': Balancing(random_state=1, strategy='weighting'),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d4824e50>,
           'cost': 0.028368794326241176,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d62e5640>,
           'ensemble_weight': 0.02,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d4824c10>,
           'model_id': 6,
           'rank': 4,
           'sklearn_classifier': MLPClassifier(alpha=0.0017940473175767063, beta_1=0.999, beta_2=0.9,
              early_stopping=True, hidden_layer_sizes=(101, 101),
              learning_rate_init=0.0004684917334431039, max_iter=32,
              n_iter_no_change=32, random_state=1, verbose=0, warm_start=True)},
    7: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d45bd160>,
           'cost': 0.014184397163120588,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d605d910>,
           'ensemble_weight': 0.16,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d47c2ee0>,
           'model_id': 7,
           'rank': 5,
           'sklearn_classifier': ExtraTreesClassifier(max_features=34, min_samples_leaf=3, min_samples_split=11,
                     n_estimators=512, n_jobs=1, random_state=1,
                     warm_start=True)},
    8: {   'balancing': Balancing(random_state=1, strategy='weighting'),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d5ff72b0>,
           'cost': 0.03546099290780147,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d5f8d070>,
           'ensemble_weight': 0.02,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d45e0fa0>,
           'model_id': 8,
           'rank': 6,
           'sklearn_classifier': RandomForestClassifier(max_features=2, min_samples_leaf=2, n_estimators=512,
                       n_jobs=1, random_state=1, warm_start=True)},
    9: {   'balancing': Balancing(random_state=1, strategy='weighting'),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d41d5e80>,
           'cost': 0.04255319148936165,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d47ec880>,
           'ensemble_weight': 0.04,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d614d880>,
           'model_id': 9,
           'rank': 7,
           'sklearn_classifier': ExtraTreesClassifier(max_features=9, min_samples_split=10, n_estimators=512,
                     n_jobs=1, random_state=1, warm_start=True)},
    11: {   'balancing': Balancing(random_state=1),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d416d2e0>,
            'cost': 0.028368794326241176,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d457fd00>,
            'ensemble_weight': 0.02,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d4160e80>,
            'model_id': 11,
            'rank': 8,
            'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=23, min_samples_leaf=7,
                       n_estimators=512, n_jobs=1, random_state=1,
                       warm_start=True)},
    12: {   'balancing': Balancing(random_state=1),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3f33070>,
            'cost': 0.03546099290780147,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d4620bb0>,
            'ensemble_weight': 0.02,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3f6f370>,
            'model_id': 12,
            'rank': 9,
            'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=0.005326508887463406,
                               learning_rate=0.060800813211425456, max_iter=512,
                               max_leaf_nodes=6, min_samples_leaf=5,
                               n_iter_no_change=5, random_state=1,
                               validation_fraction=None, warm_start=True)},
    14: {   'balancing': Balancing(random_state=1),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d2670820>,
            'cost': 0.028368794326241176,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d426de50>,
            'ensemble_weight': 0.02,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d26b6340>,
            'model_id': 14,
            'rank': 10,
            'sklearn_classifier': MLPClassifier(activation='tanh', alpha=2.5550223982458062e-06, beta_1=0.999,
              beta_2=0.9, hidden_layer_sizes=(54, 54, 54),
              learning_rate_init=0.00027271287919467994, max_iter=256,
              n_iter_no_change=32, random_state=1, validation_fraction=0.0,
              verbose=0, warm_start=True)},
    16: {   'balancing': Balancing(random_state=1, strategy='weighting'),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d261cac0>,
            'cost': 0.021276595744680882,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d40ffe50>,
            'ensemble_weight': 0.04,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d261c940>,
            'model_id': 16,
            'rank': 11,
            'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=3.387912939529945e-10,
                               learning_rate=0.30755227194768237, max_iter=128,
                               max_leaf_nodes=60, min_samples_leaf=39,
                               n_iter_no_change=18, random_state=1,
                               validation_fraction=None, warm_start=True)},
    18: {   'balancing': Balancing(random_state=1, strategy='weighting'),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d40245b0>,
            'cost': 0.03546099290780147,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d268c400>,
            'ensemble_weight': 0.02,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d247c2b0>,
            'model_id': 18,
            'rank': 12,
            'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=3, n_estimators=512,
                       n_jobs=1, random_state=1, warm_start=True)},
    20: {   'balancing': Balancing(random_state=1),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d62b5220>,
            'cost': 0.07801418439716312,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d26b6f70>,
            'ensemble_weight': 0.02,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d62b9760>,
            'model_id': 20,
            'rank': 13,
            'sklearn_classifier': PassiveAggressiveClassifier(C=0.14268277711454813, max_iter=32, random_state=1,
                            tol=0.0002600768160857831, warm_start=True)},
    21: {   'balancing': Balancing(random_state=1),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d23da280>,
            'cost': 0.021276595744680882,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d255b880>,
            'ensemble_weight': 0.06,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d24356d0>,
            'model_id': 21,
            'rank': 14,
            'sklearn_classifier': ExtraTreesClassifier(criterion='entropy', max_features=4, min_samples_leaf=2,
                     min_samples_split=15, n_estimators=512, n_jobs=1,
                     random_state=1, warm_start=True)},
    22: {   'balancing': Balancing(random_state=1),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1eb1b80>,
            'cost': 0.028368794326241176,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d24823d0>,
            'ensemble_weight': 0.06,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1f865b0>,
            'model_id': 22,
            'rank': 15,
            'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=8.057778875694463e-05,
                               learning_rate=0.09179220974965213, max_iter=256,
                               max_leaf_nodes=200, n_iter_no_change=18,
                               random_state=1,
                               validation_fraction=0.14295295806077554,
                               warm_start=True)},
    27: {   'balancing': Balancing(random_state=1, strategy='weighting'),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1cb3c10>,
            'cost': 0.014184397163120588,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d614d250>,
            'ensemble_weight': 0.04,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1cb3a90>,
            'model_id': 27,
            'rank': 16,
            'sklearn_classifier': ExtraTreesClassifier(max_features=134, min_samples_leaf=12, min_samples_split=4,
                     n_estimators=512, n_jobs=1, random_state=1,
                     warm_start=True)},
    28: {   'balancing': Balancing(random_state=1, strategy='weighting'),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1b88e50>,
            'cost': 0.07092198581560283,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d2095340>,
            'ensemble_weight': 0.08,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1b88430>,
            'model_id': 28,
            'rank': 17,
            'sklearn_classifier': BernoulliNB(alpha=0.011056975175744176)},
    29: {   'balancing': Balancing(random_state=1, strategy='weighting'),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1908fa0>,
            'cost': 0.13475177304964536,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1fa2670>,
            'ensemble_weight': 0.08,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1908c40>,
            'model_id': 29,
            'rank': 18,
            'sklearn_classifier': MLPClassifier(alpha=0.0007119897774330087, beta_1=0.999, beta_2=0.9,
              hidden_layer_sizes=(51, 51, 51),
              learning_rate_init=0.00028079049815589414, max_iter=256,
              n_iter_no_change=32, random_state=1, validation_fraction=0.0,
              verbose=0, warm_start=True)},
    30: {   'balancing': Balancing(random_state=1, strategy='weighting'),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d187da00>,
            'cost': 0.021276595744680882,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1cbbd00>,
            'ensemble_weight': 0.04,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d187d910>,
            'model_id': 30,
            'rank': 19,
            'sklearn_classifier': ExtraTreesClassifier(max_features=5390, min_samples_leaf=7, n_estimators=512,
                     n_jobs=1, random_state=1, warm_start=True)},
    31: {   'balancing': Balancing(random_state=1),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d17d6bb0>,
            'cost': 0.03546099290780147,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1ab23d0>,
            'ensemble_weight': 0.08,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d17d6760>,
            'model_id': 31,
            'rank': 20,
            'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=2, min_samples_leaf=10,
                       min_samples_split=7, n_estimators=512, n_jobs=1,
                       random_state=1, warm_start=True)},
    32: {   'balancing': Balancing(random_state=1),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d15ad460>,
            'cost': 0.07801418439716312,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d192dd60>,
            'ensemble_weight': 0.02,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d158dbe0>,
            'model_id': 32,
            'rank': 21,
            'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=6.118619248990061e-06,
                               learning_rate=0.9217672331809242, max_iter=512,
                               max_leaf_nodes=223, min_samples_leaf=122,
                               n_iter_no_change=6, random_state=1,
                               validation_fraction=None, warm_start=True)},
    33: {   'balancing': Balancing(random_state=1),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1021a60>,
            'cost': 0.07092198581560283,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d189ed00>,
            'ensemble_weight': 0.02,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d10219a0>,
            'model_id': 33,
            'rank': 22,
            'sklearn_classifier': DecisionTreeClassifier(criterion='entropy', max_depth=30, min_samples_leaf=9,
                       min_samples_split=14, random_state=1)},
    34: {   'balancing': Balancing(random_state=1),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d0f03bb0>,
            'cost': 0.07092198581560283,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d165c1c0>,
            'ensemble_weight': 0.02,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d0fdbcd0>,
            'model_id': 34,
            'rank': 23,
            'sklearn_classifier': KNeighborsClassifier(p=1)}}

Get the Score of the final ensemble¶

predictions = automl.predict(X_test)
print("Accuracy score:", sklearn.metrics.accuracy_score(y_test, predictions))

Accuracy score: 0.958041958041958

Total running time of the script: ( 2 minutes 1.815 seconds)

Gallery generated by Sphinx-Gallery