Note

Click here to download the full example code or to run this example in your browser via Binder

Successive Halving¶

This advanced example illustrates how to interact with the SMAC callback and get relevant information from the run, like the number of iterations. Particularly, it exemplifies how to select the intensification strategy to use in smac, in this case: SuccessiveHalving.

This results in an adaptation of the BOHB algorithm. It uses Successive Halving instead of Hyperband, and could be abbreviated as BOSH. To get the BOHB algorithm, simply import Hyperband and use it as the intensification strategy.

from pprint import pprint

import sklearn.model_selection
import sklearn.datasets
import sklearn.metrics

import autosklearn.classification

Define a callback that instantiates SuccessiveHalving¶

def get_smac_object_callback(budget_type):
    def get_smac_object(
        scenario_dict,
        seed,
        ta,
        ta_kwargs,
        metalearning_configurations,
        n_jobs,
        dask_client,
        multi_objective_algorithm,  # This argument will be ignored as SH does not yet support multi-objective optimization
        multi_objective_kwargs,
    ):
        from smac.facade.smac_ac_facade import SMAC4AC
        from smac.intensification.successive_halving import SuccessiveHalving
        from smac.runhistory.runhistory2epm import RunHistory2EPM4LogCost
        from smac.scenario.scenario import Scenario

        if n_jobs > 1 or (dask_client and len(dask_client.nthreads()) > 1):
            raise ValueError(
                "Please make sure to guard the code invoking Auto-sklearn by "
                "`if __name__ == '__main__'` and remove this exception."
            )

        scenario = Scenario(scenario_dict)
        if len(metalearning_configurations) > 0:
            default_config = scenario.cs.get_default_configuration()
            initial_configurations = [default_config] + metalearning_configurations
        else:
            initial_configurations = None
        rh2EPM = RunHistory2EPM4LogCost

        ta_kwargs["budget_type"] = budget_type

        return SMAC4AC(
            scenario=scenario,
            rng=seed,
            runhistory2epm=rh2EPM,
            tae_runner=ta,
            tae_runner_kwargs=ta_kwargs,
            initial_configurations=initial_configurations,
            run_id=seed,
            intensifier=SuccessiveHalving,
            intensifier_kwargs={
                "initial_budget": 10.0,
                "max_budget": 100,
                "eta": 2,
                "min_chall": 1,
            },
            n_jobs=n_jobs,
            dask_client=dask_client,
        )

    return get_smac_object

Data Loading¶

X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
    X, y, random_state=1, shuffle=True
)

Build and fit a classifier¶

automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=40,
    per_run_time_limit=10,
    tmp_folder="/tmp/autosklearn_sh_example_tmp",
    disable_evaluator_output=False,
    # 'holdout' with 'train_size'=0.67 is the default argument setting
    # for AutoSklearnClassifier. It is explicitly specified in this example
    # for demonstrational purpose.
    resampling_strategy="holdout",
    resampling_strategy_arguments={"train_size": 0.67},
    include={
        "classifier": [
            "extra_trees",
            "gradient_boosting",
            "random_forest",
            "sgd",
            "passive_aggressive",
        ],
        "feature_preprocessor": ["no_preprocessing"],
    },
    get_smac_object_callback=get_smac_object_callback("iterations"),
)
automl.fit(X_train, y_train, dataset_name="breast_cancer")

pprint(automl.show_models(), indent=4)
predictions = automl.predict(X_test)
# Print statistics about the auto-sklearn run such as number of
# iterations, number of models failed with a time out.
print(automl.sprint_statistics())
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))

/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/smac/intensification/parallel_scheduling.py:153: UserWarning: SuccessiveHalving is executed with 1 workers only. Consider to use pynisher to use all available workers.
  warnings.warn(
{   2: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d22a6610>,
           'cost': 0.021276595744680882,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d18a4040>,
           'ensemble_weight': 0.06,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d0b408e0>,
           'model_id': 2,
           'rank': 1,
           'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=64, n_jobs=1,
                       random_state=1, warm_start=True)},
    4: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d61aa5b0>,
           'cost': 0.07801418439716312,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d624bfd0>,
           'ensemble_weight': 0.02,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d61aae50>,
           'model_id': 4,
           'rank': 2,
           'sklearn_classifier': PassiveAggressiveClassifier(C=0.14268277711454813, max_iter=128, random_state=1,
                            tol=0.0002600768160857831, warm_start=True)},
    5: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d61d3e80>,
           'cost': 0.028368794326241176,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d62e1dc0>,
           'ensemble_weight': 0.06,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d61d3310>,
           'model_id': 5,
           'rank': 3,
           'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=False, l2_regularization=1e-10,
                               learning_rate=0.16262682406125173, max_iter=64,
                               max_leaf_nodes=66, n_iter_no_change=0,
                               random_state=1, validation_fraction=None,
                               warm_start=True)},
    6: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05e9b362e0>,
           'cost': 0.028368794326241176,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1c62cd0>,
           'ensemble_weight': 0.02,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05e9b36b80>,
           'model_id': 6,
           'rank': 4,
           'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=3.609412172481434e-10,
                               learning_rate=0.05972079854295879, max_iter=64,
                               max_leaf_nodes=4, min_samples_leaf=2,
                               n_iter_no_change=14, random_state=1,
                               validation_fraction=None, warm_start=True)},
    7: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1aa6e80>,
           'cost': 0.03546099290780147,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d0d67910>,
           'ensemble_weight': 0.06,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1aa6190>,
           'model_id': 7,
           'rank': 5,
           'sklearn_classifier': SGDClassifier(alpha=0.0002346515712987664, average=True, eta0=0.01, loss='log',
              max_iter=128, penalty='l1', random_state=1,
              tol=1.3716748930467322e-05, warm_start=True)},
    8: {   'balancing': Balancing(random_state=1, strategy='weighting'),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1c382b0>,
           'cost': 0.021276595744680882,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d248c790>,
           'ensemble_weight': 0.08,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1c38ac0>,
           'model_id': 8,
           'rank': 9,
           'sklearn_classifier': RandomForestClassifier(bootstrap=False, criterion='entropy', max_features=4,
                       min_samples_split=4, n_estimators=512, n_jobs=1,
                       random_state=1, warm_start=True)},
    9: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d23022b0>,
           'cost': 0.014184397163120588,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d442c940>,
           'ensemble_weight': 0.02,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d23020d0>,
           'model_id': 9,
           'rank': 10,
           'sklearn_classifier': ExtraTreesClassifier(criterion='entropy', max_features=8, min_samples_split=3,
                     n_estimators=64, n_jobs=1, random_state=1,
                     warm_start=True)},
    10: {   'balancing': Balancing(random_state=1, strategy='weighting'),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1e27100>,
            'cost': 0.028368794326241176,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d24bb8e0>,
            'ensemble_weight': 0.06,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1e27eb0>,
            'model_id': 10,
            'rank': 11,
            'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=5.027708640006448e-08,
                               learning_rate=0.09750328007832798, max_iter=64,
                               max_leaf_nodes=1234, min_samples_leaf=25,
                               n_iter_no_change=1, random_state=1,
                               validation_fraction=0.08300813783286698,
                               warm_start=True)},
    11: {   'balancing': Balancing(random_state=1, strategy='weighting'),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d45af8b0>,
            'cost': 0.014184397163120588,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1af2d90>,
            'ensemble_weight': 0.06,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d45af970>,
            'model_id': 11,
            'rank': 14,
            'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=False,
                               l2_regularization=1.0945814167023392e-10,
                               learning_rate=0.11042628136263043, max_iter=256,
                               max_leaf_nodes=30, min_samples_leaf=22,
                               n_iter_no_change=0, random_state=1,
                               validation_fraction=None, warm_start=True)},
    12: {   'balancing': Balancing(random_state=1),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d45e0370>,
            'cost': 0.04255319148936165,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d3ef7b50>,
            'ensemble_weight': 0.06,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3e85070>,
            'model_id': 12,
            'rank': 15,
            'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=1, min_samples_leaf=6,
                       min_samples_split=13, n_estimators=64, n_jobs=1,
                       random_state=1, warm_start=True)},
    13: {   'balancing': Balancing(random_state=1, strategy='weighting'),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d45bdc10>,
            'cost': 0.03546099290780147,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d0e5b550>,
            'ensemble_weight': 0.04,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d45bddf0>,
            'model_id': 13,
            'rank': 16,
            'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=2.506856350040198e-06,
                               learning_rate=0.04634380160611007, max_iter=64,
                               max_leaf_nodes=11, min_samples_leaf=41,
                               n_iter_no_change=17, random_state=1,
                               validation_fraction=None, warm_start=True)},
    14: {   'balancing': Balancing(random_state=1),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05e9b26eb0>,
            'cost': 0.03546099290780147,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05ce652e20>,
            'ensemble_weight': 0.04,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05e9b260a0>,
            'model_id': 14,
            'rank': 17,
            'sklearn_classifier': ExtraTreesClassifier(bootstrap=True, max_features=3, min_samples_leaf=2,
                     min_samples_split=3, n_estimators=64, n_jobs=1,
                     random_state=1, warm_start=True)},
    16: {   'balancing': Balancing(random_state=1, strategy='weighting'),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d0f45400>,
            'cost': 0.049645390070921946,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d457f820>,
            'ensemble_weight': 0.02,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d0f45f70>,
            'model_id': 16,
            'rank': 18,
            'sklearn_classifier': RandomForestClassifier(bootstrap=False, criterion='entropy', max_features=12,
                       min_samples_leaf=15, min_samples_split=6,
                       n_estimators=64, n_jobs=1, random_state=1,
                       warm_start=True)},
    17: {   'balancing': Balancing(random_state=1, strategy='weighting'),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05cb2a2d90>,
            'cost': 0.099290780141844,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d11a2e20>,
            'ensemble_weight': 0.08,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05cb2a21f0>,
            'model_id': 17,
            'rank': 19,
            'sklearn_classifier': SGDClassifier(alpha=9.410144741041167e-05, average=True,
              eta0=0.0018055343233337954, learning_rate='constant', loss='log',
              max_iter=128, penalty='l1', random_state=1,
              tol=0.05082904256838701, warm_start=True)}}
auto-sklearn results:
  Dataset name: breast_cancer
  Metric: accuracy
  Best validation score: 0.985816
  Number of target algorithm runs: 21
  Number of successful target algorithm runs: 21
  Number of crashed target algorithm runs: 0
  Number of target algorithms that exceeded the time limit: 0
  Number of target algorithms that exceeded the memory limit: 0

Accuracy score 0.9440559440559441

We can also use cross-validation with successive halving¶

X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
    X, y, random_state=1, shuffle=True
)

automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=40,
    per_run_time_limit=10,
    tmp_folder="/tmp/autosklearn_sh_example_tmp_01",
    disable_evaluator_output=False,
    resampling_strategy="cv",
    include={
        "classifier": [
            "extra_trees",
            "gradient_boosting",
            "random_forest",
            "sgd",
            "passive_aggressive",
        ],
        "feature_preprocessor": ["no_preprocessing"],
    },
    get_smac_object_callback=get_smac_object_callback("iterations"),
)
automl.fit(X_train, y_train, dataset_name="breast_cancer")

# Print the final ensemble constructed by auto-sklearn.
pprint(automl.show_models(), indent=4)
automl.refit(X_train, y_train)
predictions = automl.predict(X_test)
# Print statistics about the auto-sklearn run such as number of
# iterations, number of models failed with a time out.
print(automl.sprint_statistics())
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))

/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/smac/intensification/parallel_scheduling.py:153: UserWarning: SuccessiveHalving is executed with 1 workers only. Consider to use pynisher to use all available workers.
  warnings.warn(
{   2: {   'cost': 0.046948356807511755,
           'ensemble_weight': 0.1,
           'estimators': [   {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d2165340>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d61cd4f0>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d21655b0>,
                                 'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=64, n_jobs=1,
                       random_state=1, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1ef6250>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d60015e0>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1ef6910>,
                                 'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=64, n_jobs=1,
                       random_state=1, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3d35a60>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d6169c40>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3d351f0>,
                                 'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=64, n_jobs=1,
                       random_state=1, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05ce624a00>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d0f45430>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05ce624dc0>,
                                 'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=64, n_jobs=1,
                       random_state=1, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1c0e400>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d17c88e0>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1c0e7c0>,
                                 'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=64, n_jobs=1,
                       random_state=1, warm_start=True)}],
           'model_id': 2,
           'rank': 1,
           'voting_model': VotingClassifier(estimators=None, voting='soft')},
    4: {   'cost': 0.08215962441314555,
           'ensemble_weight': 0.22,
           'estimators': [   {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05cb2c8490>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d3f33b50>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05cb2c8520>,
                                 'sklearn_classifier': PassiveAggressiveClassifier(C=0.14268277711454813, max_iter=128, random_state=1,
                            tol=0.0002600768160857831, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d23027c0>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d45bd760>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d23023d0>,
                                 'sklearn_classifier': PassiveAggressiveClassifier(C=0.14268277711454813, max_iter=128, random_state=1,
                            tol=0.0002600768160857831, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05cc79ac70>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d6140a30>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05cc79abb0>,
                                 'sklearn_classifier': PassiveAggressiveClassifier(C=0.14268277711454813, max_iter=128, random_state=1,
                            tol=0.0002600768160857831, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d45d05e0>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1aa6cd0>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d45d0ca0>,
                                 'sklearn_classifier': PassiveAggressiveClassifier(C=0.14268277711454813, max_iter=128, random_state=1,
                            tol=0.0002600768160857831, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3b2c1f0>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d45eea30>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3b2ca60>,
                                 'sklearn_classifier': PassiveAggressiveClassifier(C=0.14268277711454813, max_iter=128, random_state=1,
                            tol=0.0002600768160857831, warm_start=True)}],
           'model_id': 4,
           'rank': 2,
           'voting_model': VotingClassifier(estimators=None, voting='soft')},
    6: {   'cost': 0.04694835680751174,
           'ensemble_weight': 0.06,
           'estimators': [   {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d25bf9d0>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d61e5220>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d25bf610>,
                                 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=3.609412172481434e-10,
                               learning_rate=0.05972079854295879, max_iter=64,
                               max_leaf_nodes=4, min_samples_leaf=2,
                               n_iter_no_change=14, random_state=1,
                               validation_fraction=None, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d248c970>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d0fd7520>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d0ccba90>,
                                 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=3.609412172481434e-10,
                               learning_rate=0.05972079854295879, max_iter=64,
                               max_leaf_nodes=4, min_samples_leaf=2,
                               n_iter_no_change=14, random_state=1,
                               validation_fraction=None, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d23b72e0>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d18a4f40>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d23b7880>,
                                 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=3.609412172481434e-10,
                               learning_rate=0.05972079854295879, max_iter=64,
                               max_leaf_nodes=4, min_samples_leaf=2,
                               n_iter_no_change=14, random_state=1,
                               validation_fraction=None, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3e68c40>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d3dcb3a0>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3e68760>,
                                 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=3.609412172481434e-10,
                               learning_rate=0.05972079854295879, max_iter=64,
                               max_leaf_nodes=4, min_samples_leaf=2,
                               n_iter_no_change=14, random_state=1,
                               validation_fraction=None, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3aeeac0>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d5fe5e80>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3aee730>,
                                 'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=3.609412172481434e-10,
                               learning_rate=0.05972079854295879, max_iter=64,
                               max_leaf_nodes=4, min_samples_leaf=2,
                               n_iter_no_change=14, random_state=1,
                               validation_fraction=None, warm_start=True)}],
           'model_id': 6,
           'rank': 3,
           'voting_model': VotingClassifier(estimators=None, voting='soft')},
    7: {   'cost': 0.035211267605633784,
           'ensemble_weight': 0.12,
           'estimators': [   {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05ce624880>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d0caab80>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05ce624100>,
                                 'sklearn_classifier': SGDClassifier(alpha=0.0002346515712987664, average=True, eta0=0.01, loss='log',
              max_iter=128, penalty='l1', random_state=1,
              tol=1.3716748930467322e-05, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1c62c40>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d3c54520>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1c62460>,
                                 'sklearn_classifier': SGDClassifier(alpha=0.0002346515712987664, average=True, eta0=0.01, loss='log',
              max_iter=128, penalty='l1', random_state=1,
              tol=1.3716748930467322e-05, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05ce64b1c0>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1c15850>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05ce64b5b0>,
                                 'sklearn_classifier': SGDClassifier(alpha=0.0002346515712987664, average=True, eta0=0.01, loss='log',
              max_iter=128, penalty='l1', random_state=1,
              tol=1.3716748930467322e-05, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d624b8e0>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d3f40520>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d624b280>,
                                 'sklearn_classifier': SGDClassifier(alpha=0.0002346515712987664, average=True, eta0=0.01, loss='log',
              max_iter=128, penalty='l1', random_state=1,
              tol=1.3716748930467322e-05, warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d2143880>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05ca561370>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d21437f0>,
                                 'sklearn_classifier': SGDClassifier(alpha=0.0002346515712987664, average=True, eta0=0.01, loss='log',
              max_iter=128, penalty='l1', random_state=1,
              tol=1.3716748930467322e-05, warm_start=True)}],
           'model_id': 7,
           'rank': 4,
           'voting_model': VotingClassifier(estimators=None, voting='soft')},
    8: {   'cost': 0.039906103286385,
           'ensemble_weight': 0.24,
           'estimators': [   {   'balancing': Balancing(random_state=1, strategy='weighting'),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d24bbeb0>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05e9b261c0>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d248c3a0>,
                                 'sklearn_classifier': RandomForestClassifier(bootstrap=False, criterion='entropy', max_features=4,
                       min_samples_split=4, n_estimators=64, n_jobs=1,
                       random_state=1, warm_start=True)},
                             {   'balancing': Balancing(random_state=1, strategy='weighting'),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3f6fc10>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d0f2e0a0>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3f6f4c0>,
                                 'sklearn_classifier': RandomForestClassifier(bootstrap=False, criterion='entropy', max_features=4,
                       min_samples_split=4, n_estimators=64, n_jobs=1,
                       random_state=1, warm_start=True)},
                             {   'balancing': Balancing(random_state=1, strategy='weighting'),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1efea90>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d448de20>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1efebe0>,
                                 'sklearn_classifier': RandomForestClassifier(bootstrap=False, criterion='entropy', max_features=4,
                       min_samples_split=4, n_estimators=64, n_jobs=1,
                       random_state=1, warm_start=True)},
                             {   'balancing': Balancing(random_state=1, strategy='weighting'),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d245a040>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1a8ddf0>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d245a7c0>,
                                 'sklearn_classifier': RandomForestClassifier(bootstrap=False, criterion='entropy', max_features=4,
                       min_samples_split=4, n_estimators=64, n_jobs=1,
                       random_state=1, warm_start=True)},
                             {   'balancing': Balancing(random_state=1, strategy='weighting'),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1c36df0>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05ca916040>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1c36220>,
                                 'sklearn_classifier': RandomForestClassifier(bootstrap=False, criterion='entropy', max_features=4,
                       min_samples_split=4, n_estimators=64, n_jobs=1,
                       random_state=1, warm_start=True)}],
           'model_id': 8,
           'rank': 5,
           'voting_model': VotingClassifier(estimators=None, voting='soft')},
    9: {   'cost': 0.030516431924882622,
           'ensemble_weight': 0.2,
           'estimators': [   {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d61aaca0>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1417310>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d61aafa0>,
                                 'sklearn_classifier': ExtraTreesClassifier(criterion='entropy', max_features=8, min_samples_split=3,
                     n_estimators=128, n_jobs=1, random_state=1,
                     warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3b4ad30>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d3e6cb50>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3b4aa30>,
                                 'sklearn_classifier': ExtraTreesClassifier(criterion='entropy', max_features=8, min_samples_split=3,
                     n_estimators=128, n_jobs=1, random_state=1,
                     warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1c40580>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d5fa2ca0>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1c404c0>,
                                 'sklearn_classifier': ExtraTreesClassifier(criterion='entropy', max_features=8, min_samples_split=3,
                     n_estimators=128, n_jobs=1, random_state=1,
                     warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05cb1911f0>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05ca701940>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05cb191130>,
                                 'sklearn_classifier': ExtraTreesClassifier(criterion='entropy', max_features=8, min_samples_split=3,
                     n_estimators=128, n_jobs=1, random_state=1,
                     warm_start=True)},
                             {   'balancing': Balancing(random_state=1),
                                 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05ca81ce20>,
                                 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05ca9405e0>,
                                 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05ca81cd60>,
                                 'sklearn_classifier': ExtraTreesClassifier(criterion='entropy', max_features=8, min_samples_split=3,
                     n_estimators=128, n_jobs=1, random_state=1,
                     warm_start=True)}],
           'model_id': 9,
           'rank': 7,
           'voting_model': VotingClassifier(estimators=None, voting='soft')}}
/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/sklearn/impute/_base.py:49: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.
  mode = stats.mode(array)
/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/sklearn/impute/_base.py:49: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.
  mode = stats.mode(array)
/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/sklearn/impute/_base.py:49: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.
  mode = stats.mode(array)
auto-sklearn results:
  Dataset name: breast_cancer
  Metric: accuracy
  Best validation score: 0.971831
  Number of target algorithm runs: 10
  Number of successful target algorithm runs: 9
  Number of crashed target algorithm runs: 0
  Number of target algorithms that exceeded the time limit: 1
  Number of target algorithms that exceeded the memory limit: 0

Accuracy score 0.958041958041958

Use an iterative fit cross-validation with successive halving¶

X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
    X, y, random_state=1, shuffle=True
)

automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=40,
    per_run_time_limit=10,
    tmp_folder="/tmp/autosklearn_sh_example_tmp_cv_02",
    disable_evaluator_output=False,
    resampling_strategy="cv-iterative-fit",
    include={
        "classifier": [
            "extra_trees",
            "gradient_boosting",
            "random_forest",
            "sgd",
            "passive_aggressive",
        ],
        "feature_preprocessor": ["no_preprocessing"],
    },
    get_smac_object_callback=get_smac_object_callback("iterations"),
)
automl.fit(X_train, y_train, dataset_name="breast_cancer")

# Print the final ensemble constructed by auto-sklearn.
pprint(automl.show_models(), indent=4)
automl.refit(X_train, y_train)
predictions = automl.predict(X_test)
# Print statistics about the auto-sklearn run such as number of
# iterations, number of models failed with a time out.
print(automl.sprint_statistics())
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))

/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/smac/intensification/parallel_scheduling.py:153: UserWarning: SuccessiveHalving is executed with 1 workers only. Consider to use pynisher to use all available workers.
  warnings.warn(
{   2: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1219fa0>,
           'cost': 0.046948356807511755,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1c62850>,
           'ensemble_weight': 0.32,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1219430>,
           'model_id': 2,
           'rank': 1,
           'sklearn_classifier': None},
    3: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d438e580>,
           'cost': 0.05164319248826292,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d45d0e20>,
           'ensemble_weight': 0.1,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d438eaf0>,
           'model_id': 3,
           'rank': 2,
           'sklearn_classifier': None},
    4: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3e59880>,
           'cost': 0.11267605633802817,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d448ae80>,
           'ensemble_weight': 0.04,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3dcbd30>,
           'model_id': 4,
           'rank': 3,
           'sklearn_classifier': None},
    5: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d5fe5f40>,
           'cost': 0.035211267605633804,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1c62970>,
           'ensemble_weight': 0.08,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d5fe5670>,
           'model_id': 5,
           'rank': 4,
           'sklearn_classifier': None},
    6: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05ca2e0ca0>,
           'cost': 0.04694835680751174,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d0fd78b0>,
           'ensemble_weight': 0.1,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05ca2e0a90>,
           'model_id': 6,
           'rank': 5,
           'sklearn_classifier': None},
    7: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d61aa5b0>,
           'cost': 0.03286384976525822,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1ad4490>,
           'ensemble_weight': 0.36,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d61aa7f0>,
           'model_id': 7,
           'rank': 6,
           'sklearn_classifier': None}}
/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/sklearn/impute/_base.py:49: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.
  mode = stats.mode(array)
/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/sklearn/impute/_base.py:49: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.
  mode = stats.mode(array)
/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/sklearn/impute/_base.py:49: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.
  mode = stats.mode(array)
auto-sklearn results:
  Dataset name: breast_cancer
  Metric: accuracy
  Best validation score: 0.967136
  Number of target algorithm runs: 7
  Number of successful target algorithm runs: 6
  Number of crashed target algorithm runs: 0
  Number of target algorithms that exceeded the time limit: 1
  Number of target algorithms that exceeded the memory limit: 0

Accuracy score 0.972027972027972

Next, we see the use of subsampling as a budget in Auto-sklearn¶

X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
    X, y, random_state=1, shuffle=True
)

automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=40,
    per_run_time_limit=10,
    tmp_folder="/tmp/autosklearn_sh_example_tmp_03",
    disable_evaluator_output=False,
    # 'holdout' with 'train_size'=0.67 is the default argument setting
    # for AutoSklearnClassifier. It is explicitly specified in this example
    # for demonstrational purpose.
    resampling_strategy="holdout",
    resampling_strategy_arguments={"train_size": 0.67},
    get_smac_object_callback=get_smac_object_callback("subsample"),
)
automl.fit(X_train, y_train, dataset_name="breast_cancer")

# Print the final ensemble constructed by auto-sklearn.
pprint(automl.show_models(), indent=4)
predictions = automl.predict(X_test)
# Print statistics about the auto-sklearn run such as number of
# iterations, number of models failed with a time out.
print(automl.sprint_statistics())
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))

/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/smac/intensification/parallel_scheduling.py:153: UserWarning: SuccessiveHalving is executed with 1 workers only. Consider to use pynisher to use all available workers.
  warnings.warn(
{   2: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05e9b26370>,
           'cost': 0.028368794326241176,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d4240ee0>,
           'ensemble_weight': 0.12,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d6043d90>,
           'model_id': 2,
           'rank': 4,
           'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=512, n_jobs=1,
                       random_state=1, warm_start=True)},
    3: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3d35100>,
           'cost': 0.021276595744680882,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d62ef100>,
           'ensemble_weight': 0.14,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3d352e0>,
           'model_id': 3,
           'rank': 5,
           'sklearn_classifier': MLPClassifier(activation='tanh', alpha=0.0001363185819149026, beta_1=0.999,
              beta_2=0.9, early_stopping=True,
              hidden_layer_sizes=(115, 115, 115),
              learning_rate_init=0.00018009776276177523, max_iter=32,
              n_iter_no_change=32, random_state=1, verbose=0, warm_start=True)},
    4: {   'balancing': Balancing(random_state=1, strategy='weighting'),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05cb155af0>,
           'cost': 0.028368794326241176,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05cc052400>,
           'ensemble_weight': 0.12,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05ce624ee0>,
           'model_id': 4,
           'rank': 6,
           'sklearn_classifier': MLPClassifier(activation='tanh', alpha=0.00021148999718383549, beta_1=0.999,
              beta_2=0.9, hidden_layer_sizes=(113, 113, 113),
              learning_rate_init=0.0007452270241186694, max_iter=32,
              n_iter_no_change=32, random_state=1, validation_fraction=0.0,
              verbose=0, warm_start=True)},
    5: {   'balancing': Balancing(random_state=1, strategy='weighting'),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d23b7b20>,
           'cost': 0.03546099290780147,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d11a22e0>,
           'ensemble_weight': 0.1,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d0ccb550>,
           'model_id': 5,
           'rank': 7,
           'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=3, min_samples_leaf=2,
                       n_estimators=512, n_jobs=1, random_state=1,
                       warm_start=True)},
    6: {   'balancing': Balancing(random_state=1, strategy='weighting'),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05cb2a21c0>,
           'cost': 0.021276595744680882,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d2302df0>,
           'ensemble_weight': 0.02,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05cc69a460>,
           'model_id': 6,
           'rank': 8,
           'sklearn_classifier': MLPClassifier(alpha=0.0017940473175767063, beta_1=0.999, beta_2=0.9,
              early_stopping=True, hidden_layer_sizes=(101, 101),
              learning_rate_init=0.0004684917334431039, max_iter=32,
              n_iter_no_change=32, random_state=1, verbose=0, warm_start=True)},
    7: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1e35a00>,
           'cost': 0.028368794326241176,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d62234f0>,
           'ensemble_weight': 0.14,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d5ff7580>,
           'model_id': 7,
           'rank': 9,
           'sklearn_classifier': ExtraTreesClassifier(max_features=34, min_samples_leaf=3, min_samples_split=11,
                     n_estimators=512, n_jobs=1, random_state=1,
                     warm_start=True)},
    8: {   'balancing': Balancing(random_state=1, strategy='weighting'),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d237eee0>,
           'cost': 0.028368794326241176,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d13ffd00>,
           'ensemble_weight': 0.06,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05cb27b550>,
           'model_id': 8,
           'rank': 10,
           'sklearn_classifier': RandomForestClassifier(max_features=3, min_samples_leaf=2, n_estimators=512,
                       n_jobs=1, random_state=1, warm_start=True)},
    9: {   'balancing': Balancing(random_state=1, strategy='weighting'),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d2413eb0>,
           'cost': 0.07801418439716312,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1c007f0>,
           'ensemble_weight': 0.04,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d618dd00>,
           'model_id': 9,
           'rank': 11,
           'sklearn_classifier': ExtraTreesClassifier(max_features=6, min_samples_split=10, n_estimators=512,
                     n_jobs=1, random_state=1, warm_start=True)},
    10: {   'balancing': Balancing(random_state=1, strategy='weighting'),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d6140b20>,
            'cost': 0.028368794326241176,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d3e089a0>,
            'ensemble_weight': 0.04,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d1e7a130>,
            'model_id': 10,
            'rank': 12,
            'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=4, min_samples_split=6,
                       n_estimators=512, n_jobs=1, random_state=1,
                       warm_start=True)}}
auto-sklearn results:
  Dataset name: breast_cancer
  Metric: accuracy
  Best validation score: 0.978723
  Number of target algorithm runs: 12
  Number of successful target algorithm runs: 12
  Number of crashed target algorithm runs: 0
  Number of target algorithms that exceeded the time limit: 0
  Number of target algorithms that exceeded the memory limit: 0

Accuracy score 0.9440559440559441

Mixed budget approach¶

Finally, there’s a mixed budget type which uses iterations where possible and subsamples otherwise

X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
    X, y, random_state=1, shuffle=True
)

automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=40,
    per_run_time_limit=10,
    tmp_folder="/tmp/autosklearn_sh_example_tmp_04",
    disable_evaluator_output=False,
    # 'holdout' with 'train_size'=0.67 is the default argument setting
    # for AutoSklearnClassifier. It is explicitly specified in this example
    # for demonstrational purpose.
    resampling_strategy="holdout",
    resampling_strategy_arguments={"train_size": 0.67},
    include={
        "classifier": ["extra_trees", "gradient_boosting", "random_forest", "sgd"]
    },
    get_smac_object_callback=get_smac_object_callback("mixed"),
)
automl.fit(X_train, y_train, dataset_name="breast_cancer")

# Print the final ensemble constructed by auto-sklearn.
pprint(automl.show_models(), indent=4)
predictions = automl.predict(X_test)
# Print statistics about the auto-sklearn run such as number of
# iterations, number of models failed with a time out.
print(automl.sprint_statistics())
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))

/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/smac/intensification/parallel_scheduling.py:153: UserWarning: SuccessiveHalving is executed with 1 workers only. Consider to use pynisher to use all available workers.
  warnings.warn(
{   2: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d6873430>,
           'cost': 0.021276595744680882,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05cc27dcd0>,
           'ensemble_weight': 0.06,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d6873f70>,
           'model_id': 2,
           'rank': 1,
           'sklearn_classifier': RandomForestClassifier(max_features=5, n_estimators=64, n_jobs=1,
                       random_state=1, warm_start=True)},
    4: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d23b7190>,
           'cost': 0.014184397163120588,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d19ad490>,
           'ensemble_weight': 0.1,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d23b7400>,
           'model_id': 4,
           'rank': 5,
           'sklearn_classifier': ExtraTreesClassifier(max_features=34, min_samples_leaf=3, min_samples_split=11,
                     n_estimators=512, n_jobs=1, random_state=1,
                     warm_start=True)},
    6: {   'balancing': Balancing(random_state=1, strategy='weighting'),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05ce6245e0>,
           'cost': 0.04255319148936165,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05cb1d66d0>,
           'ensemble_weight': 0.12,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d13ffb50>,
           'model_id': 6,
           'rank': 6,
           'sklearn_classifier': ExtraTreesClassifier(max_features=9, min_samples_split=10, n_estimators=64,
                     n_jobs=1, random_state=1, warm_start=True)},
    9: {   'balancing': Balancing(random_state=1),
           'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3e44730>,
           'cost': 0.028368794326241176,
           'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d61cd820>,
           'ensemble_weight': 0.06,
           'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3e44790>,
           'model_id': 9,
           'rank': 7,
           'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=0.005326508887463406,
                               learning_rate=0.060800813211425456, max_iter=64,
                               max_leaf_nodes=6, min_samples_leaf=5,
                               n_iter_no_change=5, random_state=1,
                               validation_fraction=None, warm_start=True)},
    11: {   'balancing': Balancing(random_state=1, strategy='weighting'),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3ad40a0>,
            'cost': 0.021276595744680882,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05f240fd30>,
            'ensemble_weight': 0.26,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3ad4cd0>,
            'model_id': 11,
            'rank': 8,
            'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=3.387912939529945e-10,
                               learning_rate=0.30755227194768237, max_iter=64,
                               max_leaf_nodes=60, min_samples_leaf=39,
                               n_iter_no_change=18, random_state=1,
                               validation_fraction=None, warm_start=True)},
    14: {   'balancing': Balancing(random_state=1, strategy='weighting'),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05e9b265b0>,
            'cost': 0.028368794326241176,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d0ccb940>,
            'ensemble_weight': 0.06,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05e9b26f70>,
            'model_id': 14,
            'rank': 9,
            'sklearn_classifier': ExtraTreesClassifier(criterion='entropy', max_features=448, min_samples_leaf=2,
                     min_samples_split=20, n_estimators=64, n_jobs=1,
                     random_state=1, warm_start=True)},
    16: {   'balancing': Balancing(random_state=1),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3f33c40>,
            'cost': 0.028368794326241176,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d3d350a0>,
            'ensemble_weight': 0.02,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d6403e20>,
            'model_id': 16,
            'rank': 10,
            'sklearn_classifier': HistGradientBoostingClassifier(early_stopping=True,
                               l2_regularization=8.057778875694463e-05,
                               learning_rate=0.09179220974965213, max_iter=64,
                               max_leaf_nodes=200, n_iter_no_change=18,
                               random_state=1,
                               validation_fraction=0.14295295806077554,
                               warm_start=True)},
    17: {   'balancing': Balancing(random_state=1),
            'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3ff3280>,
            'cost': 0.07801418439716312,
            'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d1ef67c0>,
            'ensemble_weight': 0.02,
            'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d62ef3a0>,
            'model_id': 17,
            'rank': 11,
            'sklearn_classifier': RandomForestClassifier(criterion='entropy', max_features=16, n_estimators=64,
                       n_jobs=1, random_state=1, warm_start=True)}}
auto-sklearn results:
  Dataset name: breast_cancer
  Metric: accuracy
  Best validation score: 0.985816
  Number of target algorithm runs: 19
  Number of successful target algorithm runs: 19
  Number of crashed target algorithm runs: 0
  Number of target algorithms that exceeded the time limit: 0
  Number of target algorithms that exceeded the memory limit: 0

Accuracy score 0.951048951048951

Total running time of the script: ( 3 minutes 10.654 seconds)

Gallery generated by Sphinx-Gallery