Metrics

Auto-sklearn supports various built-in metrics, which can be found in the metrics section in the API. However, it is also possible to define your own metric and use it to fit and evaluate your model. The following examples show how to use built-in and self-defined metrics for a classification problem.

import numpy as np

import sklearn.model_selection
import sklearn.datasets
import sklearn.metrics

import autosklearn.classification
import autosklearn.metrics

Custom Metrics

def accuracy(solution, prediction):
    # custom function defining accuracy
    return np.mean(solution == prediction)


def error(solution, prediction):
    # custom function defining error
    return np.mean(solution != prediction)


def accuracy_wk(solution, prediction, extra_argument):
    # custom function defining accuracy and accepting an additional argument
    assert extra_argument is None
    return np.mean(solution == prediction)


def error_wk(solution, prediction, extra_argument):
    # custom function defining error and accepting an additional argument
    assert extra_argument is None
    return np.mean(solution != prediction)

Data Loading

X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = \
    sklearn.model_selection.train_test_split(X, y, random_state=1)

First example: Use predefined accuracy metric

print("#"*80)
print("Use predefined accuracy metric")
cls = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=60,
    per_run_time_limit=30,
    seed=1,
    metric=autosklearn.metrics.accuracy,
)
cls.fit(X_train, y_train)

predictions = cls.predict(X_test)
score = sklearn.metrics.accuracy_score(y_test, predictions)
metric_name = cls.automl_._metric.name
print(f"Accuracy score {score:.3f} using {metric_name}")

Out:

################################################################################
Use predefined accuracy metric
Accuracy score 0.944 using accuracy

Second example: Use own accuracy metric

print("#"*80)
print("Use self defined accuracy metric")
accuracy_scorer = autosklearn.metrics.make_scorer(
    name="accu",
    score_func=accuracy,
    optimum=1,
    greater_is_better=True,
    needs_proba=False,
    needs_threshold=False,
)
cls = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=60,
    per_run_time_limit=30,
    seed=1,
    metric=accuracy_scorer,
)
cls.fit(X_train, y_train)

predictions = cls.predict(X_test)
score = accuracy_scorer(y_test, predictions)
metric_name = cls.automl_._metric.name
print(f"Accuracy score {score:.3f} using {metric_name:s}")

Out:

################################################################################
Use self defined accuracy metric
Accuracy score 0.937 using accu

Third example: Use own error metric

print("#"*80)
print("Use self defined error metric")
error_rate = autosklearn.metrics.make_scorer(
    name='error',
    score_func=error,
    optimum=0,
    greater_is_better=False,
    needs_proba=False,
    needs_threshold=False
)
cls = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=60,
    per_run_time_limit=30,
    seed=1,
    metric=error_rate,
)
cls.fit(X_train, y_train)

cls.predictions = cls.predict(X_test)
score = error_rate(y_test, predictions)
metric_name = cls.automl_._metric.name
print(f"Error score {score:.3f} using {metric_name:s}")

Out:

################################################################################
Use self defined error metric
[WARNING] [2021-11-09 20:04:18,731:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2021-11-09 20:04:19,737:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2021-11-09 20:04:23,184:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2021-11-09 20:04:24,140:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2021-11-09 20:04:25,232:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2021-11-09 20:04:29,419:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2021-11-09 20:04:31,278:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2021-11-09 20:04:35,411:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2021-11-09 20:04:40,045:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2021-11-09 20:04:45,068:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2021-11-09 20:04:50,071:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
Error score -0.063 using error

Fourth example: Use own accuracy metric with additional argument

print("#"*80)
print("Use self defined accuracy with additional argument")
accuracy_scorer = autosklearn.metrics.make_scorer(
    name="accu_add",
    score_func=accuracy_wk,
    optimum=1,
    greater_is_better=True,
    needs_proba=False,
    needs_threshold=False,
    extra_argument=None,
)
cls = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=60,
    per_run_time_limit=30,
    seed=1,
    metric=accuracy_scorer
)
cls.fit(X_train, y_train)

predictions = cls.predict(X_test)
score = accuracy_scorer(y_test, predictions)
metric_name = cls.automl_._metric.name
print(f"Accuracy score {score:.3f} using {metric_name:s}")

Out:

################################################################################
Use self defined accuracy with additional argument
Accuracy score 0.937 using accu_add

Fifth example: Use own accuracy metric with additional argument

print("#"*80)
print("Use self defined error with additional argument")
error_rate = autosklearn.metrics.make_scorer(
    name="error_add",
    score_func=error_wk,
    optimum=0,
    greater_is_better=True,
    needs_proba=False,
    needs_threshold=False,
    extra_argument=None,
)
cls = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=60,
    per_run_time_limit=30,
    seed=1,
    metric=error_rate,
)
cls.fit(X_train, y_train)

predictions = cls.predict(X_test)
score = error_rate(y_test, predictions)
metric_name = cls.automl_._metric.name
print(f"Error score {score:.3f} using {metric_name:s}")

Out:

################################################################################
Use self defined error with additional argument
[WARNING] [2021-11-09 20:06:23,413:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2021-11-09 20:06:27,269:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2021-11-09 20:06:34,117:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2021-11-09 20:06:35,067:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2021-11-09 20:06:38,388:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2021-11-09 20:06:39,226:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2021-11-09 20:06:40,191:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2021-11-09 20:06:45,538:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2021-11-09 20:06:49,773:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2021-11-09 20:06:54,348:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2021-11-09 20:06:56,606:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2021-11-09 20:07:01,814:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2021-11-09 20:07:06,326:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2021-11-09 20:07:07,428:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
Error score 0.615 using error_add

Total running time of the script: ( 5 minutes 19.475 seconds)

Gallery generated by Sphinx-Gallery