Note
Click here to download the full example code or to run this example in your browser via Binder
Metrics¶
Auto-sklearn supports various built-in metrics, which can be found in the metrics section in the API. However, it is also possible to define your own metric and use it to fit and evaluate your model. The following examples show how to use built-in and self-defined metrics for a classification problem.
import numpy as np
import sklearn.model_selection
import sklearn.datasets
import sklearn.metrics
import autosklearn.classification
import autosklearn.metrics
Custom Metrics¶
def accuracy(solution, prediction):
# custom function defining accuracy
return np.mean(solution == prediction)
def error(solution, prediction):
# custom function defining error
return np.mean(solution != prediction)
def accuracy_wk(solution, prediction, extra_argument):
# custom function defining accuracy and accepting an additional argument
assert extra_argument is None
return np.mean(solution == prediction)
def error_wk(solution, prediction, extra_argument):
# custom function defining error and accepting an additional argument
assert extra_argument is None
return np.mean(solution != prediction)
def metric_which_needs_x(solution, prediction, X_data, consider_col, val_threshold):
# custom function defining accuracy
assert X_data is not None
rel_idx = X_data[:, consider_col] > val_threshold
return np.mean(solution[rel_idx] == prediction[rel_idx])
Data Loading¶
X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
X, y, random_state=1
)
Print a list of available metrics¶
print("Available CLASSIFICATION metrics autosklearn.metrics.*:")
print("\t*" + "\n\t*".join(autosklearn.metrics.CLASSIFICATION_METRICS))
print("Available REGRESSION autosklearn.metrics.*:")
print("\t*" + "\n\t*".join(autosklearn.metrics.REGRESSION_METRICS))
Available CLASSIFICATION metrics autosklearn.metrics.*:
*accuracy
*balanced_accuracy
*roc_auc
*average_precision
*log_loss
*precision_macro
*precision_micro
*precision_samples
*precision_weighted
*recall_macro
*recall_micro
*recall_samples
*recall_weighted
*f1_macro
*f1_micro
*f1_samples
*f1_weighted
Available REGRESSION autosklearn.metrics.*:
*mean_absolute_error
*mean_squared_error
*root_mean_squared_error
*mean_squared_log_error
*median_absolute_error
*r2
First example: Use predefined accuracy metric¶
print("#" * 80)
print("Use predefined accuracy metric")
scorer = autosklearn.metrics.accuracy
cls = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=60,
seed=1,
metric=scorer,
)
cls.fit(X_train, y_train)
predictions = cls.predict(X_test)
score = scorer(y_test, predictions)
print(f"Accuracy score {score:.3f} using {scorer.name}")
################################################################################
Use predefined accuracy metric
Accuracy score 0.951 using accuracy
Second example: Use own accuracy metric¶
print("#" * 80)
print("Use self defined accuracy metric")
accuracy_scorer = autosklearn.metrics.make_scorer(
name="accu",
score_func=accuracy,
optimum=1,
greater_is_better=True,
needs_proba=False,
needs_threshold=False,
)
cls = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=60,
seed=1,
metric=accuracy_scorer,
)
cls.fit(X_train, y_train)
predictions = cls.predict(X_test)
score = accuracy_scorer(y_test, predictions)
print(f"Accuracy score {score:.3f} using {accuracy_scorer.name:s}")
################################################################################
Use self defined accuracy metric
Accuracy score 0.958 using accu
Third example: Use own error metric¶
print("#" * 80)
print("Use self defined error metric")
error_rate = autosklearn.metrics.make_scorer(
name="error",
score_func=error,
optimum=0,
greater_is_better=False,
needs_proba=False,
needs_threshold=False,
)
cls = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=60,
seed=1,
metric=error_rate,
)
cls.fit(X_train, y_train)
cls.predictions = cls.predict(X_test)
score = error_rate(y_test, predictions)
print(f"Error score {score:.3f} using {error_rate.name:s}")
################################################################################
Use self defined error metric
Error score -0.042 using error
Fourth example: Use own accuracy metric with additional argument¶
print("#" * 80)
print("Use self defined accuracy with additional argument")
accuracy_scorer = autosklearn.metrics.make_scorer(
name="accu_add",
score_func=accuracy_wk,
optimum=1,
greater_is_better=True,
needs_proba=False,
needs_threshold=False,
extra_argument=None,
)
cls = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=60, per_run_time_limit=30, seed=1, metric=accuracy_scorer
)
cls.fit(X_train, y_train)
predictions = cls.predict(X_test)
score = accuracy_scorer(y_test, predictions)
print(f"Accuracy score {score:.3f} using {accuracy_scorer.name:s}")
################################################################################
Use self defined accuracy with additional argument
Accuracy score 0.958 using accu_add
Fifth example: Use own accuracy metric with additional argument¶
print("#" * 80)
print("Use self defined error with additional argument")
error_rate = autosklearn.metrics.make_scorer(
name="error_add",
score_func=error_wk,
optimum=0,
greater_is_better=True,
needs_proba=False,
needs_threshold=False,
extra_argument=None,
)
cls = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=60,
seed=1,
metric=error_rate,
)
cls.fit(X_train, y_train)
predictions = cls.predict(X_test)
score = error_rate(y_test, predictions)
print(f"Error score {score:.3f} using {error_rate.name:s}")
################################################################################
Use self defined error with additional argument
[WARNING] [2022-09-20 09:06:56,340:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:06:59,761:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:03,267:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:04,490:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:08,583:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:09,506:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:11,881:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:15,907:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:20,128:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:25,022:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:28,498:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:33,277:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:37,278:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:07:38,197:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
Error score 0.615 using error_add
Sixth example: Use a metric with additional argument which also needs xdata¶
"""
Finally, *Auto-sklearn* also support metric that require the train data (aka X_data) to
compute a value. This can be useful if one only cares about the score on a subset of the
data.
"""
accuracy_scorer = autosklearn.metrics.make_scorer(
name="accu_X",
score_func=metric_which_needs_x,
optimum=1,
greater_is_better=True,
needs_proba=False,
needs_X=True,
needs_threshold=False,
consider_col=1,
val_threshold=18.8,
)
cls = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=60,
seed=1,
metric=accuracy_scorer,
)
cls.fit(X_train, y_train)
predictions = cls.predict(X_test)
score = metric_which_needs_x(
y_test,
predictions,
X_data=X_test,
consider_col=1,
val_threshold=18.8,
)
print(f"Error score {score:.3f} using {accuracy_scorer.name:s}")
[WARNING] [2022-09-20 09:08:26,830:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:08:28,209:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:08:29,449:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:08:31,978:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
[WARNING] [2022-09-20 09:08:33,021:smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost] Got cost of smaller/equal to 0. Replace by 0.000010 since we use log cost.
Error score 0.919 using accu_X
Total running time of the script: ( 5 minutes 47.306 seconds)