Skip to content

Surrogate Model

The surrogate module defines the basic classes for surrogate models.

It provides methods for training and evaluating a model that approximates the relationship between input hyperparameters and performance.

DataBasedSurrogateModel

Bases: ModelBasedSurrogateModel

A surrogate model trained on a dataset of configurations and their performance.

Source code in src/hypershap/surrogate_model.py
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
class DataBasedSurrogateModel(ModelBasedSurrogateModel):
    """A surrogate model trained on a dataset of configurations and their performance."""

    def __init__(
        self,
        config_space: ConfigurationSpace,
        data: list[tuple[Configuration, float]],
        base_model: BaseEstimator | None = None,
        seed: int | None = 0,
    ) -> None:
        """Initialize the DataBasedSurrogateModel with data and an optional base model.

        Args:
            config_space: The configuration space.
            data: The data to be used for fitting the surrogate model.  Each element
                  is a tuple of (Configuration, float).
            base_model: The base model to be used for fitting the surrogate model.
                        If None, a RandomForestRegressor is used.
            seed: The random seed for pseudo-randomization of the surrogate model. Defaults to 0.

        """
        train_x = np.array([obs[0].get_array() for obs in data])
        train_y = np.array([obs[1] for obs in data])

        if base_model is None:
            base_model = RandomForestRegressor(random_state=seed)

        pipeline = cast("SklearnRegressorProtocol", base_model)
        pipeline.fit(train_x, train_y)

        super().__init__(config_space, base_model)

__init__(config_space, data, base_model=None, seed=0)

Initialize the DataBasedSurrogateModel with data and an optional base model.

Parameters:

Name Type Description Default
config_space ConfigurationSpace

The configuration space.

required
data list[tuple[Configuration, float]]

The data to be used for fitting the surrogate model. Each element is a tuple of (Configuration, float).

required
base_model BaseEstimator | None

The base model to be used for fitting the surrogate model. If None, a RandomForestRegressor is used.

None
seed int | None

The random seed for pseudo-randomization of the surrogate model. Defaults to 0.

0
Source code in src/hypershap/surrogate_model.py
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
def __init__(
    self,
    config_space: ConfigurationSpace,
    data: list[tuple[Configuration, float]],
    base_model: BaseEstimator | None = None,
    seed: int | None = 0,
) -> None:
    """Initialize the DataBasedSurrogateModel with data and an optional base model.

    Args:
        config_space: The configuration space.
        data: The data to be used for fitting the surrogate model.  Each element
              is a tuple of (Configuration, float).
        base_model: The base model to be used for fitting the surrogate model.
                    If None, a RandomForestRegressor is used.
        seed: The random seed for pseudo-randomization of the surrogate model. Defaults to 0.

    """
    train_x = np.array([obs[0].get_array() for obs in data])
    train_y = np.array([obs[1] for obs in data])

    if base_model is None:
        base_model = RandomForestRegressor(random_state=seed)

    pipeline = cast("SklearnRegressorProtocol", base_model)
    pipeline.fit(train_x, train_y)

    super().__init__(config_space, base_model)

ModelBasedSurrogateModel

Bases: SurrogateModel

A surrogate model based on a pre-trained machine learning model.

Source code in src/hypershap/surrogate_model.py
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
class ModelBasedSurrogateModel(SurrogateModel):
    """A surrogate model based on a pre-trained machine learning model."""

    def __init__(self, config_space: ConfigurationSpace, base_model: BaseEstimator) -> None:
        """Initialize the ModelBasedSurrogateModel with a configuration space and a base model.

        Args:
            config_space: The configuration space.
            base_model: The base machine learning model.

        """
        super().__init__(config_space)
        self.base_model = base_model

    def evaluate(self, config_array: np.ndarray) -> float | list[float]:
        """Evaluate a configuration (or batch of configurations).

        Args:
            config_array: A numpy array representing the configuration(s).

        Returns:
            The predicted performance(s).

        """
        if config_array.ndim == 1:
            config_array = config_array.reshape(1, -1)

        base_model = cast("SklearnRegressorProtocol", self.base_model)
        predictions = base_model.predict(config_array)

        if predictions.shape == (1,):  # Check for a 1-element array (scalar)
            return float(predictions[0])  # Convert to a Python float

        return predictions.tolist()  # Convert to a Python list

__init__(config_space, base_model)

Initialize the ModelBasedSurrogateModel with a configuration space and a base model.

Parameters:

Name Type Description Default
config_space ConfigurationSpace

The configuration space.

required
base_model BaseEstimator

The base machine learning model.

required
Source code in src/hypershap/surrogate_model.py
144
145
146
147
148
149
150
151
152
153
def __init__(self, config_space: ConfigurationSpace, base_model: BaseEstimator) -> None:
    """Initialize the ModelBasedSurrogateModel with a configuration space and a base model.

    Args:
        config_space: The configuration space.
        base_model: The base machine learning model.

    """
    super().__init__(config_space)
    self.base_model = base_model

evaluate(config_array)

Evaluate a configuration (or batch of configurations).

Parameters:

Name Type Description Default
config_array ndarray

A numpy array representing the configuration(s).

required

Returns:

Type Description
float | list[float]

The predicted performance(s).

Source code in src/hypershap/surrogate_model.py
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
def evaluate(self, config_array: np.ndarray) -> float | list[float]:
    """Evaluate a configuration (or batch of configurations).

    Args:
        config_array: A numpy array representing the configuration(s).

    Returns:
        The predicted performance(s).

    """
    if config_array.ndim == 1:
        config_array = config_array.reshape(1, -1)

    base_model = cast("SklearnRegressorProtocol", self.base_model)
    predictions = base_model.predict(config_array)

    if predictions.shape == (1,):  # Check for a 1-element array (scalar)
        return float(predictions[0])  # Convert to a Python float

    return predictions.tolist()  # Convert to a Python list

SklearnRegressorProtocol

Bases: Protocol

Defines the interface for scikit-learn-like regression models.

This protocol specifies the required methods for a class to be considered a compatible scikit-learn regression model. It mandates the presence of fit, predict, and score methods, mirroring the structure of many scikit-learn estimators.

Attributes:

Name Type Description
fit callable

A method that fits the model to the provided data. It should accept training data (X) and target variables (y) as arguments, and any optional fit parameters. It should return the fitted model instance itself (allowing for chaining).

predict callable

A method that generates predictions for a given input dataset. It accepts input data (X) and returns predictions as a NumPy array.

score callable

A method that evaluates the model's performance on a given dataset. It accepts input data (X) and corresponding target variables (y) and returns a scalar performance score (e.g., R-squared, MSE).

Source code in src/hypershap/surrogate_model.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
class SklearnRegressorProtocol(Protocol):
    """Defines the interface for scikit-learn-like regression models.

    This protocol specifies the required methods for a class to be considered
    a compatible scikit-learn regression model.  It mandates the presence of `fit`,
    `predict`, and `score` methods, mirroring the structure of many
    scikit-learn estimators.

    Attributes:
        fit (callable): A method that fits the model to the provided data.
                         It should accept training data (X) and target variables (y)
                         as arguments, and any optional fit parameters. It should return
                         the fitted model instance itself (allowing for chaining).
        predict (callable): A method that generates predictions for a given input dataset.
                           It accepts input data (X) and returns predictions as a NumPy array.
        score (callable): A method that evaluates the model's performance on a given dataset.
                          It accepts input data (X) and corresponding target variables (y)
                          and returns a scalar performance score (e.g., R-squared, MSE).

    """

    def fit(self, X: Any, y: Any, **fit_params: Any) -> None:
        """Fit the regression model to the provided training data.

        Args:
            X (np.ndarray): The training data features.
            y (np.ndarray): The training data target variables.
            **fit_params (Any): Optional keyword arguments passed to the fit method.

        Returns:
            'SklearnRegressorProtocol': The fitted regression model instance itself, allowing for method chaining.

        """
        ...

    def predict(self, X: Any) -> np.ndarray:
        """Generate predictions for a given input dataset.

        Args:
            X (np.ndarray): The input data features for prediction.

        Returns:
            np.ndarray: The predicted target values as a NumPy array.

        """
        ...

    def score(self, X: Any, y: Any) -> float:
        """Evaluate the model's performance on a given dataset.

        Args:
            X (np.ndarray): The input data features.
            y (np.ndarray): The corresponding target variables.

        Returns:
            float: A scalar performance score, representing the model's accuracy on the dataset.

        """
        ...

fit(X, y, **fit_params)

Fit the regression model to the provided training data.

Parameters:

Name Type Description Default
X ndarray

The training data features.

required
y ndarray

The training data target variables.

required
**fit_params Any

Optional keyword arguments passed to the fit method.

{}

Returns:

Type Description
None

'SklearnRegressorProtocol': The fitted regression model instance itself, allowing for method chaining.

Source code in src/hypershap/surrogate_model.py
42
43
44
45
46
47
48
49
50
51
52
53
54
def fit(self, X: Any, y: Any, **fit_params: Any) -> None:
    """Fit the regression model to the provided training data.

    Args:
        X (np.ndarray): The training data features.
        y (np.ndarray): The training data target variables.
        **fit_params (Any): Optional keyword arguments passed to the fit method.

    Returns:
        'SklearnRegressorProtocol': The fitted regression model instance itself, allowing for method chaining.

    """
    ...

predict(X)

Generate predictions for a given input dataset.

Parameters:

Name Type Description Default
X ndarray

The input data features for prediction.

required

Returns:

Type Description
ndarray

np.ndarray: The predicted target values as a NumPy array.

Source code in src/hypershap/surrogate_model.py
56
57
58
59
60
61
62
63
64
65
66
def predict(self, X: Any) -> np.ndarray:
    """Generate predictions for a given input dataset.

    Args:
        X (np.ndarray): The input data features for prediction.

    Returns:
        np.ndarray: The predicted target values as a NumPy array.

    """
    ...

score(X, y)

Evaluate the model's performance on a given dataset.

Parameters:

Name Type Description Default
X ndarray

The input data features.

required
y ndarray

The corresponding target variables.

required

Returns:

Name Type Description
float float

A scalar performance score, representing the model's accuracy on the dataset.

Source code in src/hypershap/surrogate_model.py
68
69
70
71
72
73
74
75
76
77
78
79
def score(self, X: Any, y: Any) -> float:
    """Evaluate the model's performance on a given dataset.

    Args:
        X (np.ndarray): The input data features.
        y (np.ndarray): The corresponding target variables.

    Returns:
        float: A scalar performance score, representing the model's accuracy on the dataset.

    """
    ...

SurrogateModel

Bases: ABC

An abstract class for defining the interface of surrogate models.

This class defines the basic methods that all surrogate models should implement, allowing for a consistent interface for evaluating different models.

Source code in src/hypershap/surrogate_model.py
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
class SurrogateModel(ABC):
    """An abstract class for defining the interface of surrogate models.

    This class defines the basic methods that all surrogate models should implement,
    allowing for a consistent interface for evaluating different models.
    """

    def __init__(self, config_space: ConfigurationSpace) -> None:
        """Initialize the SurrogateModel with a configuration space.

        Args:
            config_space: The configuration space for the surrogate model.

        """
        self.config_space = config_space

    def evaluate_config(self, config: Configuration) -> float:
        """Evaluate a single configuration using the surrogate model.

        Args:
            config: The configuration to evaluate.

        Returns:
            The predicted performance for the given configuration.

        """
        res = self.evaluate(np.array(config.get_array()))
        if not isinstance(res, float):  # pragma: no cover
            raise TypeError  # pragma: no cover
        return res

    def evaluate_config_batch(self, config_batch: list[Configuration]) -> list[float]:
        """Evaluate a batch of configurations using the surrogate model.

        Args:
            config_batch: A list of configurations to evaluate.

        Returns:
            A list of predicted performances for the given configurations.

        """
        res = self.evaluate(np.array([config.get_array() for config in config_batch]))
        if not isinstance(res, list):  # pragma: no cover
            raise TypeError  # pragma: no cover
        return res

    @abstractmethod
    def evaluate(self, config_array: np.ndarray) -> float | list[float]:
        """Evaluate a configuration (or batch of configurations) represented as a numpy array.

        Args:
            config_array: A numpy array representing the configuration(s).

        Returns:
            The predicted performance(s).

        """

__init__(config_space)

Initialize the SurrogateModel with a configuration space.

Parameters:

Name Type Description Default
config_space ConfigurationSpace

The configuration space for the surrogate model.

required
Source code in src/hypershap/surrogate_model.py
89
90
91
92
93
94
95
96
def __init__(self, config_space: ConfigurationSpace) -> None:
    """Initialize the SurrogateModel with a configuration space.

    Args:
        config_space: The configuration space for the surrogate model.

    """
    self.config_space = config_space

evaluate(config_array) abstractmethod

Evaluate a configuration (or batch of configurations) represented as a numpy array.

Parameters:

Name Type Description Default
config_array ndarray

A numpy array representing the configuration(s).

required

Returns:

Type Description
float | list[float]

The predicted performance(s).

Source code in src/hypershap/surrogate_model.py
128
129
130
131
132
133
134
135
136
137
138
@abstractmethod
def evaluate(self, config_array: np.ndarray) -> float | list[float]:
    """Evaluate a configuration (or batch of configurations) represented as a numpy array.

    Args:
        config_array: A numpy array representing the configuration(s).

    Returns:
        The predicted performance(s).

    """

evaluate_config(config)

Evaluate a single configuration using the surrogate model.

Parameters:

Name Type Description Default
config Configuration

The configuration to evaluate.

required

Returns:

Type Description
float

The predicted performance for the given configuration.

Source code in src/hypershap/surrogate_model.py
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
def evaluate_config(self, config: Configuration) -> float:
    """Evaluate a single configuration using the surrogate model.

    Args:
        config: The configuration to evaluate.

    Returns:
        The predicted performance for the given configuration.

    """
    res = self.evaluate(np.array(config.get_array()))
    if not isinstance(res, float):  # pragma: no cover
        raise TypeError  # pragma: no cover
    return res

evaluate_config_batch(config_batch)

Evaluate a batch of configurations using the surrogate model.

Parameters:

Name Type Description Default
config_batch list[Configuration]

A list of configurations to evaluate.

required

Returns:

Type Description
list[float]

A list of predicted performances for the given configurations.

Source code in src/hypershap/surrogate_model.py
113
114
115
116
117
118
119
120
121
122
123
124
125
126
def evaluate_config_batch(self, config_batch: list[Configuration]) -> list[float]:
    """Evaluate a batch of configurations using the surrogate model.

    Args:
        config_batch: A list of configurations to evaluate.

    Returns:
        A list of predicted performances for the given configurations.

    """
    res = self.evaluate(np.array([config.get_array() for config in config_batch]))
    if not isinstance(res, list):  # pragma: no cover
        raise TypeError  # pragma: no cover
    return res