deepGP

neps.optimizers.bayesian_optimization.models.deepGP #

DeepGP #

DeepGP(
    pipeline_space: SearchSpace,
    neural_network_args: dict | None = None,
    logger=None,
    surrogate_model_fit_args: dict | None = None,
    checkpointing: bool = False,
    root_directory: Path | str | None = None,
    checkpoint_file: (
        Path | str
    ) = "surrogate_checkpoint.pth",
    refine_epochs: int = 50,
    **kwargs
)

Gaussian process with a deep kernel

Source code in neps/optimizers/bayesian_optimization/models/deepGP.py

def __init__(
    self,
    pipeline_space: SearchSpace,
    neural_network_args: dict | None = None,
    logger=None,
    surrogate_model_fit_args: dict | None = None,
    # IMPORTANT: Checkpointing does not use file locking,
    # IMPORTANT: hence, it is not suitable for multiprocessing settings
    checkpointing: bool = False,
    root_directory: Path | str | None = None,
    checkpoint_file: Path | str = "surrogate_checkpoint.pth",
    refine_epochs: int = 50,
    **kwargs,
):
    self.surrogate_model_fit_args = (
        surrogate_model_fit_args if surrogate_model_fit_args is not None else {}
    )

    self.checkpointing = checkpointing
    self.refine_epochs = refine_epochs
    if checkpointing:
        assert (
            root_directory is not None
        ), "neps root_directory must be provided for the checkpointing"
        self.root_dir = Path(os.getcwd(), root_directory)
        self.checkpoint_path = Path(os.getcwd(), root_directory, checkpoint_file)

    super().__init__()
    self.__preprocess_search_space(pipeline_space)
    # set the categories array for the encoder
    self.categories_array = np.array(self.categories)

    if neural_network_args is None:
        neural_network_args = {}
    self.nn_args = neural_network_args

    self.device = (
        torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    )
    # self.device = torch.device("cpu")

    # Save the NN args, necessary for preprocessing
    self.cnn_kernel_size = neural_network_args.get("cnn_kernel_size", 3)
    self.model, self.likelihood, self.mll = self.__initialize_gp_model(
        neural_network_args.get("n_layers", 2)
    )

    # build the neural network
    self.nn = NeuralFeatureExtractor(self.input_size, **neural_network_args)

    self.logger = logger or logging.getLogger("neps")

__initialize_gp_model #

__initialize_gp_model(
    train_size: int,
) -> tuple[
    GPRegressionModel,
    GaussianLikelihood,
    ExactMarginalLogLikelihood,
]

Called when the surrogate is first initialized or restarted.

PARAMETER	DESCRIPTION
`train_size`	The size of the current training set. TYPE: `int`

RETURNS	DESCRIPTION
`tuple[GPRegressionModel, GaussianLikelihood, ExactMarginalLogLikelihood]`	model, likelihood, mll - The GP model, the likelihood and the marginal likelihood.

Source code in neps/optimizers/bayesian_optimization/models/deepGP.py

def __initialize_gp_model(
    self,
    train_size: int,
) -> tuple[
    GPRegressionModel,
    gpytorch.likelihoods.GaussianLikelihood,
    gpytorch.mlls.ExactMarginalLogLikelihood,
]:
    """
    Called when the surrogate is first initialized or restarted.

    Args:
        train_size: The size of the current training set.

    Returns:
        model, likelihood, mll - The GP model, the likelihood and
            the marginal likelihood.
    """
    train_x = torch.ones(train_size, train_size).to(self.device)
    train_y = torch.ones(train_size).to(self.device)

    likelihood = gpytorch.likelihoods.GaussianLikelihood().to(self.device)
    model = GPRegressionModel(
        train_x=train_x, train_y=train_y, likelihood=likelihood
    ).to(self.device)
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model).to(self.device)
    return model, likelihood, mll

get_state #

get_state() -> dict[str, dict]

Get the current state of the surrogate.

RETURNS	DESCRIPTION
`current_state`	A dictionary that represents the current state of the surrogate model. TYPE: `dict[str, dict]`

Source code in neps/optimizers/bayesian_optimization/models/deepGP.py

def get_state(self) -> dict[str, dict]:
    """
    Get the current state of the surrogate.

    Returns:
        current_state: A dictionary that represents
            the current state of the surrogate model.
    """
    current_state = {
        "gp_state_dict": deepcopy(self.model.state_dict()),
        "nn_state_dict": deepcopy(self.nn.state_dict()),
        "likelihood_state_dict": deepcopy(self.likelihood.state_dict()),
    }

    return current_state

load_checkpoint #

load_checkpoint(state: dict | None = None)

Load the state from a previous checkpoint.

Source code in neps/optimizers/bayesian_optimization/models/deepGP.py

def load_checkpoint(self, state: dict | None = None):
    """
    Load the state from a previous checkpoint.
    """
    if state is None:
        checkpoint = torch.load(self.checkpoint_path)
    else:
        checkpoint = state
    self.model.load_state_dict(checkpoint["gp_state_dict"])
    self.nn.load_state_dict(checkpoint["nn_state_dict"])
    self.likelihood.load_state_dict(checkpoint["likelihood_state_dict"])

    self.model.to(self.device)
    self.likelihood.to(self.device)
    self.nn.to(self.device)

save_checkpoint #

save_checkpoint(state: dict | None = None)

Save the given state or the current state in a checkpoint file.

PARAMETER	DESCRIPTION
`checkpoint_path`	path to the checkpoint file
`state`	The state to save, if none, it will TYPE: `dict \| None` DEFAULT: `None`

Source code in neps/optimizers/bayesian_optimization/models/deepGP.py

def save_checkpoint(self, state: dict | None = None):
    """
    Save the given state or the current state in a
    checkpoint file.

    Args:
        checkpoint_path: path to the checkpoint file
        state: The state to save, if none, it will
        save the current state.
    """

    if state is None:
        torch.save(
            self.get_state(),
            self.checkpoint_path,
        )
    else:
        torch.save(
            state,
            self.checkpoint_path,
        )

GPRegressionModel #

GPRegressionModel(
    train_x: Tensor,
    train_y: Tensor,
    likelihood: GaussianLikelihood,
)

Bases: ExactGP

A simple GP model.

PARAMETER	DESCRIPTION
`train_x`	The initial train examples for the GP. TYPE: `Tensor`
`train_y`	The initial train labels for the GP. TYPE: `Tensor`
`likelihood`	The likelihood to be used. TYPE: `GaussianLikelihood`

Source code in neps/optimizers/bayesian_optimization/models/deepGP.py

def __init__(
    self,
    train_x: torch.Tensor,
    train_y: torch.Tensor,
    likelihood: gpytorch.likelihoods.GaussianLikelihood,
):
    """
    Constructor of the GPRegressionModel.

    Args:
        train_x: The initial train examples for the GP.
        train_y: The initial train labels for the GP.
        likelihood: The likelihood to be used.
    """
    super().__init__(train_x, train_y, likelihood)

    self.mean_module = gpytorch.means.ConstantMean()
    self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())

NeuralFeatureExtractor #

NeuralFeatureExtractor(input_size: int, **kwargs)

Bases: Module

Neural network to be used in the DeepGP

Source code in neps/optimizers/bayesian_optimization/models/deepGP.py

def __init__(self, input_size: int, **kwargs):
    super().__init__()

    # Set number of hyperparameters
    self.input_size = input_size

    self.n_layers = kwargs.get("n_layers", 2)
    self.activation = nn.LeakyReLU()

    layer1_units = kwargs.get("layer1_units", 128)
    self.fc1 = nn.Linear(input_size, layer1_units)
    self.bn1 = nn.BatchNorm1d(layer1_units)

    previous_layer_units = layer1_units
    for i in range(2, self.n_layers):
        next_layer_units = kwargs.get(f"layer{i}_units", 256)
        setattr(
            self,
            f"fc{i}",
            nn.Linear(previous_layer_units, next_layer_units),
        )
        setattr(
            self,
            f"bn{i}",
            nn.BatchNorm1d(next_layer_units),
        )
        previous_layer_units = next_layer_units

    setattr(
        self,
        f"fc{self.n_layers}",
        nn.Linear(
            previous_layer_units + kwargs.get("cnn_nr_channels", 4),
            # accounting for the learning curve features
            kwargs.get(f"layer{self.n_layers}_units", 256),
        ),
    )
    self.cnn = nn.Sequential(
        nn.Conv1d(
            in_channels=1,
            kernel_size=(kwargs.get("cnn_kernel_size", 3),),
            out_channels=4,
        ),
        nn.AdaptiveMaxPool1d(1),
    )