Source code for smac.model.gaussian_process.kernels.matern_kernel

from __future__ import annotations

import math

import numpy as np
import scipy.optimize
import scipy.spatial.distance
import scipy.special
import sklearn.gaussian_process.kernels as kernels

from smac.model.gaussian_process.kernels.base_kernels import AbstractKernel
from smac.model.gaussian_process.priors.abstract_prior import AbstractPrior

__copyright__ = "Copyright 2022, automl.org"
__license__ = "3-clause BSD"


[docs]class MaternKernel(AbstractKernel, kernels.Matern): """Matern kernel implementation.""" def __init__( self, length_scale: float | tuple[float, ...] | np.ndarray = 1.0, length_scale_bounds: tuple[float, float] | list[tuple[float, float]] | np.ndarray = (1e-5, 1e5), nu: float = 1.5, operate_on: np.ndarray | None = None, has_conditions: bool = False, prior: AbstractPrior | None = None, ) -> None: super().__init__( operate_on=operate_on, has_conditions=has_conditions, prior=prior, length_scale=length_scale, length_scale_bounds=length_scale_bounds, nu=nu, ) def _call( self, X: np.ndarray, Y: np.ndarray | None = None, eval_gradient: bool = False, active: np.ndarray | None = None, ) -> np.ndarray | tuple[np.ndarray, np.ndarray]: X = np.atleast_2d(X) length_scale = kernels._check_length_scale(X, self.length_scale) if Y is None: dists = scipy.spatial.distance.pdist(X / length_scale, metric="euclidean") else: if eval_gradient: raise ValueError("Gradient can only be evaluated when Y is None.") dists = scipy.spatial.distance.cdist(X / length_scale, Y / length_scale, metric="euclidean") if self.nu == 0.5: K = np.exp(-dists) elif self.nu == 1.5: K = dists * math.sqrt(3) K = (1.0 + K) * np.exp(-K) elif self.nu == 2.5: K = dists * math.sqrt(5) K = (1.0 + K + K**2 / 3.0) * np.exp(-K) else: # general case; expensive to evaluate K = dists K[K == 0.0] += np.finfo(float).eps # strict zeros result in nan tmp = math.sqrt(2 * self.nu) * K K.fill((2 ** (1.0 - self.nu)) / scipy.special.gamma(self.nu)) K *= tmp**self.nu K *= scipy.special.kv(self.nu, tmp) if Y is None: # convert from upper-triangular matrix to square matrix K = scipy.spatial.distance.squareform(K) np.fill_diagonal(K, 1) if active is not None: K = K * active if eval_gradient: if self.hyperparameter_length_scale.fixed: # Hyperparameter l kept fixed K_gradient = np.empty((X.shape[0], X.shape[0], 0)) return K, K_gradient # We need to recompute the pairwise dimension-wise distances if self.anisotropic: D = (X[:, np.newaxis, :] - X[np.newaxis, :, :]) ** 2 / (length_scale**2) else: D = scipy.spatial.distance.squareform(dists**2)[:, :, np.newaxis] if self.nu == 0.5: K_gradient = K[..., np.newaxis] * D / np.sqrt(D.sum(2))[:, :, np.newaxis] K_gradient[~np.isfinite(K_gradient)] = 0 elif self.nu == 1.5: K_gradient = 3 * D * np.exp(-np.sqrt(3 * D.sum(-1)))[..., np.newaxis] elif self.nu == 2.5: tmp = np.sqrt(5 * D.sum(-1))[..., np.newaxis] K_gradient = 5.0 / 3.0 * D * (tmp + 1) * np.exp(-tmp) else: # original sklearn code would approximate gradient numerically, but this would violate our assumption # that the kernel hyperparameters are not changed within __call__ raise ValueError(self.nu) if not self.anisotropic: return K, K_gradient[:, :].sum(-1)[:, :, np.newaxis] else: return K, K_gradient else: return K