Source code for autooed.mobo.surrogate_model.gp

'''
Gaussian process surrogate model.
'''

import numpy as np
import math
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel
from sklearn.gaussian_process.kernels import Matern as MaternKernel, _check_length_scale
from sklearn.utils.optimize import _check_optimize_result
from scipy.optimize import minimize
from scipy.linalg import solve_triangular
from scipy.spatial.distance import pdist, cdist, squareform
from scipy.special import kv, gamma

from autooed.mobo.surrogate_model.base import SurrogateModel
from autooed.utils.operand import safe_divide


class Matern(MaternKernel):
    '''
    Customized version of Matern kernel to avoid numerical error.
    '''
    def __call__(self, X, Y=None, eval_gradient=False):
        X = np.atleast_2d(X)
        length_scale = _check_length_scale(X, self.length_scale)
        if Y is None:
            dists = pdist(X / length_scale, metric='euclidean')
        else:
            if eval_gradient:
                raise ValueError(
                    "Gradient can only be evaluated when Y is None.")
            dists = cdist(X / length_scale, Y / length_scale,
                            metric='euclidean')

        if self.nu == 0.5:
            K = np.exp(-dists)
        elif self.nu == 1.5:
            K = dists * math.sqrt(3)
            K = (1. + K) * np.exp(-K)
        elif self.nu == 2.5:
            K = dists * math.sqrt(5)
            K = (1. + K + K ** 2 / 3.0) * np.exp(-K)
        else:  # general case; expensive to evaluate
            K = dists
            K[K == 0.0] += np.finfo(float).eps  # strict zeros result in nan
            tmp = (math.sqrt(2 * self.nu) * K)
            K.fill((2 ** (1. - self.nu)) / gamma(self.nu))
            K *= tmp ** self.nu
            K *= kv(self.nu, tmp)

        if Y is None:
            # convert from upper-triangular matrix to square matrix
            K = squareform(K)
            np.fill_diagonal(K, 1)

        if eval_gradient:
            if self.hyperparameter_length_scale.fixed:
                # Hyperparameter l kept fixed
                K_gradient = np.empty((X.shape[0], X.shape[0], 0))
                return K, K_gradient

            # We need to recompute the pairwise dimension-wise distances
            if self.anisotropic:
                D = (X[:, np.newaxis, :] - X[np.newaxis, :, :])**2 \
                    / (length_scale ** 2)
            else:
                D = squareform(dists**2)[:, :, np.newaxis]

            if self.nu == 0.5:
                K_gradient = safe_divide(K[..., np.newaxis] * D,
                    np.sqrt(D.sum(2))[:, :, np.newaxis])
                K_gradient[~np.isfinite(K_gradient)] = 0
            elif self.nu == 1.5:
                K_gradient = \
                    3 * D * np.exp(-np.sqrt(3 * D.sum(-1)))[..., np.newaxis]
            elif self.nu == 2.5:
                tmp = np.sqrt(5 * D.sum(-1))[..., np.newaxis]
                K_gradient = 5.0 / 3.0 * D * (tmp + 1) * np.exp(-tmp)
            else:
                # approximate gradient numerically
                def f(theta):  # helper function
                    return self.clone_with_theta(theta)(X, Y)
                return K, _approx_fprime(self.theta, f, 1e-10)

            if not self.anisotropic:
                return K, K_gradient[:, :].sum(-1)[:, :, np.newaxis]
            else:
                return K, K_gradient
        else:
            return K


def constrained_optimization(obj_func, initial_theta, bounds):
    '''
    Customized version of constrained optimization to avoid convergence warning.
    '''
    opt_res = minimize(obj_func, initial_theta, method="L-BFGS-B", jac=True, bounds=bounds)
    # NOTE: Temporarily disable the checking below because of the numerical instability sometimes.
    # _check_optimize_result("lbfgs", opt_res)
    return opt_res.x, opt_res.fun


[docs]class GaussianProcess(SurrogateModel): ''' Gaussian process. '''
[docs] def __init__(self, problem, nu=1, **kwargs): ''' Initialize a Gaussian process. Parameters ---------- problem: autooed.problem.Problem The optimization problem. nu: int The parameter nu controlling the type of the Matern kernel. Choices are 1, 3, 5 and -1. ''' super().__init__(problem) self.nu = nu self.gps = [] for _ in range(self.n_obj): if nu > 0: main_kernel = Matern(length_scale=np.ones(self.n_var), length_scale_bounds=(np.sqrt(1e-3), np.sqrt(1e3)), nu=0.5 * nu) else: main_kernel = RBF(length_scale=np.ones(self.n_var), length_scale_bounds=(np.sqrt(1e-3), np.sqrt(1e3))) kernel = ConstantKernel(constant_value=1.0, constant_value_bounds=(np.sqrt(1e-3), np.sqrt(1e3))) * \ main_kernel + \ ConstantKernel(constant_value=1e-2, constant_value_bounds=(np.exp(-6), np.exp(0))) gp = GaussianProcessRegressor(kernel=kernel, optimizer=constrained_optimization) self.gps.append(gp)
[docs] def _fit(self, X, Y): for i, gp in enumerate(self.gps): gp.fit(X, Y[:, i])
[docs] def _evaluate(self, X, std, gradient, hessian): F, dF, hF = [], [], [] # mean S, dS, hS = [], [], [] # std for gp in self.gps: # mean K = gp.kernel_(X, gp.X_train_) # K: shape (N, N_train) y_mean = K.dot(gp.alpha_) F.append(y_mean) # y_mean: shape (N,) if std: if gp._K_inv is None: L_inv = solve_triangular(gp.L_.T, np.eye(gp.L_.shape[0])) gp._K_inv = L_inv.dot(L_inv.T) y_var = gp.kernel_.diag(X) y_var -= np.einsum("ij,ij->i", np.dot(K, gp._K_inv), K) y_var_negative = y_var < 0 if np.any(y_var_negative): y_var[y_var_negative] = 0.0 y_std = np.sqrt(y_var) S.append(y_std) # y_std: shape (N,) if not (gradient or hessian): continue ell = np.exp(gp.kernel_.theta[1:-1]) # ell: shape (n_var,) sf2 = np.exp(gp.kernel_.theta[0]) # sf2: shape (1,) d = np.expand_dims(cdist(X / ell, gp.X_train_ / ell), 2) # d: shape (N, N_train, 1) X_, X_train_ = np.expand_dims(X, 1), np.expand_dims(gp.X_train_, 0) dd_N = X_ - X_train_ # numerator dd_D = d * ell ** 2 # denominator dd = safe_divide(dd_N, dd_D) # dd: shape (N, N_train, n_var) if self.nu == 1: dK = -sf2 * np.exp(-d) * dd elif self.nu == 3: dK = -3 * sf2 * np.exp(-np.sqrt(3) * d) * d * dd elif self.nu == 5: dK = -5. / 3 * sf2 * np.exp(-np.sqrt(5) * d) * (1 + np.sqrt(5) * d) * d * dd else: # RBF dK = -sf2 * np.exp(-0.5 * d ** 2) * d * dd dK_T = dK.transpose(0, 2, 1) # dK: shape (N, N_train, n_var), dK_T: shape (N, n_var, N_train) if gradient: dy_mean = dK_T @ gp.alpha_ # gp.alpha_: shape (N_train,) dF.append(dy_mean) # dy_mean: shape (N, n_var) # TODO: check if std: K = np.expand_dims(K, 1) # K: shape (N, 1, N_train) K_Ki = K @ gp._K_inv # gp._K_inv: shape (N_train, N_train), K_Ki: shape (N, 1, N_train) dK_Ki = dK_T @ gp._K_inv # dK_Ki: shape (N, n_var, N_train) dy_var = -np.sum(dK_Ki * K + K_Ki * dK_T, axis=2) # dy_var: shape (N, n_var) dy_std = 0.5 * safe_divide(dy_var, y_std) # dy_std: shape (N, n_var) dS.append(dy_std) if hessian: d = np.expand_dims(d, 3) # d: shape (N, N_train, 1, 1) dd = np.expand_dims(dd, 2) # dd: shape (N, N_train, 1, n_var) hd_N = d * np.expand_dims(np.eye(len(ell)), (0, 1)) - np.expand_dims(X_ - X_train_, 3) * dd # numerator hd_D = d ** 2 * np.expand_dims(ell ** 2, (0, 1, 3)) # denominator hd = safe_divide(hd_N, hd_D) # hd: shape (N, N_train, n_var, n_var) if self.nu == 1: hK = -sf2 * np.exp(-d) * (hd - dd ** 2) elif self.nu == 3: hK = -3 * sf2 * np.exp(-np.sqrt(3) * d) * (d * hd + (1 - np.sqrt(3) * d) * dd ** 2) elif self.nu == 5: hK = -5. / 3 * sf2 * np.exp(-np.sqrt(5) * d) * (-5 * d ** 2 * dd ** 2 + (1 + np.sqrt(5) * d) * (dd ** 2 + d * hd)) else: # RBF hK = -sf2 * np.exp(-0.5 * d ** 2) * ((1 - d ** 2) * dd ** 2 + d * hd) hK_T = hK.transpose(0, 2, 3, 1) # hK: shape (N, N_train, n_var, n_var), hK_T: shape (N, n_var, n_var, N_train) hy_mean = hK_T @ gp.alpha_ # hy_mean: shape (N, n_var, n_var) hF.append(hy_mean) # TODO: check if std: K = np.expand_dims(K, 2) # K: shape (N, 1, 1, N_train) dK = np.expand_dims(dK_T, 2) # dK: shape (N, n_var, 1, N_train) dK_Ki = np.expand_dims(dK_Ki, 2) # dK_Ki: shape (N, n_var, 1, N_train) hK_Ki = hK_T @ gp._K_inv # hK_Ki: shape (N, n_var, n_var, N_train) hy_var = -np.sum(hK_Ki * K + 2 * dK_Ki * dK + K_Ki * hK_T, axis=3) # hy_var: shape (N, n_var, n_var) hy_std = 0.5 * safe_divide(hy_var * y_std - dy_var * dy_std, y_var) # hy_std: shape (N, n_var, n_var) hS.append(hy_std) F = np.stack(F, axis=1) dF = np.stack(dF, axis=1) if gradient else None hF = np.stack(hF, axis=1) if hessian else None S = np.stack(S, axis=1) if std else None dS = np.stack(dS, axis=1) if std and gradient else None hS = np.stack(hS, axis=1) if std and hessian else None out = {'F': F, 'dF': dF, 'hF': hF, 'S': S, 'dS': dS, 'hS': hS} return out