Source code for autooed.mobo.surrogate_model.bnn

'''
Bayesian neural network surrogate model.
'''

import numpy as np
import torch

from autooed.mobo.surrogate_model.nn import NeuralNetwork, jacobian, hessian


class BayesianRegression:
    """
    Bayesian regression model

    w ~ N(w|0, alpha^(-1)I)
    y = X @ w
    t ~ N(t|X @ w, beta^(-1))
    """
    def __init__(self, alpha:float=1., beta:float=1.):
        self.alpha = alpha
        self.beta = beta
        self.w_mean = None
        self.w_precision = None

    def _is_prior_defined(self) -> bool:
        return self.w_mean is not None and self.w_precision is not None

    def _get_prior(self, ndim:int) -> tuple:
        if self._is_prior_defined():
            return self.w_mean, self.w_precision
        else:
            return np.zeros(ndim), self.alpha * np.eye(ndim)

    def fit(self, X:np.ndarray, t:np.ndarray):
        """
        bayesian update of parameters given training dataset

        Parameters
        ----------
        X : (N, n_features) np.ndarray
            training data independent variable
        t : (N,) np.ndarray
            training data dependent variable
        """
        mean_prev, precision_prev = self._get_prior(np.size(X, 1))

        w_precision = precision_prev + self.beta * X.T @ X
        w_mean = np.linalg.solve(
            w_precision,
            precision_prev @ mean_prev + self.beta * X.T @ t
        )
        self.w_mean = w_mean
        self.w_precision = w_precision
        self.w_cov = np.linalg.inv(self.w_precision)

    def predict(self, X:np.ndarray, return_std:bool=False, sample_size:int=None):
        """
        return mean (and standard deviation) of predictive distribution

        Parameters
        ----------
        X : (N, n_features) np.ndarray
            independent variable
        return_std : bool, optional
            flag to return standard deviation (the default is False)
        sample_size : int, optional
            number of samples to draw from the predictive distribution
            (the default is None, no sampling from the distribution)

        Returns
        -------
        y : (N,) np.ndarray
            mean of the predictive distribution
        y_std : (N,) np.ndarray
            standard deviation of the predictive distribution
        y_sample : (N, sample_size) np.ndarray
            samples from the predictive distribution
        """
        if sample_size is not None:
            w_sample = np.random.multivariate_normal(
                self.w_mean, self.w_cov, size=sample_size
            )
            y_sample = X @ w_sample.T
            return y_sample
        y = X @ self.w_mean
        if return_std:
            y_var = 1 / self.beta + np.sum(X @ self.w_cov * X, axis=1)
            y_std = np.sqrt(y_var)
            return y, y_std
        return y

    def export_params(self):
        return self.w_mean, self.w_cov, self.beta


[docs]class BayesianNeuralNetwork(NeuralNetwork):
    '''
    Deep Networks for Global Optimization [1]: Bayesian Linear Regression with basis function extracted from a neural network
    
    [1] J. Snoek, O. Rippel, K. Swersky, R. Kiros, N. Satish, 
        N. Sundaram, M.~M.~A. Patwary, Prabhat, R.~P. Adams
        Scalable Bayesian Optimization Using Deep Neural Networks
        Proc. of ICML'15
    '''
[docs]    def __init__(self, problem, hidden_size=50, hidden_layers=3, activation='tanh', lr=1e-3, weight_decay=1e-4, n_epoch=100, **kwargs):
        '''
        Initialize a Bayesian neural network as surrogate model.

        Parameters
        ----------
        problem: autooed.problem.Problem
            The optimization problem.
        hidden_size: int
            Size of the hidden layer of the neural network.
        hidden_layers: int
            Number of hidden layers of the neural network.
        activation: str
            Type of activation function.
        lr: float
            Learning rate.
        weight_decay: float
            Weight decay.
        n_epoch: int
            Number of training epochs.
        '''
        super().__init__(problem, hidden_size, hidden_layers, activation, lr, weight_decay, n_epoch)

        self.regressor = [BayesianRegression()] * self.n_obj

[docs]    def _fit(self, X, Y):
        super()._fit(X, Y)
        
        for i in range(self.n_obj):
            phi = self.net[i].basis_func(torch.FloatTensor(X)).data.numpy()
            self.regressor[i].fit(phi, Y[:, i])
    
[docs]    def _evaluate(self, X, std, gradient, hessian):
        F, dF, hF = [], [], [] # mean
        S, dS, hS = [], [], [] # std

        X = torch.FloatTensor(X)
        X.requires_grad = True

        for i in range(self.n_obj):

            phi = self.net[i].basis_func(X)

            w_mean = torch.FloatTensor(self.regressor[i].w_mean)
            w_cov = torch.FloatTensor(self.regressor[i].w_cov)
            beta = self.regressor[i].beta

            y_mean = torch.matmul(phi, w_mean)
            F.append(y_mean.detach().numpy())

            if std:
                y_var = 1 / beta + torch.sum(torch.matmul(phi, w_cov) * phi, axis=1)
                y_std = torch.sqrt(y_var)
                S.append(y_std.detach().numpy())

            if not (gradient or hessian): continue

            if gradient:
                dy_mean = jacobian(y_mean, X)
                dF.append(dy_mean.numpy())

                if std:
                    dy_std = jacobian(y_std, X)
                    dS.append(dy_std.numpy())

            if hessian:
                hy_mean = hessian(y_mean, X)
                hF.append(hy_mean.numpy())

                if std:
                    hy_std = hessian(y_std, X)
                    hS.append(hy_std.numpy())
        
        F = np.stack(F, axis=1)
        dF = np.stack(dF, axis=1) if gradient else None
        hF = np.stack(hF, axis=1) if hessian else None
        
        S = np.stack(S, axis=1) if std else None
        dS = np.stack(dS, axis=1) if std and gradient else None
        hS = np.stack(hS, axis=1) if std and hessian else None
        
        out = {'F': F, 'dF': dF, 'hF': hF, 'S': S, 'dS': dS, 'hS': hS}
        return out