Source code for autooed.mobo.surrogate_model.nn

'''
Neural network surrogate model.
'''

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

from autooed.mobo.surrogate_model.base import SurrogateModel


class MLP(nn.Module):
    '''
    Multi-layer perceptron.
    '''
    def __init__(self, n_in, n_out, hidden_sizes, activation):
        '''
        Initialize a MLP neural network.

        Parameters
        ----------
        n_in: int
            Input dimension.
        n_out: int
            Output dimension.
        hidden_sizes: list
            List of sizes of hidden layers.
        activation: str
            Type of activation function, [relu, tanh] are supported.
        '''
        super().__init__()
        
        self.fc = nn.ModuleList()
        last_size = n_in
        for size in hidden_sizes:
            self.fc.append(nn.Linear(last_size, size))
            last_size = size
        self.fc.append(nn.Linear(last_size, n_out))

        ac_map = {
            'relu': torch.relu,
            'tanh': torch.tanh,
        }
        assert activation in ac_map, f"activation type {activation} doesn't supported"
        self.ac = ac_map[activation]

    def forward(self, x):
        for fc in self.fc[:-1]:
            x = self.ac(fc(x))
        x = self.fc[-1](x)
        return x

    def basis_func(self, x):
        for fc in self.fc[:-1]:
            x = self.ac(fc(x))
        return x


def jacobian(outputs, inputs, create_graph=False):
    '''
    Compute the jacobian of `outputs` with respect to `inputs`.
    NOTE: here the `outputs` and `inputs` are batched data, meaning that there's no correlation between individuals in a batch.

    Parameters
    ----------
    outputs: torch.tensor
        Outputs of neural networks.
    inputs: torch.tensor
        Inputs of neural networks.
    create_graph: bool, default=False
        Whether to create the computation graph.

    Returns
    -------
    torch.tensor
        Jacobian of outputs w.r.t. inputs.
    '''
    batch_size, output_shape, input_shape = outputs.shape[0], outputs.shape[1:], inputs.shape[1:]
    jacs = []
    for i in range(batch_size):
        for output in outputs[i].view(-1):
            jac = torch.autograd.grad(output, inputs, grad_outputs=None, allow_unused=True, retain_graph=True, create_graph=create_graph)[0][i]
            jacs.append(jac)
    return torch.stack(jacs).reshape((batch_size,) + output_shape + input_shape)


def hessian(outputs, inputs):
    '''
    Compute the hessian of `outputs` with respect to `inputs`.

    Parameters
    ----------
    outputs: torch.tensor
        Outputs of neural networks.
    inputs: torch.tensor
        Inputs of neural networks.

    Returns
    -------
    torch.tensor
        Hessian of outputs w.r.t. inputs.
    '''
    grad_inputs = jacobian(outputs, inputs, create_graph=True)
    return jacobian(grad_inputs, inputs)


[docs]class NeuralNetwork(SurrogateModel): ''' Simple neural network '''
[docs] def __init__(self, problem, hidden_size=50, hidden_layers=3, activation='tanh', lr=1e-3, weight_decay=1e-4, n_epoch=100, **kwargs): ''' Initialize a neural network as surrogate model. Parameters ---------- problem: autooed.problem.Problem The optimization problem. hidden_size: int Size of the hidden layer of the neural network. hidden_layers: int Number of hidden layers of the neural network. activation: str Type of activation function. lr: float Learning rate. weight_decay: float Weight decay. n_epoch: int Number of training epochs. ''' super().__init__(problem) self.net = [MLP(n_in=self.n_var, n_out=1, hidden_sizes=(hidden_size,) * hidden_layers, activation=activation) for _ in range(self.n_obj)] self.criterion = nn.MSELoss() self.optimizer = [optim.Adam(net.parameters(), lr=lr, weight_decay=weight_decay) for net in self.net] self.n_epoch = n_epoch
[docs] def _fit(self, X, Y): X, Y = torch.FloatTensor(X), torch.FloatTensor(Y) for i in range(self.n_obj): for _ in range(self.n_epoch): Y_pred = self.net[i](X)[:, 0] loss = self.criterion(Y_pred, Y[:, i]) self.optimizer[i].zero_grad() loss.backward() self.optimizer[i].step()
[docs] def _evaluate(self, X, std, gradient, hessian): F, dF, hF = [], [], [] n_sample = X.shape[0] if len(X.shape) > 1 else 1 X = torch.FloatTensor(X) X.requires_grad = True F = [self.net[i](X)[:, 0] for i in range(self.n_obj)] if gradient: dF = [jacobian(f, X).numpy() for f in F] if hessian: hF = [hessian(f, X).numpy() for f in F] F = [f.detach().numpy() for f in F] F = np.stack(F, axis=1) dF = np.stack(dF, axis=1) if gradient else None hF = np.stack(hF, axis=1) if hessian else None S = np.zeros((n_sample, self.n_obj)) if std else None dS = np.zeros((n_sample, self.n_obj, self.n_var)) if std and gradient else None hS = np.zeros((n_sample, self.n_obj, self.n_var, self.n_var)) if std and hessian else None out = {'F': F, 'dF': dF, 'hF': hF, 'S': S, 'dS': dS, 'hS': hS} return out