Source code for algorithms.AGen.critic.model

import torch.nn as nn
import torch


'''
Block of nn
'''


[docs]class Block(nn.Module): def __init__(self, input_size, hidden_layer_dims, activation_fn, drop_out_fn): ''' :param input_size: input size of the model :param hidden_layer_dims: hidden size :param activation_fn: activation function :param drop_out_fn: dropout function ''' super(Block, self).__init__() net = [] input = input_size for hidden_size in hidden_layer_dims: net.append(nn.Linear(input, hidden_size)) net.append(activation_fn) net.append(drop_out_fn) input = hidden_size self.block = nn.Sequential(*net)
[docs] def forward(self, x): return self.block(x)
''' Reward function approximation function(Neural Network) '''
[docs]class ObservationActionMLP(nn.Module): def __init__( self, hidden_layer_dims, obs_size, act_size, output_dim=1, obs_hidden_layer_dims=list(), act_hidden_layer_dims=list(), activation_fn = nn.ReLU(inplace=False), dropout_keep_prob=1., l2_reg=0., return_features=False): ''' :param hidden_layer_dims: hidden layer dimension :param obs_size: observation size :param act_size: action size :param output_dim: output dimension :param obs_hidden_layer_dims: observation hidden layer dimension :param act_hidden_layer_dims: action hidden layer dimension :param activation_fn: activation function :param dropout_keep_prob: dropout keep probability :param l2_reg: L2 regularization term :param return_features: if returning feature ''' super(ObservationActionMLP, self).__init__() self.output_dim = output_dim self.obs_hidden_layer_dims = obs_hidden_layer_dims self.act_hidden_layer_dims = act_hidden_layer_dims self.hidden_layer_dims = hidden_layer_dims self.activation_fn = activation_fn self.dropout_keep_prob = dropout_keep_prob self.return_features = return_features self.dropout = nn.Dropout(p=1 - dropout_keep_prob) self.l2_reg = l2_reg # build block for obs and action self.obs_block = Block(obs_size, obs_hidden_layer_dims, activation_fn, self.dropout) self.act_block = Block(act_size, act_hidden_layer_dims, activation_fn, self.dropout) feature_size = (self.obs_hidden_layer_dims[-1] if len(obs_hidden_layer_dims) != 0 else obs_size) \ + (self.act_hidden_layer_dims[-1] if len(act_hidden_layer_dims) != 0 else act_size) self.hidden_block = Block(feature_size, hidden_layer_dims, activation_fn, self.dropout) self.score_layer = nn.Linear(hidden_layer_dims[-1], output_dim)
[docs] def forward(self, obs, act): ''' :param obs: batch of observations :param act: batch of actions :return: rewards for the batched observation and action pairs ''' if torch.cuda.is_available(): obs = torch.tensor(obs).cuda().float() act = torch.tensor(act).cuda().float() else: obs = torch.tensor(obs).float() act = torch.tensor(act).float() obs_feature = self.obs_block.forward(obs) act_feature = self.act_block.forward(act) feature = torch.cat((obs_feature, act_feature), dim=1) feature = self.hidden_block.forward(feature) score = self.score_layer.forward(feature) return score