Source code for algorithms.AGen.critic.model
import torch.nn as nn
import torch
'''
Block of nn
'''
[docs]class Block(nn.Module):
def __init__(self, input_size, hidden_layer_dims, activation_fn, drop_out_fn):
'''
:param input_size: input size of the model
:param hidden_layer_dims: hidden size
:param activation_fn: activation function
:param drop_out_fn: dropout function
'''
super(Block, self).__init__()
net = []
input = input_size
for hidden_size in hidden_layer_dims:
net.append(nn.Linear(input, hidden_size))
net.append(activation_fn)
net.append(drop_out_fn)
input = hidden_size
self.block = nn.Sequential(*net)
[docs] def forward(self, x):
return self.block(x)
'''
Reward function approximation function(Neural Network)
'''
[docs]class ObservationActionMLP(nn.Module):
def __init__(
self,
hidden_layer_dims,
obs_size,
act_size,
output_dim=1,
obs_hidden_layer_dims=list(),
act_hidden_layer_dims=list(),
activation_fn = nn.ReLU(inplace=False),
dropout_keep_prob=1.,
l2_reg=0.,
return_features=False):
'''
:param hidden_layer_dims: hidden layer dimension
:param obs_size: observation size
:param act_size: action size
:param output_dim: output dimension
:param obs_hidden_layer_dims: observation hidden layer dimension
:param act_hidden_layer_dims: action hidden layer dimension
:param activation_fn: activation function
:param dropout_keep_prob: dropout keep probability
:param l2_reg: L2 regularization term
:param return_features: if returning feature
'''
super(ObservationActionMLP, self).__init__()
self.output_dim = output_dim
self.obs_hidden_layer_dims = obs_hidden_layer_dims
self.act_hidden_layer_dims = act_hidden_layer_dims
self.hidden_layer_dims = hidden_layer_dims
self.activation_fn = activation_fn
self.dropout_keep_prob = dropout_keep_prob
self.return_features = return_features
self.dropout = nn.Dropout(p=1 - dropout_keep_prob)
self.l2_reg = l2_reg
# build block for obs and action
self.obs_block = Block(obs_size, obs_hidden_layer_dims, activation_fn, self.dropout)
self.act_block = Block(act_size, act_hidden_layer_dims, activation_fn, self.dropout)
feature_size = (self.obs_hidden_layer_dims[-1] if len(obs_hidden_layer_dims) != 0 else obs_size) \
+ (self.act_hidden_layer_dims[-1] if len(act_hidden_layer_dims) != 0 else act_size)
self.hidden_block = Block(feature_size, hidden_layer_dims, activation_fn, self.dropout)
self.score_layer = nn.Linear(hidden_layer_dims[-1], output_dim)
[docs] def forward(self, obs, act):
'''
:param obs: batch of observations
:param act: batch of actions
:return: rewards for the batched observation and action pairs
'''
if torch.cuda.is_available():
obs = torch.tensor(obs).cuda().float()
act = torch.tensor(act).cuda().float()
else:
obs = torch.tensor(obs).float()
act = torch.tensor(act).float()
obs_feature = self.obs_block.forward(obs)
act_feature = self.act_block.forward(act)
feature = torch.cat((obs_feature, act_feature), dim=1)
feature = self.hidden_block.forward(feature)
score = self.score_layer.forward(feature)
return score