Source code for algorithms.distribution.diagonal_gaussian
import numpy as np
import torch
from algorithms.distribution.base import Distribution
[docs]class DiagonalGaussian(Distribution):
def __init__(self, dim):
self._dim = dim
@property
def dim(self):
'''
:return: dimension of the distribution
'''
return self._dim
[docs] def kl(self, old_dist_info, new_dist_info):
'''
:param old_dist_info: old distribution
:param new_dist_info: new distribution
:return: KL divergence of two distribution
'''
old_means = old_dist_info["mean"]
old_log_stds = old_dist_info["log_std"]
new_means = new_dist_info["mean"]
new_log_stds = new_dist_info["log_std"]
"""
Compute the KL divergence of two multivariate Gaussian distribution with
diagonal covariance matrices
"""
old_std = np.exp(old_log_stds)
new_std = np.exp(new_log_stds)
# means: (N*A)
# std: (N*A)
# formula:
# { (\mu_1 - \mu_2)^2 + \sigma_1^2 - \sigma_2^2 } / (2\sigma_2^2) +
# ln(\sigma_2/\sigma_1)
numerator = np.square(old_means - new_means) + \
np.square(old_std) - np.square(new_std)
denominator = 2 * np.square(new_std) + 1e-8
return np.sum(
numerator / denominator + new_log_stds - old_log_stds, axis=-1)
# more lossy version
# return TT.sum(
# numerator / denominator + TT.log(new_std) - TT.log(old_std ), axis=-1)
[docs] def kl_sym(self, old_dist_info_vars, new_dist_info_vars):
'''
:param old_dist_info_vars: old distribution
:param new_dist_info_vars: new distribution
:return: KL divergence of two distribution
'''
old_means = old_dist_info_vars["mean"]
old_log_stds = old_dist_info_vars["log_std"]
new_means = new_dist_info_vars["mean"]
new_log_stds = new_dist_info_vars["log_std"]
"""
Compute the KL divergence of two multivariate Gaussian distribution with
diagonal covariance matrices
"""
old_std = np.exp(old_log_stds)
new_std = np.exp(new_log_stds)
# means: (N*A)
# std: (N*A)
# formula:
# { (\mu_1 - \mu_2)^2 + \sigma_1^2 - \sigma_2^2 } / (2\sigma_2^2) +
# ln(\sigma_2/\sigma_1)
numerator = np.square(old_means - new_means) + np.square(old_std) - np.square(new_std)
denominator = 2 * np.square(new_std) + 1e-8
return torch.sum(
torch.tensor(numerator / denominator + new_log_stds - old_log_stds), dim=-1)
[docs] def likelihood_ratio_sym(self, x_var, old_dist_info_vars, new_dist_info_vars):
logli_new = self.log_likelihood_sym(x_var, new_dist_info_vars)
logli_old = self.log_likelihood_sym(x_var, old_dist_info_vars)
return torch.tensor(np.exp(logli_new - logli_old))
[docs] def log_likelihood_sym(self, x_var, dist_info_vars):
'''
:param x_var: x
:param dist_info_vars: distribution
:return: log likelihood of x in the given distribution
'''
means = dist_info_vars["mean"]
log_stds = dist_info_vars["log_std"]
zs = (x_var - means) / np.exp(log_stds)
return - torch.sum(log_stds, dim=-1) - \
0.5 * torch.sum(torch.tensor(np.square(zs)), dim=-1) - \
0.5 * self.dim * np.log(2 * np.pi)
[docs] def sample(self, dist_info):
'''
:param dist_info: distribution
:return: sampled var from the given distribution
'''
means = dist_info["mean"]
log_stds = dist_info["log_std"]
rnd = np.random.normal(size=means.shape)
return rnd * np.exp(log_stds) + means
[docs] def log_likelihood(self, xs, dist_info):
'''
:param xs: x
:param dist_info: distribution
:return: log likelihood of x in the given distribution
'''
means = dist_info["mean"]
log_stds = dist_info["log_std"]
zs = (xs - means) / np.exp(log_stds)
return - np.sum(log_stds, axis=-1) - \
0.5 * np.sum(np.square(zs), axis=-1) - \
0.5 * self.dim * np.log(2 * np.pi)
[docs] def entropy(self, dist_info):
'''
:param dist_info: distribution
:return: entropy of the distribution
'''
log_stds = dist_info["log_std"]
return np.sum(log_stds + np.log(np.sqrt(2 * np.pi * np.e)), axis=-1)
@property
def dist_info_specs(self):
'''
:return: distribution dimension information
'''
return [("mean", (self.dim,)), ("log_std", (self.dim,))]