Source code for algorithms.distribution.diagonal_gaussian

import numpy as np
import torch
from algorithms.distribution.base import Distribution


[docs]class DiagonalGaussian(Distribution): def __init__(self, dim): self._dim = dim @property def dim(self): ''' :return: dimension of the distribution ''' return self._dim
[docs] def kl(self, old_dist_info, new_dist_info): ''' :param old_dist_info: old distribution :param new_dist_info: new distribution :return: KL divergence of two distribution ''' old_means = old_dist_info["mean"] old_log_stds = old_dist_info["log_std"] new_means = new_dist_info["mean"] new_log_stds = new_dist_info["log_std"] """ Compute the KL divergence of two multivariate Gaussian distribution with diagonal covariance matrices """ old_std = np.exp(old_log_stds) new_std = np.exp(new_log_stds) # means: (N*A) # std: (N*A) # formula: # { (\mu_1 - \mu_2)^2 + \sigma_1^2 - \sigma_2^2 } / (2\sigma_2^2) + # ln(\sigma_2/\sigma_1) numerator = np.square(old_means - new_means) + \ np.square(old_std) - np.square(new_std) denominator = 2 * np.square(new_std) + 1e-8 return np.sum( numerator / denominator + new_log_stds - old_log_stds, axis=-1)
# more lossy version # return TT.sum( # numerator / denominator + TT.log(new_std) - TT.log(old_std ), axis=-1)
[docs] def kl_sym(self, old_dist_info_vars, new_dist_info_vars): ''' :param old_dist_info_vars: old distribution :param new_dist_info_vars: new distribution :return: KL divergence of two distribution ''' old_means = old_dist_info_vars["mean"] old_log_stds = old_dist_info_vars["log_std"] new_means = new_dist_info_vars["mean"] new_log_stds = new_dist_info_vars["log_std"] """ Compute the KL divergence of two multivariate Gaussian distribution with diagonal covariance matrices """ old_std = np.exp(old_log_stds) new_std = np.exp(new_log_stds) # means: (N*A) # std: (N*A) # formula: # { (\mu_1 - \mu_2)^2 + \sigma_1^2 - \sigma_2^2 } / (2\sigma_2^2) + # ln(\sigma_2/\sigma_1) numerator = np.square(old_means - new_means) + np.square(old_std) - np.square(new_std) denominator = 2 * np.square(new_std) + 1e-8 return torch.sum( torch.tensor(numerator / denominator + new_log_stds - old_log_stds), dim=-1)
[docs] def likelihood_ratio_sym(self, x_var, old_dist_info_vars, new_dist_info_vars): logli_new = self.log_likelihood_sym(x_var, new_dist_info_vars) logli_old = self.log_likelihood_sym(x_var, old_dist_info_vars) return torch.tensor(np.exp(logli_new - logli_old))
[docs] def log_likelihood_sym(self, x_var, dist_info_vars): ''' :param x_var: x :param dist_info_vars: distribution :return: log likelihood of x in the given distribution ''' means = dist_info_vars["mean"] log_stds = dist_info_vars["log_std"] zs = (x_var - means) / np.exp(log_stds) return - torch.sum(log_stds, dim=-1) - \ 0.5 * torch.sum(torch.tensor(np.square(zs)), dim=-1) - \ 0.5 * self.dim * np.log(2 * np.pi)
[docs] def sample(self, dist_info): ''' :param dist_info: distribution :return: sampled var from the given distribution ''' means = dist_info["mean"] log_stds = dist_info["log_std"] rnd = np.random.normal(size=means.shape) return rnd * np.exp(log_stds) + means
[docs] def log_likelihood(self, xs, dist_info): ''' :param xs: x :param dist_info: distribution :return: log likelihood of x in the given distribution ''' means = dist_info["mean"] log_stds = dist_info["log_std"] zs = (xs - means) / np.exp(log_stds) return - np.sum(log_stds, axis=-1) - \ 0.5 * np.sum(np.square(zs), axis=-1) - \ 0.5 * self.dim * np.log(2 * np.pi)
[docs] def entropy(self, dist_info): ''' :param dist_info: distribution :return: entropy of the distribution ''' log_stds = dist_info["log_std"] return np.sum(log_stds + np.log(np.sqrt(2 * np.pi * np.e)), axis=-1)
@property def dist_info_specs(self): ''' :return: distribution dimension information ''' return [("mean", (self.dim,)), ("log_std", (self.dim,))]