Source code for algorithms.distribution.diagonal_gaussian

import numpy as np
import torch
from algorithms.distribution.base import Distribution


[docs]class DiagonalGaussian(Distribution):
    def __init__(self, dim):
        self._dim = dim

    @property
    def dim(self):
        '''

        :return: dimension of the distribution
        '''
        return self._dim

[docs]    def kl(self, old_dist_info, new_dist_info):
        '''

        :param old_dist_info: old distribution
        :param new_dist_info: new distribution
        :return: KL divergence of two distribution
        '''
        old_means = old_dist_info["mean"]
        old_log_stds = old_dist_info["log_std"]
        new_means = new_dist_info["mean"]
        new_log_stds = new_dist_info["log_std"]
        """
        Compute the KL divergence of two multivariate Gaussian distribution with
        diagonal covariance matrices
        """
        old_std = np.exp(old_log_stds)
        new_std = np.exp(new_log_stds)
        # means: (N*A)
        # std: (N*A)
        # formula:
        # { (\mu_1 - \mu_2)^2 + \sigma_1^2 - \sigma_2^2 } / (2\sigma_2^2) +
        # ln(\sigma_2/\sigma_1)
        numerator = np.square(old_means - new_means) + \
                    np.square(old_std) - np.square(new_std)
        denominator = 2 * np.square(new_std) + 1e-8
        return np.sum(
            numerator / denominator + new_log_stds - old_log_stds, axis=-1)
        # more lossy version
        # return TT.sum(
        #     numerator / denominator + TT.log(new_std) - TT.log(old_std ), axis=-1)

[docs]    def kl_sym(self, old_dist_info_vars, new_dist_info_vars):
        '''

        :param old_dist_info_vars: old distribution
        :param new_dist_info_vars: new distribution
        :return: KL divergence of two distribution
        '''
        old_means = old_dist_info_vars["mean"]
        old_log_stds = old_dist_info_vars["log_std"]
        new_means = new_dist_info_vars["mean"]
        new_log_stds = new_dist_info_vars["log_std"]
        """
        Compute the KL divergence of two multivariate Gaussian distribution with
        diagonal covariance matrices
        """
        old_std = np.exp(old_log_stds)
        new_std = np.exp(new_log_stds)
        # means: (N*A)
        # std: (N*A)
        # formula:
        # { (\mu_1 - \mu_2)^2 + \sigma_1^2 - \sigma_2^2 } / (2\sigma_2^2) +
        # ln(\sigma_2/\sigma_1)
        numerator = np.square(old_means - new_means) + np.square(old_std) - np.square(new_std)
        denominator = 2 * np.square(new_std) + 1e-8
        return torch.sum(
            torch.tensor(numerator / denominator + new_log_stds - old_log_stds), dim=-1)

[docs]    def likelihood_ratio_sym(self, x_var, old_dist_info_vars, new_dist_info_vars):
        logli_new = self.log_likelihood_sym(x_var, new_dist_info_vars)
        logli_old = self.log_likelihood_sym(x_var, old_dist_info_vars)
        return torch.tensor(np.exp(logli_new - logli_old))

[docs]    def log_likelihood_sym(self, x_var, dist_info_vars):
        '''

        :param x_var: x
        :param dist_info_vars: distribution
        :return: log likelihood of x in the given distribution
        '''
        means = dist_info_vars["mean"]
        log_stds = dist_info_vars["log_std"]
        zs = (x_var - means) / np.exp(log_stds)
        return - torch.sum(log_stds, dim=-1) - \
               0.5 * torch.sum(torch.tensor(np.square(zs)), dim=-1) - \
               0.5 * self.dim * np.log(2 * np.pi)

[docs]    def sample(self, dist_info):
        '''

        :param dist_info: distribution
        :return: sampled var from the given distribution
        '''
        means = dist_info["mean"]
        log_stds = dist_info["log_std"]
        rnd = np.random.normal(size=means.shape)
        return rnd * np.exp(log_stds) + means

[docs]    def log_likelihood(self, xs, dist_info):
        '''

        :param xs: x
        :param dist_info: distribution
        :return: log likelihood of x in the given distribution
        '''
        means = dist_info["mean"]
        log_stds = dist_info["log_std"]
        zs = (xs - means) / np.exp(log_stds)
        return - np.sum(log_stds, axis=-1) - \
               0.5 * np.sum(np.square(zs), axis=-1) - \
               0.5 * self.dim * np.log(2 * np.pi)

[docs]    def entropy(self, dist_info):
        '''

        :param dist_info: distribution
        :return: entropy of the distribution
        '''
        log_stds = dist_info["log_std"]
        return np.sum(log_stds + np.log(np.sqrt(2 * np.pi * np.e)), axis=-1)

    @property
    def dist_info_specs(self):
        '''

        :return: distribution dimension information
        '''
        return [("mean", (self.dim,)), ("log_std", (self.dim,))]
Source code for algorithms.distribution.diagonal_gaussian

AutoEnv

Navigation

Related Topics