# -*- coding: utf-8 -*-
""" Copyright 2020 Marco Arrigoni
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import os
import time
import json
import numpy as np
from scipy.linalg import cho_factor, cho_solve
from scipy.special import erf
import matplotlib.pyplot as plt
[docs]def read_cmaes_status(json_status):
""" Read and parse a CMAES status
Parameters
----------
json_status : string
the path to the json file describing the CMAES status
Returns
-------
data : dict
the dictionary with the retrieved data
"""
with open(json_status, 'r') as fp:
data = json.load(fp)
for key, value in data['cmaes'].items():
if key in ['mean', 'covariance', 'p_sigma', 'p_c']:
data['cmaes'][key] = np.array(value)
return data
[docs]def calculate_KL_divergence_gaussians(mean_1, mean_2,
covariance_1, covariance_2):
""" Calculate the Kullback-Leibler divergence between two Gaussians:
KL(G1 || G2) = E_1[ln G1 - ln G2]
Parameters
----------
mean_1, mean_2 : 1D np.ndarray
the mean vectors of the distributions
covariance_1, covariance_2 : 2D np.ndarray
the covariance matrices of the distributions
Returns
-------
divergence : float
the KL divergence
"""
if not (len(mean_1) == len(mean_2) or
covariance_1.shape == covariance_2.shape):
raise ValueError('Shapes must match')
n = len(mean_1)
L1, low1 = cho_factor(covariance_1, lower=True)
L2, low2 = cho_factor(covariance_2, lower=True)
log1 = 2*np.sum(np.log(np.diag(L1)))
log2 = 2*np.sum(np.log(np.diag(L2)))
mean_diff = mean_2 - mean_1
term1 = cho_solve((L2, low2), covariance_1)
term2 = cho_solve((L2, low2), mean_diff)
log_term = log2 - log1
divergence = (log_term - n +
np.trace(term1) + np.dot(mean_diff, term2))
return 0.5*divergence
[docs]def calculate_B_distance_gaussians(mean_1, mean_2, covariance_1, covariance_2):
""" Calculates the Bhattacharyya distance between two normal
distributions.
Parameters
----------
mean_1, mean_2 : 1D np.ndarray
the mean vectors of the distributions
covariance_1, covariance_2 : 2D np.ndarray
the covariance matrices of the distributions
Returns
-------
distance : float
the Bhattacharyya distance
"""
if not (len(mean_1) == len(mean_2) or
covariance_1.shape == covariance_2.shape):
raise ValueError('Shapes must match')
av_covariance = 0.5*(covariance_1 + covariance_2)
L1, low1 = cho_factor(covariance_1, lower=True)
L2, low2 = cho_factor(covariance_2, lower=True)
Lav, lowav = cho_factor(av_covariance, lower=True)
log1 = 2*np.sum(np.log(np.diag(L1)))
log2 = 2*np.sum(np.log(np.diag(L2)))
log_av = 2*np.sum(np.log(np.diag(Lav)))
mean_diff = mean_1 - mean_2
term1 = cho_solve((Lav, lowav), mean_diff)
term1 = np.dot(mean_diff, term1)
distance = term1/8 + 0.5*(log_av - 0.5*(log1 + log2))
return distance
[docs]def calculate_B_coefficient_gaussians(mean_1, mean_2,
covariance_1, covariance_2):
""" Calculates the Bhattacharyya coefficient between two normal
distributions
Parameters
----------
mean_1, mean_2 : 1D np.ndarray
the mean vectors of the distributions
covariance_1, covariance_2 : 2D np.ndarray
the covariance matrices of the distributions
Returns
-------
b_coeff : float
the Bhattacharyya coefficient
"""
distance = calculate_B_distance_gaussians(mean_1, mean_2,
covariance_1, covariance_2)
b_coeff = np.exp(-distance)
return b_coeff
[docs]def calculate_H_distance_gaussians(mean_1, mean_2, covariance_1, covariance_2):
""" Calculate the Hellinger distance between two gaussians.
Parameters
----------
mean_1, mean_2 : 1D np.ndarray
the mean vectors of the distributions
covariance_1, covariance_2 : 2D np.ndarray
the covariance matrices of the distributions
Returns
-------
distance : float
the Hellinger distance
"""
b_coeff = calculate_B_coefficient_gaussians(mean_1, mean_2,
covariance_1, covariance_2)
return np.sqrt(1 - b_coeff)
[docs]def integral_multivariate_standard_normal_rectangular_region(region):
""" Compute the probability that a normal standard vector assumes values
in a rectangular region
Parameters
----------
region : tuple of 2-ple
region = ((a_1, b_1), (a_2, b_2), ... , (a_k, b_k))
the number of tuples gives the dimension of the random vector.
Each 2-ple contains the initial and final integration limits on
the considered direction
Returns
-------
probability : float
the corresponding probability
"""
k = len(region)
probability = 1
for reg in region:
value = erf(reg[1]/np.sqrt(2)) - erf(reg[0]/np.sqrt(2))
value /= 2
print(value)
probability *= value
return probability