Source code for easyvvuq.comparison.validate

"""Validation by comparing QoI distributions.
"""
import numpy as np
import scipy.stats as st
from . import BaseComparisonElement


__copyright__ = """

    Copyright 2018 Robin A. Richardson, David W. Wright

    This file is part of EasyVVUQ

    EasyVVUQ is free software: you can redistribute it and/or modify
    it under the terms of the Lesser GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    EasyVVUQ is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    Lesser GNU General Public License for more details.

    You should have received a copy of the Lesser GNU General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.

"""
__author__ = 'Jalal Lakhlili'
__license__ = "LGPL"


[docs]class ValidateSimilarity(BaseComparisonElement): def __init__(self): pass
[docs] def dist(self, p, q): raise NotImplementedError
[docs] def compare(self, dataframe1, dataframe2): """Perform comparison between two lists or arrays of discrete distributions. Parameters ---------- dataframe1 : NumPy array or list dataframe2 : NumPy array or list Returns ------- A list of distances between two lists of discrete distributions, dataframe1 and dataframe2. """ if len(dataframe1) != len(dataframe2): raise RuntimeError("Input dataframe sizes are not equal") shape = np.shape(dataframe1) if len(shape) == 2: results = [] for i in range(len(dataframe1)): p1 = np.array(dataframe1[i]) p2 = np.array(dataframe2[i]) d = self.dist(p1, p2) results.append(d) else: p1 = np.array(dataframe1) p2 = np.array(dataframe2) results = self.dist(p1, p2) return results
[docs]class ValidateSimilarityHellinger(ValidateSimilarity):
[docs] def element_name(self): return "validate_similarity_hellinger"
[docs] def element_version(self): return "0.1"
[docs] def dist(self, p, q): """ Compute Hellinger distance between two discrete probability distributions (PDF). The Hellinger distance metric gives an output in the range [0,1] with values closer to 0 meaning the PDFs are more similar. Parameters ---------- p : NumPy array q : NumPy array Returns ------- Hellinger distance between distributions p and q. https://en.wikipedia.org/wiki/Hellinger_distance """ p /= p.sum() q /= q.sum() return np.sqrt(1. - np.sqrt(p * q).sum())
[docs]class ValidateSimilarityJensenShannon(ValidateSimilarity):
[docs] def element_name(self): return "validate_similarity_jensen_shannon"
[docs] def element_version(self): return "0.1"
[docs] def dist(self, p, q): """ Compute Jensen-Shannon distance between two discrete probability distributions (PDF). It is based on Kullback–Leibler divergence and gives an output metric un the range [0,1] with values closer to 0 meaning the PDFs are more similar. Parameters ---------- p : NumPy array q : NumPy array Returns ------- Jensen-Shannon divergence between distributions p and q. https://en.wikipedia.org/wiki/Jensen%E2%80%93Shannon_divergence https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence """ p /= p.sum() q /= q.sum() m = 0.5 * (p + q) div = 0.5 * (st.entropy(p, m) + st.entropy(q, m)) return np.sqrt(div / np.log(2))
[docs]class ValidateSimilarityWasserstein(ValidateSimilarity):
[docs] def element_name(self): return "validate_similarity_wasserstein"
[docs] def element_version(self): return "0.1"
[docs] def dist(self, p, q): """ Compute Wasserstein distance between two discrete cumulative distributions (CDF). The Wasserstein distance has an unrestricted range with a lower limit of 0. A smaller distance indicates a stronger similarity between between CFDs. Parameters ---------- p : NumPy array q : NumPy array Returns ------- Wasserstein distance between distributions p and q. https://en.wikipedia.org/wiki/Wasserstein_metric """ return st.wasserstein_distance(p, q)