Source code for easyvvuq.analysis.ssc_analysis

"""
ANALYSIS CLASS FOR THE SSC SAMPLER
"""

import numpy as np
import pickle
import copy
from easyvvuq import OutputType
from .base import BaseAnalysisElement
from .results import AnalysisResults
# import logging
# from scipy.special import comb
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon

__author__ = "Wouter Edeling"
__copyright__ = """

    Copyright 2018 Robin A. Richardson, David W. Wright

    This file is part of EasyVVUQ

    EasyVVUQ is free software: you can redistribute it and/or modify
    it under the terms of the Lesser GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    EasyVVUQ is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    Lesser GNU General Public License for more details.

    You should have received a copy of the Lesser GNU General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.

"""
__license__ = "LGPL"


[docs]class SSCAnalysisResults(AnalysisResults):
    # def _get_sobols_first(self, qoi, input_):
    #     raw_dict = AnalysisResults._keys_to_tuples(self.raw_data['sobols_first'])
    #     result = raw_dict[AnalysisResults._to_tuple(qoi)][input_]
    #     try:
    #         return np.array([float(result)])
    #     except TypeError:
    #         return np.array(result)

[docs]    def supported_stats(self):
        """Types of statistics supported by the describe method.

        Returns
        -------
        list of str
        """
        return ['mean', 'var', 'std']

    def _describe(self, qoi, statistic):
        if statistic in self.supported_stats():
            return self.raw_data['statistical_moments'][qoi][statistic]
        else:
            raise NotImplementedError

[docs]    def surrogate(self):
        """Return a SSC surrogate model.

        Returns
        -------
        A function that takes a dictionary of parameter - value pairs and returns
        a dictionary with the results (same output as decoder).
        """
        def surrogate_fn(inputs):
            def swap(x):
                if x.size > 1:
                    return list(x)
                else:
                    return x[0]
            values = np.squeeze(np.array([inputs[key] for key in self.inputs])).T
            results = dict([(qoi, swap(self.surrogate_(qoi, values))) for qoi in self.qois])
            return results
        return surrogate_fn


[docs]class SSCAnalysis(BaseAnalysisElement):
    """
    SSc analysis class.
    """

    def __init__(self, sampler=None, qoi_cols=None):
        """
        Parameters
        ----------
        sampler : SSCSampler
            Sampler used to initiate the SSC analysis
        qoi_cols : list or None
            Column names for quantities of interest (for which analysis is
            performed).
        """

        if sampler is None:
            msg = 'SSC analysis requires a paired sampler to be passed'
            raise RuntimeError(msg)

        if qoi_cols is None:
            raise RuntimeError("Analysis element requires a list of "
                               "quantities of interest (qoi)")

        self.qoi_cols = qoi_cols
        self.output_type = OutputType.SUMMARY
        self.sampler = sampler

[docs]    def element_name(self):
        """Name for this element for logging purposes"""
        return "SSC_Analysis"

[docs]    def element_version(self):
        """Version of this element for logging purposes"""
        return "0.1"

[docs]    def save_state(self, filename):
        """
        Saves the complete state of the analysis object to a pickle file,
        except the sampler object (self.sampler).

        Parameters
        ----------
        filename : string
            name to the file to write the state to
        """
        print("Saving analysis state to %s" % filename)
        # make a copy of the state, and do not store the sampler as well
        state = copy.copy(self.__dict__)
        del state['sampler']
        with open(filename, 'wb') as fp:
            pickle.dump(state, fp)

[docs]    def load_state(self, filename):
        """
        Loads the complete state of the analysis object from a
        pickle file, stored using save_state.

        Parameters
        ----------
        filename : string
            name of the file to load
        """
        print("Loading analysis state from %s" % filename)
        with open(filename, 'rb') as fp:
            state = pickle.load(fp)
        for key in state.keys():
            self.__dict__[key] = state[key]

[docs]    def analyse(self, data_frame=None, compute_moments=True, n_mc=20000):
        """
        Perform SSC analysis on input `data_frame`.

        Parameters
        ----------
        data_frame : pandas.DataFrame
            Input data for analysis.
        compute_moments : bool, optional.
            Compute the first 2 moments. Default is True.

        Returns
        -------
        results : dict
            A dictionary containing the statistical moments.
        """

        if data_frame is None:
            raise RuntimeError("Analysis element needs a data frame to "
                               "analyse")
        elif isinstance(data_frame, pd.DataFrame) and data_frame.empty:
            raise RuntimeError(
                "No data in data frame passed to analyse element")

        self.load_samples(data_frame)
        # # size of one code sample
        # # TODO: change this to include QoI of different size
        # self.N_qoi = self.samples[qoi_cols[0]][0].size

        # Compute descriptive statistics for each quantity of interest
        results = {'statistical_moments': {}}

        if compute_moments:
            for qoi_k in self.qoi_cols:
                mean_k, var_k = self.get_moments(qoi_k, n_mc=n_mc)
                std_k = var_k ** 0.5
                # compute statistical moments
                results['statistical_moments'][qoi_k] = {'mean': mean_k,
                                                         'var': var_k,
                                                         'std': std_k}

        results = SSCAnalysisResults(raw_data=results, samples=data_frame,
                                     qois=self.qoi_cols, inputs=list(self.sampler.vary.get_keys()))
        results.surrogate_ = self.surrogate
        return results

[docs]    def load_samples(self, data_frame):
        """
        Extract output values for each quantity of interest from Dataframe.

        Parameters
        ----------
        data_frame : EasyVVUQ (pandas) data frame
            The code samples from the EasyVVUQ data frame.

        Returns
        -------
        None.

        """
        print('Loading samples...')
        qoi_cols = self.qoi_cols
        samples = {k: [] for k in qoi_cols}
        for k in qoi_cols:
            for run_id in data_frame[('run_id', 0)].unique():
                values = data_frame.loc[data_frame[('run_id', 0)] == run_id][k].values
                samples[k].append(values.flatten())
            samples[k] = np.array(samples[k])
        self.samples = samples
        print('done')

[docs]    def get_moments(self, qoi, n_mc):
        """
        Compute the mean and variance through Monte Carlo sampling of the SSC
        surrogate. Independent random inputs samples are drawn though the
        SSC sampler object.

        Parameters
        ----------
        qoi : string
            The name of the QoI.
        n_mc : int
            The number of Monte Carlo samples.

        Returns
        -------
        mean : array
            The mean of qoi.
        var : array
            The variance of qoi.

        """
        print('Computing mean and variance...')
        Xi = self.sampler.sample_inputs(n_mc)
        rvs = [self.surrogate(qoi, xi) for xi in Xi]
        mean = np.mean(rvs)
        var = np.var(rvs)
        print('done.')
        return np.array([mean]), np.array([var])

[docs]    def update_surrogate(self, qoi, data_frame, max_LEC_jobs=4, n_mc_LEC=5,
                         max_ENO_jobs=4):
        """
        Update the SSC surrogate given new data. Given an EasyVVUQ dataframe,
        check the LEC condition, and compute the ENO interpolation stencils.

        Parameters
        ----------
        qoi : string
            The name of the QoI on the basis of which the sampling plan
            is refined.
        data_frame : EasyVVUQ (pandas) data frame
            The code samples from the EasyVVUQ data frame.
        max_LEC_jobs : int, optional
            The number of LEC checks to perform in parallel. The default is 4.
        n_mc_LEC : int, optional
            The number of surrogate evaluations used in the LEC check.
            The default is 5.
        max_LEC_jobs : int, optional
            The number of ENO stencils to compute in parallel. The default is 4.

        Returns
        -------
        None. Stores the polynomials orders, interpolation stencils and
        the simplex probabilities in analysis.p_j, analysis.S_j and
        analysis.prob_j respectively.

        """

        # the number of code evaluations
        n_s = self.sampler.n_samples
        # the number of simplex elements
        n_e = self.sampler.n_elements

        # load the EasyVVUQ data frame
        self.load_samples(data_frame)
        # code outputs
        # v = np.array(self.samples[qoi])
        v = self.samples[qoi]

        # find the max polynomial order and set the p_j = pmax
        pmax = self.sampler.find_pmax(n_s)
        if pmax > self.sampler.pmax_cutoff:
            pmax = self.sampler.pmax_cutoff
            print('Max. polynomial order set by hand to = ' + str(pmax))
        else:
            print('Max. polynomial order allowed by n_s = ' + str(pmax))

        # polynomial order per simplex elememt
        p_j = (np.ones(n_e) * pmax).astype('int')

        # compute nearest neighbour stencils
        S_j = self.sampler.compute_stencil_j()

        # check the LEC condition of all stencil
        res_LEC = self.sampler.check_LEC(p_j, v, S_j,
                                         n_mc=n_mc_LEC,
                                         max_jobs=max_LEC_jobs)
        # updated polynomial order, stencil and el_idx are the element indices
        # per interpolation stencil
        p_j = res_LEC['p_j']
        S_j = res_LEC['S_j']
        el_idx = res_LEC['el_idx']

        # convert the nearest-neighbour stencils to ENO stencils
        S_j, p_j, el_idx = self.sampler.compute_ENO_stencil(p_j, S_j, el_idx,
                                                            max_jobs=max_ENO_jobs)
        # store polynomial orders and stencils
        self.p_j = p_j
        self.S_j = S_j

        print("Updated polynomials orders = %s" % p_j)

        # compute the simplex probabilities
        self.prob_j = self.sampler.compute_probability()

[docs]    def adapt_locally(self, n_new_samples=1):
        """
        Locally refine the sampling plan based on the SSC geometric
        refinement measure.

        Parameters
        ----------
        n_new_samples : int, optional
            The number of new code evaulations to perform. The default is 1.

        Returns
        -------
        None. Updates the Delaunay triangulation of the SSC sampler with
        the new points. A new ensemble must be executed next.

        """

        if n_new_samples > self.sampler.n_elements:
            n_new_samples = self.sampler.n_elements

        # compute the refinement measures
        eps_bar_j, vol_j = self.sampler.compute_eps_bar_j(self.p_j, self.prob_j)

        # rank elements according to eps_bar_j
        refine_idx = np.flipud(np.argsort(eps_bar_j))

        # find the elements at the hypercube boundaries
        bound_simplices = self.sampler.find_boundary_simplices()

        # store which edges are refined, to prevent refining the same edge twice
        # during the same interation
        refined_edges = []

        # the refinement locations
        xi_k_jref = np.zeros([n_new_samples, self.sampler.n_dimensions])

        i = 0
        j = 0

        # determine n_new_samples locations
        while i < n_new_samples:

            if np.in1d(refine_idx[j], bound_simplices) == False:
                # compute the subvertices of the element chosen for refinement
                sub_vertices = self.sampler.compute_sub_simplex_vertices(refine_idx[j])
                # draw a random sample from the sub simplex
                xi_k_jref[i, :] = self.sampler.sample_simplex(1, sub_vertices)
                # if interior is refined: no danger of refining the same edge twice
                already_refined = False
            else:
                print('refining edge')

                xi_k_jref[i, :], refined_edges, already_refined =  \
                    self.sampler.sample_simplex_edge(refine_idx[j], refined_edges)

            if not already_refined:
                i += 1
                j += 1
            else:
                print('Edge already refined, selecting another sample.')
                j += 1

        self.sampler.update_Delaunay(xi_k_jref)

[docs]    def surrogate(self, qoi, xi):
        """
        Evaluate the SSC surrogate at xi.

        Parameters
        ----------
        qoi : string
            Name of the QoI.
        xi : array, shape (n_xi,)
            The location in the input space at which to evaluate the
            surrogate.

        Returns
        -------
        array
            The surrogate output at xi

        """

        if not isinstance(xi, np.ndarray):
            xi = np.array([xi])

        surr = self.sampler.surrogate(xi, self.S_j, self.p_j, self.samples[qoi])
        return np.array([surr])

[docs]    def get_sample_array(self, qoi):
        """
        Parameters
        ----------
        qoi : str
            name of quantity of interest

        Returns
        -------
        array of all samples of qoi
        """

        return self.samples[qoi]

[docs]    def plot_grid(self):
        """
        Plot the 1D or 2D sampling plan and color code the simplices according
        to their polynomial order.

        Returns
        -------
        None.

        """

        assert self.sampler.n_xi == 1 or self.sampler.n_xi == 2, \
            "Only works for 1D and 2D problems"

        tri = self.sampler.tri
        colors = ["#8dd3c7", "#ffffb3", "#bebada", "#fb8072", "#80b1d3",
                  "#fdb462", "#b3de69", "#fccde5", "#d9d9d9"]

        if self.sampler.n_xi == 2:
            # plot the delaunay grid and color it according to the local p_j
            fig = plt.figure()
            ax = fig.add_subplot(111, xlabel=r'$\xi_1$', ylabel=r'$\xi_2$',
                                 xlim=[self.sampler.corners[0][0],
                                       self.sampler.corners[0][1]],
                                 ylim=[self.sampler.corners[1][0],
                                       self.sampler.corners[1][1]])

            for p in range(np.max(self.p_j)):
                idx = (self.p_j == p + 1).nonzero()[0]
                first = True
                for i in idx:
                    vertices = tri.points[tri.simplices[i]]
                    if first:
                        pg = Polygon(vertices, facecolor=colors[p], edgecolor='k',
                                     label=r'$p_j = %d$' % (p + 1))
                        first = False
                    else:
                        pg = Polygon(vertices, facecolor=colors[p], edgecolor='k')

                    ax.add_patch(pg)

            leg = plt.legend(loc=0)
            leg.set_draggable(True)

        elif self.sampler.n_xi == 1:
            fig = plt.figure()
            ax = fig.add_subplot(111, xlabel='cell center',
                                 ylabel=r'polynomial order %s' % '$p_j$')
            ax.plot(self.sampler.compute_xi_center_j(), self.p_j, 'o')
            ax.vlines(self.sampler.compute_xi_center_j(), np.zeros(self.sampler.n_elements),
                      self.p_j, linestyles='dashed')
            ax.vlines(self.sampler.tri.points, -0.05 * np.max(self.p_j),
                      0.05 * np.max(self.p_j))

        plt.tight_layout()
        plt.show()
Source code for easyvvuq.analysis.ssc_analysis

EasyVVUQ

Navigation

Related Topics