Source code for easyvvuq.analysis.mcmc

"""Analysis element for the Markov Chain Monte Carlo (MCMC) method.
For more details on the method see the `easyvvuq.sampling.MonteCarloSampler` class.
The analysis part of Markov Chain Monte Carlo consists of approximating the distribution
from the results obtained by evaluating the samples.
"""
import pandas as pd
from .base import BaseAnalysisElement
from .results import AnalysisResults


[docs]class MCMCAnalysisResults(AnalysisResults): """The analysis results class for MCMC. You will not need to instantiate this class manually. Parameters ---------- chains: dict A dictionary with pandas DataFrame that correspond to an MCMC chain each. A chain consists of points that MCMC has visited. From this a distribution of the input variables can be constructed by means of a simple histogram. """ def __init__(self, chains): self.chains = chains
[docs] def plot_hist(self, input_parameter, chain=None, skip=0, merge=True): """Will plot a histogram for a given input parameter. Parameters ---------- input_parameter: str An input parameter name to draw the histogram for. chain: int, optional Index of a chain to be plotted. skip: int How many steps to skip (for getting rid of burn-in). merge: bool If set to True will use all chains to construct the histogram. """ import matplotlib.pyplot as plt input_parameter = (input_parameter, 0) if merge: chain_keys = list(self.chains.keys()) df = self.chains[chain_keys[0]][input_parameter].iloc[skip:] for chain in chain_keys[1:]: df.append(self.chains[chain][input_parameter].iloc[skip:]) plt.hist(df, 20) else: plt.hist(self.chains[chain][input_parameter].iloc[skip:], 20)
[docs] def plot_chains(self, input_parameter, chain=None): """Will plot the chains with the input parameter value in the y axis. Parameters ---------- input_parameter: str Input parameter name. chain: int, optional The chain number of the chain to plot. """ import matplotlib.pyplot as plt if chain is None: for chain in self.chains: plt.plot(self.chains[chain][(input_parameter, 0)]) else: plt.plot(self.chains[chain][(input_parameter, 0)])
[docs]class MCMCAnalysis(BaseAnalysisElement): """The analysis part of the MCMC method in EasyVVUQ Parameters ---------- sampler: MCMCSampler An instance of MCMCSampler used to generate MCMC samples. """ def __init__(self, sampler): self.sampler = sampler
[docs] def analyse(self, df): """Performs some pre-processing on the chains in order to be able to construct the histograms or other methods of distribution estimation. Parameters ---------- df: DataFrame DataFrame with the results obtained by evaluating the samples generated by the MCMC sampler. """ chains = dict([(chain_id, []) for chain_id in df[('chain_id', 0)].unique()]) for chain in chains: chain_values = df[df[('chain_id', 0)] == chain] values = chain_values.groupby(('iteration', 0)).apply(lambda x: x.mean()) indexes = values.index.values for a, b in zip(indexes[:-1], indexes[1:]): chains[chain] += [values.loc[a][self.sampler.inputs].to_dict()] * (b - a) for chain in chains: tmp = dict([(input_, []) for input_ in chains[chain][0]]) for row in chains[chain]: for input_ in chains[chain][0]: tmp[input_].append(row[input_]) chains[chain] = pd.DataFrame(tmp) return MCMCAnalysisResults(chains)