Source code for easyvvuq.analysis.basic_stats
"""Provides analysis element for basic statistical analysis.
The analysis is based on `pandas.DataFrame.describe()` function.
"""
from easyvvuq import OutputType
from .base import BaseAnalysisElement
__copyright__ = """
Copyright 2018 Robin A. Richardson, David W. Wright
This file is part of EasyVVUQ
EasyVVUQ is free software: you can redistribute it and/or modify
it under the terms of the Lesser GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
EasyVVUQ is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Lesser GNU General Public License for more details.
You should have received a copy of the Lesser GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
__license__ = "LGPL"
[docs]class BasicStats(BaseAnalysisElement):
def __init__(self, groupby=None, qoi_cols=None):
"""Element to calculate basic stats for `qoi_cols` values.
This results in values for: count, mean, std, min, max and 25%, 50% &
75% percentiles for each value in the analysis.
Parameters
----------
groupby : list or None
Columns to use to group the data in `analyse` method before
calculating stats.
qoi_cols : list or None
Columns of quantities of interest (for which stats will be
calculated).
"""
self.groupby = groupby
if qoi_cols is not None:
self.qoi_cols = qoi_cols
else:
self.qoi_cols = []
self.output_type = OutputType.SUMMARY
[docs] def element_name(self):
"""Name for this element for logging purposes"""
return "basic_stats"
[docs] def element_version(self):
"""Version of this element for logging purposes"""
return "0.1"
[docs] def analyse(self, data_frame=None):
"""Perform the basis stats analysis on the input `data_frame`.
Analysis is based on `pandas.Dataframe.describe` and results in
values for: count, mean, std, min, max and 25%, 50% & 75% percentiles
for each value in the analysis.
The data_frame is grouped according to `self.groupby` if specified and
analysis is performed on the columns selected in `self.qoi_cols` if set.
Parameters
----------
data_frame : :obj:`pandas.DataFrame`
Summary data produced through collation of simulation output.
Returns
-------
:obj:`pandas.DataFrame`
Basic statistic for selected columns and groupings of data.
"""
qoi_cols = self.qoi_cols
if data_frame is None:
raise RuntimeError("Analysis element needs a data frame to "
"analyse")
elif data_frame.empty:
raise RuntimeError(
"No data in data frame passed to analyse element")
# Get summary statistics
if self.groupby:
grouped_data = data_frame.groupby(self.groupby)
results = grouped_data.describe()
if qoi_cols:
results = results[qoi_cols]
else:
if qoi_cols:
results = data_frame[qoi_cols].describe()
else:
results = data_frame.describe()
return results