Source code for data_morph.data.stats
"""Utility functions for calculating summary statistics."""
from collections import namedtuple
import pandas as pd
SummaryStatistics = namedtuple(
'SummaryStatistics', ['x_mean', 'y_mean', 'x_stdev', 'y_stdev', 'correlation']
)
SummaryStatistics.__doc__ = (
'Named tuple containing the summary statistics for plotting/analysis.'
)
[docs]
def get_values(df: pd.DataFrame) -> SummaryStatistics:
"""
Calculate the summary statistics for the given set of points.
Parameters
----------
df : pandas.DataFrame
A dataset with columns x and y.
Returns
-------
SummaryStatistics
Named tuple consisting of mean and standard deviations of x and y,
along with the Pearson correlation coefficient between the two.
"""
return SummaryStatistics(
df.x.mean(),
df.y.mean(),
df.x.std(),
df.y.std(),
df.corr().x.y,
)