Source code for data_morph.data.stats
"""Utility functions for calculating summary statistics."""
from __future__ import annotations
from typing import TYPE_CHECKING, NamedTuple
if TYPE_CHECKING:
    from collections.abc import Generator
    import pandas as pd
[docs]
class SummaryStatistics(NamedTuple):
    """Named tuple containing the summary statistics for plotting/analysis."""
    x_mean: float
    y_mean: float
    x_stdev: float
    y_stdev: float
    correlation: float
    x_median: float | None
    y_median: float | None
    def __iter__(self) -> Generator[float, None, None]:
        for statistic in self._fields:
            if (value := getattr(self, statistic)) is not None:
                yield value 
[docs]
def get_summary_statistics(data: pd.DataFrame, with_median: bool) -> SummaryStatistics:
    """
    Calculate the summary statistics for the given set of points.
    Parameters
    ----------
    data : pandas.DataFrame
        A dataset with columns ``x`` and ``y``.
    with_median : bool
        Whether to include the median of ``x`` and ``y``.
    Returns
    -------
    SummaryStatistics
        Named tuple consisting of mean and standard deviations of ``x`` and ``y``,
        along with the Pearson correlation coefficient between the two, and optionally,
        the median of ``x`` and ``y``.
    """
    return SummaryStatistics(
        x_mean=data.x.mean(),
        y_mean=data.y.mean(),
        x_stdev=data.x.std(),
        y_stdev=data.y.std(),
        correlation=data.corr().x.y,
        x_median=data.x.median() if with_median else None,
        y_median=data.y.median() if with_median else None,
    )