Source code for data_morph.data.stats

"""Utility functions for calculating summary statistics."""

from __future__ import annotations

from typing import TYPE_CHECKING, NamedTuple

if TYPE_CHECKING:
    from collections.abc import Generator

    import pandas as pd


[docs] class SummaryStatistics(NamedTuple): """Named tuple containing the summary statistics for plotting/analysis.""" x_mean: float y_mean: float x_stdev: float y_stdev: float correlation: float x_median: float | None y_median: float | None def __iter__(self) -> Generator[float, None, None]: for statistic in self._fields: if (value := getattr(self, statistic)) is not None: yield value
[docs] def get_summary_statistics(data: pd.DataFrame, with_median: bool) -> SummaryStatistics: """ Calculate the summary statistics for the given set of points. Parameters ---------- data : pandas.DataFrame A dataset with columns ``x`` and ``y``. with_median : bool Whether to include the median of ``x`` and ``y``. Returns ------- SummaryStatistics Named tuple consisting of mean and standard deviations of ``x`` and ``y``, along with the Pearson correlation coefficient between the two, and optionally, the median of ``x`` and ``y``. """ return SummaryStatistics( x_mean=data.x.mean(), y_mean=data.y.mean(), x_stdev=data.x.std(), y_stdev=data.y.std(), correlation=data.corr().x.y, x_median=data.x.median() if with_median else None, y_median=data.y.median() if with_median else None, )