Source code for pysumreg.sum_registers

"""
Statistics of list of (x, y) pairs from calculator-style summation registers.

* mean, standard deviation, max, min of x & y
* linear regression & correlation coefficient
* peak analysis: centroid and width of x weighted by y

.. autosummary::
   ~SummationRegisters

References:

* "Engineering Statistics with a Programmable Calculator", William Volk,
  1982, McGraw-Hill Companies, New York, ISBN 10: 007067552,
  ISBN 13: 9780070675520.
* https://goodcalculators.com/linear-regression-calculator/
* https://www.statssolver.com/simple-regression.html
"""

import math


[docs]class SummationRegisters: """ Summation registers in the style of a pocket calculator. Operations .. autosummary:: ~clear ~add ~subtract ~to_dict Statistical Parameters .. autosummary:: ~mean_x ~mean_y ~stddev_x ~stddev_y Linear Parameters .. autosummary:: ~slope ~intercept ~correlation ~linear_y Peak Parameters .. autosummary:: ~centroid ~sigma """ _property_methods = """ mean_x mean_y stddev_x stddev_y slope intercept correlation centroid sigma """.split() _registers = "n X Y XX XY XXY YY".split() _extrema_names = "min_x max_x min_y max_y x_at_max_y x_at_min_y".split() def __init__(self) -> None: self.clear()
[docs] def clear(self): """Clear the :math:`\sum{}` summation registers.""" self.n = 0 self.X = 0 self.Y = 0 self.XX = 0 self.XY = 0 self.XXY = 0 self.YY = 0 self.max_x = None self.max_y = None self.min_x = None self.min_y = None self.x_at_max_y = None self.x_at_min_y = None
def _assess_extrema(self, x, y): """Assess min & max of x & y.""" self.min_x = min(x, self.min_x or x) self.max_x = max(x, self.max_x or x) self.min_y = min(y, self.min_y or y) self.max_y = max(y, self.max_y or y) if y == self.min_y: self.x_at_min_y = x if y == self.max_y: self.x_at_max_y = x
[docs] def add(self, x, y): """:math:`\sum{+}`: Add :math:`(x, y)` ordered pair to the registers.""" self.n += 1 self.X += x self.Y += y self.XX += x * x self.XY += x * y self.XXY += x * x * y self.YY += y * y self._assess_extrema(x, y)
[docs] def subtract(self, x, y): """:math:`\sum{-}`: Subtract :math:`(x, y)` ordered pair from the registers.""" self.n -= 1 self.X -= x self.Y -= y self.XX -= x * x self.XY -= x * y self.XXY -= x * x * y self.YY -= y * y self._assess_extrema(x, y)
[docs] def to_dict(self, use_registers=False): """ Return all statistics as dictionary. Returns ``None`` for any unavailable or undefined values. PARAMETERS ``use_registers`` bool: Include values of the summation registers. (default: ``False``) """ def param(k): try: v = getattr(self, k) except Exception: v = None return v d = {k: param(k) for k in self._property_methods} d.update({k: param(k) for k in self._extrema_names}) if use_registers: d.update({k: param(k) for k in self._registers}) return d
def __repr__(self): dd = self.to_dict(use_registers=True) vv = ", ".join([f"{k}={v}" for k, v in sorted(dd.items())]) return f"{self.__class__.__name__}({vv})" @property def mean_x(self): r""" Average of :math:`x` values. .. math:: \bar{x} = {\sum{x} \over n} """ return self.X / self.n @property def mean_y(self): r""" Average of :math:`y` values. .. math:: \bar{y} = {\sum{y} \over n} """ return self.Y / self.n @property def stddev_x(self): r""" Standard deviation of :math:`x` values. .. math:: \sigma_x^2 = {{\sum{x^2} - \bar{x}\sum{x}} \over {n-1}} """ numerator = max(0, self.XX - self.mean_x * self.X) denominator = self.n - 1 return math.sqrt(numerator / denominator) @property def stddev_y(self): r""" Standard deviation of :math:`y` values. .. math:: \sigma_y^2 = {{\sum{y^2} - \bar{y}\sum{y}} \over {n-1}} """ numerator = max(0, self.YY - self.mean_y * self.Y) denominator = self.n - 1 return math.sqrt(numerator / denominator) @property def slope(self): r""" First order term (:math:`b_1`) in linear fit of :math:`(x,y)`. .. math:: y = b_0 + b_1 x .. math:: b_1 = {{n\sum{xy} - \sum{x}\sum{y}} \over {n\sum{x^2} - \sum{x}\sum{x}}} See: :meth:`intercept`, :meth:`linear_y` """ return ( # fmt: off (self.n*self.XY - self.X*self.Y) / (self.n*self.XX - self.X*self.X) # fmt: on ) @property def intercept(self): r""" Zero order term (:math:`b_0`) in linear fit of :math:`(x,y)`. .. math:: y = b_0 + b_1 x .. math:: b_0 = {{\sum{y} - b_1\sum{x}} \over n} See: :meth:`slope`, :meth:`linear_y` """ return ( # fmt: off (self.Y - self.slope*self.X) / self.n # fmt: on )
[docs] def linear_y(self, x): r""" Compute :math:`\hat{y}` given :math:`x` using slope and intercept. .. math:: \hat{y} = b_0 + b_1 x See: :meth:`intercept`, :meth:`slope` """ return self.intercept + self.slope * x
@property def correlation(self): r""" Regression correlation coefficient (:math:`r`) of :math:`(x, y)`. .. math:: r = {{n\sum{xy} - \sum{x}\sum{y}} \over \sqrt{(n\sum{x^2}-\sum{x}\sum{x}) (n\sum{y^2}-\sum{y}\sum{y})}} """ return ( # fmt: off (self.n*self.XY - self.X*self.Y) / math.sqrt( (self.n*self.XX - self.X*self.X) * (self.n*self.YY - self.Y*self.Y) ) # fmt: on ) @property def centroid(self): r""" Centroid (:math:`x_c`) of :math:`(x, y)`. .. math:: x_c = { \sum{x y} \over \sum{y} } Regardless of the input :math:`(x, y)` signal modality (single peak, multiple peaks, no peaks at all), it is possible to estimate the center (centroid, :math:`x_c`) and width (:math:`2\sigma`) of the :math:`x` values assuming the :math:`y` values are their associated weights. The center should fall between the minimum and maximum given :math:`x` values. See: :meth:`sigma` """ return self.XY / self.Y @property def sigma(self): r""" The y-weighted variance (:math:`\sigma_c^2`) of :math:`(x, y)`. .. math:: \sigma_c^2 = {\sum{y{(x-x_c)}^2} \over \sum{y}} Regardless of the input :math:`(x, y)` signal modality (single peak, multiple peaks, no peaks at all), it is possible to estimate the center (centroid, :math:`x_c`) and width (:math:`2\sigma_c`) of the :math:`x` values assuming the :math:`y` values are their associated weights. The sigma should be less than the span of the :math:`x` values. See: :meth:`centroid` """ return math.sqrt((self.XXY - self.centroid * self.XY) / self.Y)