Source code for simplestatistics.statistics.z_scores

"""
Implements z_scores() function to standardize a variable to have mean
of 0 and standard deviation of 1.
"""

from .mean import mean
from .decimalize import decimalize
from .standard_deviation import standard_deviation

[docs]def z_scores(data, sample=True): """ Standardizing a variable or set of data is transforming the data such that it has a mean of 0 and standard deviation of 1. Each converted value equals how many standard deviations the value is above or below the mean. These converted values are known as "z scores". Equation: .. math:: z_i = \\frac{X_i - \\bar{X}}{s_X} In English: - Subract the value from the mean. - Divide the result by the standard deviation. Args: data: A list of numerical objects. sample: A boolean value. If True, calculates z scores for sample. If False, calculates z scores for population. Returns: A list of float objects. Examples: >>> z_scores([-2, -1, 0, 1, 2]) [1.2649110640673518, 0.6324555320336759, 0.0, -0.6324555320336759, -1.2649110640673518] >>> z_scores([-2, -1, 0, 1, 2], False) [1.414213562373095, 0.7071067811865475, 0.0, -0.7071067811865475, -1.414213562373095] >>> z_scores([1, 2]) [0.7071067811865475, -0.7071067811865475] >>> z_scores([1, 2], False) [1.0, -1.0] >>> z_scores([90]) # a z score for one value is not defined >>> z_scores(4) # a z score for one value is not defined """ # You can't get z scores for one number if type(data) is int: return(None) elif type(data) is list: # You can't get z scores for one number if len(data) < 2: return(None) mean_of_data = decimalize(mean(data)) sd_of_data = decimalize(standard_deviation(data, sample)) scores = [float((mean_of_data - ii) / sd_of_data) for ii in data] return(scores)