Source code for simplestatistics.statistics.z_scores
"""
Implements z_scores() function to standardize a variable to have mean
of 0 and standard deviation of 1.
"""
from .mean import mean
from .decimalize import decimalize
from .standard_deviation import standard_deviation
[docs]def z_scores(data, sample=True):
"""
Standardizing a variable or set of data is transforming the data such that it
has a mean of 0 and standard deviation of 1.
Each converted value equals how many standard deviations the value is above or below the mean.
These converted values are known as "z scores".
Equation:
.. math::
z_i = \\frac{X_i - \\bar{X}}{s_X}
In English:
- Subract the value from the mean.
- Divide the result by the standard deviation.
Args:
data: A list of numerical objects.
sample: A boolean value. If True, calculates z scores for
sample. If False, calculates z scores for population.
Returns:
A list of float objects.
Examples:
>>> z_scores([-2, -1, 0, 1, 2])
[1.2649110640673518, 0.6324555320336759, 0.0, -0.6324555320336759, -1.2649110640673518]
>>> z_scores([-2, -1, 0, 1, 2], False)
[1.414213562373095, 0.7071067811865475, 0.0, -0.7071067811865475, -1.414213562373095]
>>> z_scores([1, 2])
[0.7071067811865475, -0.7071067811865475]
>>> z_scores([1, 2], False)
[1.0, -1.0]
>>> z_scores([90]) # a z score for one value is not defined
>>> z_scores(4) # a z score for one value is not defined
"""
# You can't get z scores for one number
if type(data) is int:
return(None)
elif type(data) is list:
# You can't get z scores for one number
if len(data) < 2:
return(None)
mean_of_data = decimalize(mean(data))
sd_of_data = decimalize(standard_deviation(data, sample))
scores = [float((mean_of_data - ii) / sd_of_data) for ii in data]
return(scores)