Source code for simplestatistics.statistics.perceptron

"""
Implements perceptron().
"""

[docs]class perceptron():
"""
A perceptron model_.

The implementation of the perceptron model is closely modeled after
the implementation in simple-statistics <https://github.com/simple-statistics/\
simple-statistics>_, the javascript analogue of simplestatistics. You can
find the javascript implementation of the perceptron model
here <https://github.com/simple-statistics/simple-statistics/blob/master/src/perceptron.js>_.

.. _perceptron model: https://en.wikipedia.org/wiki/Perceptron

Examples:
>>> mod = perceptron()
>>> for ii in range(10):
...     mod.train([0,1], 0)
...     mod.train([1,0], 0)
...     mod.train([1,1], 1)
...     mod.train([0,0], 0)
>>> mod.predict([1, 0])
0
>>> mod.predict([1, 1])
1

You cannot predict an item with a model that hasn't been trained yet.

>>> mod2 = perceptron()
>>> mod2.predict([0, 0])
Traceback (most recent call last):
...
RuntimeError: The model has not been trained yet.

When training, labels need to be 0 or 1.

>>> mod3 = perceptron()
>>> mod3.train([1, 1], 4)
Traceback (most recent call last):
...
ValueError: Labels need to be either 0 or 1.

Once trained on an item, the rest of the training items need to have
features of the same length.

>>> mod4 = perceptron()
>>> mod4.train([1, 0], 1)
>>> mod4.train([1, 1], 1)
>>> mod4.train([1, 1, 0], 1) # doctest: +ELLIPSIS
Traceback (most recent call last):
...
ValueError: The length of features is different ... to use new feature lengths.
"""

def __init__(self):
# The weights are the coefficiencts of the model
# and get updated during training  when the model's
# prediction is different from the label/correct category
self.weights = []

# The bias, or intercept
self.bias = 0

[docs]    def predict(self, features):
"""
Classifies an item based on the learning the instance of the model has
done on previous items.

Args:
features: A list of the features of the item to classify. The length
of the list has to be the same as that of the lists of features the
model trained on.

Returns:
0 or 1 denoting the predicted category/classification.
"""
# if the model hasn't been trained before
# return an error
if len(self.weights) == 0:
raise RuntimeError('The model has not been trained yet.')

# initialize prediction score as 0
score = 0

# the score is the sum of the product of each feature
# with the corresponding weight
for ii, _ in enumerate(features):
score += features[ii] * self.weights[ii]

score += self.bias

if score > 0:
return(1)

# else
return(0)

[docs]    def train(self, features, label):
"""
The method to train an instance of the perceptron model on an item.

Args:
features: A list of numerical features in the form [feature_1,
feature_2, ...]. The length of the list needs to be the same for each
item given to the same model/instance.  label: An integer of value 0
or 1 to denote category of the item.

Returns:
null
"""

# we will require labels to either be 0 or 1
if label not in [0, 1]:
raise ValueError('Labels need to be either 0 or 1.')

# if this is the first set of features the model trains on
# set the weights to the features, and the bias to 1
if len(self.weights) == 0:
self.weights = features
self.bias = 1

elif len(self.weights) != len(features):
raise ValueError('The length of features is different than previous '
'features. Reinitialize your model if you want to use '
'new feature lengths.')

# Make a prediction with current weights
prediction = self.predict(features)

if prediction != label: