Source code for simplestatistics.statistics.perceptron

"""
Implements perceptron().
"""

[docs]class perceptron():
    """
    A `perceptron model`_.

    The implementation of the perceptron model is closely modeled after
    the implementation in `simple-statistics <https://github.com/simple-statistics/\
            simple-statistics>`_, the javascript analogue of ``simplestatistics``. You can
    find the javascript implementation of the perceptron model
    `here <https://github.com/simple-statistics/simple-statistics/blob/master/src/perceptron.js>`_.

    .. _`perceptron model`: https://en.wikipedia.org/wiki/Perceptron

    Examples:
        >>> mod = perceptron()
        >>> for ii in range(10):
        ...     mod.train([0,1], 0)
        ...     mod.train([1,0], 0)
        ...     mod.train([1,1], 1)
        ...     mod.train([0,0], 0)
        >>> mod.predict([1, 0])
        0
        >>> mod.predict([1, 1])
        1

        You cannot predict an item with a model that hasn't been trained yet.

        >>> mod2 = perceptron()
        >>> mod2.predict([0, 0])
        Traceback (most recent call last):
            ...
        RuntimeError: The model has not been trained yet.

        When training, labels need to be 0 or 1.

        >>> mod3 = perceptron()
        >>> mod3.train([1, 1], 4)
        Traceback (most recent call last):
            ...
        ValueError: Labels need to be either 0 or 1.

        Once trained on an item, the rest of the training items need to have
        features of the same length.

        >>> mod4 = perceptron()
        >>> mod4.train([1, 0], 1)
        >>> mod4.train([1, 1], 1)
        >>> mod4.train([1, 1, 0], 1) # doctest: +ELLIPSIS
        Traceback (most recent call last):
            ...
        ValueError: The length of features is different ... to use new feature lengths.
    """

    def __init__(self):
        # The weights are the coefficiencts of the model
        # and get updated during training  when the model's
        # prediction is different from the label/correct category
        self.weights = []

        # The bias, or intercept
        self.bias = 0

[docs]    def predict(self, features):
        """
        Classifies an item based on the learning the instance of the model has
        done on previous items.

        Args:
            features: A list of the features of the item to classify. The length
            of the list has to be the same as that of the lists of features the
            model trained on.

        Returns:
            0 or 1 denoting the predicted category/classification.
        """
        # if the model hasn't been trained before
        # return an error
        if len(self.weights) == 0:
            raise RuntimeError('The model has not been trained yet.')

        # initialize prediction score as 0
        score = 0

        # the score is the sum of the product of each feature
        # with the corresponding weight
        # then add the bias
        for ii, _ in enumerate(features):
            score += features[ii] * self.weights[ii]

        score += self.bias

        if score > 0:
            return(1)

        # else
        return(0)

[docs]    def train(self, features, label):
        """
        The method to train an instance of the perceptron model on an item.

        Args:
            features: A list of numerical features in the form [feature_1,
            feature_2, ...]. The length of the list needs to be the same for each
            item given to the same model/instance.  label: An integer of value 0
            or 1 to denote category of the item.

        Returns:
            null
        """

        # we will require labels to either be 0 or 1
        if label not in [0, 1]:
            raise ValueError('Labels need to be either 0 or 1.')

        # if this is the first set of features the model trains on
        # set the weights to the features, and the bias to 1
        if len(self.weights) == 0:
            self.weights = features
            self.bias = 1

        elif len(self.weights) != len(features):
            raise ValueError('The length of features is different than previous '
                             'features. Reinitialize your model if you want to use '
                             'new feature lengths.')

        # Make a prediction with current weights
        prediction = self.predict(features)

        if prediction != label:
            gradient = label - prediction

            for ii in range(len(self.weights)):
                self.weights[ii] += gradient * features[ii]

            self.bias += gradient