ottertune/server/analysis/lasso.py

#
# OtterTune - lasso.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
'''
Created on Jul 8, 2016

@author: dvanaken
'''

import numpy as np
from sklearn.linear_model import lasso_path

from .base import ModelBase


class LassoPath(ModelBase):
    """Lasso:

    Computes the Lasso path using Sklearn's lasso_path method.


    See also
    --------
    http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.lasso_path.html


    Attributes
    ----------
    feature_labels_ : array, [n_features]
                      Labels for each of the features in X.

    alphas_ : array, [n_alphas]
              The alphas along the path where models are computed. (These are
              the decreasing values of the penalty along the path).

    coefs_ : array, [n_outputs, n_features, n_alphas]
             Coefficients along the path.

    rankings_ : array, [n_features]
             The average ranking of each feature across all target values.
    """
    def __init__(self):
        self.feature_labels_ = None
        self.alphas_ = None
        self.coefs_ = None
        self.rankings_ = None

    def _reset(self):
        """Resets all attributes (erases the model)"""
        self.feature_labels_ = None
        self.alphas_ = None
        self.coefs_ = None
        self.rankings_ = None

    def fit(self, X, y, feature_labels, estimator_params=None):
        """Computes the Lasso path using Sklearn's lasso_path method.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data (the independent variables).

        y : array-like, shape (n_samples, n_outputs)
            Training data (the output/target values).

        feature_labels : array-like, shape (n_features)
                         Labels for each of the features in X.

        estimator_params : dict, optional
                           The parameters to pass to Sklearn's Lasso estimator.


        Returns
        -------
        self
        """
        self._reset()
        if estimator_params is None:
            estimator_params = {}
        self.feature_labels_ = feature_labels

        alphas, coefs, _ = lasso_path(X, y, **estimator_params)
        self.alphas_ = alphas.copy()
        self.coefs_ = coefs.copy()

        # Rank the features in X by order of importance. This ranking is based
        # on how early a given features enter the regression (the earlier a
        # feature enters the regression, the MORE important it is).
        feature_rankings = [[] for _ in range(X.shape[1])]
        for target_coef_paths in self.coefs_:
            for i, feature_path in enumerate(target_coef_paths):
                entrance_step = 1
                for val_at_step in feature_path:
                    if val_at_step == 0:
                        entrance_step += 1
                    else:
                        break
                feature_rankings[i].append(entrance_step)
        self.rankings_ = np.array([np.mean(ranks) for ranks in feature_rankings])
        return self

    def get_ranked_features(self):
        if self.rankings_ is None:
            raise Exception("No lasso path has been fit yet!")

        rank_idxs = np.argsort(self.rankings_)
        return [self.feature_labels_[i] for i in rank_idxs]
Initial commit with BSL 2019-08-23 08:47:19 -07:00			`#`
			`# OtterTune - lasso.py`
			`#`
			`# Copyright (c) 2017-18, Carnegie Mellon University Database Group`
			`#`
			`'''`
			`Created on Jul 8, 2016`

			`@author: dvanaken`
			`'''`

			`import numpy as np`
			`from sklearn.linear_model import lasso_path`

			`from .base import ModelBase`


			`class LassoPath(ModelBase):`
			`"""Lasso:`

			`Computes the Lasso path using Sklearn's lasso_path method.`


			`See also`
			`--------`
			`http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.lasso_path.html`


			`Attributes`
			`----------`
			`feature_labels_ : array, [n_features]`
			`Labels for each of the features in X.`

			`alphas_ : array, [n_alphas]`
			`The alphas along the path where models are computed. (These are`
			`the decreasing values of the penalty along the path).`

			`coefs_ : array, [n_outputs, n_features, n_alphas]`
			`Coefficients along the path.`

			`rankings_ : array, [n_features]`
			`The average ranking of each feature across all target values.`
			`"""`
			`def __init__(self):`
			`self.feature_labels_ = None`
			`self.alphas_ = None`
			`self.coefs_ = None`
			`self.rankings_ = None`

			`def _reset(self):`
			`"""Resets all attributes (erases the model)"""`
			`self.feature_labels_ = None`
			`self.alphas_ = None`
			`self.coefs_ = None`
			`self.rankings_ = None`

			`def fit(self, X, y, feature_labels, estimator_params=None):`
			`"""Computes the Lasso path using Sklearn's lasso_path method.`

			`Parameters`
			`----------`
			`X : array-like, shape (n_samples, n_features)`
			`Training data (the independent variables).`

			`y : array-like, shape (n_samples, n_outputs)`
			`Training data (the output/target values).`

			`feature_labels : array-like, shape (n_features)`
			`Labels for each of the features in X.`

			`estimator_params : dict, optional`
			`The parameters to pass to Sklearn's Lasso estimator.`


			`Returns`
			`-------`
			`self`
			`"""`
			`self._reset()`
			`if estimator_params is None:`
			`estimator_params = {}`
			`self.feature_labels_ = feature_labels`

			`alphas, coefs, _ = lasso_path(X, y, **estimator_params)`
			`self.alphas_ = alphas.copy()`
			`self.coefs_ = coefs.copy()`

			`# Rank the features in X by order of importance. This ranking is based`
			`# on how early a given features enter the regression (the earlier a`
			`# feature enters the regression, the MORE important it is).`
			`feature_rankings = [[] for _ in range(X.shape[1])]`
			`for target_coef_paths in self.coefs_:`
			`for i, feature_path in enumerate(target_coef_paths):`
			`entrance_step = 1`
			`for val_at_step in feature_path:`
			`if val_at_step == 0:`
			`entrance_step += 1`
			`else:`
			`break`
			`feature_rankings[i].append(entrance_step)`
			`self.rankings_ = np.array([np.mean(ranks) for ranks in feature_rankings])`
			`return self`

			`def get_ranked_features(self):`
			`if self.rankings_ is None:`
			`raise Exception("No lasso path has been fit yet!")`

			`rank_idxs = np.argsort(self.rankings_)`
			`return [self.feature_labels_[i] for i in rank_idxs]`