ottertune/server/analysis/factor_analysis.py

112 lines
3.3 KiB
Python
Raw Normal View History

2019-08-23 08:47:19 -07:00
#
# OtterTune - factor_analysis.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
'''
Created on Jul 4, 2016
@author: dvanaken
'''
import numpy as np
from sklearn.decomposition import FactorAnalysis as SklearnFactorAnalysis
from .base import ModelBase
class FactorAnalysis(ModelBase):
"""FactorAnalysis (FA):
Fits an Sklearn FactorAnalysis model to X.
See also
--------
http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.FactorAnalysis.html
Attributes
----------
model_ : sklearn.decomposition.FactorAnalysis
The fitted FA model
components_ : array, [n_components, n_features]
Components (i.e., factors) with maximum variance
feature_labels_ : array, [n_features]
total_variance_ : float
The total amount of variance explained by the components
pvars_ : array, [n_components]
The percentage of the variance explained by each component
pvars_noise_ : array, [n_components]
The percentage of the variance explained by each component also
accounting for noise
"""
def __init__(self):
self.model_ = None
self.components_ = None
self.feature_labels_ = None
self.total_variance_ = None
self.pvars_ = None
self.pvars_noise_ = None
def _reset(self):
"""Resets all attributes (erases the model)"""
self.model_ = None
self.components_ = None
self.feature_labels_ = None
self.total_variance_ = None
self.pvars_ = None
self.pvars_noise_ = None
def fit(self, X, feature_labels=None, n_components=None, estimator_params=None):
"""Fits an Sklearn FA model to X.
Parameters
----------
X : array-like, shape (n_samples, n_features)
Training data.
feature_labels : array-like, shape (n_features), optional
Labels for each of the features in X.
estimator_params : dict, optional
The parameters to pass to Sklearn's FA estimators.
Returns
-------
self
"""
self._reset()
if feature_labels is None:
feature_labels = ["feature_{}".format(i) for i in range(X.shape[1])]
self.feature_labels_ = feature_labels
if n_components is not None:
model = SklearnFactorAnalysis(n_components=n_components)
else:
model = SklearnFactorAnalysis()
self.model_ = model
if estimator_params is not None:
# Update Sklearn estimator params
assert isinstance(estimator_params, dict)
self.model_.set_params(**estimator_params)
self.model_.fit(X)
# Remove zero-valued components (n_components x n_features)
components_mask = np.sum(self.model_.components_ != 0.0, axis=1) > 0.0
self.components_ = self.model_.components_[components_mask]
# Compute the % variance explained (with/without noise)
c2 = np.sum(self.components_ ** 2, axis=1)
self.total_variance_ = np.sum(c2)
self.pvars_ = 100 * c2 / self.total_variance_
self.pvars_noise_ = 100 * c2 / (self.total_variance_ +
np.sum(self.model_.noise_variance_))
return self