use gpflow in workload mapping

This commit is contained in:
bohanjason 2020-01-22 07:38:18 -05:00 committed by Dana Van Aken
parent 25d1950e67
commit 6f0fcfd952
4 changed files with 72 additions and 78 deletions

View File

@ -16,13 +16,6 @@ from analysis.util import get_analysis_logger
LOG = get_analysis_logger(__name__) LOG = get_analysis_logger(__name__)
class GPRResult():
def __init__(self, ypreds=None, sigmas=None):
self.ypreds = ypreds
self.sigmas = sigmas
class GPRGDResult(): class GPRGDResult():
def __init__(self, ypreds=None, sigmas=None, minl=None, minl_conf=None): def __init__(self, ypreds=None, sigmas=None, minl=None, minl_conf=None):
@ -32,20 +25,6 @@ class GPRGDResult():
self.minl_conf = minl_conf self.minl_conf = minl_conf
def gpflow_predict(model, Xin):
fmean, fvar, _, _, _ = model._build_predict(Xin) # pylint: disable=protected-access
y_mean_var = model.likelihood.predict_mean_and_var(fmean, fvar)
y_mean = y_mean_var[0]
y_var = y_mean_var[1]
y_std = tf.sqrt(y_var)
session = model.enquire_session(session=None)
with session.as_default():
y_mean_value = session.run(y_mean)
y_std_value = session.run(y_std)
return GPRResult(y_mean_value, y_std_value)
def tf_optimize(model, Xnew_arr, learning_rate=0.01, maxiter=100, ucb_beta=3., def tf_optimize(model, Xnew_arr, learning_rate=0.01, maxiter=100, ucb_beta=3.,
active_dims=None, bounds=None, debug=True): active_dims=None, bounds=None, debug=True):
Xnew_arr = check_array(Xnew_arr, copy=False, warn_on_dtype=True, dtype=FLOAT_DTYPES) Xnew_arr = check_array(Xnew_arr, copy=False, warn_on_dtype=True, dtype=FLOAT_DTYPES)

View File

@ -0,0 +1,34 @@
#
# OtterTune - analysis/optimize.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
# Author: Dana Van Aken
import tensorflow as tf
from sklearn.utils import assert_all_finite, check_array
from sklearn.utils.validation import FLOAT_DTYPES
class GPRResult():
def __init__(self, ypreds=None, sigmas=None):
self.ypreds = ypreds
self.sigmas = sigmas
def gpflow_predict(model, Xin):
Xin = check_array(Xin, copy=False, warn_on_dtype=True, dtype=FLOAT_DTYPES)
fmean, fvar, _, _, _ = model._build_predict(Xin) # pylint: disable=protected-access
y_mean_var = model.likelihood.predict_mean_and_var(fmean, fvar)
y_mean = y_mean_var[0]
y_var = y_mean_var[1]
y_std = tf.sqrt(y_var)
session = model.enquire_session(session=None)
with session.as_default():
y_mean_value = session.run(y_mean)
y_std_value = session.run(y_std)
assert_all_finite(y_mean_value)
assert_all_finite(y_std_value)
return GPRResult(y_mean_value, y_std_value)

View File

@ -14,7 +14,7 @@ from analysis.gp_tf import GPR
from analysis.gp_tf import GPRGD from analysis.gp_tf import GPRGD
from analysis.gpr import gpr_models from analysis.gpr import gpr_models
from analysis.gpr.optimize import tf_optimize from analysis.gpr.optimize import tf_optimize
from analysis.gpr.optimize import gpflow_predict from analysis.gpr.predict import gpflow_predict
# test numpy version GPR # test numpy version GPR
class TestGPRNP(unittest.TestCase): class TestGPRNP(unittest.TestCase):
@ -31,12 +31,12 @@ class TestGPRNP(unittest.TestCase):
cls.model.fit(X_train, y_train, ridge=1.0) cls.model.fit(X_train, y_train, ridge=1.0)
cls.gpr_result = cls.model.predict(X_test) cls.gpr_result = cls.model.predict(X_test)
def test_gprnp_ypreds(self): def test_ypreds(self):
ypreds_round = [round(x[0], 4) for x in self.gpr_result.ypreds] ypreds_round = [round(x[0], 4) for x in self.gpr_result.ypreds]
expected_ypreds = [0.0181, 0.0014, 0.0006, 0.0015, 0.0039, 0.0014] expected_ypreds = [0.0181, 0.0014, 0.0006, 0.0015, 0.0039, 0.0014]
self.assertEqual(ypreds_round, expected_ypreds) self.assertEqual(ypreds_round, expected_ypreds)
def test_gprnp_sigmas(self): def test_sigmas(self):
sigmas_round = [round(x[0], 4) for x in self.gpr_result.sigmas] sigmas_round = [round(x[0], 4) for x in self.gpr_result.sigmas]
expected_sigmas = [1.4142, 1.4142, 1.4142, 1.4142, 1.4142, 1.4142] expected_sigmas = [1.4142, 1.4142, 1.4142, 1.4142, 1.4142, 1.4142]
self.assertEqual(sigmas_round, expected_sigmas) self.assertEqual(sigmas_round, expected_sigmas)
@ -57,23 +57,23 @@ class TestGPRTF(unittest.TestCase):
cls.model.fit(X_train, y_train) cls.model.fit(X_train, y_train)
cls.gpr_result = cls.model.predict(X_test) cls.gpr_result = cls.model.predict(X_test)
def test_gprnp_ypreds(self): def test_ypreds(self):
ypreds_round = [round(x[0], 4) for x in self.gpr_result.ypreds] ypreds_round = [round(x[0], 4) for x in self.gpr_result.ypreds]
expected_ypreds = [0.0181, 0.0014, 0.0006, 0.0015, 0.0039, 0.0014] expected_ypreds = [0.0181, 0.0014, 0.0006, 0.0015, 0.0039, 0.0014]
self.assertEqual(ypreds_round, expected_ypreds) self.assertEqual(ypreds_round, expected_ypreds)
def test_gprnp_sigmas(self): def test_sigmas(self):
sigmas_round = [round(x[0], 4) for x in self.gpr_result.sigmas] sigmas_round = [round(x[0], 4) for x in self.gpr_result.sigmas]
expected_sigmas = [1.4142, 1.4142, 1.4142, 1.4142, 1.4142, 1.4142] expected_sigmas = [1.4142, 1.4142, 1.4142, 1.4142, 1.4142, 1.4142]
self.assertEqual(sigmas_round, expected_sigmas) self.assertEqual(sigmas_round, expected_sigmas)
# test GPFlow version GPR # test GPFlow version GPR
class TestGPRGPF(unittest.TestCase): class TestGPRGPFlow(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
super(TestGPRGPF, cls).setUpClass() super(TestGPRGPFlow, cls).setUpClass()
boston = datasets.load_boston() boston = datasets.load_boston()
data = boston['data'] data = boston['data']
X_train = data[0:500] X_train = data[0:500]
@ -88,51 +88,23 @@ class TestGPRGPF(unittest.TestCase):
**model_kwargs) **model_kwargs)
cls.gpr_result = gpflow_predict(cls.m.model, X_test) cls.gpr_result = gpflow_predict(cls.m.model, X_test)
def test_gprnp_ypreds(self): def test_ypreds(self):
ypreds_round = [round(x[0], 4) for x in self.gpr_result.ypreds] ypreds_round = [round(x[0], 4) for x in self.gpr_result.ypreds]
expected_ypreds = [0.0181, 0.0014, 0.0006, 0.0015, 0.0039, 0.0014] expected_ypreds = [0.0181, 0.0014, 0.0006, 0.0015, 0.0039, 0.0014]
self.assertEqual(ypreds_round, expected_ypreds) self.assertEqual(ypreds_round, expected_ypreds)
def test_gprnp_sigmas(self): def test_sigmas(self):
sigmas_round = [round(x[0], 4) for x in self.gpr_result.sigmas] sigmas_round = [round(x[0], 4) for x in self.gpr_result.sigmas]
expected_sigmas = [1.4142, 1.4142, 1.4142, 1.4142, 1.4142, 1.4142] expected_sigmas = [1.4142, 1.4142, 1.4142, 1.4142, 1.4142, 1.4142]
self.assertEqual(sigmas_round, expected_sigmas) self.assertEqual(sigmas_round, expected_sigmas)
# test Tensorflow GPRGD model # test GPFlow version Gradient Descent
class TestGPRGD(unittest.TestCase): class TestGDGPFlow(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
super(TestGPRGD, cls).setUpClass() super(TestGDGPFlow, cls).setUpClass()
boston = datasets.load_boston()
data = boston['data']
X_train = data[0:500]
X_test = data[500:]
y_train = boston['target'][0:500].reshape(500, 1)
Xmin = np.min(X_train, 0)
Xmax = np.max(X_train, 0)
cls.model = GPRGD(length_scale=1.0, magnitude=1.0, max_iter=1, learning_rate=0, ridge=1.0)
cls.model.fit(X_train, y_train, Xmin, Xmax)
cls.gpr_result = cls.model.predict(X_test)
def test_gprnp_ypreds(self):
ypreds_round = [round(x[0], 4) for x in self.gpr_result.ypreds]
expected_ypreds = [0.0181, 0.0014, 0.0006, 0.0015, 0.0039, 0.0014]
self.assertEqual(ypreds_round, expected_ypreds)
def test_gprnp_sigmas(self):
sigmas_round = [round(x[0], 4) for x in self.gpr_result.sigmas]
expected_sigmas = [1.4142, 1.4142, 1.4142, 1.4142, 1.4142, 1.4142]
self.assertEqual(sigmas_round, expected_sigmas)
# test Gradient Descent in GPFlow model
class TestGPFGD(unittest.TestCase):
@classmethod
def setUpClass(cls):
super(TestGPFGD, cls).setUpClass()
boston = datasets.load_boston() boston = datasets.load_boston()
data = boston['data'] data = boston['data']
X_train = data[0:500] X_train = data[0:500]
@ -158,23 +130,23 @@ class TestGPFGD(unittest.TestCase):
cls.m = gpr_models.create_model('BasicGP', X=X_train, y=y_train, **model_kwargs) cls.m = gpr_models.create_model('BasicGP', X=X_train, y=y_train, **model_kwargs)
cls.gpr_result = tf_optimize(cls.m.model, X_test, **opt_kwargs) cls.gpr_result = tf_optimize(cls.m.model, X_test, **opt_kwargs)
def test_gprnp_ypreds(self): def test_ypreds(self):
ypreds_round = [round(x[0], 4) for x in self.gpr_result.ypreds] ypreds_round = [round(x[0], 4) for x in self.gpr_result.ypreds]
expected_ypreds = [0.5272] expected_ypreds = [0.5272]
self.assertEqual(ypreds_round, expected_ypreds) self.assertEqual(ypreds_round, expected_ypreds)
def test_gprnp_sigmas(self): def test_sigmas(self):
sigmas_round = [round(x[0], 4) for x in self.gpr_result.sigmas] sigmas_round = [round(x[0], 4) for x in self.gpr_result.sigmas]
expected_sigmas = [1.4153] expected_sigmas = [1.4153]
self.assertEqual(sigmas_round, expected_sigmas) self.assertEqual(sigmas_round, expected_sigmas)
# test Gradient Descent in Tensorflow GPRGD model # test Tensorflow version Gradient Descent
class TestGPRGDGD(unittest.TestCase): class TestGDTF(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
super(TestGPRGDGD, cls).setUpClass() super(TestGDTF, cls).setUpClass()
boston = datasets.load_boston() boston = datasets.load_boston()
data = boston['data'] data = boston['data']
X_train = data[0:500] X_train = data[0:500]
@ -191,12 +163,12 @@ class TestGPRGDGD(unittest.TestCase):
cls.model.fit(X_train, y_train, Xmin, Xmax) cls.model.fit(X_train, y_train, Xmin, Xmax)
cls.gpr_result = cls.model.predict(X_test) cls.gpr_result = cls.model.predict(X_test)
def test_gprnp_ypreds(self): def test_ypreds(self):
ypreds_round = [round(x[0], 4) for x in self.gpr_result.ypreds] ypreds_round = [round(x[0], 4) for x in self.gpr_result.ypreds]
expected_ypreds = [0.5272] expected_ypreds = [0.5272]
self.assertEqual(ypreds_round, expected_ypreds) self.assertEqual(ypreds_round, expected_ypreds)
def test_gprnp_sigmas(self): def test_sigmas(self):
sigmas_round = [round(x[0], 4) for x in self.gpr_result.sigmas] sigmas_round = [round(x[0], 4) for x in self.gpr_result.sigmas]
expected_sigmas = [1.4153] expected_sigmas = [1.4153]
self.assertEqual(sigmas_round, expected_sigmas) self.assertEqual(sigmas_round, expected_sigmas)

View File

@ -22,7 +22,8 @@ from analysis.gp_tf import GPRGD
from analysis.nn_tf import NeuralNet from analysis.nn_tf import NeuralNet
from analysis.gpr import gpr_models from analysis.gpr import gpr_models
from analysis.gpr import ucb from analysis.gpr import ucb
from analysis.gpr.optimize import tf_optimize, gpflow_predict from analysis.gpr.optimize import tf_optimize
from analysis.gpr.predict import gpflow_predict
from analysis.preprocessing import Bin, DummyEncoder from analysis.preprocessing import Bin, DummyEncoder
from analysis.constraints import ParamConstraintHelper from analysis.constraints import ParamConstraintHelper
from website.models import PipelineData, PipelineRun, Result, Workload, SessionKnob, MetricCatalog from website.models import PipelineData, PipelineRun, Result, Workload, SessionKnob, MetricCatalog
@ -932,6 +933,7 @@ def map_workload(map_workload_input):
# and then predict the performance of each metric for each of # and then predict the performance of each metric for each of
# the knob configurations attempted so far by the target. # the knob configurations attempted so far by the target.
y_col = y_col.reshape(-1, 1) y_col = y_col.reshape(-1, 1)
if params['GPR_USE_GPFLOW']:
model_kwargs = {'lengthscales': params['GPR_LENGTH_SCALE'], model_kwargs = {'lengthscales': params['GPR_LENGTH_SCALE'],
'variance': params['GPR_MAGNITUDE'], 'variance': params['GPR_MAGNITUDE'],
'noise_variance': params['GPR_RIDGE']} 'noise_variance': params['GPR_RIDGE']}
@ -941,6 +943,13 @@ def map_workload(map_workload_input):
m = gpr_models.create_model(params['GPR_MODEL_NAME'], X=X_scaled, y=y_col, m = gpr_models.create_model(params['GPR_MODEL_NAME'], X=X_scaled, y=y_col,
**model_kwargs) **model_kwargs)
gpr_result = gpflow_predict(m.model, X_target) gpr_result = gpflow_predict(m.model, X_target)
else:
model = GPRNP(length_scale=params['GPR_LENGTH_SCALE'],
magnitude=params['GPR_MAGNITUDE'],
max_train_size=params['GPR_MAX_TRAIN_SIZE'],
batch_size=params['GPR_BATCH_SIZE'])
model.fit(X_scaled, y_col, ridge=params['GPR_RIDGE'])
gpr_result = model.predict(X_target)
predictions[:, j] = gpr_result.ypreds.ravel() predictions[:, j] = gpr_result.ypreds.ravel()
# Bin each of the predicted metric columns by deciles and then # Bin each of the predicted metric columns by deciles and then
# compute the score (i.e., distance) between the target workload # compute the score (i.e., distance) between the target workload