use GPFlow in workload mapping

This commit is contained in:
BohanZhang 2020-01-20 03:09:01 -05:00 committed by Dana Van Aken
parent 389174302f
commit 25d1950e67
4 changed files with 73 additions and 10 deletions

View File

@ -111,7 +111,13 @@ class BasicGP(BaseModel):
] ]
def _build_kernel(self, kernel_kwargs, **kwargs): def _build_kernel(self, kernel_kwargs, **kwargs):
k = gpflow.kernels.Matern12(lengthscales=2, **kernel_kwargs[0]) ls = 2
var = 1
if kwargs.get('lengthscales') is not None:
ls = kwargs['lengthscales']
if kwargs.get('variance') is not None:
var = kwargs['variance']
k = gpflow.kernels.Matern12(variance=var, lengthscales=ls, **kernel_kwargs[0])
if kwargs.pop('optimize_hyperparameters'): if kwargs.pop('optimize_hyperparameters'):
k.lengthscales.transform = gpflow.transforms.Logistic( k.lengthscales.transform = gpflow.transforms.Logistic(
*self._LENGTHSCALE_BOUNDS) *self._LENGTHSCALE_BOUNDS)

View File

@ -16,6 +16,13 @@ from analysis.util import get_analysis_logger
LOG = get_analysis_logger(__name__) LOG = get_analysis_logger(__name__)
class GPRResult():
def __init__(self, ypreds=None, sigmas=None):
self.ypreds = ypreds
self.sigmas = sigmas
class GPRGDResult(): class GPRGDResult():
def __init__(self, ypreds=None, sigmas=None, minl=None, minl_conf=None): def __init__(self, ypreds=None, sigmas=None, minl=None, minl_conf=None):
@ -25,6 +32,20 @@ class GPRGDResult():
self.minl_conf = minl_conf self.minl_conf = minl_conf
def gpflow_predict(model, Xin):
fmean, fvar, _, _, _ = model._build_predict(Xin) # pylint: disable=protected-access
y_mean_var = model.likelihood.predict_mean_and_var(fmean, fvar)
y_mean = y_mean_var[0]
y_var = y_mean_var[1]
y_std = tf.sqrt(y_var)
session = model.enquire_session(session=None)
with session.as_default():
y_mean_value = session.run(y_mean)
y_std_value = session.run(y_std)
return GPRResult(y_mean_value, y_std_value)
def tf_optimize(model, Xnew_arr, learning_rate=0.01, maxiter=100, ucb_beta=3., def tf_optimize(model, Xnew_arr, learning_rate=0.01, maxiter=100, ucb_beta=3.,
active_dims=None, bounds=None, debug=True): active_dims=None, bounds=None, debug=True):
Xnew_arr = check_array(Xnew_arr, copy=False, warn_on_dtype=True, dtype=FLOAT_DTYPES) Xnew_arr = check_array(Xnew_arr, copy=False, warn_on_dtype=True, dtype=FLOAT_DTYPES)

View File

@ -14,6 +14,7 @@ from analysis.gp_tf import GPR
from analysis.gp_tf import GPRGD from analysis.gp_tf import GPRGD
from analysis.gpr import gpr_models from analysis.gpr import gpr_models
from analysis.gpr.optimize import tf_optimize from analysis.gpr.optimize import tf_optimize
from analysis.gpr.optimize import gpflow_predict
# test numpy version GPR # test numpy version GPR
class TestGPRNP(unittest.TestCase): class TestGPRNP(unittest.TestCase):
@ -67,6 +68,37 @@ class TestGPRTF(unittest.TestCase):
self.assertEqual(sigmas_round, expected_sigmas) self.assertEqual(sigmas_round, expected_sigmas)
# test GPFlow version GPR
class TestGPRGPF(unittest.TestCase):
@classmethod
def setUpClass(cls):
super(TestGPRGPF, cls).setUpClass()
boston = datasets.load_boston()
data = boston['data']
X_train = data[0:500]
X_test = data[500:]
y_train = boston['target'][0:500].reshape(500, 1)
model_kwargs = {'lengthscales': 1, 'variance': 1, 'noise_variance': 1}
tf.reset_default_graph()
graph = tf.get_default_graph()
gpflow.reset_default_session(graph=graph)
cls.m = gpr_models.create_model('BasicGP', X=X_train, y=y_train,
**model_kwargs)
cls.gpr_result = gpflow_predict(cls.m.model, X_test)
def test_gprnp_ypreds(self):
ypreds_round = [round(x[0], 4) for x in self.gpr_result.ypreds]
expected_ypreds = [0.0181, 0.0014, 0.0006, 0.0015, 0.0039, 0.0014]
self.assertEqual(ypreds_round, expected_ypreds)
def test_gprnp_sigmas(self):
sigmas_round = [round(x[0], 4) for x in self.gpr_result.sigmas]
expected_sigmas = [1.4142, 1.4142, 1.4142, 1.4142, 1.4142, 1.4142]
self.assertEqual(sigmas_round, expected_sigmas)
# test Tensorflow GPRGD model # test Tensorflow GPRGD model
class TestGPRGD(unittest.TestCase): class TestGPRGD(unittest.TestCase):
@ -96,11 +128,11 @@ class TestGPRGD(unittest.TestCase):
# test Gradient Descent in GPFlow model # test Gradient Descent in GPFlow model
class TestGPRGP(unittest.TestCase): class TestGPFGD(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
super(TestGPRGP, cls).setUpClass() super(TestGPFGD, cls).setUpClass()
boston = datasets.load_boston() boston = datasets.load_boston()
data = boston['data'] data = boston['data']
X_train = data[0:500] X_train = data[0:500]

View File

@ -22,7 +22,7 @@ from analysis.gp_tf import GPRGD
from analysis.nn_tf import NeuralNet from analysis.nn_tf import NeuralNet
from analysis.gpr import gpr_models from analysis.gpr import gpr_models
from analysis.gpr import ucb from analysis.gpr import ucb
from analysis.gpr.optimize import tf_optimize from analysis.gpr.optimize import tf_optimize, gpflow_predict
from analysis.preprocessing import Bin, DummyEncoder from analysis.preprocessing import Bin, DummyEncoder
from analysis.constraints import ParamConstraintHelper from analysis.constraints import ParamConstraintHelper
from website.models import PipelineData, PipelineRun, Result, Workload, SessionKnob, MetricCatalog from website.models import PipelineData, PipelineRun, Result, Workload, SessionKnob, MetricCatalog
@ -932,12 +932,16 @@ def map_workload(map_workload_input):
# and then predict the performance of each metric for each of # and then predict the performance of each metric for each of
# the knob configurations attempted so far by the target. # the knob configurations attempted so far by the target.
y_col = y_col.reshape(-1, 1) y_col = y_col.reshape(-1, 1)
model = GPRNP(length_scale=params['GPR_LENGTH_SCALE'], model_kwargs = {'lengthscales': params['GPR_LENGTH_SCALE'],
magnitude=params['GPR_MAGNITUDE'], 'variance': params['GPR_MAGNITUDE'],
max_train_size=params['GPR_MAX_TRAIN_SIZE'], 'noise_variance': params['GPR_RIDGE']}
batch_size=params['GPR_BATCH_SIZE']) tf.reset_default_graph()
model.fit(X_scaled, y_col, ridge=params['GPR_RIDGE']) graph = tf.get_default_graph()
predictions[:, j] = model.predict(X_target).ypreds.ravel() gpflow.reset_default_session(graph=graph)
m = gpr_models.create_model(params['GPR_MODEL_NAME'], X=X_scaled, y=y_col,
**model_kwargs)
gpr_result = gpflow_predict(m.model, X_target)
predictions[:, j] = gpr_result.ypreds.ravel()
# Bin each of the predicted metric columns by deciles and then # Bin each of the predicted metric columns by deciles and then
# compute the score (i.e., distance) between the target workload # compute the score (i.e., distance) between the target workload
# and each of the known workloads # and each of the known workloads