use GPFlow in workload mapping

This commit is contained in:
BohanZhang 2020-01-20 03:09:01 -05:00 committed by Dana Van Aken
parent 389174302f
commit 25d1950e67
4 changed files with 73 additions and 10 deletions

View File

@ -111,7 +111,13 @@ class BasicGP(BaseModel):
]
def _build_kernel(self, kernel_kwargs, **kwargs):
k = gpflow.kernels.Matern12(lengthscales=2, **kernel_kwargs[0])
ls = 2
var = 1
if kwargs.get('lengthscales') is not None:
ls = kwargs['lengthscales']
if kwargs.get('variance') is not None:
var = kwargs['variance']
k = gpflow.kernels.Matern12(variance=var, lengthscales=ls, **kernel_kwargs[0])
if kwargs.pop('optimize_hyperparameters'):
k.lengthscales.transform = gpflow.transforms.Logistic(
*self._LENGTHSCALE_BOUNDS)

View File

@ -16,6 +16,13 @@ from analysis.util import get_analysis_logger
LOG = get_analysis_logger(__name__)
class GPRResult():
def __init__(self, ypreds=None, sigmas=None):
self.ypreds = ypreds
self.sigmas = sigmas
class GPRGDResult():
def __init__(self, ypreds=None, sigmas=None, minl=None, minl_conf=None):
@ -25,6 +32,20 @@ class GPRGDResult():
self.minl_conf = minl_conf
def gpflow_predict(model, Xin):
fmean, fvar, _, _, _ = model._build_predict(Xin) # pylint: disable=protected-access
y_mean_var = model.likelihood.predict_mean_and_var(fmean, fvar)
y_mean = y_mean_var[0]
y_var = y_mean_var[1]
y_std = tf.sqrt(y_var)
session = model.enquire_session(session=None)
with session.as_default():
y_mean_value = session.run(y_mean)
y_std_value = session.run(y_std)
return GPRResult(y_mean_value, y_std_value)
def tf_optimize(model, Xnew_arr, learning_rate=0.01, maxiter=100, ucb_beta=3.,
active_dims=None, bounds=None, debug=True):
Xnew_arr = check_array(Xnew_arr, copy=False, warn_on_dtype=True, dtype=FLOAT_DTYPES)

View File

@ -14,6 +14,7 @@ from analysis.gp_tf import GPR
from analysis.gp_tf import GPRGD
from analysis.gpr import gpr_models
from analysis.gpr.optimize import tf_optimize
from analysis.gpr.optimize import gpflow_predict
# test numpy version GPR
class TestGPRNP(unittest.TestCase):
@ -67,6 +68,37 @@ class TestGPRTF(unittest.TestCase):
self.assertEqual(sigmas_round, expected_sigmas)
# test GPFlow version GPR
class TestGPRGPF(unittest.TestCase):
@classmethod
def setUpClass(cls):
super(TestGPRGPF, cls).setUpClass()
boston = datasets.load_boston()
data = boston['data']
X_train = data[0:500]
X_test = data[500:]
y_train = boston['target'][0:500].reshape(500, 1)
model_kwargs = {'lengthscales': 1, 'variance': 1, 'noise_variance': 1}
tf.reset_default_graph()
graph = tf.get_default_graph()
gpflow.reset_default_session(graph=graph)
cls.m = gpr_models.create_model('BasicGP', X=X_train, y=y_train,
**model_kwargs)
cls.gpr_result = gpflow_predict(cls.m.model, X_test)
def test_gprnp_ypreds(self):
ypreds_round = [round(x[0], 4) for x in self.gpr_result.ypreds]
expected_ypreds = [0.0181, 0.0014, 0.0006, 0.0015, 0.0039, 0.0014]
self.assertEqual(ypreds_round, expected_ypreds)
def test_gprnp_sigmas(self):
sigmas_round = [round(x[0], 4) for x in self.gpr_result.sigmas]
expected_sigmas = [1.4142, 1.4142, 1.4142, 1.4142, 1.4142, 1.4142]
self.assertEqual(sigmas_round, expected_sigmas)
# test Tensorflow GPRGD model
class TestGPRGD(unittest.TestCase):
@ -96,11 +128,11 @@ class TestGPRGD(unittest.TestCase):
# test Gradient Descent in GPFlow model
class TestGPRGP(unittest.TestCase):
class TestGPFGD(unittest.TestCase):
@classmethod
def setUpClass(cls):
super(TestGPRGP, cls).setUpClass()
super(TestGPFGD, cls).setUpClass()
boston = datasets.load_boston()
data = boston['data']
X_train = data[0:500]

View File

@ -22,7 +22,7 @@ from analysis.gp_tf import GPRGD
from analysis.nn_tf import NeuralNet
from analysis.gpr import gpr_models
from analysis.gpr import ucb
from analysis.gpr.optimize import tf_optimize
from analysis.gpr.optimize import tf_optimize, gpflow_predict
from analysis.preprocessing import Bin, DummyEncoder
from analysis.constraints import ParamConstraintHelper
from website.models import PipelineData, PipelineRun, Result, Workload, SessionKnob, MetricCatalog
@ -932,12 +932,16 @@ def map_workload(map_workload_input):
# and then predict the performance of each metric for each of
# the knob configurations attempted so far by the target.
y_col = y_col.reshape(-1, 1)
model = GPRNP(length_scale=params['GPR_LENGTH_SCALE'],
magnitude=params['GPR_MAGNITUDE'],
max_train_size=params['GPR_MAX_TRAIN_SIZE'],
batch_size=params['GPR_BATCH_SIZE'])
model.fit(X_scaled, y_col, ridge=params['GPR_RIDGE'])
predictions[:, j] = model.predict(X_target).ypreds.ravel()
model_kwargs = {'lengthscales': params['GPR_LENGTH_SCALE'],
'variance': params['GPR_MAGNITUDE'],
'noise_variance': params['GPR_RIDGE']}
tf.reset_default_graph()
graph = tf.get_default_graph()
gpflow.reset_default_session(graph=graph)
m = gpr_models.create_model(params['GPR_MODEL_NAME'], X=X_scaled, y=y_col,
**model_kwargs)
gpr_result = gpflow_predict(m.model, X_target)
predictions[:, j] = gpr_result.ypreds.ravel()
# Bin each of the predicted metric columns by deciles and then
# compute the score (i.e., distance) between the target workload
# and each of the known workloads