diff --git a/server/analysis/gpr/optimize.py b/server/analysis/gpr/optimize.py index f897e3a..ca82cae 100644 --- a/server/analysis/gpr/optimize.py +++ b/server/analysis/gpr/optimize.py @@ -16,13 +16,6 @@ from analysis.util import get_analysis_logger LOG = get_analysis_logger(__name__) -class GPRResult(): - - def __init__(self, ypreds=None, sigmas=None): - self.ypreds = ypreds - self.sigmas = sigmas - - class GPRGDResult(): def __init__(self, ypreds=None, sigmas=None, minl=None, minl_conf=None): @@ -32,20 +25,6 @@ class GPRGDResult(): self.minl_conf = minl_conf -def gpflow_predict(model, Xin): - fmean, fvar, _, _, _ = model._build_predict(Xin) # pylint: disable=protected-access - y_mean_var = model.likelihood.predict_mean_and_var(fmean, fvar) - y_mean = y_mean_var[0] - y_var = y_mean_var[1] - y_std = tf.sqrt(y_var) - - session = model.enquire_session(session=None) - with session.as_default(): - y_mean_value = session.run(y_mean) - y_std_value = session.run(y_std) - return GPRResult(y_mean_value, y_std_value) - - def tf_optimize(model, Xnew_arr, learning_rate=0.01, maxiter=100, ucb_beta=3., active_dims=None, bounds=None, debug=True): Xnew_arr = check_array(Xnew_arr, copy=False, warn_on_dtype=True, dtype=FLOAT_DTYPES) diff --git a/server/analysis/gpr/predict.py b/server/analysis/gpr/predict.py new file mode 100644 index 0000000..d922c7b --- /dev/null +++ b/server/analysis/gpr/predict.py @@ -0,0 +1,34 @@ +# +# OtterTune - analysis/optimize.py +# +# Copyright (c) 2017-18, Carnegie Mellon University Database Group +# +# Author: Dana Van Aken + +import tensorflow as tf +from sklearn.utils import assert_all_finite, check_array +from sklearn.utils.validation import FLOAT_DTYPES + + +class GPRResult(): + + def __init__(self, ypreds=None, sigmas=None): + self.ypreds = ypreds + self.sigmas = sigmas + + +def gpflow_predict(model, Xin): + Xin = check_array(Xin, copy=False, warn_on_dtype=True, dtype=FLOAT_DTYPES) + fmean, fvar, _, _, _ = model._build_predict(Xin) # pylint: disable=protected-access + y_mean_var = model.likelihood.predict_mean_and_var(fmean, fvar) + y_mean = y_mean_var[0] + y_var = y_mean_var[1] + y_std = tf.sqrt(y_var) + + session = model.enquire_session(session=None) + with session.as_default(): + y_mean_value = session.run(y_mean) + y_std_value = session.run(y_std) + assert_all_finite(y_mean_value) + assert_all_finite(y_std_value) + return GPRResult(y_mean_value, y_std_value) diff --git a/server/analysis/tests/test_gpr.py b/server/analysis/tests/test_gpr.py index 310c46f..7c6d798 100644 --- a/server/analysis/tests/test_gpr.py +++ b/server/analysis/tests/test_gpr.py @@ -14,7 +14,7 @@ from analysis.gp_tf import GPR from analysis.gp_tf import GPRGD from analysis.gpr import gpr_models from analysis.gpr.optimize import tf_optimize -from analysis.gpr.optimize import gpflow_predict +from analysis.gpr.predict import gpflow_predict # test numpy version GPR class TestGPRNP(unittest.TestCase): @@ -31,12 +31,12 @@ class TestGPRNP(unittest.TestCase): cls.model.fit(X_train, y_train, ridge=1.0) cls.gpr_result = cls.model.predict(X_test) - def test_gprnp_ypreds(self): + def test_ypreds(self): ypreds_round = [round(x[0], 4) for x in self.gpr_result.ypreds] expected_ypreds = [0.0181, 0.0014, 0.0006, 0.0015, 0.0039, 0.0014] self.assertEqual(ypreds_round, expected_ypreds) - def test_gprnp_sigmas(self): + def test_sigmas(self): sigmas_round = [round(x[0], 4) for x in self.gpr_result.sigmas] expected_sigmas = [1.4142, 1.4142, 1.4142, 1.4142, 1.4142, 1.4142] self.assertEqual(sigmas_round, expected_sigmas) @@ -57,23 +57,23 @@ class TestGPRTF(unittest.TestCase): cls.model.fit(X_train, y_train) cls.gpr_result = cls.model.predict(X_test) - def test_gprnp_ypreds(self): + def test_ypreds(self): ypreds_round = [round(x[0], 4) for x in self.gpr_result.ypreds] expected_ypreds = [0.0181, 0.0014, 0.0006, 0.0015, 0.0039, 0.0014] self.assertEqual(ypreds_round, expected_ypreds) - def test_gprnp_sigmas(self): + def test_sigmas(self): sigmas_round = [round(x[0], 4) for x in self.gpr_result.sigmas] expected_sigmas = [1.4142, 1.4142, 1.4142, 1.4142, 1.4142, 1.4142] self.assertEqual(sigmas_round, expected_sigmas) # test GPFlow version GPR -class TestGPRGPF(unittest.TestCase): +class TestGPRGPFlow(unittest.TestCase): @classmethod def setUpClass(cls): - super(TestGPRGPF, cls).setUpClass() + super(TestGPRGPFlow, cls).setUpClass() boston = datasets.load_boston() data = boston['data'] X_train = data[0:500] @@ -88,51 +88,23 @@ class TestGPRGPF(unittest.TestCase): **model_kwargs) cls.gpr_result = gpflow_predict(cls.m.model, X_test) - def test_gprnp_ypreds(self): + def test_ypreds(self): ypreds_round = [round(x[0], 4) for x in self.gpr_result.ypreds] expected_ypreds = [0.0181, 0.0014, 0.0006, 0.0015, 0.0039, 0.0014] self.assertEqual(ypreds_round, expected_ypreds) - def test_gprnp_sigmas(self): + def test_sigmas(self): sigmas_round = [round(x[0], 4) for x in self.gpr_result.sigmas] expected_sigmas = [1.4142, 1.4142, 1.4142, 1.4142, 1.4142, 1.4142] self.assertEqual(sigmas_round, expected_sigmas) -# test Tensorflow GPRGD model -class TestGPRGD(unittest.TestCase): +# test GPFlow version Gradient Descent +class TestGDGPFlow(unittest.TestCase): @classmethod def setUpClass(cls): - super(TestGPRGD, cls).setUpClass() - boston = datasets.load_boston() - data = boston['data'] - X_train = data[0:500] - X_test = data[500:] - y_train = boston['target'][0:500].reshape(500, 1) - Xmin = np.min(X_train, 0) - Xmax = np.max(X_train, 0) - cls.model = GPRGD(length_scale=1.0, magnitude=1.0, max_iter=1, learning_rate=0, ridge=1.0) - cls.model.fit(X_train, y_train, Xmin, Xmax) - cls.gpr_result = cls.model.predict(X_test) - - def test_gprnp_ypreds(self): - ypreds_round = [round(x[0], 4) for x in self.gpr_result.ypreds] - expected_ypreds = [0.0181, 0.0014, 0.0006, 0.0015, 0.0039, 0.0014] - self.assertEqual(ypreds_round, expected_ypreds) - - def test_gprnp_sigmas(self): - sigmas_round = [round(x[0], 4) for x in self.gpr_result.sigmas] - expected_sigmas = [1.4142, 1.4142, 1.4142, 1.4142, 1.4142, 1.4142] - self.assertEqual(sigmas_round, expected_sigmas) - - -# test Gradient Descent in GPFlow model -class TestGPFGD(unittest.TestCase): - - @classmethod - def setUpClass(cls): - super(TestGPFGD, cls).setUpClass() + super(TestGDGPFlow, cls).setUpClass() boston = datasets.load_boston() data = boston['data'] X_train = data[0:500] @@ -158,23 +130,23 @@ class TestGPFGD(unittest.TestCase): cls.m = gpr_models.create_model('BasicGP', X=X_train, y=y_train, **model_kwargs) cls.gpr_result = tf_optimize(cls.m.model, X_test, **opt_kwargs) - def test_gprnp_ypreds(self): + def test_ypreds(self): ypreds_round = [round(x[0], 4) for x in self.gpr_result.ypreds] expected_ypreds = [0.5272] self.assertEqual(ypreds_round, expected_ypreds) - def test_gprnp_sigmas(self): + def test_sigmas(self): sigmas_round = [round(x[0], 4) for x in self.gpr_result.sigmas] expected_sigmas = [1.4153] self.assertEqual(sigmas_round, expected_sigmas) -# test Gradient Descent in Tensorflow GPRGD model -class TestGPRGDGD(unittest.TestCase): +# test Tensorflow version Gradient Descent +class TestGDTF(unittest.TestCase): @classmethod def setUpClass(cls): - super(TestGPRGDGD, cls).setUpClass() + super(TestGDTF, cls).setUpClass() boston = datasets.load_boston() data = boston['data'] X_train = data[0:500] @@ -191,12 +163,12 @@ class TestGPRGDGD(unittest.TestCase): cls.model.fit(X_train, y_train, Xmin, Xmax) cls.gpr_result = cls.model.predict(X_test) - def test_gprnp_ypreds(self): + def test_ypreds(self): ypreds_round = [round(x[0], 4) for x in self.gpr_result.ypreds] expected_ypreds = [0.5272] self.assertEqual(ypreds_round, expected_ypreds) - def test_gprnp_sigmas(self): + def test_sigmas(self): sigmas_round = [round(x[0], 4) for x in self.gpr_result.sigmas] expected_sigmas = [1.4153] self.assertEqual(sigmas_round, expected_sigmas) diff --git a/server/website/website/tasks/async_tasks.py b/server/website/website/tasks/async_tasks.py index b2cb870..85c35d6 100644 --- a/server/website/website/tasks/async_tasks.py +++ b/server/website/website/tasks/async_tasks.py @@ -22,7 +22,8 @@ from analysis.gp_tf import GPRGD from analysis.nn_tf import NeuralNet from analysis.gpr import gpr_models from analysis.gpr import ucb -from analysis.gpr.optimize import tf_optimize, gpflow_predict +from analysis.gpr.optimize import tf_optimize +from analysis.gpr.predict import gpflow_predict from analysis.preprocessing import Bin, DummyEncoder from analysis.constraints import ParamConstraintHelper from website.models import PipelineData, PipelineRun, Result, Workload, SessionKnob, MetricCatalog @@ -932,15 +933,23 @@ def map_workload(map_workload_input): # and then predict the performance of each metric for each of # the knob configurations attempted so far by the target. y_col = y_col.reshape(-1, 1) - model_kwargs = {'lengthscales': params['GPR_LENGTH_SCALE'], - 'variance': params['GPR_MAGNITUDE'], - 'noise_variance': params['GPR_RIDGE']} - tf.reset_default_graph() - graph = tf.get_default_graph() - gpflow.reset_default_session(graph=graph) - m = gpr_models.create_model(params['GPR_MODEL_NAME'], X=X_scaled, y=y_col, - **model_kwargs) - gpr_result = gpflow_predict(m.model, X_target) + if params['GPR_USE_GPFLOW']: + model_kwargs = {'lengthscales': params['GPR_LENGTH_SCALE'], + 'variance': params['GPR_MAGNITUDE'], + 'noise_variance': params['GPR_RIDGE']} + tf.reset_default_graph() + graph = tf.get_default_graph() + gpflow.reset_default_session(graph=graph) + m = gpr_models.create_model(params['GPR_MODEL_NAME'], X=X_scaled, y=y_col, + **model_kwargs) + gpr_result = gpflow_predict(m.model, X_target) + else: + model = GPRNP(length_scale=params['GPR_LENGTH_SCALE'], + magnitude=params['GPR_MAGNITUDE'], + max_train_size=params['GPR_MAX_TRAIN_SIZE'], + batch_size=params['GPR_BATCH_SIZE']) + model.fit(X_scaled, y_col, ridge=params['GPR_RIDGE']) + gpr_result = model.predict(X_target) predictions[:, j] = gpr_result.ypreds.ravel() # Bin each of the predicted metric columns by deciles and then # compute the score (i.e., distance) between the target workload