diff --git a/server/analysis/gp.py b/server/analysis/gp.py index 9e5be69..f01ad0f 100644 --- a/server/analysis/gp.py +++ b/server/analysis/gp.py @@ -33,6 +33,7 @@ class GPRNP(object): self.K = None self.K_inv = None self.y_best = None + self.ridge = None def __repr__(self): rep = "" @@ -77,7 +78,7 @@ class GPRNP(object): raise Exception("Input contains non-finite values: {}" .format(X[~finite_els])) - def fit(self, X_train, y_train, ridge=0.01): + def fit(self, X_train, y_train, ridge=1.0): self._reset() X_train, y_train = self.check_X_y(X_train, y_train) if X_train.ndim != 2 or y_train.ndim != 2: @@ -86,6 +87,7 @@ class GPRNP(object): self.X_train = np.float32(X_train) self.y_train = np.float32(y_train) sample_size = self.X_train.shape[0] + self.ridge = ridge if np.isscalar(ridge): ridge = np.ones(sample_size) * ridge assert isinstance(ridge, np.ndarray) @@ -120,8 +122,12 @@ class GPRNP(object): K3 = self.magnitude * np.exp(-ed(xt_, xt_) / length_scale) K2_trans = np.transpose(K2) yhat = np.matmul(K2_trans, np.matmul(self.K_inv, self.y_train)) - sigma = np.sqrt(np.diag(K3 - np.matmul(K2_trans, np.matmul(self.K_inv, K2)))) \ - .reshape(xt_.shape[0], 1) + xt_size = K3.shape[0] + ridge = self.ridge + if np.isscalar(ridge): + ridge = np.ones(xt_size) * ridge + sigma = np.sqrt(np.diag(K3 - np.matmul(K2_trans, np.matmul(self.K_inv, K2)) + + np.diag(ridge))).reshape(xt_.shape[0], 1) u = (self.y_best - yhat) / sigma phi1 = 0.5 * special.erf(u / np.sqrt(2.0)) + 0.5 phi2 = (1.0 / np.sqrt(2.0 * np.pi)) * np.exp(np.square(u) * (-0.5)) diff --git a/server/analysis/gp_tf.py b/server/analysis/gp_tf.py index d3ff7d1..704f4bb 100644 --- a/server/analysis/gp_tf.py +++ b/server/analysis/gp_tf.py @@ -56,6 +56,7 @@ class GPR(object): self.graph = None self.vars = None self.ops = None + self.ridge = None def build_graph(self): self.vars = {} @@ -117,13 +118,15 @@ class GPR(object): # Nodes for yhat/sigma computation K2 = tf.placeholder(tf.float32, name="K2") K3 = tf.placeholder(tf.float32, name="K3") + ridge_test_ph = tf.placeholder(tf.float32, name="ridge_test_ph") yhat_ = tf.cast(tf.matmul(tf.transpose(K2), xy_), tf.float32) if self.check_numerics: yhat_ = tf.check_numerics(yhat_, "yhat_: ") sv1 = tf.matmul(tf.transpose(K2), tf.matmul(K_inv, K2)) if self.check_numerics: sv1 = tf.check_numerics(sv1, "sv1: ") - sig_val = tf.cast((tf.sqrt(tf.diag_part(K3 - sv1))), tf.float32) + sig_val = tf.cast((tf.sqrt(tf.diag_part(K3 + tf.diag(ridge_test_ph) - sv1))), + tf.float32) if self.check_numerics: sig_val = tf.check_numerics(sig_val, "sig_val: ") @@ -131,6 +134,7 @@ class GPR(object): self.vars['K3_h'] = K3 self.ops['yhat_op'] = yhat_ self.ops['sig_op'] = sig_val + self.ops['ridge_test_h'] = ridge_test_ph # Compute y_best (min y) y_best_op = tf.cast(tf.reduce_min(yt_, 0, True), tf.float32) @@ -186,6 +190,7 @@ class GPR(object): self.X_train = np.float32(X_train) self.y_train = np.float32(y_train) sample_size = self.X_train.shape[0] + self.ridge = ridge if np.isscalar(ridge): ridge = np.ones(sample_size) * ridge @@ -224,7 +229,9 @@ class GPR(object): X_test = np.float32(GPR.check_array(X_test)) test_size = X_test.shape[0] sample_size = self.X_train.shape[0] - + ridge = self.ridge + if np.isscalar(ridge): + ridge_test = np.ones(test_size) * ridge arr_offset = 0 yhats = np.zeros([test_size, 1]) sigmas = np.zeros([test_size, 1]) @@ -246,7 +253,7 @@ class GPR(object): K2 = self.vars['K2_h'] K3 = self.vars['K3_h'] xy_ph = self.vars['xy_h'] - + ridge_test_ph = self.ops['ridge_test_h'] while arr_offset < test_size: if arr_offset + self.batch_size_ > test_size: end_offset = test_size @@ -270,7 +277,8 @@ class GPR(object): K3_ = sess.run(K_op, feed_dict={X_dists: dists2}) sigma = np.zeros([1, batch_len], np.float32) - sigma[0] = sess.run(sig_val, feed_dict={K_inv_ph: self.K_inv, K2: K2_, K3: K3_}) + sigma[0] = sess.run(sig_val, feed_dict={K_inv_ph: self.K_inv, K2: K2_, + K3: K3_, ridge_test_ph: ridge_test}) sigma = np.transpose(sigma) yhats[arr_offset: end_offset] = yhat sigmas[arr_offset: end_offset] = sigma @@ -356,7 +364,7 @@ class GPRGD(GPR): yhat_gd = tf.cast(tf.matmul(tf.transpose(K2__), self.xy_), tf.float32) if self.check_numerics is True: yhat_gd = tf.check_numerics(yhat_gd, message="yhat: ") - sig_val = tf.cast((tf.sqrt(self.magnitude - tf.matmul( + sig_val = tf.cast((tf.sqrt(self.magnitude + ridge - tf.matmul( tf.transpose(K2__), tf.matmul(self.K_inv, K2__)))), tf.float32) if self.check_numerics is True: sig_val = tf.check_numerics(sig_val, message="sigma: ") diff --git a/server/analysis/tests/test_gpr.py b/server/analysis/tests/test_gpr.py index 43ca1b1..27f7fec 100644 --- a/server/analysis/tests/test_gpr.py +++ b/server/analysis/tests/test_gpr.py @@ -4,10 +4,11 @@ # Copyright (c) 2017-18, Carnegie Mellon University Database Group # import unittest +import numpy as np from sklearn import datasets from analysis.gp import GPRNP from analysis.gp_tf import GPR - +from analysis.gp_tf import GPRGD # test numpy version GPR class TestGPRNP(unittest.TestCase): @@ -31,7 +32,7 @@ class TestGPRNP(unittest.TestCase): def test_gprnp_sigmas(self): sigmas_round = [round(x[0], 4) for x in self.gpr_result.sigmas] - expected_sigmas = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + expected_sigmas = [1.4142, 1.4142, 1.4142, 1.4142, 1.4142, 1.4142] self.assertEqual(sigmas_round, expected_sigmas) @@ -57,5 +58,33 @@ class TestGPRTF(unittest.TestCase): def test_gprnp_sigmas(self): sigmas_round = [round(x[0], 4) for x in self.gpr_result.sigmas] - expected_sigmas = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + expected_sigmas = [1.4142, 1.4142, 1.4142, 1.4142, 1.4142, 1.4142] + self.assertEqual(sigmas_round, expected_sigmas) + + +# test Tensorflow GPRGD model +class TestGPRGD(unittest.TestCase): + + @classmethod + def setUpClass(cls): + super(TestGPRGD, cls).setUpClass() + boston = datasets.load_boston() + data = boston['data'] + X_train = data[0:500] + X_test = data[500:] + y_train = boston['target'][0:500].reshape(500, 1) + Xmin = np.min(X_train, 0) + Xmax = np.max(X_train, 0) + cls.model = GPRGD(length_scale=1.0, magnitude=1.0, max_iter=1, learning_rate=0) + cls.model.fit(X_train, y_train, Xmin, Xmax, ridge=1.0) + cls.gpr_result = cls.model.predict(X_test) + + def test_gprnp_ypreds(self): + ypreds_round = [round(x[0], 4) for x in self.gpr_result.ypreds] + expected_ypreds = [0.0181, 0.0014, 0.0006, 0.0015, 0.0039, 0.0014] + self.assertEqual(ypreds_round, expected_ypreds) + + def test_gprnp_sigmas(self): + sigmas_round = [round(x[0], 4) for x in self.gpr_result.sigmas] + expected_sigmas = [1.4142, 1.4142, 1.4142, 1.4142, 1.4142, 1.4142] self.assertEqual(sigmas_round, expected_sigmas)