fix old gpr model, add noise variance

This commit is contained in:
bohanjason 2019-11-29 19:39:20 -05:00 committed by Dana Van Aken
parent 7be5b89975
commit 5e51cf7025
3 changed files with 54 additions and 11 deletions

View File

@ -33,6 +33,7 @@ class GPRNP(object):
self.K = None self.K = None
self.K_inv = None self.K_inv = None
self.y_best = None self.y_best = None
self.ridge = None
def __repr__(self): def __repr__(self):
rep = "" rep = ""
@ -77,7 +78,7 @@ class GPRNP(object):
raise Exception("Input contains non-finite values: {}" raise Exception("Input contains non-finite values: {}"
.format(X[~finite_els])) .format(X[~finite_els]))
def fit(self, X_train, y_train, ridge=0.01): def fit(self, X_train, y_train, ridge=1.0):
self._reset() self._reset()
X_train, y_train = self.check_X_y(X_train, y_train) X_train, y_train = self.check_X_y(X_train, y_train)
if X_train.ndim != 2 or y_train.ndim != 2: if X_train.ndim != 2 or y_train.ndim != 2:
@ -86,6 +87,7 @@ class GPRNP(object):
self.X_train = np.float32(X_train) self.X_train = np.float32(X_train)
self.y_train = np.float32(y_train) self.y_train = np.float32(y_train)
sample_size = self.X_train.shape[0] sample_size = self.X_train.shape[0]
self.ridge = ridge
if np.isscalar(ridge): if np.isscalar(ridge):
ridge = np.ones(sample_size) * ridge ridge = np.ones(sample_size) * ridge
assert isinstance(ridge, np.ndarray) assert isinstance(ridge, np.ndarray)
@ -120,8 +122,12 @@ class GPRNP(object):
K3 = self.magnitude * np.exp(-ed(xt_, xt_) / length_scale) K3 = self.magnitude * np.exp(-ed(xt_, xt_) / length_scale)
K2_trans = np.transpose(K2) K2_trans = np.transpose(K2)
yhat = np.matmul(K2_trans, np.matmul(self.K_inv, self.y_train)) yhat = np.matmul(K2_trans, np.matmul(self.K_inv, self.y_train))
sigma = np.sqrt(np.diag(K3 - np.matmul(K2_trans, np.matmul(self.K_inv, K2)))) \ xt_size = K3.shape[0]
.reshape(xt_.shape[0], 1) ridge = self.ridge
if np.isscalar(ridge):
ridge = np.ones(xt_size) * ridge
sigma = np.sqrt(np.diag(K3 - np.matmul(K2_trans, np.matmul(self.K_inv, K2))
+ np.diag(ridge))).reshape(xt_.shape[0], 1)
u = (self.y_best - yhat) / sigma u = (self.y_best - yhat) / sigma
phi1 = 0.5 * special.erf(u / np.sqrt(2.0)) + 0.5 phi1 = 0.5 * special.erf(u / np.sqrt(2.0)) + 0.5
phi2 = (1.0 / np.sqrt(2.0 * np.pi)) * np.exp(np.square(u) * (-0.5)) phi2 = (1.0 / np.sqrt(2.0 * np.pi)) * np.exp(np.square(u) * (-0.5))

View File

@ -56,6 +56,7 @@ class GPR(object):
self.graph = None self.graph = None
self.vars = None self.vars = None
self.ops = None self.ops = None
self.ridge = None
def build_graph(self): def build_graph(self):
self.vars = {} self.vars = {}
@ -117,13 +118,15 @@ class GPR(object):
# Nodes for yhat/sigma computation # Nodes for yhat/sigma computation
K2 = tf.placeholder(tf.float32, name="K2") K2 = tf.placeholder(tf.float32, name="K2")
K3 = tf.placeholder(tf.float32, name="K3") K3 = tf.placeholder(tf.float32, name="K3")
ridge_test_ph = tf.placeholder(tf.float32, name="ridge_test_ph")
yhat_ = tf.cast(tf.matmul(tf.transpose(K2), xy_), tf.float32) yhat_ = tf.cast(tf.matmul(tf.transpose(K2), xy_), tf.float32)
if self.check_numerics: if self.check_numerics:
yhat_ = tf.check_numerics(yhat_, "yhat_: ") yhat_ = tf.check_numerics(yhat_, "yhat_: ")
sv1 = tf.matmul(tf.transpose(K2), tf.matmul(K_inv, K2)) sv1 = tf.matmul(tf.transpose(K2), tf.matmul(K_inv, K2))
if self.check_numerics: if self.check_numerics:
sv1 = tf.check_numerics(sv1, "sv1: ") sv1 = tf.check_numerics(sv1, "sv1: ")
sig_val = tf.cast((tf.sqrt(tf.diag_part(K3 - sv1))), tf.float32) sig_val = tf.cast((tf.sqrt(tf.diag_part(K3 + tf.diag(ridge_test_ph) - sv1))),
tf.float32)
if self.check_numerics: if self.check_numerics:
sig_val = tf.check_numerics(sig_val, "sig_val: ") sig_val = tf.check_numerics(sig_val, "sig_val: ")
@ -131,6 +134,7 @@ class GPR(object):
self.vars['K3_h'] = K3 self.vars['K3_h'] = K3
self.ops['yhat_op'] = yhat_ self.ops['yhat_op'] = yhat_
self.ops['sig_op'] = sig_val self.ops['sig_op'] = sig_val
self.ops['ridge_test_h'] = ridge_test_ph
# Compute y_best (min y) # Compute y_best (min y)
y_best_op = tf.cast(tf.reduce_min(yt_, 0, True), tf.float32) y_best_op = tf.cast(tf.reduce_min(yt_, 0, True), tf.float32)
@ -186,6 +190,7 @@ class GPR(object):
self.X_train = np.float32(X_train) self.X_train = np.float32(X_train)
self.y_train = np.float32(y_train) self.y_train = np.float32(y_train)
sample_size = self.X_train.shape[0] sample_size = self.X_train.shape[0]
self.ridge = ridge
if np.isscalar(ridge): if np.isscalar(ridge):
ridge = np.ones(sample_size) * ridge ridge = np.ones(sample_size) * ridge
@ -224,7 +229,9 @@ class GPR(object):
X_test = np.float32(GPR.check_array(X_test)) X_test = np.float32(GPR.check_array(X_test))
test_size = X_test.shape[0] test_size = X_test.shape[0]
sample_size = self.X_train.shape[0] sample_size = self.X_train.shape[0]
ridge = self.ridge
if np.isscalar(ridge):
ridge_test = np.ones(test_size) * ridge
arr_offset = 0 arr_offset = 0
yhats = np.zeros([test_size, 1]) yhats = np.zeros([test_size, 1])
sigmas = np.zeros([test_size, 1]) sigmas = np.zeros([test_size, 1])
@ -246,7 +253,7 @@ class GPR(object):
K2 = self.vars['K2_h'] K2 = self.vars['K2_h']
K3 = self.vars['K3_h'] K3 = self.vars['K3_h']
xy_ph = self.vars['xy_h'] xy_ph = self.vars['xy_h']
ridge_test_ph = self.ops['ridge_test_h']
while arr_offset < test_size: while arr_offset < test_size:
if arr_offset + self.batch_size_ > test_size: if arr_offset + self.batch_size_ > test_size:
end_offset = test_size end_offset = test_size
@ -270,7 +277,8 @@ class GPR(object):
K3_ = sess.run(K_op, feed_dict={X_dists: dists2}) K3_ = sess.run(K_op, feed_dict={X_dists: dists2})
sigma = np.zeros([1, batch_len], np.float32) sigma = np.zeros([1, batch_len], np.float32)
sigma[0] = sess.run(sig_val, feed_dict={K_inv_ph: self.K_inv, K2: K2_, K3: K3_}) sigma[0] = sess.run(sig_val, feed_dict={K_inv_ph: self.K_inv, K2: K2_,
K3: K3_, ridge_test_ph: ridge_test})
sigma = np.transpose(sigma) sigma = np.transpose(sigma)
yhats[arr_offset: end_offset] = yhat yhats[arr_offset: end_offset] = yhat
sigmas[arr_offset: end_offset] = sigma sigmas[arr_offset: end_offset] = sigma
@ -356,7 +364,7 @@ class GPRGD(GPR):
yhat_gd = tf.cast(tf.matmul(tf.transpose(K2__), self.xy_), tf.float32) yhat_gd = tf.cast(tf.matmul(tf.transpose(K2__), self.xy_), tf.float32)
if self.check_numerics is True: if self.check_numerics is True:
yhat_gd = tf.check_numerics(yhat_gd, message="yhat: ") yhat_gd = tf.check_numerics(yhat_gd, message="yhat: ")
sig_val = tf.cast((tf.sqrt(self.magnitude - tf.matmul( sig_val = tf.cast((tf.sqrt(self.magnitude + ridge - tf.matmul(
tf.transpose(K2__), tf.matmul(self.K_inv, K2__)))), tf.float32) tf.transpose(K2__), tf.matmul(self.K_inv, K2__)))), tf.float32)
if self.check_numerics is True: if self.check_numerics is True:
sig_val = tf.check_numerics(sig_val, message="sigma: ") sig_val = tf.check_numerics(sig_val, message="sigma: ")

View File

@ -4,10 +4,11 @@
# Copyright (c) 2017-18, Carnegie Mellon University Database Group # Copyright (c) 2017-18, Carnegie Mellon University Database Group
# #
import unittest import unittest
import numpy as np
from sklearn import datasets from sklearn import datasets
from analysis.gp import GPRNP from analysis.gp import GPRNP
from analysis.gp_tf import GPR from analysis.gp_tf import GPR
from analysis.gp_tf import GPRGD
# test numpy version GPR # test numpy version GPR
class TestGPRNP(unittest.TestCase): class TestGPRNP(unittest.TestCase):
@ -31,7 +32,7 @@ class TestGPRNP(unittest.TestCase):
def test_gprnp_sigmas(self): def test_gprnp_sigmas(self):
sigmas_round = [round(x[0], 4) for x in self.gpr_result.sigmas] sigmas_round = [round(x[0], 4) for x in self.gpr_result.sigmas]
expected_sigmas = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0] expected_sigmas = [1.4142, 1.4142, 1.4142, 1.4142, 1.4142, 1.4142]
self.assertEqual(sigmas_round, expected_sigmas) self.assertEqual(sigmas_round, expected_sigmas)
@ -57,5 +58,33 @@ class TestGPRTF(unittest.TestCase):
def test_gprnp_sigmas(self): def test_gprnp_sigmas(self):
sigmas_round = [round(x[0], 4) for x in self.gpr_result.sigmas] sigmas_round = [round(x[0], 4) for x in self.gpr_result.sigmas]
expected_sigmas = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0] expected_sigmas = [1.4142, 1.4142, 1.4142, 1.4142, 1.4142, 1.4142]
self.assertEqual(sigmas_round, expected_sigmas)
# test Tensorflow GPRGD model
class TestGPRGD(unittest.TestCase):
@classmethod
def setUpClass(cls):
super(TestGPRGD, cls).setUpClass()
boston = datasets.load_boston()
data = boston['data']
X_train = data[0:500]
X_test = data[500:]
y_train = boston['target'][0:500].reshape(500, 1)
Xmin = np.min(X_train, 0)
Xmax = np.max(X_train, 0)
cls.model = GPRGD(length_scale=1.0, magnitude=1.0, max_iter=1, learning_rate=0)
cls.model.fit(X_train, y_train, Xmin, Xmax, ridge=1.0)
cls.gpr_result = cls.model.predict(X_test)
def test_gprnp_ypreds(self):
ypreds_round = [round(x[0], 4) for x in self.gpr_result.ypreds]
expected_ypreds = [0.0181, 0.0014, 0.0006, 0.0015, 0.0039, 0.0014]
self.assertEqual(ypreds_round, expected_ypreds)
def test_gprnp_sigmas(self):
sigmas_round = [round(x[0], 4) for x in self.gpr_result.sigmas]
expected_sigmas = [1.4142, 1.4142, 1.4142, 1.4142, 1.4142, 1.4142]
self.assertEqual(sigmas_round, expected_sigmas) self.assertEqual(sigmas_round, expected_sigmas)