From 7ebb2ea47395fa10ad68552580f23f9a5deea921 Mon Sep 17 00:00:00 2001 From: bohanjason Date: Sat, 30 Nov 2019 03:30:35 -0500 Subject: [PATCH] update old gpr model --- server/analysis/gp_tf.py | 96 +++++++++++++++------ server/analysis/gpr/gpr_models.py | 2 +- server/analysis/simulation.py | 11 ++- server/analysis/tests/test_gpr.py | 8 +- server/website/website/tasks/async_tasks.py | 5 +- 5 files changed, 84 insertions(+), 38 deletions(-) diff --git a/server/analysis/gp_tf.py b/server/analysis/gp_tf.py index 704f4bb..b1916ee 100644 --- a/server/analysis/gp_tf.py +++ b/server/analysis/gp_tf.py @@ -36,18 +36,21 @@ class GPRGDResult(GPRResult): class GPR(object): - def __init__(self, length_scale=1.0, magnitude=1.0, max_train_size=7000, - batch_size=3000, num_threads=4, check_numerics=True, debug=False): + def __init__(self, length_scale=1.0, magnitude=1.0, ridge=1.0, max_train_size=7000, + batch_size=3000, num_threads=4, check_numerics=True, debug=False, + hyperparameter_trainable=False): assert np.isscalar(length_scale) assert np.isscalar(magnitude) assert length_scale > 0 and magnitude > 0 self.length_scale = length_scale self.magnitude = magnitude + self.ridge = ridge self.max_train_size_ = max_train_size self.batch_size_ = batch_size self.num_threads_ = num_threads self.check_numerics = check_numerics self.debug = debug + self.hyperparameter_trainable = hyperparameter_trainable self.X_train = None self.y_train = None self.xy_ = None @@ -56,20 +59,36 @@ class GPR(object): self.graph = None self.vars = None self.ops = None - self.ridge = None + def build_graph(self): self.vars = {} self.ops = {} self.graph = tf.Graph() with self.graph.as_default(): - mag_const = tf.constant(self.magnitude, - dtype=np.float32, - name='magnitude') - ls_const = tf.constant(self.length_scale, - dtype=np.float32, - name='length_scale') - + if self.hyperparameter_trainable: + mag_ = np.log(np.exp(self.magnitude) - 1) + ls_ = np.log(np.exp(self.length_scale) - 1) + noise_ = np.log(np.exp(self.ridge) - 1) + mag_var = tf.nn.softplus(tf.Variable(mag_, + dtype=np.float32, + name='magnitude')) + ls_var = tf.nn.softplus(tf.Variable(ls_, + dtype=np.float32, + name='length_scale')) + noise_var = tf.nn.softplus(tf.Variable(noise_, + dtype=np.float32, + name='noise_scale')) + else: + mag_var = tf.constant(self.magnitude, + dtype=np.float32, + name='magnitude') + ls_var = tf.constant(self.length_scale, + dtype=np.float32, + name='length_scale') + noise_var = tf.constant(self.ridge, + dtype=np.float32, + name='noise_scale') # Nodes for distance computation v1 = tf.placeholder(tf.float32, name="v1") v2 = tf.placeholder(tf.float32, name="v2") @@ -80,11 +99,14 @@ class GPR(object): self.vars['v1_h'] = v1 self.vars['v2_h'] = v2 self.ops['dist_op'] = dist_op + self.vars['mag_v'] = mag_var + self.vars['ls_v'] = ls_var + self.vars['noise_v'] = noise_var # Nodes for kernel computation X_dists = tf.placeholder(tf.float32, name='X_dists') ridge_ph = tf.placeholder(tf.float32, name='ridge') - K_op = mag_const * tf.exp(-X_dists / ls_const) + K_op = mag_var * tf.exp(-X_dists / ls_var) # pylint: disable=invalid-name if self.check_numerics: K_op = tf.check_numerics(K_op, "K_op: ") K_ridge_op = K_op + tf.diag(ridge_ph) @@ -184,23 +206,27 @@ class GPR(object): raise Exception("Input contains non-finite values: {}" .format(X[~finite_els])) - def fit(self, X_train, y_train, ridge=1.0): + def fit(self, X_train, y_train): self._reset() X_train, y_train = self.check_X_y(X_train, y_train) self.X_train = np.float32(X_train) self.y_train = np.float32(y_train) sample_size = self.X_train.shape[0] - self.ridge = ridge - - if np.isscalar(ridge): - ridge = np.ones(sample_size) * ridge - assert isinstance(ridge, np.ndarray) - assert ridge.ndim == 1 + ridge = self.ridge X_dists = np.zeros((sample_size, sample_size), dtype=np.float32) with tf.Session(graph=self.graph, config=tf.ConfigProto( intra_op_parallelism_threads=self.num_threads_)) as sess: + init = tf.global_variables_initializer() + sess.run(init) + + noise_var = self.vars['noise_v'] + if np.isscalar(ridge): + ridge = np.ones(sample_size) * sess.run(noise_var) + assert isinstance(ridge, np.ndarray) + assert ridge.ndim == 1 + dist_op = self.ops['dist_op'] v1, v2 = self.vars['v1_h'], self.vars['v2_h'] for i in range(sample_size): @@ -230,14 +256,18 @@ class GPR(object): test_size = X_test.shape[0] sample_size = self.X_train.shape[0] ridge = self.ridge - if np.isscalar(ridge): - ridge_test = np.ones(test_size) * ridge arr_offset = 0 yhats = np.zeros([test_size, 1]) sigmas = np.zeros([test_size, 1]) with tf.Session(graph=self.graph, config=tf.ConfigProto( intra_op_parallelism_threads=self.num_threads_)) as sess: + init = tf.global_variables_initializer() + sess.run(init) + + noise_var = self.vars['noise_v'] + if np.isscalar(ridge): + ridge_test = np.ones(test_size) * sess.run(noise_var) # Nodes for distance operation dist_op = self.ops['dist_op'] v1 = self.vars['v1_h'] @@ -320,6 +350,7 @@ class GPRGD(GPR): def __init__(self, length_scale=1.0, magnitude=1.0, + ridge=1.0, max_train_size=7000, batch_size=3000, num_threads=4, @@ -327,12 +358,19 @@ class GPRGD(GPR): epsilon=1e-6, max_iter=100, sigma_multiplier=3.0, - mu_multiplier=1.0): + mu_multiplier=1.0, + check_numerics=True, + debug=False, + hyperparameter_trainable=False): super(GPRGD, self).__init__(length_scale=length_scale, magnitude=magnitude, + ridge=ridge, max_train_size=max_train_size, batch_size=batch_size, - num_threads=num_threads) + num_threads=num_threads, + check_numerics=check_numerics, + debug=debug, + hyperparameter_trainable=hyperparameter_trainable) self.learning_rate = learning_rate self.epsilon = epsilon self.max_iter = max_iter @@ -341,8 +379,8 @@ class GPRGD(GPR): self.X_min = None self.X_max = None - def fit(self, X_train, y_train, X_min, X_max, ridge): # pylint: disable=arguments-differ - super(GPRGD, self).fit(X_train, y_train, ridge) + def fit(self, X_train, y_train, X_min, X_max): # pylint: disable=arguments-differ + super(GPRGD, self).fit(X_train, y_train) self.X_min = X_min self.X_max = X_max @@ -354,17 +392,21 @@ class GPRGD(GPR): xt_assign_op = xt_.assign(xt_ph) init = tf.global_variables_initializer() sess.run(init) + + mag_var = self.vars['mag_v'] + ls_var = self.vars['ls_v'] + noise_var = self.vars['noise_v'] K2_mat = tf.transpose(tf.expand_dims(tf.sqrt(tf.reduce_sum(tf.pow( tf.subtract(xt_, self.X_train), 2), 1)), 0)) if self.check_numerics is True: K2_mat = tf.check_numerics(K2_mat, "K2_mat: ") - K2__ = tf.cast(self.magnitude * tf.exp(-K2_mat / self.length_scale), tf.float32) + K2__ = tf.cast(mag_var * tf.exp(-K2_mat / ls_var), tf.float32) # pylint: disable=invalid-name if self.check_numerics is True: K2__ = tf.check_numerics(K2__, "K2__: ") yhat_gd = tf.cast(tf.matmul(tf.transpose(K2__), self.xy_), tf.float32) if self.check_numerics is True: yhat_gd = tf.check_numerics(yhat_gd, message="yhat: ") - sig_val = tf.cast((tf.sqrt(self.magnitude + ridge - tf.matmul( + sig_val = tf.cast((tf.sqrt(mag_var + noise_var - tf.matmul( tf.transpose(K2__), tf.matmul(self.K_inv, K2__)))), tf.float32) if self.check_numerics is True: sig_val = tf.check_numerics(sig_val, message="sigma: ") @@ -438,7 +480,7 @@ class GPRGD(GPR): sigmas_it = np.empty((self.max_iter + 1,)) * np.nan losses_it = np.empty((self.max_iter + 1,)) * np.nan confs_it = np.empty((self.max_iter + 1, nfeats)) * np.nan - + sess.run(init) sess.run(assign_op, feed_dict={xt_ph: X_test_batch[i]}) step = 0 for step in range(self.max_iter): diff --git a/server/analysis/gpr/gpr_models.py b/server/analysis/gpr/gpr_models.py index ec47c7d..82b04bf 100644 --- a/server/analysis/gpr/gpr_models.py +++ b/server/analysis/gpr/gpr_models.py @@ -111,7 +111,7 @@ class BasicGP(BaseModel): ] def _build_kernel(self, kernel_kwargs, **kwargs): - k = gpflow.kernels.Exponential(**kernel_kwargs[0]) + k = gpflow.kernels.Matern12(lengthscales=2, **kernel_kwargs[0]) if kwargs.pop('optimize_hyperparameters'): k.lengthscales.transform = gpflow.transforms.Logistic( *self._LENGTHSCALE_BOUNDS) diff --git a/server/analysis/simulation.py b/server/analysis/simulation.py index 77cb738..7fb6274 100644 --- a/server/analysis/simulation.py +++ b/server/analysis/simulation.py @@ -228,7 +228,7 @@ def gpr(env, config, n_loops=100): # Tensorflow get broken if we use the training data points as # starting points for GPRGD. X_samples = np.vstack((X_samples, np.array(entry[0]) * 0.97 + 0.01)) - model = GPRGD(length_scale=1.0, + model = GPRGD(length_scale=2.0, magnitude=1.0, max_train_size=2000, batch_size=100, @@ -236,11 +236,14 @@ def gpr(env, config, n_loops=100): learning_rate=0.01, epsilon=1e-6, max_iter=500, - sigma_multiplier=3.0, - mu_multiplier=1.0) + sigma_multiplier=1.0, + mu_multiplier=1.0, + ridge=1.0, + debug=False, + hyperparameter_trainable=True) actions, rewards = memory.get_all() - model.fit(np.array(actions), -np.array(rewards), X_min, X_max, ridge=0.01) + model.fit(np.array(actions), -np.array(rewards), X_min, X_max) res = model.predict(X_samples) best_config_idx = np.argmin(res.minl.ravel()) best_config = res.minl_conf[best_config_idx, :] diff --git a/server/analysis/tests/test_gpr.py b/server/analysis/tests/test_gpr.py index 27f7fec..fa60018 100644 --- a/server/analysis/tests/test_gpr.py +++ b/server/analysis/tests/test_gpr.py @@ -47,8 +47,8 @@ class TestGPRTF(unittest.TestCase): X_train = data[0:500] X_test = data[500:] y_train = boston['target'][0:500].reshape(500, 1) - cls.model = GPR(length_scale=1.0, magnitude=1.0) - cls.model.fit(X_train, y_train, ridge=1.0) + cls.model = GPR(length_scale=1.0, magnitude=1.0, ridge=1.0) + cls.model.fit(X_train, y_train) cls.gpr_result = cls.model.predict(X_test) def test_gprnp_ypreds(self): @@ -75,8 +75,8 @@ class TestGPRGD(unittest.TestCase): y_train = boston['target'][0:500].reshape(500, 1) Xmin = np.min(X_train, 0) Xmax = np.max(X_train, 0) - cls.model = GPRGD(length_scale=1.0, magnitude=1.0, max_iter=1, learning_rate=0) - cls.model.fit(X_train, y_train, Xmin, Xmax, ridge=1.0) + cls.model = GPRGD(length_scale=1.0, magnitude=1.0, max_iter=1, learning_rate=0, ridge=1.0) + cls.model.fit(X_train, y_train, Xmin, Xmax) cls.gpr_result = cls.model.predict(X_test) def test_gprnp_ypreds(self): diff --git a/server/website/website/tasks/async_tasks.py b/server/website/website/tasks/async_tasks.py index 9712054..de3b915 100644 --- a/server/website/website/tasks/async_tasks.py +++ b/server/website/website/tasks/async_tasks.py @@ -645,8 +645,9 @@ def configuration_recommendation(recommendation_input): epsilon=DEFAULT_EPSILON, max_iter=MAX_ITER, sigma_multiplier=DEFAULT_SIGMA_MULTIPLIER, - mu_multiplier=DEFAULT_MU_MULTIPLIER) - model.fit(X_scaled, y_scaled, X_min, X_max, ridge=DEFAULT_RIDGE) + mu_multiplier=DEFAULT_MU_MULTIPLIER, + ridge=DEFAULT_RIDGE) + model.fit(X_scaled, y_scaled, X_min, X_max) res = model.predict(X_samples, constraint_helper=constraint_helper) best_config_idx = np.argmin(res.minl.ravel())