# # OtterTune - gp_tf.py # # Copyright (c) 2017-18, Carnegie Mellon University Database Group # ''' Created on Aug 18, 2016 @author: Bohan Zhang, Dana Van Aken ''' import gc import numpy as np import tensorflow as tf from .util import get_analysis_logger LOG = get_analysis_logger(__name__) class GPRResult(object): def __init__(self, ypreds=None, sigmas=None): self.ypreds = ypreds self.sigmas = sigmas class GPRGDResult(GPRResult): def __init__(self, ypreds=None, sigmas=None, minl=None, minl_conf=None): super(GPRGDResult, self).__init__(ypreds, sigmas) self.minl = minl self.minl_conf = minl_conf class GPR(object): def __init__(self, length_scale=2.0, magnitude=1.0, ridge=1.0, max_train_size=7000, batch_size=3000, num_threads=4, check_numerics=True, debug=False, hyperparameter_trainable=False): assert np.isscalar(length_scale) assert np.isscalar(magnitude) assert length_scale > 0 and magnitude > 0 self.length_scale = length_scale self.magnitude = magnitude self.ridge = ridge self.max_train_size_ = max_train_size self.batch_size_ = batch_size self.num_threads_ = num_threads self.check_numerics = check_numerics self.debug = debug self.hyperparameter_trainable = hyperparameter_trainable self.X_train = None self.y_train = None self.xy_ = None self.K = None self.K_inv = None self.graph = None self.vars = None self.ops = None def build_graph(self): self.vars = {} self.ops = {} self.graph = tf.Graph() with self.graph.as_default(): if self.hyperparameter_trainable: mag_ = np.log(np.exp(self.magnitude) - 1) ls_ = np.log(np.exp(self.length_scale) - 1) noise_ = np.log(np.exp(self.ridge) - 1) mag_var = tf.nn.softplus(tf.Variable(mag_, dtype=np.float32, name='magnitude')) ls_var = tf.nn.softplus(tf.Variable(ls_, dtype=np.float32, name='length_scale')) noise_var = tf.nn.softplus(tf.Variable(noise_, dtype=np.float32, name='noise_scale')) else: mag_var = tf.constant(self.magnitude, dtype=np.float32, name='magnitude') ls_var = tf.constant(self.length_scale, dtype=np.float32, name='length_scale') noise_var = tf.constant(self.ridge, dtype=np.float32, name='noise_scale') # Nodes for distance computation v1 = tf.placeholder(tf.float32, name="v1") v2 = tf.placeholder(tf.float32, name="v2") dist_op = tf.sqrt(tf.reduce_sum(tf.pow(tf.subtract(v1, v2), 2), 1), name='dist_op') if self.check_numerics: dist_op = tf.check_numerics(dist_op, "dist_op: ") self.vars['v1_h'] = v1 self.vars['v2_h'] = v2 self.ops['dist_op'] = dist_op self.vars['mag_v'] = mag_var self.vars['ls_v'] = ls_var self.vars['noise_v'] = noise_var # Nodes for kernel computation X_dists = tf.placeholder(tf.float32, name='X_dists') ridge_ph = tf.placeholder(tf.float32, name='ridge') K_op = mag_var * tf.exp(-X_dists / ls_var) # pylint: disable=invalid-name if self.check_numerics: K_op = tf.check_numerics(K_op, "K_op: ") K_ridge_op = K_op + tf.diag(ridge_ph) if self.check_numerics: K_ridge_op = tf.check_numerics(K_ridge_op, "K_ridge_op: ") self.vars['X_dists_h'] = X_dists self.vars['ridge_h'] = ridge_ph self.ops['K_op'] = K_op self.ops['K_ridge_op'] = K_ridge_op # Nodes for xy computation K = tf.placeholder(tf.float32, name='K') K_inv = tf.placeholder(tf.float32, name='K_inv') xy_ = tf.placeholder(tf.float32, name='xy_') yt_ = tf.placeholder(tf.float32, name='yt_') K_inv_op = tf.matrix_inverse(K) if self.check_numerics: K_inv_op = tf.check_numerics(K_inv_op, "K_inv: ") xy_op = tf.matmul(K_inv, yt_) if self.check_numerics: xy_op = tf.check_numerics(xy_op, "xy_: ") self.vars['K_h'] = K self.vars['K_inv_h'] = K_inv self.vars['xy_h'] = xy_ self.vars['yt_h'] = yt_ self.ops['K_inv_op'] = K_inv_op self.ops['xy_op'] = xy_op # Nodes for yhat/sigma computation K2 = tf.placeholder(tf.float32, name="K2") K3 = tf.placeholder(tf.float32, name="K3") ridge_test_ph = tf.placeholder(tf.float32, name="ridge_test_ph") yhat_ = tf.cast(tf.matmul(tf.transpose(K2), xy_), tf.float32) if self.check_numerics: yhat_ = tf.check_numerics(yhat_, "yhat_: ") sv1 = tf.matmul(tf.transpose(K2), tf.matmul(K_inv, K2)) if self.check_numerics: sv1 = tf.check_numerics(sv1, "sv1: ") sig_val = tf.cast((tf.sqrt(tf.diag_part(K3 + tf.diag(ridge_test_ph) - sv1))), tf.float32) if self.check_numerics: sig_val = tf.check_numerics(sig_val, "sig_val: ") self.vars['K2_h'] = K2 self.vars['K3_h'] = K3 self.ops['yhat_op'] = yhat_ self.ops['sig_op'] = sig_val self.ops['ridge_test_h'] = ridge_test_ph # Compute y_best (min y) y_best_op = tf.cast(tf.reduce_min(yt_, 0, True), tf.float32) if self.check_numerics: y_best_op = tf.check_numerics(y_best_op, "y_best_op: ") self.ops['y_best_op'] = y_best_op sigma = tf.placeholder(tf.float32, name='sigma') yhat = tf.placeholder(tf.float32, name='yhat') self.vars['sigma_h'] = sigma self.vars['yhat_h'] = yhat def __repr__(self): rep = "" for k, v in sorted(self.__dict__.items()): rep += "{} = {}\n".format(k, v) return rep def __str__(self): return self.__repr__() def check_X_y(self, X, y): from sklearn.utils.validation import check_X_y if X.shape[0] > self.max_train_size_: raise Exception("X_train size cannot exceed {} ({})" .format(self.max_train_size_, X.shape[0])) return check_X_y(X, y, multi_output=True, allow_nd=True, y_numeric=True, estimator="GPR") def check_fitted(self): if self.X_train is None or self.y_train is None \ or self.xy_ is None or self.K is None: raise Exception("The model must be trained before making predictions!") @staticmethod def check_array(X): from sklearn.utils.validation import check_array return check_array(X, allow_nd=True, estimator="GPR") @staticmethod def check_output(X): finite_els = np.isfinite(X) if not np.all(finite_els): raise Exception("Input contains non-finite values: {}" .format(X[~finite_els])) def fit(self, X_train, y_train): self._reset() X_train, y_train = self.check_X_y(X_train, y_train) self.X_train = np.float32(X_train) self.y_train = np.float32(y_train) sample_size = self.X_train.shape[0] ridge = self.ridge X_dists = np.zeros((sample_size, sample_size), dtype=np.float32) with tf.Session(graph=self.graph, config=tf.ConfigProto( intra_op_parallelism_threads=self.num_threads_)) as sess: init = tf.global_variables_initializer() sess.run(init) noise_var = self.vars['noise_v'] if np.isscalar(ridge): ridge = np.ones(sample_size) * sess.run(noise_var) assert isinstance(ridge, np.ndarray) assert ridge.ndim == 1 dist_op = self.ops['dist_op'] v1, v2 = self.vars['v1_h'], self.vars['v2_h'] for i in range(sample_size): X_dists[i] = sess.run(dist_op, feed_dict={v1: self.X_train[i], v2: self.X_train}) K_ridge_op = self.ops['K_ridge_op'] X_dists_ph = self.vars['X_dists_h'] ridge_ph = self.vars['ridge_h'] self.K = sess.run(K_ridge_op, feed_dict={X_dists_ph: X_dists, ridge_ph: ridge}) K_ph = self.vars['K_h'] K_inv_op = self.ops['K_inv_op'] self.K_inv = sess.run(K_inv_op, feed_dict={K_ph: self.K}) xy_op = self.ops['xy_op'] K_inv_ph = self.vars['K_inv_h'] yt_ph = self.vars['yt_h'] self.xy_ = sess.run(xy_op, feed_dict={K_inv_ph: self.K_inv, yt_ph: self.y_train}) return self def predict(self, X_test): self.check_fitted() X_test = np.float32(GPR.check_array(X_test)) test_size = X_test.shape[0] sample_size = self.X_train.shape[0] ridge = self.ridge arr_offset = 0 yhats = np.zeros([test_size, 1]) sigmas = np.zeros([test_size, 1]) with tf.Session(graph=self.graph, config=tf.ConfigProto( intra_op_parallelism_threads=self.num_threads_)) as sess: init = tf.global_variables_initializer() sess.run(init) noise_var = self.vars['noise_v'] if np.isscalar(ridge): ridge_test = np.ones(test_size) * sess.run(noise_var) # Nodes for distance operation dist_op = self.ops['dist_op'] v1 = self.vars['v1_h'] v2 = self.vars['v2_h'] # Nodes for kernel computation K_op = self.ops['K_op'] X_dists = self.vars['X_dists_h'] # Nodes to compute yhats/sigmas yhat_ = self.ops['yhat_op'] K_inv_ph = self.vars['K_inv_h'] K2 = self.vars['K2_h'] K3 = self.vars['K3_h'] xy_ph = self.vars['xy_h'] ridge_test_ph = self.ops['ridge_test_h'] while arr_offset < test_size: if arr_offset + self.batch_size_ > test_size: end_offset = test_size else: end_offset = arr_offset + self.batch_size_ X_test_batch = X_test[arr_offset:end_offset] batch_len = end_offset - arr_offset dists1 = np.zeros([sample_size, batch_len]) for i in range(sample_size): dists1[i] = sess.run(dist_op, feed_dict={v1: self.X_train[i], v2: X_test_batch}) sig_val = self.ops['sig_op'] K2_ = sess.run(K_op, feed_dict={X_dists: dists1}) yhat = sess.run(yhat_, feed_dict={K2: K2_, xy_ph: self.xy_}) dists2 = np.zeros([batch_len, batch_len]) for i in range(batch_len): dists2[i] = sess.run(dist_op, feed_dict={v1: X_test_batch[i], v2: X_test_batch}) K3_ = sess.run(K_op, feed_dict={X_dists: dists2}) sigma = np.zeros([1, batch_len], np.float32) sigma[0] = sess.run(sig_val, feed_dict={K_inv_ph: self.K_inv, K2: K2_, K3: K3_, ridge_test_ph: ridge_test}) sigma = np.transpose(sigma) yhats[arr_offset: end_offset] = yhat sigmas[arr_offset: end_offset] = sigma arr_offset = end_offset GPR.check_output(yhats) GPR.check_output(sigmas) return GPRResult(yhats, sigmas) def get_params(self, deep=True): return {"length_scale": self.length_scale, "magnitude": self.magnitude, "X_train": self.X_train, "y_train": self.y_train, "xy_": self.xy_, "K": self.K, "K_inv": self.K_inv} def set_params(self, **parameters): for param, val in list(parameters.items()): setattr(self, param, val) return self def _reset(self): self.X_train = None self.y_train = None self.xy_ = None self.K = None self.K_inv = None self.graph = None self.build_graph() gc.collect() class GPRGD(GPR): GP_BETA_UCB = "UCB" GP_BETA_CONST = "CONST" def __init__(self, length_scale=2.0, magnitude=1.0, ridge=1.0, max_train_size=7000, batch_size=3000, num_threads=4, learning_rate=0.01, epsilon=1e-6, max_iter=100, sigma_multiplier=3.0, mu_multiplier=1.0, check_numerics=True, debug=False, hyperparameter_trainable=False): super(GPRGD, self).__init__(length_scale=length_scale, magnitude=magnitude, ridge=ridge, max_train_size=max_train_size, batch_size=batch_size, num_threads=num_threads, check_numerics=check_numerics, debug=debug, hyperparameter_trainable=hyperparameter_trainable) self.learning_rate = learning_rate self.epsilon = epsilon self.max_iter = max_iter self.sigma_multiplier = sigma_multiplier self.mu_multiplier = mu_multiplier self.X_min = None self.X_max = None def fit(self, X_train, y_train, X_min, X_max): # pylint: disable=arguments-differ super(GPRGD, self).fit(X_train, y_train) self.X_min = X_min self.X_max = X_max with tf.Session(graph=self.graph, config=tf.ConfigProto( intra_op_parallelism_threads=self.num_threads_)) as sess: xt_ = tf.Variable(self.X_train[0], tf.float32) xt_ph = tf.placeholder(tf.float32) xt_assign_op = xt_.assign(xt_ph) init = tf.global_variables_initializer() sess.run(init) mag_var = self.vars['mag_v'] ls_var = self.vars['ls_v'] noise_var = self.vars['noise_v'] K2_mat = tf.transpose(tf.expand_dims(tf.sqrt(tf.reduce_sum(tf.pow( tf.subtract(xt_, self.X_train), 2), 1)), 0)) if self.check_numerics is True: K2_mat = tf.check_numerics(K2_mat, "K2_mat: ") K2__ = tf.cast(mag_var * tf.exp(-K2_mat / ls_var), tf.float32) # pylint: disable=invalid-name if self.check_numerics is True: K2__ = tf.check_numerics(K2__, "K2__: ") yhat_gd = tf.cast(tf.matmul(tf.transpose(K2__), self.xy_), tf.float32) if self.check_numerics is True: yhat_gd = tf.check_numerics(yhat_gd, message="yhat: ") sig_val = tf.cast((tf.sqrt(mag_var + noise_var - tf.matmul( tf.transpose(K2__), tf.matmul(self.K_inv, K2__)))), tf.float32) if self.check_numerics is True: sig_val = tf.check_numerics(sig_val, message="sigma: ") LOG.debug("\nyhat_gd : %s", str(sess.run(yhat_gd))) LOG.debug("\nsig_val : %s", str(sess.run(sig_val))) loss = tf.squeeze(tf.subtract(self.mu_multiplier * yhat_gd, self.sigma_multiplier * sig_val)) if self.check_numerics is True: loss = tf.check_numerics(loss, "loss: ") optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate, epsilon=self.epsilon) # optimizer = tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate) train = optimizer.minimize(loss) self.vars['xt_'] = xt_ self.vars['xt_ph'] = xt_ph self.ops['xt_assign_op'] = xt_assign_op self.ops['yhat_gd'] = yhat_gd self.ops['sig_val2'] = sig_val self.ops['loss_op'] = loss self.ops['train_op'] = train return self def predict(self, X_test, constraint_helper=None, # pylint: disable=arguments-differ categorical_feature_method='hillclimbing', categorical_feature_steps=3): self.check_fitted() X_test = np.float32(GPR.check_array(X_test)) test_size = X_test.shape[0] nfeats = self.X_train.shape[1] arr_offset = 0 yhats = np.zeros([test_size, 1]) sigmas = np.zeros([test_size, 1]) minls = np.zeros([test_size, 1]) minl_confs = np.zeros([test_size, nfeats]) with tf.Session(graph=self.graph, config=tf.ConfigProto( intra_op_parallelism_threads=self.num_threads_)) as sess: while arr_offset < test_size: if arr_offset + self.batch_size_ > test_size: end_offset = test_size else: end_offset = arr_offset + self.batch_size_ X_test_batch = X_test[arr_offset:end_offset] batch_len = end_offset - arr_offset xt_ = self.vars['xt_'] init = tf.global_variables_initializer() sess.run(init) sig_val = self.ops['sig_val2'] yhat_gd = self.ops['yhat_gd'] loss = self.ops['loss_op'] train = self.ops['train_op'] xt_ph = self.vars['xt_ph'] assign_op = self.ops['xt_assign_op'] yhat = np.empty((batch_len, 1)) sigma = np.empty((batch_len, 1)) minl = np.empty((batch_len, 1)) minl_conf = np.empty((batch_len, nfeats)) for i in range(batch_len): if self.debug is True: LOG.info("-------------------------------------------") yhats_it = np.empty((self.max_iter + 1,)) * np.nan sigmas_it = np.empty((self.max_iter + 1,)) * np.nan losses_it = np.empty((self.max_iter + 1,)) * np.nan confs_it = np.empty((self.max_iter + 1, nfeats)) * np.nan sess.run(init) sess.run(assign_op, feed_dict={xt_ph: X_test_batch[i]}) step = 0 for step in range(self.max_iter): if self.debug is True: LOG.info("Batch %d, iter %d:", i, step) yhats_it[step] = sess.run(yhat_gd)[0][0] sigmas_it[step] = sess.run(sig_val)[0][0] losses_it[step] = sess.run(loss) confs_it[step] = sess.run(xt_) if self.debug is True: LOG.info(" yhat: %s", str(yhats_it[step])) LOG.info(" sigma: %s", str(sigmas_it[step])) LOG.info(" loss: %s", str(losses_it[step])) LOG.info(" conf: %s", str(confs_it[step])) sess.run(train) # constraint Projected Gradient Descent xt = sess.run(xt_) xt_valid = np.minimum(xt, self.X_max) xt_valid = np.maximum(xt_valid, self.X_min) sess.run(assign_op, feed_dict={xt_ph: xt_valid}) if constraint_helper is not None: xt_valid = constraint_helper.apply_constraints(sess.run(xt_)) sess.run(assign_op, feed_dict={xt_ph: xt_valid}) if categorical_feature_method == 'hillclimbing': if step % categorical_feature_steps == 0: current_xt = sess.run(xt_) current_loss = sess.run(loss) new_xt = \ constraint_helper.randomize_categorical_features( current_xt) sess.run(assign_op, feed_dict={xt_ph: new_xt}) new_loss = sess.run(loss) if current_loss < new_loss: sess.run(assign_op, feed_dict={xt_ph: new_xt}) else: raise Exception("Unknown categorial feature method: {}".format( categorical_feature_method)) if step == self.max_iter - 1: # Record results from final iteration yhats_it[-1] = sess.run(yhat_gd)[0][0] sigmas_it[-1] = sess.run(sig_val)[0][0] losses_it[-1] = sess.run(loss) confs_it[-1] = sess.run(xt_) assert np.all(np.isfinite(yhats_it)) assert np.all(np.isfinite(sigmas_it)) assert np.all(np.isfinite(losses_it)) assert np.all(np.isfinite(confs_it)) # Store info for conf with min loss from all iters if np.all(~np.isfinite(losses_it)): min_loss_idx = 0 else: min_loss_idx = np.nanargmin(losses_it) yhat[i] = yhats_it[min_loss_idx] sigma[i] = sigmas_it[min_loss_idx] minl[i] = losses_it[min_loss_idx] minl_conf[i] = confs_it[min_loss_idx] minls[arr_offset:end_offset] = minl minl_confs[arr_offset:end_offset] = minl_conf yhats[arr_offset:end_offset] = yhat sigmas[arr_offset:end_offset] = sigma arr_offset = end_offset GPR.check_output(yhats) GPR.check_output(sigmas) GPR.check_output(minls) GPR.check_output(minl_confs) return GPRGDResult(yhats, sigmas, minls, minl_confs) @staticmethod def calculate_sigma_multiplier(t, ndim, bound=0.1): assert t > 0 assert ndim > 0 assert bound > 0 and bound <= 1 beta = 2 * np.log(ndim * (t**2) * (np.pi**2) / 6 * bound) if beta > 0: beta = np.sqrt(beta) else: beta = 1 return beta # def gp_tf(X_train, y_train, X_test, ridge, length_scale, magnitude, batch_size=3000): # with tf.Graph().as_default(): # y_best = tf.cast(tf.reduce_min(y_train, 0, True), tf.float32) # sample_size = X_train.shape[0] # train_size = X_test.shape[0] # arr_offset = 0 # yhats = np.zeros([train_size, 1]) # sigmas = np.zeros([train_size, 1]) # eips = np.zeros([train_size, 1]) # X_train = np.float32(X_train) # y_train = np.float32(y_train) # X_test = np.float32(X_test) # ridge = np.float32(ridge) # # v1 = tf.placeholder(tf.float32,name="v1") # v2 = tf.placeholder(tf.float32,name="v2") # dist_op = tf.sqrt(tf.reduce_sum(tf.pow(tf.subtract(v1, v2), 2), 1)) # try: # sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) # # dists = np.zeros([sample_size,sample_size]) # for i in range(sample_size): # dists[i] = sess.run(dist_op,feed_dict={v1:X_train[i], v2:X_train}) # # # dists = tf.cast(dists, tf.float32) # K = magnitude * tf.exp(-dists/length_scale) + tf.diag(ridge); # # K2 = tf.placeholder(tf.float32, name="K2") # K3 = tf.placeholder(tf.float32, name="K3") # # x = tf.matmul(tf.matrix_inverse(K), y_train) # yhat_ = tf.cast(tf.matmul(tf.transpose(K2), x), tf.float32); # sig_val = tf.cast((tf.sqrt(tf.diag_part(K3 - tf.matmul(tf.transpose(K2), # tf.matmul(tf.matrix_inverse(K), # K2))))), # tf.float32) # # u = tf.placeholder(tf.float32, name="u") # phi1 = 0.5 * tf.erf(u / np.sqrt(2.0)) + 0.5 # phi2 = (1.0 / np.sqrt(2.0 * np.pi)) * tf.exp(tf.square(u) * (-0.5)); # eip = (tf.multiply(u, phi1) + phi2); # # while arr_offset < train_size: # if arr_offset + batch_size > train_size: # end_offset = train_size # else: # end_offset = arr_offset + batch_size; # # xt_ = X_test[arr_offset:end_offset]; # batch_len = end_offset - arr_offset # # dists = np.zeros([sample_size, batch_len]) # for i in range(sample_size): # dists[i] = sess.run(dist_op, feed_dict={v1:X_train[i], v2:xt_}) # # K2_ = magnitude * tf.exp(-dists / length_scale); # K2_ = sess.run(K2_) # # dists = np.zeros([batch_len, batch_len]) # for i in range(batch_len): # dists[i] = sess.run(dist_op, feed_dict={v1:xt_[i], v2:xt_}) # K3_ = magnitude * tf.exp(-dists / length_scale); # K3_ = sess.run(K3_) # # yhat = sess.run(yhat_, feed_dict={K2:K2_}) # # sigma = np.zeros([1, batch_len], np.float32) # sigma[0] = (sess.run(sig_val, feed_dict={K2:K2_, K3:K3_})) # sigma = np.transpose(sigma) # # u_ = tf.cast(tf.div(tf.subtract(y_best, yhat), sigma), tf.float32) # u_ = sess.run(u_) # eip_p = sess.run(eip, feed_dict={u:u_}) # eip_ = tf.multiply(sigma, eip_p) # yhats[arr_offset:end_offset] = yhat # sigmas[arr_offset:end_offset] = sigma; # eips[arr_offset:end_offset] = sess.run(eip_); # arr_offset = end_offset # # finally: # sess.close() # # return yhats, sigmas, eips def euclidean_mat(X, y, sess): x_n = X.shape[0] y_n = y.shape[0] z = np.zeros([x_n, y_n]) for i in range(x_n): v1 = X[i] tmp = [] for j in range(y_n): v2 = y[j] tmp.append(tf.sqrt(tf.reduce_sum(tf.pow(tf.subtract(v1, v2), 2)))) z[i] = (sess.run(tmp)) return z def gd_tf(xs, ys, xt, ridge, length_scale=1.0, magnitude=1.0, max_iter=50): LOG.debug("xs shape: %s", str(xs.shape)) LOG.debug("ys shape: %s", str(ys.shape)) LOG.debug("xt shape: %s", str(xt.shape)) with tf.Graph().as_default(): # y_best = tf.cast(tf.reduce_min(ys,0,True),tf.float32); #array # yhat_gd = tf.check_numerics(yhat_gd, message="yhat: ") sample_size = xs.shape[0] nfeats = xs.shape[1] test_size = xt.shape[0] # arr_offset = 0 ini_size = xt.shape[0] yhats = np.zeros([test_size, 1]) sigmas = np.zeros([test_size, 1]) minl = np.zeros([test_size, 1]) new_conf = np.zeros([test_size, nfeats]) xs = np.float32(xs) ys = np.float32(ys) xt_ = tf.Variable(xt[0], tf.float32) sess = tf.Session(config=tf.ConfigProto(intra_op_parallelism_threads=8)) init = tf.global_variables_initializer() sess.run(init) ridge = np.float32(ridge) v1 = tf.placeholder(tf.float32, name="v1") v2 = tf.placeholder(tf.float32, name="v2") dist = tf.sqrt(tf.reduce_sum(tf.pow(tf.subtract(v1, v2), 2), 1)) tmp = np.zeros([sample_size, sample_size]) for i in range(sample_size): tmp[i] = sess.run(dist, feed_dict={v1: xs[i], v2: xs}) tmp = tf.cast(tmp, tf.float32) K = magnitude * tf.exp(-tmp / length_scale) + tf.diag(ridge) LOG.debug("K shape: %s", str(sess.run(K).shape)) K2_mat = tf.sqrt(tf.reduce_sum(tf.pow(tf.subtract(xt_, xs), 2), 1)) K2_mat = tf.transpose(tf.expand_dims(K2_mat, 0)) K2 = tf.cast(tf.exp(-K2_mat / length_scale), tf.float32) x = tf.matmul(tf.matrix_inverse(K), ys) x = sess.run(x) yhat_ = tf.cast(tf.matmul(tf.transpose(K2), x), tf.float32) sig_val = tf.cast((tf.sqrt(magnitude - tf.matmul( tf.transpose(K2), tf.matmul(tf.matrix_inverse(K), K2)))), tf.float32) LOG.debug('yhat shape: %s', str(sess.run(yhat_).shape)) LOG.debug('sig_val shape: %s', str(sess.run(sig_val).shape)) yhat_ = tf.check_numerics(yhat_, message='yhat: ') sig_val = tf.check_numerics(sig_val, message='sig_val: ') loss = tf.squeeze(tf.subtract(yhat_, sig_val)) loss = tf.check_numerics(loss, message='loss: ') # optimizer = tf.train.GradientDescentOptimizer(0.1) LOG.debug('loss: %s', str(sess.run(loss))) optimizer = tf.train.AdamOptimizer(0.1) train = optimizer.minimize(loss) init = tf.global_variables_initializer() sess.run(init) for i in range(ini_size): assign_op = xt_.assign(xt[i]) sess.run(assign_op) for step in range(max_iter): LOG.debug('sample #: %d, iter #: %d, loss: %s', i, step, str(sess.run(loss))) sess.run(train) yhats[i] = sess.run(yhat_)[0][0] sigmas[i] = sess.run(sig_val)[0][0] minl[i] = sess.run(loss) new_conf[i] = sess.run(xt_) return yhats, sigmas, minl, new_conf def main(): pass def create_random_matrices(n_samples=3000, n_feats=12, n_test=4444): X_train = np.random.rand(n_samples, n_feats) y_train = np.random.rand(n_samples, 1) X_test = np.random.rand(n_test, n_feats) length_scale = np.random.rand() magnitude = np.random.rand() ridge = np.ones(n_samples) * np.random.rand() return X_train, y_train, X_test, length_scale, magnitude, ridge if __name__ == "__main__": main()