diff --git a/client/driver/lhs.sh b/client/driver/lhs.sh index 5fa85ba..3360465 100755 --- a/client/driver/lhs.sh +++ b/client/driver/lhs.sh @@ -1 +1 @@ -sudo -b nohup fab run_lhs > lhs.log 2>&1 +sudo -b nohup fab run_lhs > lhs.log 2>&1 < /dev/null diff --git a/server/analysis/nn_tf.py b/server/analysis/nn_tf.py new file mode 100644 index 0000000..896a943 --- /dev/null +++ b/server/analysis/nn_tf.py @@ -0,0 +1,200 @@ +# +# OtterTune - nn_tf.py +# +# Copyright (c) 2017-18, Carnegie Mellon University Database Group +# +''' +Created on Sep 16, 2019 +@author: Bohan Zhang +''' + +import numpy as np +import tensorflow as tf +from tensorflow import keras +from tensorflow.keras import layers + +from .util import get_analysis_logger + +LOG = get_analysis_logger(__name__) + + +class NeuralNetResult(object): + def __init__(self, minl=None, minl_conf=None): + self.minl = minl + self.minl_conf = minl_conf + + +class NeuralNet(object): + + def __init__(self, + n_input, + weights_file, + learning_rate=0.01, + debug=False, + debug_interval=100, + batch_size=2, + explore_iters=500, + noise_scale_begin=0.5, + noise_scale_end=0.01): + # absolute path for the model weitghs file + # one model for each (project, session) + self.weights_file = weights_file + + self.recommend_iters = 0 + self.n_input = n_input + self.debug = debug + self.debug_interval = debug_interval + self.learning_rate = 0.01 + self.batch_size = batch_size + self.explore_iters = explore_iters + self.noise_scale_begin = noise_scale_begin + self.noise_scale_end = noise_scale_end + self.optimizer = tf.train.AdamOptimizer(learning_rate=0.01) + # input X is placeholder, weights are variables. + self.model = keras.Sequential([ + layers.Dense(64, activation=tf.nn.relu, input_shape=[n_input]), + layers.Dropout(0.5), + layers.Dense(64, activation=tf.nn.relu), + layers.Dense(1) + ]) + self.load_weights() + self.model.compile(loss='mean_squared_error', + optimizer=self.optimizer, + metrics=['mean_squared_error', 'mean_absolute_error']) + self.vars = {} + self.ops = {} + self.build_graph() + + def save_weights(self): + self.model.save_weights(self.weights_file) + + def load_weights(self): + try: + self.model.load_weights(self.weights_file) + if self.debug: + LOG.info('Neural Network Model weights file exists, load weights from the file') + except: + LOG.info('Weights file does not match neural network model, train model from scratch') + + # Build same neural network as self.model, But input X is variables, + # weights are placedholders. Find optimial X using gradient descent. + def build_graph(self): + batch_size = self.batch_size + self.graph = tf.Graph() + with self.graph.as_default(): + x_ = tf.Variable(tf.ones([batch_size, self.n_input])) + w1_ = tf.placeholder(tf.float32, [self.n_input, 64]) + b1_ = tf.placeholder(tf.float32, [64]) + w2_ = tf.placeholder(tf.float32, [64, 64]) + b2_ = tf.placeholder(tf.float32, [64]) + w3_ = tf.placeholder(tf.float32, [64, 1]) + b3_ = tf.placeholder(tf.float32, [1]) + l1_ = tf.nn.relu(tf.add(tf.matmul(x_, w1_), b1_)) + l2_ = tf.nn.relu(tf.add(tf.matmul(l1_, w2_), b2_)) + y_ = tf.add(tf.matmul(l2_, w3_), b3_) + optimizer_ = tf.train.AdamOptimizer(learning_rate=0.01) + train_ = optimizer_.minimize(y_) + + self.vars['x_'] = x_ + self.vars['y_'] = y_ + self.vars['w1_'] = w1_ + self.vars['w2_'] = w2_ + self.vars['w3_'] = w3_ + self.vars['b1_'] = b1_ + self.vars['b2_'] = b2_ + self.vars['b3_'] = b3_ + self.ops['train_'] = train_ + + def fit(self, X_train, y_train, fit_epochs=500): + self.history = self.model.fit( + X_train, y_train, epochs=fit_epochs, verbose=0) + # save model weights + self.save_weights() + if self.debug: + MSEs = self.history.history['mean_squared_error'] + i = 0 + size = len(MSEs) + while(i < size): + LOG.info("Neural network training phase, epoch %d: mean_squared_error %f", + i, MSEs[i]) + i += self.debug_interval + LOG.info("Neural network training phase, epoch %d: mean_squared_error %f", + size - 1, MSEs[size - 1]) + + def predict(self, X_pred): + return self.model.predict(X_pred) + + def add_noise(self, weights, scale=1): + size = weights.shape[-1] + noise = scale * np.random.normal(size=size) + return weights + noise + + def adaptive_noise_scale(self): + if self.recommend_iters > self.explore_iters: + scale = self.noise_scale_end + else: + scale = self.noise_scale_begin - (self.noise_scale_begin - self.noise_scale_end) \ + * 1.0 * self.recommend_iters / self.explore_iters + return scale + + def recommend(self, X_start, X_min=None, X_max=None, recommend_epochs=500, explore=False): + batch_size = len(X_start) + assert(batch_size == self.batch_size) + w1, b1 = self.model.get_layer(index=0).get_weights() + w2, b2 = self.model.get_layer(index=2).get_weights() + w3, b3 = self.model.get_layer(index=3).get_weights() + + if self.debug: + y_predict = self.predict(X_start) + LOG.info("Recommend phase, y prediction: min %f, max %f, mean %f", + np.min(y_predict), np.max(y_predict), np.mean(y_predict)) + + with tf.Session(graph=self.graph) as sess: + init = tf.global_variables_initializer() + sess.run(init) + assign_x_op = self.vars['x_'].assign(X_start) + sess.run(assign_x_op) + y_before = sess.run(self.vars['y_'], + feed_dict={self.vars['w1_']: w1, self.vars['w2_']: w2, + self.vars['w3_']: w3, self.vars['b1_']: b1, + self.vars['b2_']: b2, self.vars['b3_']: b3}) + if self.debug: + LOG.info("Recommend phase, y before gradient descent: min %f, max %f, mean %f", + np.min(y_before), np.max(y_before), np.mean(y_before)) + + for i in range(recommend_epochs): + sess.run(self.ops['train_'], + feed_dict={self.vars['w1_']: w1, self.vars['w2_']: w2, + self.vars['w3_']: w3, self.vars['b1_']: b1, + self.vars['b2_']: b2, self.vars['b3_']: b3}) + + # constrain by X_min and X_max + if X_min is not None and X_max is not None: + X_train = sess.run(self.vars['x_']) + X_train = np.minimum(X_train, X_max) + X_train = np.maximum(X_train, X_min) + constraint_x_op = self.vars['x_'].assign(X_train) + sess.run(constraint_x_op) + + if self.debug and i % self.debug_interval == 0: + y_train = sess.run(self.vars['y_'], + feed_dict={self.vars['w1_']: w1, self.vars['w2_']: w2, + self.vars['w3_']: w3, self.vars['b1_']: b1, + self.vars['b2_']: b2, self.vars['b3_']: b3}) + LOG.info("Recommend phase, epoch %d, y: min %f, max %f, mean %f", + i, np.min(y_train), np.max(y_train), np.mean(y_train)) + + y_recommend = sess.run(self.vars['y_'], + feed_dict={self.vars['w1_']: w1, self.vars['w2_']: w2, + self.vars['w3_']: w3, self.vars['b1_']: b1, + self.vars['b2_']: b2, self.vars['b3_']: b3}) + X_recommend = sess.run(self.vars['x_']) + res = NeuralNetResult(minl=y_recommend, minl_conf=X_recommend) + + if self.debug: + LOG.info("Recommend phase, epoch %d, y after gradient descent: \ + min %f, max %f, mean %f", recommend_epochs, np.mean(y_recommend), + np.max(y_recommend), np.mean(y_recommend)) + + self.recommend_iters += 1 + return res diff --git a/server/website/website/settings/common.py b/server/website/website/settings/common.py index 93e140c..d7bd3dd 100644 --- a/server/website/website/settings/common.py +++ b/server/website/website/settings/common.py @@ -33,6 +33,9 @@ CONFIG_DIR = join(PROJECT_ROOT, 'config') # Where the log files are stored LOG_DIR = join(PROJECT_ROOT, 'log') +# Where the model weight files are stored +MODEL_DIR = join(PROJECT_ROOT, 'model') + # File/directory upload permissions FILE_UPLOAD_DIRECTORY_PERMISSIONS = 0o664 FILE_UPLOAD_PERMISSIONS = 0o664 @@ -54,6 +57,13 @@ try: except OSError: # Invalid permissions pass +# Try to create the model directory +try: + if not exists(MODEL_DIR): + os.mkdir(MODEL_DIR) +except OSError: # Invalid permissions + pass + # ============================================== # DEBUG CONFIGURATION # ============================================== diff --git a/server/website/website/tasks/async_tasks.py b/server/website/website/tasks/async_tasks.py index b7eb009..b055f28 100644 --- a/server/website/website/tasks/async_tasks.py +++ b/server/website/website/tasks/async_tasks.py @@ -3,6 +3,7 @@ # # Copyright (c) 2017-18, Carnegie Mellon University Database Group # +import os import random import queue import numpy as np @@ -15,6 +16,7 @@ from sklearn.preprocessing import StandardScaler, MinMaxScaler from analysis.ddpg.ddpg import DDPG from analysis.gp import GPRNP from analysis.gp_tf import GPRGD +from analysis.nn_tf import NeuralNet from analysis.preprocessing import Bin, DummyEncoder from analysis.constraints import ParamConstraintHelper from website.models import (PipelineData, PipelineRun, Result, Workload, KnobCatalog, @@ -32,6 +34,7 @@ from website.settings import (DEFAULT_LENGTH_SCALE, DEFAULT_MAGNITUDE, CRITIC_LEARNING_RATE, GAMMA, TAU) from website.settings import INIT_FLIP_PROB, FLIP_PROB_DECAY +from website.settings import MODEL_DIR from website.types import VarType @@ -536,6 +539,18 @@ def configuration_recommendation(target_data): except queue.Empty: break + # one model for each (project, session) + session = newest_result.session.pk + project = newest_result.session.project.pk + full_path = os.path.join(MODEL_DIR, 'p' + str(project) + '_s' + str(session) + '_nn.weights') + + # neural network model + # FIXME: choose algorithm based on the session option + model_nn = NeuralNet(weights_file=full_path, n_input=X_samples.shape[1], + batch_size=X_samples.shape[0], debug=True) + model_nn.fit(X_scaled, y_scaled) + res = model_nn.recommend(X_samples, X_min, X_max, explore=False) + model = GPRGD(length_scale=DEFAULT_LENGTH_SCALE, magnitude=DEFAULT_MAGNITUDE, max_train_size=MAX_TRAIN_SIZE, @@ -546,8 +561,8 @@ def configuration_recommendation(target_data): max_iter=MAX_ITER, sigma_multiplier=DEFAULT_SIGMA_MULTIPLIER, mu_multiplier=DEFAULT_MU_MULTIPLIER) - model.fit(X_scaled, y_scaled, X_min, X_max, ridge=DEFAULT_RIDGE) - res = model.predict(X_samples, constraint_helper=constraint_helper) + # model.fit(X_scaled, y_scaled, X_min, X_max, ridge=DEFAULT_RIDGE) + # res = model.predict(X_samples, constraint_helper=constraint_helper) best_config_idx = np.argmin(res.minl.ravel()) best_config = res.minl_conf[best_config_idx, :]