ottertune/server/analysis/nn_tf.py

254 lines
12 KiB
Python
Raw Normal View History

2019-09-22 18:03:29 -07:00
#
# OtterTune - nn_tf.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
'''
Created on Sep 16, 2019
@author: Bohan Zhang
'''
2019-09-29 17:53:23 -07:00
import pickle
2019-09-22 18:03:29 -07:00
import numpy as np
import tensorflow as tf
from tensorflow import keras
from .util import get_analysis_logger
LOG = get_analysis_logger(__name__)
class NeuralNetResult(object):
def __init__(self, minl=None, minl_conf=None):
self.minl = minl
self.minl_conf = minl_conf
class NeuralNet(object):
def __init__(self,
n_input,
learning_rate=0.01,
debug=False,
debug_interval=100,
2019-09-22 20:38:06 -07:00
batch_size=1,
2019-09-22 18:03:29 -07:00
explore_iters=500,
2019-09-22 20:38:06 -07:00
noise_scale_begin=0.1,
2019-12-16 20:54:29 -08:00
noise_scale_end=0,
reset_seed=False):
2019-09-22 18:03:29 -07:00
2019-09-22 20:38:06 -07:00
self.history = None
2019-09-22 18:03:29 -07:00
self.recommend_iters = 0
self.n_input = n_input
self.debug = debug
self.debug_interval = debug_interval
2019-09-26 19:47:20 -07:00
self.learning_rate = learning_rate
2019-09-22 18:03:29 -07:00
self.batch_size = batch_size
self.explore_iters = explore_iters
self.noise_scale_begin = noise_scale_begin
self.noise_scale_end = noise_scale_end
self.vars = {}
self.ops = {}
2019-12-16 20:54:29 -08:00
tf.reset_default_graph()
if reset_seed:
tf.set_random_seed(0)
2019-12-16 19:27:14 -08:00
self.session = tf.Session()
self.graph = tf.get_default_graph()
with self.graph.as_default():
with self.session.as_default(): # pylint: disable=not-context-manager
self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
# input X is placeholder, weights are variables.
self.model = keras.Sequential([
keras.layers.Dense(64, activation=tf.nn.relu, input_shape=[n_input]),
keras.layers.Dropout(0.5),
keras.layers.Dense(64, activation=tf.nn.relu),
keras.layers.Dense(1)
])
self.model.compile(loss='mean_squared_error',
optimizer=self.optimizer,
metrics=['mean_squared_error', 'mean_absolute_error'])
self._build_graph()
def save_weights_file(self, weights_file):
with self.graph.as_default():
with self.session.as_default(): # pylint: disable=not-context-manager
self.model.save_weights(weights_file)
2019-09-22 18:03:29 -07:00
2019-12-16 19:27:14 -08:00
def load_weights_file(self, weights_file):
2019-09-22 18:03:29 -07:00
try:
2019-12-16 19:27:14 -08:00
with self.graph.as_default():
with self.session.as_default(): # pylint: disable=not-context-manager
self.model.load_weights(weights_file)
2019-09-22 18:03:29 -07:00
if self.debug:
LOG.info('Neural Network Model weights file exists, load weights from the file')
2019-09-22 20:38:06 -07:00
except Exception: # pylint: disable=broad-except
2019-09-22 18:03:29 -07:00
LOG.info('Weights file does not match neural network model, train model from scratch')
2019-09-29 17:53:23 -07:00
def get_weights_bin(self):
2019-12-16 19:27:14 -08:00
with self.graph.as_default():
with self.session.as_default(): # pylint: disable=not-context-manager
weights = self.model.get_weights()
return pickle.dumps(weights)
2019-09-29 17:53:23 -07:00
def set_weights_bin(self, weights):
try:
2019-12-16 19:27:14 -08:00
with self.graph.as_default():
with self.session.as_default(): # pylint: disable=not-context-manager
self.model.set_weights(pickle.loads(weights))
2019-09-29 17:53:23 -07:00
if self.debug:
LOG.info('Neural Network Model weights exists, load the existing weights')
except Exception: # pylint: disable=broad-except
LOG.info('Weights does not match neural network model, train model from scratch')
2019-09-22 18:03:29 -07:00
# Build same neural network as self.model, But input X is variables,
# weights are placedholders. Find optimial X using gradient descent.
2019-12-16 19:27:14 -08:00
def _build_graph(self):
2019-09-22 18:03:29 -07:00
batch_size = self.batch_size
with self.graph.as_default():
2019-12-16 19:27:14 -08:00
with self.session.as_default(): # pylint: disable=not-context-manager
x_ = tf.Variable(tf.ones([batch_size, self.n_input]))
2019-12-18 21:37:40 -08:00
X_min_ = tf.placeholder(tf.float32, [self.n_input])
X_max_ = tf.placeholder(tf.float32, [self.n_input])
x_bounded_ = tf.minimum(x_, X_max_)
x_bounded_ = tf.maximum(x_bounded_, X_min_)
x_bounded_ = tf.cast(x_bounded_, tf.float32)
2019-12-16 19:27:14 -08:00
w1_ = tf.placeholder(tf.float32, [self.n_input, 64])
b1_ = tf.placeholder(tf.float32, [64])
w2_ = tf.placeholder(tf.float32, [64, 64])
b2_ = tf.placeholder(tf.float32, [64])
w3_ = tf.placeholder(tf.float32, [64, 1])
b3_ = tf.placeholder(tf.float32, [1])
l1_ = tf.nn.relu(tf.add(tf.matmul(x_, w1_), b1_))
l2_ = tf.nn.relu(tf.add(tf.matmul(l1_, w2_), b2_))
y_ = tf.add(tf.matmul(l2_, w3_), b3_)
optimizer_ = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
train_ = optimizer_.minimize(y_)
self.vars['x_'] = x_
self.vars['y_'] = y_
self.vars['w1_'] = w1_
self.vars['w2_'] = w2_
self.vars['w3_'] = w3_
self.vars['b1_'] = b1_
self.vars['b2_'] = b2_
self.vars['b3_'] = b3_
2019-12-18 21:37:40 -08:00
self.vars['X_min_'] = X_min_
self.vars['X_max_'] = X_max_
self.vars['x_bounded_'] = x_bounded_
2019-12-16 19:27:14 -08:00
self.ops['train_'] = train_
2019-09-22 18:03:29 -07:00
def fit(self, X_train, y_train, fit_epochs=500):
2019-12-16 19:27:14 -08:00
with self.graph.as_default():
with self.session.as_default(): # pylint: disable=not-context-manager
self.history = self.model.fit(
X_train, y_train, epochs=fit_epochs, verbose=0)
if self.debug:
mse = self.history.history['mean_squared_error']
i = 0
size = len(mse)
while(i < size):
LOG.info("Neural network training phase, epoch %d: mean_squared_error %f",
i, mse[i])
i += self.debug_interval
LOG.info("Neural network training phase, epoch %d: mean_squared_error %f",
size - 1, mse[size - 1])
2019-09-22 18:03:29 -07:00
def predict(self, X_pred):
2019-12-16 19:27:14 -08:00
with self.graph.as_default():
with self.session.as_default(): # pylint: disable=not-context-manager
return self.model.predict(X_pred)
2019-09-22 18:03:29 -07:00
2019-09-22 20:38:06 -07:00
# Reference: Parameter Space Noise for Exploration.ICLR 2018, https://arxiv.org/abs/1706.01905
2019-12-16 19:27:14 -08:00
def _add_noise(self, weights):
scale = self._adaptive_noise_scale()
2019-09-22 18:03:29 -07:00
size = weights.shape[-1]
noise = scale * np.random.normal(size=size)
return weights + noise
2019-12-16 19:27:14 -08:00
def _adaptive_noise_scale(self):
2019-09-22 18:03:29 -07:00
if self.recommend_iters > self.explore_iters:
scale = self.noise_scale_end
else:
scale = self.noise_scale_begin - (self.noise_scale_begin - self.noise_scale_end) \
* 1.0 * self.recommend_iters / self.explore_iters
return scale
def recommend(self, X_start, X_min=None, X_max=None, recommend_epochs=500, explore=False):
batch_size = len(X_start)
assert(batch_size == self.batch_size)
2019-12-18 21:37:40 -08:00
if X_min is None:
X_min = np.tile([-np.infty], self.n_input)
if X_max is None:
X_max = np.tile([np.infty], self.n_input)
2019-12-16 19:27:14 -08:00
with self.graph.as_default():
with self.session.as_default() as sess: # pylint: disable=not-context-manager
w1, b1 = self.model.get_layer(index=0).get_weights()
w2, b2 = self.model.get_layer(index=2).get_weights()
w3, b3 = self.model.get_layer(index=3).get_weights()
if explore is True:
w1 = self._add_noise(w1)
b1 = self._add_noise(b1)
w2 = self._add_noise(w2)
b2 = self._add_noise(b2)
w3 = self._add_noise(w3)
b3 = self._add_noise(b3)
y_predict = self.predict(X_start)
if self.debug:
LOG.info("Recommend phase, y prediction: min %f, max %f, mean %f",
np.min(y_predict), np.max(y_predict), np.mean(y_predict))
init = tf.global_variables_initializer()
sess.run(init)
assign_x_op = self.vars['x_'].assign(X_start)
sess.run(assign_x_op)
y_before = sess.run(self.vars['y_'],
feed_dict={self.vars['w1_']: w1, self.vars['w2_']: w2,
self.vars['w3_']: w3, self.vars['b1_']: b1,
2019-12-18 21:37:40 -08:00
self.vars['b2_']: b2, self.vars['b3_']: b3,
2019-12-18 22:27:46 -08:00
self.vars['X_max_']: X_max,
self.vars['X_min_']: X_min})
2019-12-16 19:27:14 -08:00
if self.debug:
LOG.info("Recommend phase, y before gradient descent: min %f, max %f, mean %f",
np.min(y_before), np.max(y_before), np.mean(y_before))
for i in range(recommend_epochs):
sess.run(self.ops['train_'],
feed_dict={self.vars['w1_']: w1, self.vars['w2_']: w2,
self.vars['w3_']: w3, self.vars['b1_']: b1,
2019-12-18 21:37:40 -08:00
self.vars['b2_']: b2, self.vars['b3_']: b3,
self.vars['X_max_']: X_max, self.vars['X_min_']: X_min})
2019-12-16 19:27:14 -08:00
if self.debug and i % self.debug_interval == 0:
y_train = sess.run(self.vars['y_'],
feed_dict={self.vars['w1_']: w1, self.vars['w2_']: w2,
self.vars['w3_']: w3, self.vars['b1_']: b1,
2019-12-18 21:37:40 -08:00
self.vars['b2_']: b2, self.vars['b3_']: b3,
2019-12-18 22:27:46 -08:00
self.vars['X_max_']: X_max,
self.vars['X_min_']: X_min})
2019-12-16 19:27:14 -08:00
LOG.info("Recommend phase, epoch %d, y: min %f, max %f, mean %f",
i, np.min(y_train), np.max(y_train), np.mean(y_train))
y_recommend = sess.run(self.vars['y_'],
2019-09-22 18:03:29 -07:00
feed_dict={self.vars['w1_']: w1, self.vars['w2_']: w2,
self.vars['w3_']: w3, self.vars['b1_']: b1,
2019-12-18 21:37:40 -08:00
self.vars['b2_']: b2, self.vars['b3_']: b3,
2019-12-18 22:27:46 -08:00
self.vars['X_max_']: X_max,
self.vars['X_min_']: X_min})
2019-12-18 22:43:48 -08:00
X_recommend = sess.run(self.vars['x_bounded_'],
feed_dict={self.vars['X_max_']: X_max,
self.vars['X_min_']: X_min})
2019-12-16 19:27:14 -08:00
res = NeuralNetResult(minl=y_recommend, minl_conf=X_recommend)
2019-09-22 18:03:29 -07:00
2019-12-16 19:27:14 -08:00
if self.debug:
LOG.info("Recommend phase, epoch %d, y after gradient descent: \
min %f, max %f, mean %f", recommend_epochs, np.min(y_recommend),
np.max(y_recommend), np.mean(y_recommend))
2019-09-22 18:03:29 -07:00
2019-12-16 19:27:14 -08:00
self.recommend_iters += 1
return res