From e86fce59d9f18548d2535782e81d61b38ea992c3 Mon Sep 17 00:00:00 2001 From: yangdsh Date: Fri, 1 Nov 2019 19:04:42 +0000 Subject: [PATCH] import Dana's new gpr model --- server/analysis/gpr/gpr_models.py | 271 ++++++++++++++++++++++++++++++ server/analysis/gpr/gprc.py | 48 ++++++ server/analysis/gpr/optimize.py | 64 +++++++ server/analysis/gpr/ucb.py | 40 +++++ server/analysis/simulation.py | 161 +++++++++++++++--- server/website/requirements.txt | 1 + 6 files changed, 557 insertions(+), 28 deletions(-) create mode 100644 server/analysis/gpr/gpr_models.py create mode 100644 server/analysis/gpr/gprc.py create mode 100644 server/analysis/gpr/optimize.py create mode 100644 server/analysis/gpr/ucb.py diff --git a/server/analysis/gpr/gpr_models.py b/server/analysis/gpr/gpr_models.py new file mode 100644 index 0000000..0fa3b77 --- /dev/null +++ b/server/analysis/gpr/gpr_models.py @@ -0,0 +1,271 @@ +# +# OtterTune - analysis/gpr_models.py +# +# Copyright (c) 2017-18, Carnegie Mellon University Database Group +# +# Author: Dana Van Aken + +import copy +import json +import os + +import gpflow +import numpy as np +import tensorflow as tf + +from .gprc import GPRC + + +class BaseModel(object): + + # Min/max bounds for the kernel lengthscales + _LENGTHSCALE_BOUNDS = (0.1, 10.) + + # Keys for each kernel's hyperparameters + _KERNEL_HP_KEYS = [] + + # The key for the likelihood parameter + _LIKELIHOOD_HP_KEY = 'GPRC/likelihood/variance' + + def __init__(self, X, y, hyperparameters=None, optimize_hyperparameters=False, + learning_rate=0.001, maxiter=5000, **kwargs): + # Store model kwargs + self._model_kwargs = { + 'hyperparameters': hyperparameters, + 'optimize_hyperparameters': optimize_hyperparameters, + 'learning_rate': learning_rate, + 'maxiter': maxiter, + } + + # Store kernel kwargs + kernel_kwargs = self._get_kernel_kwargs(X_dim=X.shape[1], **kwargs) + if hyperparameters is not None: + self._assign_kernel_hyperparams(hyperparameters, kernel_kwargs) + self._kernel_kwargs = copy.deepcopy(kernel_kwargs) + + # Build the kernels and the model + with gpflow.defer_build(): + k = self._build_kernel(kernel_kwargs, optimize_hyperparameters=optimize_hyperparameters, **kwargs) + m = GPRC(X, y, kern=k) + if hyperparameters is not None and self._LIKELIHOOD_HP_KEY in hyperparameters: + m.likelihood.variance = hyperparameters[self._LIKELIHOOD_HP_KEY] + m.compile() + + # If enabled, optimize the hyperparameters + if optimize_hyperparameters: + opt = gpflow.train.AdamOptimizer(learning_rate) + opt.minimize(m, maxiter=maxiter) + self._model = m + + def _get_kernel_kwargs(self, **kwargs): + return [] + + def _build_kernel(self, kernel_kwargs, **kwargs): + return None + + def get_hyperparameters(self): + return {k: float(v) if v.ndim == 0 else v.tolist() + for k, v in self._model.read_values().items()} + + def get_model_parameters(self): + return { + 'model_params': copy.deepcopy(self._model_kwargs), + 'kernel_params': copy.deepcopy(self._kernel_kwargs) + } + + def _assign_kernel_hyperparams(self, hyperparams, kernel_kwargs): + for i, kernel_keys in enumerate(self._KERNEL_HP_KEYS): + for key in kernel_keys: + if key in hyperparams: + argname = key.rsplit('/', 1)[-1] + kernel_kwargs[i][argname] = hyperparams[key] + + @staticmethod + def load_hyperparameters(path, hp_idx=0): + with open(path, 'r') as f: + hyperparams = json.load(f)['hyperparameters'] + if isinstance(hyperparams, list): + assert hp_idx >= 0, 'hp_idx: {} (expected >= 0)'.format(hp_idx) + if hp_idx >= len(hyperparams): + hp_idx = -1 + hyperparams = hyperparams[hp_idx] + return hyperparams + + +class BasicGP(BaseModel): + + _KERNEL_HP_KEYS = [ + [ + 'GPRC/kern/kernels/0/variance', + 'GPRC/kern/kernels/0/lengthscales', + ], + [ + 'GPRC/kern/kernels/1/variance', + ], + ] + + def _get_kernel_kwargs(self, **kwargs): + X_dim = kwargs.pop('X_dim') + return [ + { + 'input_dim': X_dim, + 'ARD': True + }, + { + 'input_dim': X_dim, + }, + ] + + def _build_kernel(self, kernel_kwargs, **kwargs): + k0 = gpflow.kernels.Exponential(**kernel_kwargs[0]) + k1 = gpflow.kernels.White(**kernel_kwargs[1]) + if kwargs.pop('optimize_hyperparameters'): + k0.lengthscales.transform = gpflow.transforms.Logistic( + *self._LENGTHSCALE_BOUNDS) + k = k0 + k1 + return k + + +class ContextualGP(BaseModel): + + _KERNEL_HP_KEYS = [ + [ + 'GPRC/kern/kernels/0/kernels/0/variance', + 'GPRC/kern/kernels/0/kernels/0/lengthscales', + ], + [ + 'GPRC/kern/kernels/0/kernels/1/variance', + 'GPRC/kern/kernels/0/kernels/1/lengthscales', + ], + [ + 'GPRC/kern/kernels/1/variance', + ] + ] + + def _get_kernel_kwargs(self, **kwargs): + k0_active_dims = kwargs.pop('k0_active_dims') + k1_active_dims = kwargs.pop('k1_active_dims') + return [ + { + 'input_dim': len(k0_active_dims), + 'active_dims': k0_active_dims, + 'ARD': True, + }, + { + 'input_dim': len(k1_active_dims), + 'active_dims': k1_active_dims, + 'ARD': True, + }, + { + 'input_dim': kwargs.pop('X_dim'), + } + ] + + def _build_kernel(self, kernel_kwargs, **kwargs): + k0 = gpflow.kernels.Exponential(**kernel_kwargs[0]) + k1 = gpflow.kernels.Exponential(**kernel_kwargs[1]) + k2 = gpflow.kernels.White(**kernel_kwargs[2]) + if kwargs['optimize_hyperparameters']: + k0.lengthscales.transform = gpflow.transforms.Logistic( + *self._LENGTHSCALE_BOUNDS) + k1.lengthscales.transform = gpflow.transforms.Logistic( + *self._LENGTHSCALE_BOUNDS) + k = k0 * k1 + k2 + return k + + +class ContextualGP_Alt0(ContextualGP): + + def __init__(self, X, y, hyperparameters=None, optimize_hyperparameters=False, + learning_rate=0.001, maxiter=5000, **kwargs): + self._context_lengthscale_const = kwargs.pop('context_lengthscale_const', 9.0) + super(ContextualGP_Alt0, self).__init__( + X, y, hyperparameters=hyperparameters, + optimize_hyperparameters=optimize_hyperparameters, + learning_rate=learning_rate, maxiter=maxiter, **kwargs) + + def _build_kernel(self, kernel_kwargs, **kwargs): + kernel_kwargs[1]['lengthscales'] = np.ones((kernel_kwargs[1]['input_dim'],)) * \ + self._context_lengthscale_const + + k0 = gpflow.kernels.Exponential(**kernel_kwargs[0]) + k1 = gpflow.kernels.Exponential(**kernel_kwargs[1]) + k1.lengthscales.trainable = False + k2 = gpflow.kernels.White(**kernel_kwargs[2]) + if kwargs['optimize_hyperparameters']: + k0.lengthscales.transform = gpflow.transforms.Logistic( + *self._LENGTHSCALE_BOUNDS) + k = k0 * k1 + k2 + return k + + +class ContextualGP_Alt1(ContextualGP): + + def __init__(self, X, y, hyperparameters=None, optimize_hyperparameters=False, + learning_rate=0.001, maxiter=5000, **kwargs): + self._hyperparams_path = kwargs.pop('hyperparameters_path') + self._hyperparams_idx = kwargs.pop('hyperparameters_idx', 0) + self._context_only = kwargs.pop('context_only', True) + super(ContextualGP_Alt1, self).__init__( + X, y, hyperparameters=hyperparameters, + optimize_hyperparameters=optimize_hyperparameters, + learning_rate=learning_rate, maxiter=maxiter, **kwargs) + + def _build_kernel(self, kernel_kwargs, **kwargs): + hyperparams = self.load_hyperparameters(self._hyperparams_path, + self._hyperparams_idx) + if not self._context_only: + kernel_kwargs[0]['lengthscales'] = np.array( + hyperparams['GPRC/kern/kernels/0/kernels/0/lengthscales']) + kernel_kwargs[1]['lengthscales'] = np.array( + hyperparams['GPRC/kern/kernels/0/kernels/1/lengthscales']) + + k0 = gpflow.kernels.Exponential(**kernel_kwargs[0]) + k1 = gpflow.kernels.Exponential(**kernel_kwargs[1]) + k2 = gpflow.kernels.White(**kernel_kwargs[2]) + + if not self._context_only: + k0.lengthscales.trainable = False + k1.lengthscales.trainable = False + + if self._context_only and kwargs['optimize_hyperparameters']: + k0.lengthscales.transform = gpflow.transforms.Logistic( + *self._LENGTHSCALE_BOUNDS) + k = k0 * k1 + k2 + return k + + +class AdditiveContextualGP(ContextualGP): + + def _build_kernel(self, kernel_kwargs, **kwargs): + k0 = gpflow.kernels.Exponential(**kernel_kwargs[0]) + k1 = gpflow.kernels.Exponential(**kernel_kwargs[1]) + k2 = gpflow.kernels.White(**kernel_kwargs[2]) + if kwargs['optimize_hyperparameters']: + k0.lengthscales.transform = gpflow.transforms.Logistic( + *self._LENGTHSCALE_BOUNDS) + k1.lengthscales.transform = gpflow.transforms.Logistic( + *self._LENGTHSCALE_BOUNDS) + k = k0 + k1 + k2 + return k + + +_MODEL_MAP = { + 'BasicGP': BasicGP, + 'ContextualGP': ContextualGP, + 'ContextualGP_Alt0': ContextualGP_Alt0, + 'ContextualGP_Alt1': ContextualGP_Alt1, + 'AdditiveContextualGP': AdditiveContextualGP, +} + + +def create_model(model_name, **kwargs): + # Update tensorflow session settings to enable GPU sharing + gpflow.settings.session.update(gpu_options=tf.GPUOptions(allow_growth=True)) + check_valid(model_name) + return _MODEL_MAP[model_name](**kwargs) + + +def check_valid(model_name): + if model_name not in _MODEL_MAP: + raise ValueError('Invalid GPR model name: {}'.format(model_name)) diff --git a/server/analysis/gpr/gprc.py b/server/analysis/gpr/gprc.py new file mode 100644 index 0000000..341650e --- /dev/null +++ b/server/analysis/gpr/gprc.py @@ -0,0 +1,48 @@ +# +# OtterTune - analysis/gprc.py +# +# Copyright (c) 2017-18, Carnegie Mellon University Database Group +# +# Author: Dana Van Aken + +from __future__ import absolute_import + +import tensorflow as tf +from gpflow import settings +from gpflow.decors import autoflow, name_scope, params_as_tensors +from gpflow.models import GPR + + +class GPRC(GPR): + + def __init__(self, X, Y, kern, mean_function=None, **kwargs): + super(GPRC, self).__init__(X, Y, kern, mean_function, **kwargs) + self.cholesky = None + self.alpha = None + + @autoflow() + def _compute_cache(self): + K = self.kern.K(self.X) + tf.eye(tf.shape(self.X)[0], dtype=settings.float_type) * self.likelihood.variance + L = tf.cholesky(K, name='gp_cholesky') + V = tf.matrix_triangular_solve(L, self.Y - self.mean_function(self.X), name='gp_alpha') + return L, V + + def update_cache(self): + self.cholesky, self.alpha = self._compute_cache() + + @name_scope('predict') + @params_as_tensors + def _build_predict(self, Xnew, full_cov=False): + if self.cholesky is None: + self.update_cache() + Kx = self.kern.K(self.X, Xnew) + A = tf.matrix_triangular_solve(self.cholesky, Kx, lower=True) + fmean = tf.matmul(A, self.alpha, transpose_a=True) + self.mean_function(Xnew) + if full_cov: + fvar = self.kern.K(Xnew) - tf.matmul(A, A, transpose_a=True) + shape = tf.stack([1, 1, tf.shape(self.Y)[1]]) + fvar = tf.tile(tf.expand_dims(fvar, 2), shape) + else: + fvar = self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(A), 0) + fvar = tf.tile(tf.reshape(fvar, (-1, 1)), [1, tf.shape(self.Y)[1]]) + return fmean, fvar diff --git a/server/analysis/gpr/optimize.py b/server/analysis/gpr/optimize.py new file mode 100644 index 0000000..e8bc3c1 --- /dev/null +++ b/server/analysis/gpr/optimize.py @@ -0,0 +1,64 @@ +# +# OtterTune - analysis/optimize.py +# +# Copyright (c) 2017-18, Carnegie Mellon University Database Group +# +# Author: Dana Van Aken + +import numpy as np +import tensorflow as tf +from gpflow import settings +from sklearn.utils import assert_all_finite, check_array +from sklearn.utils.validation import FLOAT_DTYPES + +from analysis.util import get_analysis_logger + +LOG = get_analysis_logger(__name__) + + +def tf_optimize(model, Xnew_arr, learning_rate=0.01, maxiter=100, ucb_beta=3., + active_dims=None, bounds=None): + Xnew_arr = check_array(Xnew_arr, copy=False, warn_on_dtype=True, dtype=FLOAT_DTYPES) + + Xnew = tf.Variable(Xnew_arr, name='Xnew', dtype=settings.float_type) + if bounds is None: + lower_bound = tf.constant(-np.infty, dtype=settings.float_type) + upper_bound = tf.constant(np.infty, dtype=settings.float_type) + else: + lower_bound = tf.constant(bounds[0], dtype=settings.float_type) + upper_bound = tf.constant(bounds[1], dtype=settings.float_type) + Xnew_bounded = tf.minimum(tf.maximum(Xnew, lower_bound), upper_bound) + + if active_dims: + indices = [] + updates = [] + n_rows = Xnew_arr.shape[0] + for c in active_dims: + for r in range(n_rows): + indices.append([r, c]) + updates.append(Xnew_bounded[r, c]) + part_X = tf.scatter_nd(indices, updates, Xnew_arr.shape) + Xin = part_X + tf.stop_gradient(-part_X + Xnew_bounded) + else: + Xin = Xnew_bounded + + beta_t = tf.constant(ucb_beta, name='ucb_beta', dtype=settings.float_type) + y_mean_var = model.likelihood.predict_mean_and_var(*model._build_predict(Xin)) + loss = tf.subtract(y_mean_var[0], tf.multiply(beta_t, y_mean_var[1]), name='loss_fn') + opt = tf.train.AdamOptimizer(learning_rate) + train_op = opt.minimize(loss) + variables = opt.variables() + init_op = tf.variables_initializer([Xnew] + variables) + session = model.enquire_session(session=None) + with session.as_default(): + session.run(init_op) + for i in range(maxiter): + session.run(train_op) + Xnew_value = session.run(Xnew_bounded) + y_mean_value, y_var_value = session.run(y_mean_var) + loss_value = session.run(loss) + assert_all_finite(Xnew_value) + assert_all_finite(y_mean_value) + assert_all_finite(y_var_value) + assert_all_finite(loss_value) + return Xnew_value, y_mean_value, y_var_value, loss_value diff --git a/server/analysis/gpr/ucb.py b/server/analysis/gpr/ucb.py new file mode 100644 index 0000000..c227faa --- /dev/null +++ b/server/analysis/gpr/ucb.py @@ -0,0 +1,40 @@ +import numpy as np + + +def get_beta_t(t, **kwargs): + assert t > 0. + return 2. * np.log(t / np.sqrt(np.log(2. * t))) + + +def get_beta_td(t, ndim, bound=1.0, **kwargs): + assert t > 0. + assert ndim > 0. + assert bound > 0. + bt = 2. * np.log(float(ndim) * t**2 * np.pi**2 / (6. * bound)) + return np.sqrt(bt) if bt > 0. else 0. + + +_UCB_MAP = { + 'get_beta_t': get_beta_t, + 'get_beta_td': get_beta_td, +} + + +def get_ucb_beta(ucb_beta, **kwargs): + check_valid(ucb_beta) + if not isinstance(ucb_beta, float): + ucb_beta = _UCB_MAP[ucb_beta](**kwargs) + assert isinstance(ucb_beta, float), type(ucb_beta) + assert ucb_beta >= 0.0 + return ucb_beta + + +def check_valid(ucb_beta): + if isinstance(ucb_beta, float): + if ucb_beta < 0.0: + raise ValueError(("Invalid value for 'ucb_beta': {} " + "(expected >= 0.0)").format(ucb_beta)) + else: + if ucb_beta not in _UCB_MAP: + raise ValueError(("Invalid value for 'ucb_beta': {} " + "(expected 'get_beta_t' or 'get_beta_td')").format(ucb_beta)) diff --git a/server/analysis/simulation.py b/server/analysis/simulation.py index 90fc285..eeed454 100644 --- a/server/analysis/simulation.py +++ b/server/analysis/simulation.py @@ -16,11 +16,14 @@ except (ModuleNotFoundError, ImportError): import numpy as np import torch sys.path.append("../") -from analysis.util import get_analysis_logger # noqa +from analysis.util import get_analysis_logger, TimerStruct # noqa from analysis.ddpg.ddpg import DDPG # noqa from analysis.ddpg.ou_process import OUProcess # noqa from analysis.gp_tf import GPRGD # noqa from analysis.nn_tf import NeuralNet # noqa +from analysis.gpr import gpr_models # noqa +from analysis.gpr import ucb # noqa +from analysis.gpr.optimize import tf_optimize # noqa LOG = get_analysis_logger(__name__) @@ -98,25 +101,31 @@ class Environment(object): def ddpg(env, config, n_loops=100): results = [] x_axis = [] + num_collections = config['num_collections'] gamma = config['gamma'] - tau = config['tau'] a_lr = config['a_lr'] c_lr = config['c_lr'] n_epochs = config['n_epochs'] - model_ddpg = DDPG(n_actions=env.knob_dim, n_states=env.metric_dim, gamma=gamma, tau=tau, - clr=c_lr, alr=a_lr) + model_ddpg = DDPG(n_actions=env.knob_dim, n_states=env.metric_dim, gamma=gamma, + clr=c_lr, alr=a_lr, shift=0.1) knob_data = np.random.rand(env.knob_dim) prev_metric_data = np.zeros(env.metric_dim) - for i in range(n_loops): - reward, metric_data = env.simulate(knob_data) + for i in range(num_collections): + action = np.random.rand(env.knob_dim) + reward, metric_data = env.simulate(action) if i > 0: model_ddpg.add_sample(prev_metric_data, prev_knob_data, prev_reward, metric_data) prev_metric_data = metric_data prev_knob_data = knob_data prev_reward = reward - if i == 0: - continue + + for i in range(n_loops): + reward, metric_data = env.simulate(knob_data) + model_ddpg.add_sample(prev_metric_data, prev_knob_data, prev_reward, metric_data) + prev_metric_data = metric_data + prev_knob_data = knob_data + prev_reward = reward for _ in range(n_epochs): model_ddpg.update() results.append(reward) @@ -144,11 +153,18 @@ def dnn(env, config, n_loops=100): results = [] x_axis = [] memory = ReplayMemory() + num_collections = config['num_collections'] num_samples = config['num_samples'] - ou_process = config['ou_process'] + ou_process = False Xmin = np.zeros(env.knob_dim) Xmax = np.ones(env.knob_dim) noise = OUProcess(env.knob_dim) + + for _ in range(num_collections): + action = np.random.rand(env.knob_dim) + reward, _ = env.simulate(action) + memory.push(action, reward) + for i in range(n_loops): X_samples = np.random.rand(num_samples, env.knob_dim) if i >= 10: @@ -165,9 +181,8 @@ def dnn(env, config, n_loops=100): noise_scale_end=0.0, debug=False, debug_interval=100) - if i >= 5: - actions, rewards = memory.get_all() - model_nn.fit(np.array(actions), -np.array(rewards), fit_epochs=50) + actions, rewards = memory.get_all() + model_nn.fit(np.array(actions), -np.array(rewards), fit_epochs=50) res = model_nn.recommend(X_samples, Xmin, Xmax, recommend_epochs=10, explore=False) best_config_idx = np.argmin(res.minl.ravel()) best_config = res.minl_conf[best_config_idx, :] @@ -182,7 +197,7 @@ def dnn(env, config, n_loops=100): return np.array(results), np.array(x_axis) -def gprgd(env, config, n_loops=100): +def gpr(env, config, n_loops=100): results = [] x_axis = [] memory = ReplayMemory() @@ -194,6 +209,7 @@ def gprgd(env, config, n_loops=100): action = np.random.rand(env.knob_dim) reward, _ = env.simulate(action) memory.push(action, reward) + for i in range(n_loops): X_samples = np.random.rand(num_samples, env.knob_dim) if i >= 10: @@ -206,13 +222,13 @@ def gprgd(env, config, n_loops=100): X_samples = np.vstack((X_samples, np.array(entry[0]) * 0.97 + 0.01)) model = GPRGD(length_scale=1.0, magnitude=1.0, - max_train_size=100, + max_train_size=2000, batch_size=100, num_threads=4, learning_rate=0.01, epsilon=1e-6, max_iter=500, - sigma_multiplier=30.0, + sigma_multiplier=3.0, mu_multiplier=1.0) actions, rewards = memory.get_all() @@ -224,7 +240,97 @@ def gprgd(env, config, n_loops=100): memory.push(best_config, reward) LOG.info('loop: %d reward: %f', i, reward[0]) results.append(reward) - x_axis.append(i) + x_axis.append(i+1) + return np.array(results), np.array(x_axis) + + +def run_optimize(X, y, X_sample, model_name, opt_kwargs, model_kwargs): + timer = TimerStruct() + + # Create model (this also optimizes the hyperparameters if that option is enabled + timer.start() + m = gpr_models.create_model(model_name, X=X, y=y, **model_kwargs) + timer.stop() + model_creation_sec = timer.elapsed_seconds + LOG.info(m._model.as_pandas_table()) + + # Optimize the DBMS's configuration knobs + timer.start() + X_new, ypred, yvar, loss = tf_optimize(m._model, X_sample, **opt_kwargs) + timer.stop() + config_optimize_sec = timer.elapsed_seconds + + return X_new, ypred, m.get_model_parameters(), m.get_hyperparameters() + + +def gpr_new(env, config, n_loops=100): + model_name = 'BasicGP' + model_opt_frequency = 5 + model_kwargs = {} + model_kwargs['model_learning_rate'] = 0.001 + model_kwargs['model_maxiter'] = 5000 + opt_kwargs = {} + opt_kwargs['learning_rate'] = 0.001 + opt_kwargs['maxiter'] = 100 + opt_kwargs['ucb_beta'] = 3.0 + + results = [] + x_axis = [] + memory = ReplayMemory() + num_samples = config['num_samples'] + num_collections = config['num_collections'] + X_min = np.zeros(env.knob_dim) + X_max = np.ones(env.knob_dim) + X_bounds = [X_min, X_max] + opt_kwargs['bounds'] = X_bounds + + for _ in range(num_collections): + action = np.random.rand(env.knob_dim) + reward, _ = env.simulate(action) + memory.push(action, reward) + + for i in range(n_loops): + X_samples = np.random.rand(num_samples, env.knob_dim) + if i >= 5: + actions, rewards = memory.get_all() + tuples = tuple(zip(actions, rewards)) + top10 = heapq.nlargest(10, tuples, key=lambda e: e[1]) + for entry in top10: + # Tensorflow get broken if we use the training data points as + # starting points for GPRGD. + X_samples = np.vstack((X_samples, np.array(entry[0]) * 0.97 + 0.01)) + + actions, rewards = memory.get_all() + + ucb_beta = opt_kwargs.pop('ucb_beta') + opt_kwargs['ucb_beta'] = ucb.get_ucb_beta(ucb_beta, t=i + 1., ndim=env.knob_dim) + if model_opt_frequency > 0: + optimize_hyperparams = i % model_opt_frequency == 0 + if not optimize_hyperparams: + model_kwargs['hyperparameters'] = hyperparameters + else: + optimize_hyperparams = False + model_kwargs['hyperparameters'] = None + model_kwargs['optimize_hyperparameters'] = optimize_hyperparams + + X_new, ypred, model_params, hyperparameters = run_optimize(np.array(actions), + -np.array(rewards), + X_samples, + model_name, + opt_kwargs, + model_kwargs) + + sort_index = np.argsort(ypred.squeeze()) + X_new = X_new[sort_index] + ypred = ypred[sort_index].squeeze() + + action = X_new[0] + reward, _ = env.simulate(action) + memory.push(action, reward) + LOG.info('loop: %d reward: %f', i, reward[0]) + results.append(reward) + x_axis.append(i+1) + return np.array(results), np.array(x_axis) @@ -233,7 +339,7 @@ def plotlines(xs, results, labels, title, path): figsize = 13, 10 figure, ax = plt.subplots(figsize=figsize) lines = [] - N = 20 + N = 1 weights = np.ones(N) for x_axis, result, label in zip(xs, results, labels): result = np.convolve(weights/weights.sum(), result.flatten())[N-1:-N+1] @@ -279,17 +385,16 @@ def run(tuners, configs, labels, title, env, n_loops, n_repeats): def main(): - env = Environment(knob_dim=192, metric_dim=60, modes=[0, 1], reward_variance=0.05) - n_loops = 2000 - configs = [{'gamma': 0, 'tau': 0.002, 'a_lr': 0.01, 'c_lr': 0.01, 'n_epochs': 1}, - {'gamma': 0, 'tau': 0.002, 'a_lr': 0.01, 'c_lr': 0.001, 'n_epochs': 1}, - {'gamma': 0., 'tau': 0.002, 'a_lr': 0.001, 'c_lr': 0.001, 'n_epochs': 1}, - # {'num_samples': 100, 'ou_process': False}, - ] - tuners = [ddpg, ddpg, ddpg] - labels = ['1', '2', '3'] - title = 'varing_workloads' - n_repeats = [3, 3, 3] + env = Environment(knob_dim=24, metric_dim=60, modes=[2], reward_variance=0.05) + title = 'compare' + n_repeats = [1, 1, 1, 1] + n_loops = 80 + configs = [{'gamma': 0., 'c_lr': 0.001, 'a_lr': 0.01, 'num_collections': 50, 'n_epochs': 50}, + {'num_samples': 30, 'num_collections': 50}, + {'num_samples': 30, 'num_collections': 50}, + {'num_samples': 30, 'num_collections': 50}] + tuners = [ddpg, gpr_new, dnn, gpr] + labels = [tuner.__name__ for tuner in tuners] run(tuners, configs, labels, title, env, n_loops, n_repeats) diff --git a/server/website/requirements.txt b/server/website/requirements.txt index 6593ae7..000875d 100644 --- a/server/website/requirements.txt +++ b/server/website/requirements.txt @@ -8,6 +8,7 @@ django-request-logging==0.4.6 mock==2.0.0 Fabric3>=1.13.1.post1 git-lint==0.1.2 +gpflow==1.5.0 hurry.filesize>=0.9 numpy==1.14.0 requests==2.20.0