ottertune/server/analysis/simulation.py

#
# OtterTune - simulation.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#


import heapq
import random
import os
import sys
try:
    import matplotlib.pyplot as plt
except (ModuleNotFoundError, ImportError):
    plt = None
import numpy as np
import torch
sys.path.append("../")
from analysis.util import get_analysis_logger, TimerStruct  # noqa
from analysis.ddpg.ddpg import DDPG  # noqa
from analysis.ddpg.ou_process import OUProcess  # noqa
from analysis.gp_tf import GPRGD  # noqa
from analysis.nn_tf import NeuralNet  # noqa
from analysis.gpr import gpr_models  # noqa
from analysis.gpr import ucb  # noqa
from analysis.gpr.optimize import tf_optimize  # noqa

LOG = get_analysis_logger(__name__)


class Environment(object):
    def __init__(self, knob_dim, metric_dim, modes=[0], reward_variance=0,
                 metrics_variance=0.2):
        self.knob_dim = knob_dim
        self.metric_dim = metric_dim
        self.modes = modes
        self.mode = np.random.choice(self.modes)
        self.counter = 0
        self.reward_variance = reward_variance
        self.metrics_variance = metrics_variance

    def identity_sqrt(self, knob_data):
        n1 = self.knob_dim // 4
        n2 = self.knob_dim // 4
        part1 = np.sum(knob_data[0: n1])
        part2 = np.sum(np.sqrt(knob_data[n1: n1 + n2]))
        reward = np.array([part1 + part2]) / (self.knob_dim // 2)
        return reward

    def threshold(self, knob_data):
        n1 = self.knob_dim // 4
        n2 = self.knob_dim // 4
        part1 = np.sum(knob_data[0: n1] > 0.9)
        part2 = np.sum(knob_data[n1: n1 + n2] < 0.1)
        reward = np.array([part1 + part2]) / (self.knob_dim // 2)
        return reward

    def borehole(self, knob_data):
        # ref: http://www.sfu.ca/~ssurjano/borehole.html
        # pylint: disable=invalid-name
        rw = knob_data[0] * (0.15 - 0.05) + 0.05
        r = knob_data[1] * (50000 - 100) + 100
        Tu = knob_data[2] * (115600 - 63070) + 63070
        Hu = knob_data[3] * (1110 - 990) + 990
        Tl = knob_data[4] * (116 - 63.1) + 63.1
        Hl = knob_data[5] * (820 - 700) + 700
        L = knob_data[6] * (1680 - 1120) + 1120
        Kw = knob_data[7] * (12045 - 9855) + 9855

        frac = 2 * L * Tu / (np.log(r / rw) * rw ** 2 * Kw)
        reward = 2 * np.pi * Tu * (Hu - Hl) / (np.log(r / rw) * (1 + frac + Tu / Tl)) / 310
        return np.array([reward])

    def get_metrics(self, mode):
        metrics = np.ones(self.metric_dim) * mode
        metrics += np.random.rand(self.metric_dim) * self.metrics_variance
        return metrics

    def simulate_mode(self, knob_data, mode):
        if mode == 0:
            reward = self.identity_sqrt(knob_data)
        elif mode == 1:
            reward = self.threshold(knob_data)
        elif mode == 2:
            reward = np.zeros(1)
            for i in range(0, len(knob_data), 8):
                reward += self.borehole(knob_data[i: i+8])[0] / len(knob_data) * 8
        reward = reward * (1.0 + self.reward_variance * np.random.rand(1)[0])
        return reward, self.get_metrics(mode)

    def simulate(self, knob_data):
        self.counter += 1
        k = 1
        # every k runs, sample a new workload
        if self.counter >= k:
            self.counter = 0
            self.mode = np.random.choice(self.modes)
        return self.simulate_mode(knob_data, self.mode)


def ddpg(env, config, n_loops=100):
    results = []
    x_axis = []
    num_collections = config['num_collections']
    gamma = config['gamma']
    a_lr = config['a_lr']
    c_lr = config['c_lr']
    n_epochs = config['n_epochs']
    model_ddpg = DDPG(n_actions=env.knob_dim, n_states=env.metric_dim, gamma=gamma,
                      clr=c_lr, alr=a_lr, shift=0.1)
    knob_data = np.random.rand(env.knob_dim)
    prev_metric_data = np.zeros(env.metric_dim)

    for i in range(num_collections):
        action = np.random.rand(env.knob_dim)
        reward, metric_data = env.simulate(action)
        if i > 0:
            model_ddpg.add_sample(prev_metric_data, prev_knob_data, prev_reward, metric_data)
        prev_metric_data = metric_data
        prev_knob_data = knob_data
        prev_reward = reward

    for i in range(n_loops):
        reward, metric_data = env.simulate(knob_data)
        model_ddpg.add_sample(prev_metric_data, prev_knob_data, prev_reward, metric_data)
        prev_metric_data = metric_data
        prev_knob_data = knob_data
        prev_reward = reward
        for _ in range(n_epochs):
            model_ddpg.update()
        results.append(reward)
        x_axis.append(i+1)
        LOG.info('loop: %d reward: %f', i, reward[0])
        knob_data = model_ddpg.choose_action(metric_data)
    return np.array(results), np.array(x_axis)


class ReplayMemory(object):

    def __init__(self):
        self.actions = []
        self.rewards = []

    def push(self, action, reward):
        self.actions.append(action.tolist())
        self.rewards.append(reward.tolist())

    def get_all(self):
        return self.actions, self.rewards


def dnn(env, config, n_loops=100):
    results = []
    x_axis = []
    memory = ReplayMemory()
    num_collections = config['num_collections']
    num_samples = config['num_samples']
    ou_process = False
    Xmin = np.zeros(env.knob_dim)
    Xmax = np.ones(env.knob_dim)
    noise = OUProcess(env.knob_dim)

    for _ in range(num_collections):
        action = np.random.rand(env.knob_dim)
        reward, _ = env.simulate(action)
        memory.push(action, reward)

    for i in range(n_loops):
        X_samples = np.random.rand(num_samples, env.knob_dim)
        if i >= 10:
            actions, rewards = memory.get_all()
            tuples = tuple(zip(actions, rewards))
            top10 = heapq.nlargest(10, tuples, key=lambda e: e[1])
            for entry in top10:
                X_samples = np.vstack((X_samples, np.array(entry[0])))
        model_nn = NeuralNet(n_input=X_samples.shape[1],
                             batch_size=X_samples.shape[0],
                             learning_rate=0.01,
                             explore_iters=100,
                             noise_scale_begin=0.1,
                             noise_scale_end=0.0,
                             debug=False,
                             debug_interval=100)
        actions, rewards = memory.get_all()
        model_nn.fit(np.array(actions), -np.array(rewards), fit_epochs=50)
        res = model_nn.recommend(X_samples, Xmin, Xmax, recommend_epochs=10, explore=False)
        best_config_idx = np.argmin(res.minl.ravel())
        best_config = res.minl_conf[best_config_idx, :]
        if ou_process:
            best_config += noise.noise()
            best_config = best_config.clip(0, 1)
        reward, _ = env.simulate(best_config)
        memory.push(best_config, reward)
        LOG.info('loop: %d reward: %f', i, reward[0])
        results.append(reward)
        x_axis.append(i+1)
    return np.array(results), np.array(x_axis)


def gpr(env, config, n_loops=100):
    results = []
    x_axis = []
    memory = ReplayMemory()
    num_collections = config['num_collections']
    num_samples = config['num_samples']
    X_min = np.zeros(env.knob_dim)
    X_max = np.ones(env.knob_dim)
    for _ in range(num_collections):
        action = np.random.rand(env.knob_dim)
        reward, _ = env.simulate(action)
        memory.push(action, reward)

    for i in range(n_loops):
        X_samples = np.random.rand(num_samples, env.knob_dim)
        if i >= 10:
            actions, rewards = memory.get_all()
            tuples = tuple(zip(actions, rewards))
            top10 = heapq.nlargest(10, tuples, key=lambda e: e[1])
            for entry in top10:
                # Tensorflow get broken if we use the training data points as
                # starting points for GPRGD.
                X_samples = np.vstack((X_samples, np.array(entry[0]) * 0.97 + 0.01))
        model = GPRGD(length_scale=1.0,
                      magnitude=1.0,
                      max_train_size=2000,
                      batch_size=100,
                      num_threads=4,
                      learning_rate=0.01,
                      epsilon=1e-6,
                      max_iter=500,
                      sigma_multiplier=3.0,
                      mu_multiplier=1.0)

        actions, rewards = memory.get_all()
        model.fit(np.array(actions), -np.array(rewards), X_min, X_max, ridge=0.01)
        res = model.predict(X_samples)
        best_config_idx = np.argmin(res.minl.ravel())
        best_config = res.minl_conf[best_config_idx, :]
        reward, _ = env.simulate(best_config)
        memory.push(best_config, reward)
        LOG.info('loop: %d reward: %f', i, reward[0])
        results.append(reward)
        x_axis.append(i+1)
    return np.array(results), np.array(x_axis)


def run_optimize(X, y, X_sample, model_name, opt_kwargs, model_kwargs):
    timer = TimerStruct()

    # Create model (this also optimizes the hyperparameters if that option is enabled
    timer.start()
    m = gpr_models.create_model(model_name, X=X, y=y, **model_kwargs)
    timer.stop()
    model_creation_sec = timer.elapsed_seconds
    LOG.info(m._model.as_pandas_table())

    # Optimize the DBMS's configuration knobs
    timer.start()
    X_new, ypred, yvar, loss = tf_optimize(m._model, X_sample, **opt_kwargs)
    timer.stop()
    config_optimize_sec = timer.elapsed_seconds

    return X_new, ypred, m.get_model_parameters(), m.get_hyperparameters()


def gpr_new(env, config, n_loops=100):
    model_name = 'BasicGP'
    model_opt_frequency = 5
    model_kwargs = {}
    model_kwargs['model_learning_rate'] = 0.001
    model_kwargs['model_maxiter'] = 5000
    opt_kwargs = {}
    opt_kwargs['learning_rate'] = 0.001
    opt_kwargs['maxiter'] = 100
    opt_kwargs['ucb_beta'] = 3.0

    results = []
    x_axis = []
    memory = ReplayMemory()
    num_samples = config['num_samples']
    num_collections = config['num_collections']
    X_min = np.zeros(env.knob_dim)
    X_max = np.ones(env.knob_dim)
    X_bounds = [X_min, X_max]
    opt_kwargs['bounds'] = X_bounds

    for _ in range(num_collections):
        action = np.random.rand(env.knob_dim)
        reward, _ = env.simulate(action)
        memory.push(action, reward)

    for i in range(n_loops):
        X_samples = np.random.rand(num_samples, env.knob_dim)
        if i >= 5:
            actions, rewards = memory.get_all()
            tuples = tuple(zip(actions, rewards))
            top10 = heapq.nlargest(10, tuples, key=lambda e: e[1])
            for entry in top10:
                # Tensorflow get broken if we use the training data points as
                # starting points for GPRGD.
                X_samples = np.vstack((X_samples, np.array(entry[0]) * 0.97 + 0.01))

        actions, rewards = memory.get_all()

        ucb_beta = opt_kwargs.pop('ucb_beta')
        opt_kwargs['ucb_beta'] = ucb.get_ucb_beta(ucb_beta, t=i + 1., ndim=env.knob_dim)
        if model_opt_frequency > 0:
            optimize_hyperparams = i % model_opt_frequency == 0
            if not optimize_hyperparams:
                model_kwargs['hyperparameters'] = hyperparameters
        else:
            optimize_hyperparams = False
            model_kwargs['hyperparameters'] = None
        model_kwargs['optimize_hyperparameters'] = optimize_hyperparams

        X_new, ypred, model_params, hyperparameters = run_optimize(np.array(actions),
                                                                   -np.array(rewards),
                                                                   X_samples,
                                                                   model_name,
                                                                   opt_kwargs,
                                                                   model_kwargs)

        sort_index = np.argsort(ypred.squeeze())
        X_new = X_new[sort_index]
        ypred = ypred[sort_index].squeeze()

        action = X_new[0]
        reward, _ = env.simulate(action)
        memory.push(action, reward)
        LOG.info('loop: %d reward: %f', i, reward[0])
        results.append(reward)
        x_axis.append(i+1)

    return np.array(results), np.array(x_axis)


def plotlines(xs, results, labels, title, path):
    if plt:
        figsize = 13, 10
        figure, ax = plt.subplots(figsize=figsize)
        lines = []
        N = 1
        weights = np.ones(N)
        for x_axis, result, label in zip(xs, results, labels):
            result = np.convolve(weights/weights.sum(), result.flatten())[N-1:-N+1]
            lines.append(plt.plot(x_axis[:-N+1], result, label=label, lw=4)[0])
        plt.legend(handles=lines, fontsize=30)
        plt.title(title, fontsize=25)
        plt.xticks(fontsize=25)
        plt.yticks(fontsize=25)
        ax.set_xlabel("loops", fontsize=30)
        ax.set_ylabel("rewards", fontsize=30)
        plt.savefig(path)
        plt.clf()


def run(tuners, configs, labels, title, env, n_loops, n_repeats):
    if not plt:
        LOG.info("Cannot import matplotlib. Will write results to files instead of figures.")
    random.seed(0)
    np.random.seed(1)
    torch.manual_seed(0)
    results = []
    xs = []
    for j, _ in enumerate(tuners):
        for i in range(n_repeats[j]):
            result, x_axis = tuners[j](env, configs[j], n_loops=n_loops)
            if i is 0:
                results.append(result / n_repeats[j])
                xs.append(x_axis)
            else:
                results[j] += result / n_repeats[j]

    if plt:
        if not os.path.exists("simulation_figures"):
            os.mkdir("simulation_figures")
        filename = "simulation_figures/{}.pdf".format(title)
        plotlines(xs, results, labels, title, filename)
    if not os.path.exists("simulation_results"):
        os.mkdir("simulation_results")
    for j in range(len(tuners)):
        with open("simulation_results/" + title + '_' + labels[j] + '.csv', 'w') as f:
            for i, result in zip(xs[j], results[j]):
                f.write(str(i) + ',' + str(result[0]) + '\n')


def main():
    env = Environment(knob_dim=24, metric_dim=60, modes=[2], reward_variance=0.05)
    title = 'compare'
    n_repeats = [1, 1, 1, 1]
    n_loops = 80
    configs = [{'gamma': 0., 'c_lr': 0.001, 'a_lr': 0.01, 'num_collections': 50, 'n_epochs': 50},
               {'num_samples': 30, 'num_collections': 50},
               {'num_samples': 30, 'num_collections': 50},
               {'num_samples': 30, 'num_collections': 50}]
    tuners = [ddpg, gpr_new, dnn, gpr]
    labels = [tuner.__name__ for tuner in tuners]
    run(tuners, configs, labels, title, env, n_loops, n_repeats)


if __name__ == '__main__':
    main()
simulate a DB tuning environment to test DDPG performance 2019-10-21 12:23:27 -07:00			`#`
			`# OtterTune - simulation.py`
			`#`
			`# Copyright (c) 2017-18, Carnegie Mellon University Database Group`
			`#`

work around a pylint & astroid bug 2019-10-22 15:22:18 -07:00
add dnn and gpr to simulation 2019-10-24 13:42:39 -07:00			`import heapq`
simulate a DB tuning environment to test DDPG performance 2019-10-21 12:23:27 -07:00			`import random`
			`import os`
			`import sys`
			`try:`
			`import matplotlib.pyplot as plt`
			`except (ModuleNotFoundError, ImportError):`
			`plt = None`
			`import numpy as np`
			`import torch`
			`sys.path.append("../")`
import Dana's new gpr model 2019-11-01 12:04:42 -07:00			`from analysis.util import get_analysis_logger, TimerStruct # noqa`
simulate a DB tuning environment to test DDPG performance 2019-10-21 12:23:27 -07:00			`from analysis.ddpg.ddpg import DDPG # noqa`
improve simulator and ddpg 2019-10-28 10:37:08 -07:00			`from analysis.ddpg.ou_process import OUProcess # noqa`
add dnn and gpr to simulation 2019-10-24 13:42:39 -07:00			`from analysis.gp_tf import GPRGD # noqa`
			`from analysis.nn_tf import NeuralNet # noqa`
import Dana's new gpr model 2019-11-01 12:04:42 -07:00			`from analysis.gpr import gpr_models # noqa`
			`from analysis.gpr import ucb # noqa`
			`from analysis.gpr.optimize import tf_optimize # noqa`
simulate a DB tuning environment to test DDPG performance 2019-10-21 12:23:27 -07:00
			`LOG = get_analysis_logger(__name__)`


			`class Environment(object):`
improve simulator and ddpg 2019-10-28 10:37:08 -07:00			`def __init__(self, knob_dim, metric_dim, modes=[0], reward_variance=0,`
			`metrics_variance=0.2):`
			`self.knob_dim = knob_dim`
			`self.metric_dim = metric_dim`
			`self.modes = modes`
			`self.mode = np.random.choice(self.modes)`
			`self.counter = 0`
			`self.reward_variance = reward_variance`
			`self.metrics_variance = metrics_variance`
simulate a DB tuning environment to test DDPG performance 2019-10-21 12:23:27 -07:00
			`def identity_sqrt(self, knob_data):`
			`n1 = self.knob_dim // 4`
			`n2 = self.knob_dim // 4`
			`part1 = np.sum(knob_data[0: n1])`
			`part2 = np.sum(np.sqrt(knob_data[n1: n1 + n2]))`
			`reward = np.array([part1 + part2]) / (self.knob_dim // 2)`
improve simulator and ddpg 2019-10-28 10:37:08 -07:00			`return reward`

			`def threshold(self, knob_data):`
			`n1 = self.knob_dim // 4`
			`n2 = self.knob_dim // 4`
			`part1 = np.sum(knob_data[0: n1] > 0.9)`
			`part2 = np.sum(knob_data[n1: n1 + n2] < 0.1)`
			`reward = np.array([part1 + part2]) / (self.knob_dim // 2)`
			`return reward`
simulate a DB tuning environment to test DDPG performance 2019-10-21 12:23:27 -07:00
			`def borehole(self, knob_data):`
			`# ref: http://www.sfu.ca/~ssurjano/borehole.html`
			`# pylint: disable=invalid-name`
			`rw = knob_data[0] * (0.15 - 0.05) + 0.05`
			`r = knob_data[1] * (50000 - 100) + 100`
			`Tu = knob_data[2] * (115600 - 63070) + 63070`
			`Hu = knob_data[3] * (1110 - 990) + 990`
			`Tl = knob_data[4] * (116 - 63.1) + 63.1`
			`Hl = knob_data[5] * (820 - 700) + 700`
			`L = knob_data[6] * (1680 - 1120) + 1120`
			`Kw = knob_data[7] * (12045 - 9855) + 9855`

			`frac = 2 * L * Tu / (np.log(r / rw) * rw ** 2 * Kw)`
improve simulator and ddpg 2019-10-28 10:37:08 -07:00			`reward = 2 * np.pi * Tu * (Hu - Hl) / (np.log(r / rw) * (1 + frac + Tu / Tl)) / 310`
			`return np.array([reward])`

			`def get_metrics(self, mode):`
			`metrics = np.ones(self.metric_dim) * mode`
			`metrics += np.random.rand(self.metric_dim) * self.metrics_variance`
			`return metrics`

			`def simulate_mode(self, knob_data, mode):`
			`if mode == 0:`
			`reward = self.identity_sqrt(knob_data)`
			`elif mode == 1:`
			`reward = self.threshold(knob_data)`
			`elif mode == 2:`
			`reward = np.zeros(1)`
			`for i in range(0, len(knob_data), 8):`
			`reward += self.borehole(knob_data[i: i+8])[0] / len(knob_data) * 8`
			`reward = reward * (1.0 + self.reward_variance * np.random.rand(1)[0])`
			`return reward, self.get_metrics(mode)`
simulate a DB tuning environment to test DDPG performance 2019-10-21 12:23:27 -07:00
			`def simulate(self, knob_data):`
improve simulator and ddpg 2019-10-28 10:37:08 -07:00			`self.counter += 1`
			`k = 1`
			`# every k runs, sample a new workload`
			`if self.counter >= k:`
			`self.counter = 0`
			`self.mode = np.random.choice(self.modes)`
			`return self.simulate_mode(knob_data, self.mode)`
simulate a DB tuning environment to test DDPG performance 2019-10-21 12:23:27 -07:00

improve simulator and ddpg 2019-10-28 10:37:08 -07:00			`def ddpg(env, config, n_loops=100):`
simulate a DB tuning environment to test DDPG performance 2019-10-21 12:23:27 -07:00			`results = []`
			`x_axis = []`
import Dana's new gpr model 2019-11-01 12:04:42 -07:00			`num_collections = config['num_collections']`
add dnn and gpr to simulation 2019-10-24 13:42:39 -07:00			`gamma = config['gamma']`
improve simulator and ddpg 2019-10-28 10:37:08 -07:00			`a_lr = config['a_lr']`
			`c_lr = config['c_lr']`
add dnn and gpr to simulation 2019-10-24 13:42:39 -07:00			`n_epochs = config['n_epochs']`
import Dana's new gpr model 2019-11-01 12:04:42 -07:00			`model_ddpg = DDPG(n_actions=env.knob_dim, n_states=env.metric_dim, gamma=gamma,`
			`clr=c_lr, alr=a_lr, shift=0.1)`
simulate a DB tuning environment to test DDPG performance 2019-10-21 12:23:27 -07:00			`knob_data = np.random.rand(env.knob_dim)`
			`prev_metric_data = np.zeros(env.metric_dim)`
improve simulator and ddpg 2019-10-28 10:37:08 -07:00
import Dana's new gpr model 2019-11-01 12:04:42 -07:00			`for i in range(num_collections):`
			`action = np.random.rand(env.knob_dim)`
			`reward, metric_data = env.simulate(action)`
improve simulator and ddpg 2019-10-28 10:37:08 -07:00			`if i > 0:`
			`model_ddpg.add_sample(prev_metric_data, prev_knob_data, prev_reward, metric_data)`
			`prev_metric_data = metric_data`
			`prev_knob_data = knob_data`
			`prev_reward = reward`
import Dana's new gpr model 2019-11-01 12:04:42 -07:00
			`for i in range(n_loops):`
			`reward, metric_data = env.simulate(knob_data)`
			`model_ddpg.add_sample(prev_metric_data, prev_knob_data, prev_reward, metric_data)`
			`prev_metric_data = metric_data`
			`prev_knob_data = knob_data`
			`prev_reward = reward`
add dnn and gpr to simulation 2019-10-24 13:42:39 -07:00			`for _ in range(n_epochs):`
			`model_ddpg.update()`
			`results.append(reward)`
improve simulator and ddpg 2019-10-28 10:37:08 -07:00			`x_axis.append(i+1)`
			`LOG.info('loop: %d reward: %f', i, reward[0])`
			`knob_data = model_ddpg.choose_action(metric_data)`
simulate a DB tuning environment to test DDPG performance 2019-10-21 12:23:27 -07:00			`return np.array(results), np.array(x_axis)`


add dnn and gpr to simulation 2019-10-24 13:42:39 -07:00			`class ReplayMemory(object):`

			`def __init__(self):`
			`self.actions = []`
			`self.rewards = []`

			`def push(self, action, reward):`
			`self.actions.append(action.tolist())`
			`self.rewards.append(reward.tolist())`
simulate a DB tuning environment to test DDPG performance 2019-10-21 12:23:27 -07:00
add dnn and gpr to simulation 2019-10-24 13:42:39 -07:00			`def get_all(self):`
			`return self.actions, self.rewards`
simulate a DB tuning environment to test DDPG performance 2019-10-21 12:23:27 -07:00
add dnn and gpr to simulation 2019-10-24 13:42:39 -07:00
			`def dnn(env, config, n_loops=100):`
			`results = []`
			`x_axis = []`
			`memory = ReplayMemory()`
import Dana's new gpr model 2019-11-01 12:04:42 -07:00			`num_collections = config['num_collections']`
add dnn and gpr to simulation 2019-10-24 13:42:39 -07:00			`num_samples = config['num_samples']`
import Dana's new gpr model 2019-11-01 12:04:42 -07:00			`ou_process = False`
add dnn and gpr to simulation 2019-10-24 13:42:39 -07:00			`Xmin = np.zeros(env.knob_dim)`
			`Xmax = np.ones(env.knob_dim)`
improve simulator and ddpg 2019-10-28 10:37:08 -07:00			`noise = OUProcess(env.knob_dim)`
import Dana's new gpr model 2019-11-01 12:04:42 -07:00
			`for _ in range(num_collections):`
			`action = np.random.rand(env.knob_dim)`
			`reward, _ = env.simulate(action)`
			`memory.push(action, reward)`

add dnn and gpr to simulation 2019-10-24 13:42:39 -07:00			`for i in range(n_loops):`
			`X_samples = np.random.rand(num_samples, env.knob_dim)`
			`if i >= 10:`
			`actions, rewards = memory.get_all()`
			`tuples = tuple(zip(actions, rewards))`
			`top10 = heapq.nlargest(10, tuples, key=lambda e: e[1])`
			`for entry in top10:`
			`X_samples = np.vstack((X_samples, np.array(entry[0])))`
			`model_nn = NeuralNet(n_input=X_samples.shape[1],`
			`batch_size=X_samples.shape[0],`
improve simulator and ddpg 2019-10-28 10:37:08 -07:00			`learning_rate=0.01,`
			`explore_iters=100,`
add dnn and gpr to simulation 2019-10-24 13:42:39 -07:00			`noise_scale_begin=0.1,`
			`noise_scale_end=0.0,`
			`debug=False,`
			`debug_interval=100)`
import Dana's new gpr model 2019-11-01 12:04:42 -07:00			`actions, rewards = memory.get_all()`
			`model_nn.fit(np.array(actions), -np.array(rewards), fit_epochs=50)`
improve simulator and ddpg 2019-10-28 10:37:08 -07:00			`res = model_nn.recommend(X_samples, Xmin, Xmax, recommend_epochs=10, explore=False)`
add dnn and gpr to simulation 2019-10-24 13:42:39 -07:00			`best_config_idx = np.argmin(res.minl.ravel())`
			`best_config = res.minl_conf[best_config_idx, :]`
improve simulator and ddpg 2019-10-28 10:37:08 -07:00			`if ou_process:`
			`best_config += noise.noise()`
			`best_config = best_config.clip(0, 1)`
add dnn and gpr to simulation 2019-10-24 13:42:39 -07:00			`reward, _ = env.simulate(best_config)`
			`memory.push(best_config, reward)`
			`LOG.info('loop: %d reward: %f', i, reward[0])`
			`results.append(reward)`
improve simulator and ddpg 2019-10-28 10:37:08 -07:00			`x_axis.append(i+1)`
add dnn and gpr to simulation 2019-10-24 13:42:39 -07:00			`return np.array(results), np.array(x_axis)`


import Dana's new gpr model 2019-11-01 12:04:42 -07:00			`def gpr(env, config, n_loops=100):`
add dnn and gpr to simulation 2019-10-24 13:42:39 -07:00			`results = []`
			`x_axis = []`
			`memory = ReplayMemory()`
improve simulator and ddpg 2019-10-28 10:37:08 -07:00			`num_collections = config['num_collections']`
add dnn and gpr to simulation 2019-10-24 13:42:39 -07:00			`num_samples = config['num_samples']`
			`X_min = np.zeros(env.knob_dim)`
			`X_max = np.ones(env.knob_dim)`
improve simulator and ddpg 2019-10-28 10:37:08 -07:00			`for _ in range(num_collections):`
add dnn and gpr to simulation 2019-10-24 13:42:39 -07:00			`action = np.random.rand(env.knob_dim)`
			`reward, _ = env.simulate(action)`
			`memory.push(action, reward)`
import Dana's new gpr model 2019-11-01 12:04:42 -07:00
add dnn and gpr to simulation 2019-10-24 13:42:39 -07:00			`for i in range(n_loops):`
			`X_samples = np.random.rand(num_samples, env.knob_dim)`
improve simulator and ddpg 2019-10-28 10:37:08 -07:00			`if i >= 10:`
add dnn and gpr to simulation 2019-10-24 13:42:39 -07:00			`actions, rewards = memory.get_all()`
			`tuples = tuple(zip(actions, rewards))`
			`top10 = heapq.nlargest(10, tuples, key=lambda e: e[1])`
			`for entry in top10:`
			`# Tensorflow get broken if we use the training data points as`
			`# starting points for GPRGD.`
			`X_samples = np.vstack((X_samples, np.array(entry[0]) * 0.97 + 0.01))`
			`model = GPRGD(length_scale=1.0,`
			`magnitude=1.0,`
import Dana's new gpr model 2019-11-01 12:04:42 -07:00			`max_train_size=2000,`
improve simulator and ddpg 2019-10-28 10:37:08 -07:00			`batch_size=100,`
add dnn and gpr to simulation 2019-10-24 13:42:39 -07:00			`num_threads=4,`
			`learning_rate=0.01,`
			`epsilon=1e-6,`
			`max_iter=500,`
import Dana's new gpr model 2019-11-01 12:04:42 -07:00			`sigma_multiplier=3.0,`
add dnn and gpr to simulation 2019-10-24 13:42:39 -07:00			`mu_multiplier=1.0)`

			`actions, rewards = memory.get_all()`
			`model.fit(np.array(actions), -np.array(rewards), X_min, X_max, ridge=0.01)`
			`res = model.predict(X_samples)`
			`best_config_idx = np.argmin(res.minl.ravel())`
			`best_config = res.minl_conf[best_config_idx, :]`
			`reward, _ = env.simulate(best_config)`
			`memory.push(best_config, reward)`
			`LOG.info('loop: %d reward: %f', i, reward[0])`
			`results.append(reward)`
import Dana's new gpr model 2019-11-01 12:04:42 -07:00			`x_axis.append(i+1)`
			`return np.array(results), np.array(x_axis)`


			`def run_optimize(X, y, X_sample, model_name, opt_kwargs, model_kwargs):`
			`timer = TimerStruct()`

			`# Create model (this also optimizes the hyperparameters if that option is enabled`
			`timer.start()`
			`m = gpr_models.create_model(model_name, X=X, y=y, **model_kwargs)`
			`timer.stop()`
			`model_creation_sec = timer.elapsed_seconds`
			`LOG.info(m._model.as_pandas_table())`

			`# Optimize the DBMS's configuration knobs`
			`timer.start()`
			`X_new, ypred, yvar, loss = tf_optimize(m._model, X_sample, **opt_kwargs)`
			`timer.stop()`
			`config_optimize_sec = timer.elapsed_seconds`

			`return X_new, ypred, m.get_model_parameters(), m.get_hyperparameters()`


			`def gpr_new(env, config, n_loops=100):`
			`model_name = 'BasicGP'`
			`model_opt_frequency = 5`
			`model_kwargs = {}`
			`model_kwargs['model_learning_rate'] = 0.001`
			`model_kwargs['model_maxiter'] = 5000`
			`opt_kwargs = {}`
			`opt_kwargs['learning_rate'] = 0.001`
			`opt_kwargs['maxiter'] = 100`
			`opt_kwargs['ucb_beta'] = 3.0`

			`results = []`
			`x_axis = []`
			`memory = ReplayMemory()`
			`num_samples = config['num_samples']`
			`num_collections = config['num_collections']`
			`X_min = np.zeros(env.knob_dim)`
			`X_max = np.ones(env.knob_dim)`
			`X_bounds = [X_min, X_max]`
			`opt_kwargs['bounds'] = X_bounds`

			`for _ in range(num_collections):`
			`action = np.random.rand(env.knob_dim)`
			`reward, _ = env.simulate(action)`
			`memory.push(action, reward)`

			`for i in range(n_loops):`
			`X_samples = np.random.rand(num_samples, env.knob_dim)`
			`if i >= 5:`
			`actions, rewards = memory.get_all()`
			`tuples = tuple(zip(actions, rewards))`
			`top10 = heapq.nlargest(10, tuples, key=lambda e: e[1])`
			`for entry in top10:`
			`# Tensorflow get broken if we use the training data points as`
			`# starting points for GPRGD.`
			`X_samples = np.vstack((X_samples, np.array(entry[0]) * 0.97 + 0.01))`

			`actions, rewards = memory.get_all()`

			`ucb_beta = opt_kwargs.pop('ucb_beta')`
			`opt_kwargs['ucb_beta'] = ucb.get_ucb_beta(ucb_beta, t=i + 1., ndim=env.knob_dim)`
			`if model_opt_frequency > 0:`
			`optimize_hyperparams = i % model_opt_frequency == 0`
			`if not optimize_hyperparams:`
			`model_kwargs['hyperparameters'] = hyperparameters`
			`else:`
			`optimize_hyperparams = False`
			`model_kwargs['hyperparameters'] = None`
			`model_kwargs['optimize_hyperparameters'] = optimize_hyperparams`

			`X_new, ypred, model_params, hyperparameters = run_optimize(np.array(actions),`
			`-np.array(rewards),`
			`X_samples,`
			`model_name,`
			`opt_kwargs,`
			`model_kwargs)`

			`sort_index = np.argsort(ypred.squeeze())`
			`X_new = X_new[sort_index]`
			`ypred = ypred[sort_index].squeeze()`

			`action = X_new[0]`
			`reward, _ = env.simulate(action)`
			`memory.push(action, reward)`
			`LOG.info('loop: %d reward: %f', i, reward[0])`
			`results.append(reward)`
			`x_axis.append(i+1)`

add dnn and gpr to simulation 2019-10-24 13:42:39 -07:00			`return np.array(results), np.array(x_axis)`


improve simulator and ddpg 2019-10-28 10:37:08 -07:00			`def plotlines(xs, results, labels, title, path):`
simulate a DB tuning environment to test DDPG performance 2019-10-21 12:23:27 -07:00			`if plt:`
improve simulator and ddpg 2019-10-28 10:37:08 -07:00			`figsize = 13, 10`
			`figure, ax = plt.subplots(figsize=figsize)`
			`lines = []`
import Dana's new gpr model 2019-11-01 12:04:42 -07:00			`N = 1`
improve simulator and ddpg 2019-10-28 10:37:08 -07:00			`weights = np.ones(N)`
			`for x_axis, result, label in zip(xs, results, labels):`
			`result = np.convolve(weights/weights.sum(), result.flatten())[N-1:-N+1]`
			`lines.append(plt.plot(x_axis[:-N+1], result, label=label, lw=4)[0])`
			`plt.legend(handles=lines, fontsize=30)`
			`plt.title(title, fontsize=25)`
			`plt.xticks(fontsize=25)`
			`plt.yticks(fontsize=25)`
			`ax.set_xlabel("loops", fontsize=30)`
			`ax.set_ylabel("rewards", fontsize=30)`
simulate a DB tuning environment to test DDPG performance 2019-10-21 12:23:27 -07:00			`plt.savefig(path)`
			`plt.clf()`


improve simulator and ddpg 2019-10-28 10:37:08 -07:00			`def run(tuners, configs, labels, title, env, n_loops, n_repeats):`
simulate a DB tuning environment to test DDPG performance 2019-10-21 12:23:27 -07:00			`if not plt:`
			`LOG.info("Cannot import matplotlib. Will write results to files instead of figures.")`
			`random.seed(0)`
improve simulator and ddpg 2019-10-28 10:37:08 -07:00			`np.random.seed(1)`
simulate a DB tuning environment to test DDPG performance 2019-10-21 12:23:27 -07:00			`torch.manual_seed(0)`
add dnn and gpr to simulation 2019-10-24 13:42:39 -07:00			`results = []`
improve simulator and ddpg 2019-10-28 10:37:08 -07:00			`xs = []`
			`for j, _ in enumerate(tuners):`
			`for i in range(n_repeats[j]):`
add dnn and gpr to simulation 2019-10-24 13:42:39 -07:00			`result, x_axis = tuners[j](env, configs[j], n_loops=n_loops)`
			`if i is 0:`
improve simulator and ddpg 2019-10-28 10:37:08 -07:00			`results.append(result / n_repeats[j])`
			`xs.append(x_axis)`
add dnn and gpr to simulation 2019-10-24 13:42:39 -07:00			`else:`
improve simulator and ddpg 2019-10-28 10:37:08 -07:00			`results[j] += result / n_repeats[j]`
add dnn and gpr to simulation 2019-10-24 13:42:39 -07:00
simulate a DB tuning environment to test DDPG performance 2019-10-21 12:23:27 -07:00			`if plt:`
improve simulator and ddpg 2019-10-28 10:37:08 -07:00			`if not os.path.exists("simulation_figures"):`
			`os.mkdir("simulation_figures")`
			`filename = "simulation_figures/{}.pdf".format(title)`
			`plotlines(xs, results, labels, title, filename)`
			`if not os.path.exists("simulation_results"):`
			`os.mkdir("simulation_results")`
add dnn and gpr to simulation 2019-10-24 13:42:39 -07:00			`for j in range(len(tuners)):`
improve simulator and ddpg 2019-10-28 10:37:08 -07:00			`with open("simulation_results/" + title + '_' + labels[j] + '.csv', 'w') as f:`
			`for i, result in zip(xs[j], results[j]):`
add dnn and gpr to simulation 2019-10-24 13:42:39 -07:00			`f.write(str(i) + ',' + str(result[0]) + '\n')`


			`def main():`
import Dana's new gpr model 2019-11-01 12:04:42 -07:00			`env = Environment(knob_dim=24, metric_dim=60, modes=[2], reward_variance=0.05)`
			`title = 'compare'`
			`n_repeats = [1, 1, 1, 1]`
			`n_loops = 80`
			`configs = [{'gamma': 0., 'c_lr': 0.001, 'a_lr': 0.01, 'num_collections': 50, 'n_epochs': 50},`
			`{'num_samples': 30, 'num_collections': 50},`
			`{'num_samples': 30, 'num_collections': 50},`
			`{'num_samples': 30, 'num_collections': 50}]`
			`tuners = [ddpg, gpr_new, dnn, gpr]`
			`labels = [tuner.__name__ for tuner in tuners]`
improve simulator and ddpg 2019-10-28 10:37:08 -07:00			`run(tuners, configs, labels, title, env, n_loops, n_repeats)`
simulate a DB tuning environment to test DDPG performance 2019-10-21 12:23:27 -07:00

			`if __name__ == '__main__':`
			`main()`