add dnn and gpr to simulation
This commit is contained in:
parent
2974cdab2b
commit
5431154784
|
@ -5,6 +5,7 @@
|
||||||
#
|
#
|
||||||
|
|
||||||
|
|
||||||
|
import heapq
|
||||||
import random
|
import random
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
@ -17,6 +18,8 @@ import torch
|
||||||
sys.path.append("../")
|
sys.path.append("../")
|
||||||
from analysis.util import get_analysis_logger # noqa
|
from analysis.util import get_analysis_logger # noqa
|
||||||
from analysis.ddpg.ddpg import DDPG # noqa
|
from analysis.ddpg.ddpg import DDPG # noqa
|
||||||
|
from analysis.gp_tf import GPRGD # noqa
|
||||||
|
from analysis.nn_tf import NeuralNet # noqa
|
||||||
|
|
||||||
LOG = get_analysis_logger(__name__)
|
LOG = get_analysis_logger(__name__)
|
||||||
|
|
||||||
|
@ -70,40 +73,130 @@ class Environment(object):
|
||||||
return self.borehole(knob_data)
|
return self.borehole(knob_data)
|
||||||
|
|
||||||
|
|
||||||
def train_ddpg(env, gamma=0.99, tau=0.002, lr=0.01, batch_size=32, n_loops=1000):
|
def ddpg(env, config, n_loops=1000):
|
||||||
results = []
|
results = []
|
||||||
x_axis = []
|
x_axis = []
|
||||||
ddpg = DDPG(n_actions=env.knob_dim, n_states=env.metric_dim, gamma=gamma, tau=tau,
|
gamma = config['gamma']
|
||||||
|
tau = config['tau']
|
||||||
|
lr = config['lr']
|
||||||
|
batch_size = config['batch_size']
|
||||||
|
n_epochs = config['n_epochs']
|
||||||
|
model_ddpg = DDPG(n_actions=env.knob_dim, n_states=env.metric_dim, gamma=gamma, tau=tau,
|
||||||
clr=lr, alr=lr, batch_size=batch_size)
|
clr=lr, alr=lr, batch_size=batch_size)
|
||||||
knob_data = np.random.rand(env.knob_dim)
|
knob_data = np.random.rand(env.knob_dim)
|
||||||
prev_metric_data = np.zeros(env.metric_dim)
|
prev_metric_data = np.zeros(env.metric_dim)
|
||||||
for i in range(n_loops):
|
for i in range(n_loops):
|
||||||
reward, metric_data = env.simulate(knob_data)
|
reward, metric_data = env.simulate(knob_data)
|
||||||
ddpg.add_sample(prev_metric_data, knob_data, reward, metric_data, False)
|
model_ddpg.add_sample(prev_metric_data, knob_data, reward, metric_data)
|
||||||
ddpg.update()
|
for _ in range(n_epochs):
|
||||||
if i % 20 == 0:
|
model_ddpg.update()
|
||||||
results.append(run_ddpg(env, ddpg))
|
results.append(reward)
|
||||||
x_axis.append(i)
|
x_axis.append(i)
|
||||||
prev_metric_data = metric_data
|
prev_metric_data = metric_data
|
||||||
knob_data = ddpg.choose_action(prev_metric_data)
|
knob_data = model_ddpg.choose_action(prev_metric_data)
|
||||||
return np.array(results), np.array(x_axis)
|
return np.array(results), np.array(x_axis)
|
||||||
|
|
||||||
|
|
||||||
def run_ddpg(env, ddpg):
|
class ReplayMemory(object):
|
||||||
total_reward = 0.0
|
|
||||||
n_samples = 100
|
def __init__(self):
|
||||||
prev_metric_data = np.zeros(env.metric_dim)
|
self.actions = []
|
||||||
for _ in range(n_samples):
|
self.rewards = []
|
||||||
knob_data = ddpg.choose_action(prev_metric_data)
|
|
||||||
reward, prev_metric_data = env.simulate(knob_data)
|
def push(self, action, reward):
|
||||||
total_reward += reward
|
self.actions.append(action.tolist())
|
||||||
return total_reward / n_samples
|
self.rewards.append(reward.tolist())
|
||||||
|
|
||||||
|
def get_all(self):
|
||||||
|
return self.actions, self.rewards
|
||||||
|
|
||||||
|
|
||||||
def plotlines(x_axis, data1, data2, label1, label2, title, path):
|
def dnn(env, config, n_loops=100):
|
||||||
|
results = []
|
||||||
|
x_axis = []
|
||||||
|
memory = ReplayMemory()
|
||||||
|
num_samples = config['num_samples']
|
||||||
|
Xmin = np.zeros(env.knob_dim)
|
||||||
|
Xmax = np.ones(env.knob_dim)
|
||||||
|
for i in range(n_loops):
|
||||||
|
X_samples = np.random.rand(num_samples, env.knob_dim)
|
||||||
|
if i >= 10:
|
||||||
|
actions, rewards = memory.get_all()
|
||||||
|
tuples = tuple(zip(actions, rewards))
|
||||||
|
top10 = heapq.nlargest(10, tuples, key=lambda e: e[1])
|
||||||
|
for entry in top10:
|
||||||
|
X_samples = np.vstack((X_samples, np.array(entry[0])))
|
||||||
|
model_nn = NeuralNet(n_input=X_samples.shape[1],
|
||||||
|
batch_size=X_samples.shape[0],
|
||||||
|
explore_iters=500,
|
||||||
|
noise_scale_begin=0.1,
|
||||||
|
noise_scale_end=0.0,
|
||||||
|
debug=False,
|
||||||
|
debug_interval=100)
|
||||||
|
if i >= 5:
|
||||||
|
actions, rewards = memory.get_all()
|
||||||
|
model_nn.fit(np.array(actions), -np.array(rewards), fit_epochs=500)
|
||||||
|
res = model_nn.recommend(X_samples, Xmin, Xmax,
|
||||||
|
explore=500, recommend_epochs=500)
|
||||||
|
best_config_idx = np.argmin(res.minl.ravel())
|
||||||
|
best_config = res.minl_conf[best_config_idx, :]
|
||||||
|
reward, _ = env.simulate(best_config)
|
||||||
|
memory.push(best_config, reward)
|
||||||
|
LOG.info('loop: %d reward: %f', i, reward[0])
|
||||||
|
results.append(reward)
|
||||||
|
x_axis.append(i)
|
||||||
|
return np.array(results), np.array(x_axis)
|
||||||
|
|
||||||
|
|
||||||
|
def gprgd(env, config, n_loops=100):
|
||||||
|
results = []
|
||||||
|
x_axis = []
|
||||||
|
memory = ReplayMemory()
|
||||||
|
num_samples = config['num_samples']
|
||||||
|
X_min = np.zeros(env.knob_dim)
|
||||||
|
X_max = np.ones(env.knob_dim)
|
||||||
|
for _ in range(5):
|
||||||
|
action = np.random.rand(env.knob_dim)
|
||||||
|
reward, _ = env.simulate(action)
|
||||||
|
memory.push(action, reward)
|
||||||
|
for i in range(n_loops):
|
||||||
|
X_samples = np.random.rand(num_samples, env.knob_dim)
|
||||||
|
if i >= 5:
|
||||||
|
actions, rewards = memory.get_all()
|
||||||
|
tuples = tuple(zip(actions, rewards))
|
||||||
|
top10 = heapq.nlargest(10, tuples, key=lambda e: e[1])
|
||||||
|
for entry in top10:
|
||||||
|
# Tensorflow get broken if we use the training data points as
|
||||||
|
# starting points for GPRGD.
|
||||||
|
X_samples = np.vstack((X_samples, np.array(entry[0]) * 0.97 + 0.01))
|
||||||
|
model = GPRGD(length_scale=1.0,
|
||||||
|
magnitude=1.0,
|
||||||
|
max_train_size=7000,
|
||||||
|
batch_size=3000,
|
||||||
|
num_threads=4,
|
||||||
|
learning_rate=0.01,
|
||||||
|
epsilon=1e-6,
|
||||||
|
max_iter=500,
|
||||||
|
sigma_multiplier=3.0,
|
||||||
|
mu_multiplier=1.0)
|
||||||
|
|
||||||
|
actions, rewards = memory.get_all()
|
||||||
|
model.fit(np.array(actions), -np.array(rewards), X_min, X_max, ridge=0.01)
|
||||||
|
res = model.predict(X_samples)
|
||||||
|
best_config_idx = np.argmin(res.minl.ravel())
|
||||||
|
best_config = res.minl_conf[best_config_idx, :]
|
||||||
|
reward, _ = env.simulate(best_config)
|
||||||
|
memory.push(best_config, reward)
|
||||||
|
LOG.info('loop: %d reward: %f', i, reward[0])
|
||||||
|
results.append(reward)
|
||||||
|
x_axis.append(i)
|
||||||
|
return np.array(results), np.array(x_axis)
|
||||||
|
|
||||||
|
|
||||||
|
def plotlines(x_axis, results, labels, title, path):
|
||||||
if plt:
|
if plt:
|
||||||
plt.plot(x_axis, data1, color='red', label=label1)
|
for result, label in zip(results, labels):
|
||||||
plt.plot(x_axis, data2, color='blue', label=label2)
|
plt.plot(x_axis, result, label=label)
|
||||||
plt.legend()
|
plt.legend()
|
||||||
plt.xlabel("loops")
|
plt.xlabel("loops")
|
||||||
plt.ylabel("rewards")
|
plt.ylabel("rewards")
|
||||||
|
@ -112,40 +205,50 @@ def plotlines(x_axis, data1, data2, label1, label2, title, path):
|
||||||
plt.clf()
|
plt.clf()
|
||||||
|
|
||||||
|
|
||||||
def main(knob_dim=8, metric_dim=60, lr=0.0001, mode=2, n_loops=2000):
|
def run(tuners, configs, labels, knob_dim, metric_dim, mode, n_loops, n_repeats):
|
||||||
if not plt:
|
if not plt:
|
||||||
LOG.info("Cannot import matplotlib. Will write results to files instead of figures.")
|
LOG.info("Cannot import matplotlib. Will write results to files instead of figures.")
|
||||||
random.seed(0)
|
random.seed(0)
|
||||||
np.random.seed(0)
|
np.random.seed(0)
|
||||||
torch.manual_seed(0)
|
torch.manual_seed(0)
|
||||||
env = Environment(knob_dim, metric_dim, mode=mode)
|
env = Environment(knob_dim, metric_dim, mode=mode)
|
||||||
|
results = []
|
||||||
|
for i in range(n_repeats):
|
||||||
|
for j, _ in enumerate(tuners):
|
||||||
|
result, x_axis = tuners[j](env, configs[j], n_loops=n_loops)
|
||||||
|
if i is 0:
|
||||||
|
results.append(result / n_repeats)
|
||||||
|
else:
|
||||||
|
results[j] += result / n_repeats
|
||||||
|
|
||||||
|
title = "mode_{}_knob_{}".format(mode, knob_dim)
|
||||||
|
|
||||||
n_repeats = 10
|
|
||||||
for i in range(n_repeats):
|
|
||||||
if i == 0:
|
|
||||||
results1, x_axis = train_ddpg(env, gamma=0, lr=lr, n_loops=n_loops)
|
|
||||||
else:
|
|
||||||
results1 += train_ddpg(env, gamma=0, lr=lr, n_loops=n_loops)[0]
|
|
||||||
for i in range(n_repeats):
|
|
||||||
if i == 0:
|
|
||||||
results2, x_axis = train_ddpg(env, gamma=0.99, lr=lr, n_loops=n_loops)
|
|
||||||
else:
|
|
||||||
results2 += train_ddpg(env, gamma=0.99, lr=lr, n_loops=n_loops)[0]
|
|
||||||
results1 /= n_repeats
|
|
||||||
results2 /= n_repeats
|
|
||||||
title = "knob_{}_lr_{}".format(knob_dim, lr)
|
|
||||||
if plt:
|
if plt:
|
||||||
if not os.path.exists("figures"):
|
if not os.path.exists("figures"):
|
||||||
os.mkdir("figures")
|
os.mkdir("figures")
|
||||||
filename = "figures/{}.pdf".format(title)
|
filename = "figures/{}.pdf".format(title)
|
||||||
plotlines(x_axis, results1, results2, "gamma=0", "gamma=0.99", title, filename)
|
plotlines(x_axis, results, labels, title, filename)
|
||||||
else:
|
for j in range(len(tuners)):
|
||||||
with open(title + '_1.csv', 'w') as f1:
|
with open(title + '_' + labels[j] + '.csv', 'w') as f:
|
||||||
for i, result in zip(x_axis, results1):
|
for i, result in zip(x_axis, results[j]):
|
||||||
f1.write(str(i) + ',' + str(result[0]) + '\n')
|
f.write(str(i) + ',' + str(result[0]) + '\n')
|
||||||
with open(title + '_2.csv', 'w') as f2:
|
|
||||||
for i, result in zip(x_axis, results2):
|
|
||||||
f2.write(str(i) + ',' + str(result[0]) + '\n')
|
def main():
|
||||||
|
knob_dim = 192
|
||||||
|
metric_dim = 60
|
||||||
|
mode = 0
|
||||||
|
n_loops = 2
|
||||||
|
n_repeats = 1
|
||||||
|
configs = [{'gamma': 0., 'tau': 0.002, 'lr': 0.001, 'batch_size': 32, 'n_epochs': 30},
|
||||||
|
{'gamma': 0.99, 'tau': 0.002, 'lr': 0.001, 'batch_size': 32, 'n_epochs': 30},
|
||||||
|
{'num_samples': 30},
|
||||||
|
{'num_samples': 30}]
|
||||||
|
tuners = [ddpg, ddpg, dnn, gprgd]
|
||||||
|
labels = [tuner.__name__ for tuner in tuners]
|
||||||
|
labels[0] += '_gamma_0'
|
||||||
|
labels[1] += '_gamma_99'
|
||||||
|
run(tuners, configs, labels, knob_dim, metric_dim, mode, n_loops, n_repeats)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
Loading…
Reference in New Issue