improve simulator and ddpg
This commit is contained in:
parent
5431154784
commit
9f71d1c8de
|
@ -32,6 +32,7 @@ class Actor(nn.Module):
|
||||||
nn.Linear(128, 128),
|
nn.Linear(128, 128),
|
||||||
nn.Tanh(),
|
nn.Tanh(),
|
||||||
nn.Dropout(0.3),
|
nn.Dropout(0.3),
|
||||||
|
nn.BatchNorm1d(128),
|
||||||
|
|
||||||
nn.Linear(128, 64),
|
nn.Linear(128, 64),
|
||||||
nn.Tanh(),
|
nn.Tanh(),
|
||||||
|
@ -99,7 +100,7 @@ class Critic(nn.Module):
|
||||||
class DDPG(object):
|
class DDPG(object):
|
||||||
|
|
||||||
def __init__(self, n_states, n_actions, model_name='', alr=0.001, clr=0.001,
|
def __init__(self, n_states, n_actions, model_name='', alr=0.001, clr=0.001,
|
||||||
gamma=0.9, batch_size=32, tau=0.002, memory_size=100000):
|
gamma=0.9, batch_size=32, tau=0.002, shift=0, memory_size=100000):
|
||||||
self.n_states = n_states
|
self.n_states = n_states
|
||||||
self.n_actions = n_actions
|
self.n_actions = n_actions
|
||||||
self.alr = alr
|
self.alr = alr
|
||||||
|
@ -108,6 +109,7 @@ class DDPG(object):
|
||||||
self.batch_size = batch_size
|
self.batch_size = batch_size
|
||||||
self.gamma = gamma
|
self.gamma = gamma
|
||||||
self.tau = tau
|
self.tau = tau
|
||||||
|
self.shift = shift
|
||||||
|
|
||||||
self._build_network()
|
self._build_network()
|
||||||
|
|
||||||
|
@ -184,7 +186,7 @@ class DDPG(object):
|
||||||
target_next_actions = self.target_actor(batch_next_states).detach()
|
target_next_actions = self.target_actor(batch_next_states).detach()
|
||||||
target_next_value = self.target_critic(batch_next_states, target_next_actions).detach()
|
target_next_value = self.target_critic(batch_next_states, target_next_actions).detach()
|
||||||
current_value = self.critic(batch_states, batch_actions)
|
current_value = self.critic(batch_states, batch_actions)
|
||||||
next_value = batch_rewards + target_next_value * self.gamma
|
next_value = batch_rewards + target_next_value * self.gamma + self.shift
|
||||||
|
|
||||||
# update prioritized memory
|
# update prioritized memory
|
||||||
if isinstance(self.replay_memory, PrioritizedReplayMemory):
|
if isinstance(self.replay_memory, PrioritizedReplayMemory):
|
||||||
|
|
|
@ -18,6 +18,7 @@ import torch
|
||||||
sys.path.append("../")
|
sys.path.append("../")
|
||||||
from analysis.util import get_analysis_logger # noqa
|
from analysis.util import get_analysis_logger # noqa
|
||||||
from analysis.ddpg.ddpg import DDPG # noqa
|
from analysis.ddpg.ddpg import DDPG # noqa
|
||||||
|
from analysis.ddpg.ou_process import OUProcess # noqa
|
||||||
from analysis.gp_tf import GPRGD # noqa
|
from analysis.gp_tf import GPRGD # noqa
|
||||||
from analysis.nn_tf import NeuralNet # noqa
|
from analysis.nn_tf import NeuralNet # noqa
|
||||||
|
|
||||||
|
@ -25,10 +26,15 @@ LOG = get_analysis_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class Environment(object):
|
class Environment(object):
|
||||||
def __init__(self, n_knob, n_metric, mode=0):
|
def __init__(self, knob_dim, metric_dim, modes=[0], reward_variance=0,
|
||||||
self.knob_dim = n_knob
|
metrics_variance=0.2):
|
||||||
self.metric_dim = n_metric
|
self.knob_dim = knob_dim
|
||||||
self.mode = mode
|
self.metric_dim = metric_dim
|
||||||
|
self.modes = modes
|
||||||
|
self.mode = np.random.choice(self.modes)
|
||||||
|
self.counter = 0
|
||||||
|
self.reward_variance = reward_variance
|
||||||
|
self.metrics_variance = metrics_variance
|
||||||
|
|
||||||
def identity_sqrt(self, knob_data):
|
def identity_sqrt(self, knob_data):
|
||||||
n1 = self.knob_dim // 4
|
n1 = self.knob_dim // 4
|
||||||
|
@ -36,8 +42,15 @@ class Environment(object):
|
||||||
part1 = np.sum(knob_data[0: n1])
|
part1 = np.sum(knob_data[0: n1])
|
||||||
part2 = np.sum(np.sqrt(knob_data[n1: n1 + n2]))
|
part2 = np.sum(np.sqrt(knob_data[n1: n1 + n2]))
|
||||||
reward = np.array([part1 + part2]) / (self.knob_dim // 2)
|
reward = np.array([part1 + part2]) / (self.knob_dim // 2)
|
||||||
metric_data = np.zeros(self.metric_dim)
|
return reward
|
||||||
return reward, metric_data
|
|
||||||
|
def threshold(self, knob_data):
|
||||||
|
n1 = self.knob_dim // 4
|
||||||
|
n2 = self.knob_dim // 4
|
||||||
|
part1 = np.sum(knob_data[0: n1] > 0.9)
|
||||||
|
part2 = np.sum(knob_data[n1: n1 + n2] < 0.1)
|
||||||
|
reward = np.array([part1 + part2]) / (self.knob_dim // 2)
|
||||||
|
return reward
|
||||||
|
|
||||||
def borehole(self, knob_data):
|
def borehole(self, knob_data):
|
||||||
# ref: http://www.sfu.ca/~ssurjano/borehole.html
|
# ref: http://www.sfu.ca/~ssurjano/borehole.html
|
||||||
|
@ -52,48 +65,64 @@ class Environment(object):
|
||||||
Kw = knob_data[7] * (12045 - 9855) + 9855
|
Kw = knob_data[7] * (12045 - 9855) + 9855
|
||||||
|
|
||||||
frac = 2 * L * Tu / (np.log(r / rw) * rw ** 2 * Kw)
|
frac = 2 * L * Tu / (np.log(r / rw) * rw ** 2 * Kw)
|
||||||
reward = 2 * np.pi * Tu * (Hu - Hl) / (np.log(r / rw) * (1 + frac + Tu / Tl))
|
reward = 2 * np.pi * Tu * (Hu - Hl) / (np.log(r / rw) * (1 + frac + Tu / Tl)) / 310
|
||||||
return np.array([reward]), np.zeros(self.metric_dim)
|
return np.array([reward])
|
||||||
|
|
||||||
def threshold(self, knob_data):
|
def get_metrics(self, mode):
|
||||||
n1 = self.knob_dim // 4
|
metrics = np.ones(self.metric_dim) * mode
|
||||||
n2 = self.knob_dim // 4
|
metrics += np.random.rand(self.metric_dim) * self.metrics_variance
|
||||||
part1 = np.sum(knob_data[0: n1] > 0.9)
|
return metrics
|
||||||
part2 = np.sum(knob_data[n1: n1 + n2] < 0.1)
|
|
||||||
reward = np.array([part1 + part2])
|
def simulate_mode(self, knob_data, mode):
|
||||||
metric_data = np.zeros(self.metric_dim)
|
if mode == 0:
|
||||||
return reward, metric_data
|
reward = self.identity_sqrt(knob_data)
|
||||||
|
elif mode == 1:
|
||||||
|
reward = self.threshold(knob_data)
|
||||||
|
elif mode == 2:
|
||||||
|
reward = np.zeros(1)
|
||||||
|
for i in range(0, len(knob_data), 8):
|
||||||
|
reward += self.borehole(knob_data[i: i+8])[0] / len(knob_data) * 8
|
||||||
|
reward = reward * (1.0 + self.reward_variance * np.random.rand(1)[0])
|
||||||
|
return reward, self.get_metrics(mode)
|
||||||
|
|
||||||
def simulate(self, knob_data):
|
def simulate(self, knob_data):
|
||||||
if self.mode == 0:
|
self.counter += 1
|
||||||
return self.identity_sqrt(knob_data)
|
k = 1
|
||||||
elif self.mode == 1:
|
# every k runs, sample a new workload
|
||||||
return self.threshold(knob_data)
|
if self.counter >= k:
|
||||||
elif self.mode == 2:
|
self.counter = 0
|
||||||
return self.borehole(knob_data)
|
self.mode = np.random.choice(self.modes)
|
||||||
|
return self.simulate_mode(knob_data, self.mode)
|
||||||
|
|
||||||
|
|
||||||
def ddpg(env, config, n_loops=1000):
|
def ddpg(env, config, n_loops=100):
|
||||||
results = []
|
results = []
|
||||||
x_axis = []
|
x_axis = []
|
||||||
gamma = config['gamma']
|
gamma = config['gamma']
|
||||||
tau = config['tau']
|
tau = config['tau']
|
||||||
lr = config['lr']
|
a_lr = config['a_lr']
|
||||||
batch_size = config['batch_size']
|
c_lr = config['c_lr']
|
||||||
n_epochs = config['n_epochs']
|
n_epochs = config['n_epochs']
|
||||||
model_ddpg = DDPG(n_actions=env.knob_dim, n_states=env.metric_dim, gamma=gamma, tau=tau,
|
model_ddpg = DDPG(n_actions=env.knob_dim, n_states=env.metric_dim, gamma=gamma, tau=tau,
|
||||||
clr=lr, alr=lr, batch_size=batch_size)
|
clr=c_lr, alr=a_lr)
|
||||||
knob_data = np.random.rand(env.knob_dim)
|
knob_data = np.random.rand(env.knob_dim)
|
||||||
prev_metric_data = np.zeros(env.metric_dim)
|
prev_metric_data = np.zeros(env.metric_dim)
|
||||||
|
|
||||||
for i in range(n_loops):
|
for i in range(n_loops):
|
||||||
reward, metric_data = env.simulate(knob_data)
|
reward, metric_data = env.simulate(knob_data)
|
||||||
model_ddpg.add_sample(prev_metric_data, knob_data, reward, metric_data)
|
if i > 0:
|
||||||
|
model_ddpg.add_sample(prev_metric_data, prev_knob_data, prev_reward, metric_data)
|
||||||
|
prev_metric_data = metric_data
|
||||||
|
prev_knob_data = knob_data
|
||||||
|
prev_reward = reward
|
||||||
|
if i == 0:
|
||||||
|
continue
|
||||||
for _ in range(n_epochs):
|
for _ in range(n_epochs):
|
||||||
model_ddpg.update()
|
model_ddpg.update()
|
||||||
results.append(reward)
|
results.append(reward)
|
||||||
x_axis.append(i)
|
x_axis.append(i+1)
|
||||||
prev_metric_data = metric_data
|
LOG.info('loop: %d reward: %f', i, reward[0])
|
||||||
knob_data = model_ddpg.choose_action(prev_metric_data)
|
knob_data = model_ddpg.choose_action(metric_data)
|
||||||
return np.array(results), np.array(x_axis)
|
return np.array(results), np.array(x_axis)
|
||||||
|
|
||||||
|
|
||||||
|
@ -116,8 +145,10 @@ def dnn(env, config, n_loops=100):
|
||||||
x_axis = []
|
x_axis = []
|
||||||
memory = ReplayMemory()
|
memory = ReplayMemory()
|
||||||
num_samples = config['num_samples']
|
num_samples = config['num_samples']
|
||||||
|
ou_process = config['ou_process']
|
||||||
Xmin = np.zeros(env.knob_dim)
|
Xmin = np.zeros(env.knob_dim)
|
||||||
Xmax = np.ones(env.knob_dim)
|
Xmax = np.ones(env.knob_dim)
|
||||||
|
noise = OUProcess(env.knob_dim)
|
||||||
for i in range(n_loops):
|
for i in range(n_loops):
|
||||||
X_samples = np.random.rand(num_samples, env.knob_dim)
|
X_samples = np.random.rand(num_samples, env.knob_dim)
|
||||||
if i >= 10:
|
if i >= 10:
|
||||||
|
@ -128,23 +159,26 @@ def dnn(env, config, n_loops=100):
|
||||||
X_samples = np.vstack((X_samples, np.array(entry[0])))
|
X_samples = np.vstack((X_samples, np.array(entry[0])))
|
||||||
model_nn = NeuralNet(n_input=X_samples.shape[1],
|
model_nn = NeuralNet(n_input=X_samples.shape[1],
|
||||||
batch_size=X_samples.shape[0],
|
batch_size=X_samples.shape[0],
|
||||||
explore_iters=500,
|
learning_rate=0.01,
|
||||||
|
explore_iters=100,
|
||||||
noise_scale_begin=0.1,
|
noise_scale_begin=0.1,
|
||||||
noise_scale_end=0.0,
|
noise_scale_end=0.0,
|
||||||
debug=False,
|
debug=False,
|
||||||
debug_interval=100)
|
debug_interval=100)
|
||||||
if i >= 5:
|
if i >= 5:
|
||||||
actions, rewards = memory.get_all()
|
actions, rewards = memory.get_all()
|
||||||
model_nn.fit(np.array(actions), -np.array(rewards), fit_epochs=500)
|
model_nn.fit(np.array(actions), -np.array(rewards), fit_epochs=50)
|
||||||
res = model_nn.recommend(X_samples, Xmin, Xmax,
|
res = model_nn.recommend(X_samples, Xmin, Xmax, recommend_epochs=10, explore=False)
|
||||||
explore=500, recommend_epochs=500)
|
|
||||||
best_config_idx = np.argmin(res.minl.ravel())
|
best_config_idx = np.argmin(res.minl.ravel())
|
||||||
best_config = res.minl_conf[best_config_idx, :]
|
best_config = res.minl_conf[best_config_idx, :]
|
||||||
|
if ou_process:
|
||||||
|
best_config += noise.noise()
|
||||||
|
best_config = best_config.clip(0, 1)
|
||||||
reward, _ = env.simulate(best_config)
|
reward, _ = env.simulate(best_config)
|
||||||
memory.push(best_config, reward)
|
memory.push(best_config, reward)
|
||||||
LOG.info('loop: %d reward: %f', i, reward[0])
|
LOG.info('loop: %d reward: %f', i, reward[0])
|
||||||
results.append(reward)
|
results.append(reward)
|
||||||
x_axis.append(i)
|
x_axis.append(i+1)
|
||||||
return np.array(results), np.array(x_axis)
|
return np.array(results), np.array(x_axis)
|
||||||
|
|
||||||
|
|
||||||
|
@ -152,16 +186,17 @@ def gprgd(env, config, n_loops=100):
|
||||||
results = []
|
results = []
|
||||||
x_axis = []
|
x_axis = []
|
||||||
memory = ReplayMemory()
|
memory = ReplayMemory()
|
||||||
|
num_collections = config['num_collections']
|
||||||
num_samples = config['num_samples']
|
num_samples = config['num_samples']
|
||||||
X_min = np.zeros(env.knob_dim)
|
X_min = np.zeros(env.knob_dim)
|
||||||
X_max = np.ones(env.knob_dim)
|
X_max = np.ones(env.knob_dim)
|
||||||
for _ in range(5):
|
for _ in range(num_collections):
|
||||||
action = np.random.rand(env.knob_dim)
|
action = np.random.rand(env.knob_dim)
|
||||||
reward, _ = env.simulate(action)
|
reward, _ = env.simulate(action)
|
||||||
memory.push(action, reward)
|
memory.push(action, reward)
|
||||||
for i in range(n_loops):
|
for i in range(n_loops):
|
||||||
X_samples = np.random.rand(num_samples, env.knob_dim)
|
X_samples = np.random.rand(num_samples, env.knob_dim)
|
||||||
if i >= 5:
|
if i >= 10:
|
||||||
actions, rewards = memory.get_all()
|
actions, rewards = memory.get_all()
|
||||||
tuples = tuple(zip(actions, rewards))
|
tuples = tuple(zip(actions, rewards))
|
||||||
top10 = heapq.nlargest(10, tuples, key=lambda e: e[1])
|
top10 = heapq.nlargest(10, tuples, key=lambda e: e[1])
|
||||||
|
@ -171,13 +206,13 @@ def gprgd(env, config, n_loops=100):
|
||||||
X_samples = np.vstack((X_samples, np.array(entry[0]) * 0.97 + 0.01))
|
X_samples = np.vstack((X_samples, np.array(entry[0]) * 0.97 + 0.01))
|
||||||
model = GPRGD(length_scale=1.0,
|
model = GPRGD(length_scale=1.0,
|
||||||
magnitude=1.0,
|
magnitude=1.0,
|
||||||
max_train_size=7000,
|
max_train_size=100,
|
||||||
batch_size=3000,
|
batch_size=100,
|
||||||
num_threads=4,
|
num_threads=4,
|
||||||
learning_rate=0.01,
|
learning_rate=0.01,
|
||||||
epsilon=1e-6,
|
epsilon=1e-6,
|
||||||
max_iter=500,
|
max_iter=500,
|
||||||
sigma_multiplier=3.0,
|
sigma_multiplier=30.0,
|
||||||
mu_multiplier=1.0)
|
mu_multiplier=1.0)
|
||||||
|
|
||||||
actions, rewards = memory.get_all()
|
actions, rewards = memory.get_all()
|
||||||
|
@ -193,62 +228,69 @@ def gprgd(env, config, n_loops=100):
|
||||||
return np.array(results), np.array(x_axis)
|
return np.array(results), np.array(x_axis)
|
||||||
|
|
||||||
|
|
||||||
def plotlines(x_axis, results, labels, title, path):
|
def plotlines(xs, results, labels, title, path):
|
||||||
if plt:
|
if plt:
|
||||||
for result, label in zip(results, labels):
|
figsize = 13, 10
|
||||||
plt.plot(x_axis, result, label=label)
|
figure, ax = plt.subplots(figsize=figsize)
|
||||||
plt.legend()
|
lines = []
|
||||||
plt.xlabel("loops")
|
N = 20
|
||||||
plt.ylabel("rewards")
|
weights = np.ones(N)
|
||||||
plt.title(title)
|
for x_axis, result, label in zip(xs, results, labels):
|
||||||
|
result = np.convolve(weights/weights.sum(), result.flatten())[N-1:-N+1]
|
||||||
|
lines.append(plt.plot(x_axis[:-N+1], result, label=label, lw=4)[0])
|
||||||
|
plt.legend(handles=lines, fontsize=30)
|
||||||
|
plt.title(title, fontsize=25)
|
||||||
|
plt.xticks(fontsize=25)
|
||||||
|
plt.yticks(fontsize=25)
|
||||||
|
ax.set_xlabel("loops", fontsize=30)
|
||||||
|
ax.set_ylabel("rewards", fontsize=30)
|
||||||
plt.savefig(path)
|
plt.savefig(path)
|
||||||
plt.clf()
|
plt.clf()
|
||||||
|
|
||||||
|
|
||||||
def run(tuners, configs, labels, knob_dim, metric_dim, mode, n_loops, n_repeats):
|
def run(tuners, configs, labels, title, env, n_loops, n_repeats):
|
||||||
if not plt:
|
if not plt:
|
||||||
LOG.info("Cannot import matplotlib. Will write results to files instead of figures.")
|
LOG.info("Cannot import matplotlib. Will write results to files instead of figures.")
|
||||||
random.seed(0)
|
random.seed(0)
|
||||||
np.random.seed(0)
|
np.random.seed(1)
|
||||||
torch.manual_seed(0)
|
torch.manual_seed(0)
|
||||||
env = Environment(knob_dim, metric_dim, mode=mode)
|
|
||||||
results = []
|
results = []
|
||||||
for i in range(n_repeats):
|
xs = []
|
||||||
for j, _ in enumerate(tuners):
|
for j, _ in enumerate(tuners):
|
||||||
|
for i in range(n_repeats[j]):
|
||||||
result, x_axis = tuners[j](env, configs[j], n_loops=n_loops)
|
result, x_axis = tuners[j](env, configs[j], n_loops=n_loops)
|
||||||
if i is 0:
|
if i is 0:
|
||||||
results.append(result / n_repeats)
|
results.append(result / n_repeats[j])
|
||||||
|
xs.append(x_axis)
|
||||||
else:
|
else:
|
||||||
results[j] += result / n_repeats
|
results[j] += result / n_repeats[j]
|
||||||
|
|
||||||
title = "mode_{}_knob_{}".format(mode, knob_dim)
|
|
||||||
|
|
||||||
if plt:
|
if plt:
|
||||||
if not os.path.exists("figures"):
|
if not os.path.exists("simulation_figures"):
|
||||||
os.mkdir("figures")
|
os.mkdir("simulation_figures")
|
||||||
filename = "figures/{}.pdf".format(title)
|
filename = "simulation_figures/{}.pdf".format(title)
|
||||||
plotlines(x_axis, results, labels, title, filename)
|
plotlines(xs, results, labels, title, filename)
|
||||||
|
if not os.path.exists("simulation_results"):
|
||||||
|
os.mkdir("simulation_results")
|
||||||
for j in range(len(tuners)):
|
for j in range(len(tuners)):
|
||||||
with open(title + '_' + labels[j] + '.csv', 'w') as f:
|
with open("simulation_results/" + title + '_' + labels[j] + '.csv', 'w') as f:
|
||||||
for i, result in zip(x_axis, results[j]):
|
for i, result in zip(xs[j], results[j]):
|
||||||
f.write(str(i) + ',' + str(result[0]) + '\n')
|
f.write(str(i) + ',' + str(result[0]) + '\n')
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
knob_dim = 192
|
env = Environment(knob_dim=192, metric_dim=60, modes=[0, 1], reward_variance=0.05)
|
||||||
metric_dim = 60
|
n_loops = 2000
|
||||||
mode = 0
|
configs = [{'gamma': 0, 'tau': 0.002, 'a_lr': 0.01, 'c_lr': 0.01, 'n_epochs': 1},
|
||||||
n_loops = 2
|
{'gamma': 0, 'tau': 0.002, 'a_lr': 0.01, 'c_lr': 0.001, 'n_epochs': 1},
|
||||||
n_repeats = 1
|
{'gamma': 0., 'tau': 0.002, 'a_lr': 0.001, 'c_lr': 0.001, 'n_epochs': 1},
|
||||||
configs = [{'gamma': 0., 'tau': 0.002, 'lr': 0.001, 'batch_size': 32, 'n_epochs': 30},
|
# {'num_samples': 100, 'ou_process': False},
|
||||||
{'gamma': 0.99, 'tau': 0.002, 'lr': 0.001, 'batch_size': 32, 'n_epochs': 30},
|
]
|
||||||
{'num_samples': 30},
|
tuners = [ddpg, ddpg, ddpg]
|
||||||
{'num_samples': 30}]
|
labels = ['1', '2', '3']
|
||||||
tuners = [ddpg, ddpg, dnn, gprgd]
|
title = 'varing_workloads'
|
||||||
labels = [tuner.__name__ for tuner in tuners]
|
n_repeats = [3, 3, 3]
|
||||||
labels[0] += '_gamma_0'
|
run(tuners, configs, labels, title, env, n_loops, n_repeats)
|
||||||
labels[1] += '_gamma_99'
|
|
||||||
run(tuners, configs, labels, knob_dim, metric_dim, mode, n_loops, n_repeats)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -80,4 +80,4 @@ DDPG_BATCH_SIZE = 32
|
||||||
ACTOR_LEARNING_RATE = 0.01
|
ACTOR_LEARNING_RATE = 0.01
|
||||||
|
|
||||||
# Learning rate of critic network
|
# Learning rate of critic network
|
||||||
CRITIC_LEARNING_RATE = 0.01
|
CRITIC_LEARNING_RATE = 0.001
|
||||||
|
|
|
@ -338,6 +338,7 @@ def train_ddpg(result_id):
|
||||||
if session.ddpg_reply_memory:
|
if session.ddpg_reply_memory:
|
||||||
ddpg.replay_memory.set(session.ddpg_reply_memory)
|
ddpg.replay_memory.set(session.ddpg_reply_memory)
|
||||||
ddpg.add_sample(normalized_metric_data, knob_data, reward, normalized_metric_data)
|
ddpg.add_sample(normalized_metric_data, knob_data, reward, normalized_metric_data)
|
||||||
|
for _ in range(25):
|
||||||
ddpg.update()
|
ddpg.update()
|
||||||
session.ddpg_actor_model, session.ddpg_critic_model = ddpg.get_model()
|
session.ddpg_actor_model, session.ddpg_critic_model = ddpg.get_model()
|
||||||
session.ddpg_reply_memory = ddpg.replay_memory.get()
|
session.ddpg_reply_memory = ddpg.replay_memory.get()
|
||||||
|
|
Loading…
Reference in New Issue