save ddpg model in database
This commit is contained in:
parent
c8fbaf6e4b
commit
a3fcf59f07
|
@ -1,10 +1,10 @@
|
||||||
#
|
#
|
||||||
# __init__.py
|
# OtterTune - __init__.py
|
||||||
#
|
#
|
||||||
# Copyright
|
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
|
||||||
#
|
#
|
||||||
|
|
||||||
|
|
||||||
from analysis.ddpg.ddpg import DDPG
|
from analysis.ddpg.ddpg import DDPG
|
||||||
|
|
||||||
__all__ = ["DDPG"]
|
__all__ = ["DDPG"]
|
||||||
|
|
|
@ -1,509 +1,428 @@
|
||||||
#
|
#
|
||||||
# ddpg.py
|
# OtterTune - ddpg.py
|
||||||
#
|
#
|
||||||
# Copyright
|
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
|
||||||
#
|
#
|
||||||
"""
|
# from: https://github.com/KqSMea8/CDBTune
|
||||||
Deep Deterministic Policy Gradient Model
|
# Zhang, Ji, et al. "An end-to-end automatic cloud database tuning system using
|
||||||
|
# deep reinforcement learning." Proceedings of the 2019 International Conference
|
||||||
"""
|
# on Management of Data. ACM, 2019
|
||||||
|
|
||||||
import logging
|
import os
|
||||||
import os
|
import pickle
|
||||||
import sys
|
import math
|
||||||
import math
|
import numpy as np
|
||||||
import pickle
|
import torch
|
||||||
import numpy as np
|
import torch.nn as nn
|
||||||
import torch
|
from torch.nn import init, Parameter
|
||||||
import torch.nn as nn
|
import torch.nn.functional as F
|
||||||
from torch.nn import init, Parameter
|
import torch.optim as optimizer
|
||||||
import torch.nn.functional as F
|
from torch.autograd import Variable
|
||||||
import torch.optim as optimizer
|
|
||||||
from torch.autograd import Variable
|
from analysis.ddpg.ou_process import OUProcess
|
||||||
|
from analysis.ddpg.prioritized_replay_memory import PrioritizedReplayMemory
|
||||||
from analysis.ddpg.OUProcess import OUProcess
|
from analysis.util import get_analysis_logger
|
||||||
from analysis.ddpg.prioritized_replay_memory import PrioritizedReplayMemory
|
|
||||||
|
LOG = get_analysis_logger(__name__)
|
||||||
LOG = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
sys.path.append('../')
|
# code from https://github.com/Kaixhin/NoisyNet-A3C/blob/master/model.py
|
||||||
|
class NoisyLinear(nn.Linear):
|
||||||
|
def __init__(self, in_features, out_features, sigma_init=0.05, bias=True):
|
||||||
# code from https://github.com/Kaixhin/NoisyNet-A3C/blob/master/model.py
|
super(NoisyLinear, self).__init__(in_features, out_features, bias=True)
|
||||||
class NoisyLinear(nn.Linear):
|
# reuse self.weight and self.bias
|
||||||
def __init__(self, in_features, out_features, sigma_init=0.05, bias=True):
|
self.sigma_init = sigma_init
|
||||||
super(NoisyLinear, self).__init__(in_features, out_features, bias=True)
|
self.sigma_weight = Parameter(torch.Tensor(out_features, in_features))
|
||||||
# reuse self.weight and self.bias
|
self.sigma_bias = Parameter(torch.Tensor(out_features))
|
||||||
self.sigma_init = sigma_init
|
self.epsilon_weight = None
|
||||||
self.sigma_weight = Parameter(torch.Tensor(out_features, in_features))
|
self.epsilon_bias = None
|
||||||
self.sigma_bias = Parameter(torch.Tensor(out_features))
|
self.register_buffer('epsilon_weight', torch.zeros(out_features, in_features))
|
||||||
self.register_buffer('epsilon_weight', torch.zeros(out_features, in_features))
|
self.register_buffer('epsilon_bias', torch.zeros(out_features))
|
||||||
self.register_buffer('epsilon_bias', torch.zeros(out_features))
|
self.reset_parameters()
|
||||||
self.reset_parameters()
|
|
||||||
|
def reset_parameters(self):
|
||||||
def reset_parameters(self):
|
# Only init after all params added (otherwise super().__init__() fails)
|
||||||
# Only init after all params added (otherwise super().__init__() fails)
|
if hasattr(self, 'sigma_weight'):
|
||||||
if hasattr(self, 'sigma_weight'):
|
init.uniform(self.weight, -math.sqrt(3 / self.in_features),
|
||||||
init.uniform(self.weight, -math.sqrt(3 / self.in_features),
|
math.sqrt(3 / self.in_features))
|
||||||
math.sqrt(3 / self.in_features))
|
init.uniform(self.bias, -math.sqrt(3 / self.in_features),
|
||||||
init.uniform(self.bias, -math.sqrt(3 / self.in_features),
|
math.sqrt(3 / self.in_features))
|
||||||
math.sqrt(3 / self.in_features))
|
init.constant(self.sigma_weight, self.sigma_init)
|
||||||
init.constant(self.sigma_weight, self.sigma_init)
|
init.constant(self.sigma_bias, self.sigma_init)
|
||||||
init.constant(self.sigma_bias, self.sigma_init)
|
|
||||||
|
def forward(self, x):
|
||||||
def forward(self, x):
|
return F.linear(x, self.weight + self.sigma_weight * Variable(self.epsilon_weight),
|
||||||
return F.linear(x, self.weight + self.sigma_weight * Variable(self.epsilon_weight),
|
self.bias + self.sigma_bias * Variable(self.epsilon_bias))
|
||||||
self.bias + self.sigma_bias * Variable(self.epsilon_bias))
|
|
||||||
|
def sample_noise(self):
|
||||||
# pylint: disable=attribute-defined-outside-init
|
self.epsilon_weight = torch.randn(self.out_features, self.in_features)
|
||||||
def sample_noise(self):
|
self.epsilon_bias = torch.randn(self.out_features)
|
||||||
self.epsilon_weight = torch.randn(self.out_features, self.in_features)
|
|
||||||
self.epsilon_bias = torch.randn(self.out_features)
|
def remove_noise(self):
|
||||||
|
self.epsilon_weight = torch.zeros(self.out_features, self.in_features)
|
||||||
def remove_noise(self):
|
self.epsilon_bias = torch.zeros(self.out_features)
|
||||||
self.epsilon_weight = torch.zeros(self.out_features, self.in_features)
|
|
||||||
self.epsilon_bias = torch.zeros(self.out_features)
|
|
||||||
# pylint: enable=attribute-defined-outside-init
|
class Normalizer(object):
|
||||||
|
|
||||||
|
def __init__(self, mean, variance):
|
||||||
class Normalizer(object):
|
if isinstance(mean, list):
|
||||||
|
mean = np.array(mean)
|
||||||
def __init__(self, mean, variance):
|
if isinstance(variance, list):
|
||||||
if isinstance(mean, list):
|
variance = np.array(variance)
|
||||||
mean = np.array(mean)
|
self.mean = mean
|
||||||
if isinstance(variance, list):
|
self.std = np.sqrt(variance + 0.00001)
|
||||||
variance = np.array(variance)
|
|
||||||
self.mean = mean
|
def normalize(self, x):
|
||||||
self.std = np.sqrt(variance + 0.00001)
|
if isinstance(x, list):
|
||||||
|
x = np.array(x)
|
||||||
def normalize(self, x):
|
x = x - self.mean
|
||||||
if isinstance(x, list):
|
x = x / self.std
|
||||||
x = np.array(x)
|
|
||||||
x = x - self.mean
|
return Variable(torch.FloatTensor(x))
|
||||||
x = x / self.std
|
|
||||||
|
def __call__(self, x, *args, **kwargs):
|
||||||
return Variable(torch.FloatTensor(x))
|
return self.normalize(x)
|
||||||
|
|
||||||
def __call__(self, x, *args, **kwargs):
|
|
||||||
return self.normalize(x)
|
class Actor(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, n_states, n_actions, noisy=False):
|
||||||
class ActorLow(nn.Module):
|
super(Actor, self).__init__()
|
||||||
|
self.layers = nn.Sequential(
|
||||||
def __init__(self, n_states, n_actions, ):
|
nn.Linear(n_states, 128),
|
||||||
super(ActorLow, self).__init__()
|
nn.LeakyReLU(negative_slope=0.2),
|
||||||
self.layers = nn.Sequential(
|
nn.BatchNorm1d(128),
|
||||||
nn.BatchNorm1d(n_states),
|
nn.Linear(128, 128),
|
||||||
nn.Linear(n_states, 32),
|
nn.Tanh(),
|
||||||
nn.LeakyReLU(negative_slope=0.2),
|
nn.Dropout(0.3),
|
||||||
nn.BatchNorm1d(32),
|
|
||||||
nn.Linear(32, n_actions),
|
nn.Linear(128, 64),
|
||||||
nn.LeakyReLU(negative_slope=0.2)
|
nn.Tanh(),
|
||||||
)
|
nn.BatchNorm1d(64),
|
||||||
self._init_weights()
|
)
|
||||||
self.out_func = nn.Tanh()
|
if noisy:
|
||||||
|
self.out = NoisyLinear(64, n_actions)
|
||||||
def _init_weights(self):
|
else:
|
||||||
|
self.out = nn.Linear(64, n_actions)
|
||||||
for m in self.layers:
|
self._init_weights()
|
||||||
if isinstance(m, nn.Linear):
|
self.act = nn.Sigmoid()
|
||||||
m.weight.data.normal_(0.0, 1e-3)
|
|
||||||
m.bias.data.uniform_(-0.1, 0.1)
|
def _init_weights(self):
|
||||||
|
|
||||||
def forward(self, x): # pylint: disable=arguments-differ
|
for m in self.layers:
|
||||||
|
if isinstance(m, nn.Linear):
|
||||||
out = self.layers(x)
|
m.weight.data.normal_(0.0, 1e-2)
|
||||||
|
m.bias.data.uniform_(-0.1, 0.1)
|
||||||
return self.out_func(out)
|
|
||||||
|
def sample_noise(self):
|
||||||
|
self.out.sample_noise()
|
||||||
class CriticLow(nn.Module):
|
|
||||||
|
def forward(self, x): # pylint: disable=arguments-differ
|
||||||
def __init__(self, n_states, n_actions):
|
|
||||||
super(CriticLow, self).__init__()
|
out = self.act(self.out(self.layers(x)))
|
||||||
self.state_input = nn.Linear(n_states, 32)
|
return out
|
||||||
self.action_input = nn.Linear(n_actions, 32)
|
|
||||||
self.act = nn.LeakyReLU(negative_slope=0.2)
|
|
||||||
self.state_bn = nn.BatchNorm1d(n_states)
|
class Critic(nn.Module):
|
||||||
self.layers = nn.Sequential(
|
|
||||||
nn.Linear(64, 1),
|
def __init__(self, n_states, n_actions):
|
||||||
nn.LeakyReLU(negative_slope=0.2),
|
super(Critic, self).__init__()
|
||||||
)
|
self.state_input = nn.Linear(n_states, 128)
|
||||||
self._init_weights()
|
self.action_input = nn.Linear(n_actions, 128)
|
||||||
|
self.act = nn.Tanh()
|
||||||
def _init_weights(self):
|
self.layers = nn.Sequential(
|
||||||
self.state_input.weight.data.normal_(0.0, 1e-3)
|
nn.Linear(256, 256),
|
||||||
self.state_input.bias.data.uniform_(-0.1, 0.1)
|
nn.LeakyReLU(negative_slope=0.2),
|
||||||
|
nn.BatchNorm1d(256),
|
||||||
self.action_input.weight.data.normal_(0.0, 1e-3)
|
|
||||||
self.action_input.bias.data.uniform_(-0.1, 0.1)
|
nn.Linear(256, 64),
|
||||||
|
nn.Tanh(),
|
||||||
for m in self.layers:
|
nn.Dropout(0.3),
|
||||||
if isinstance(m, nn.Linear):
|
nn.BatchNorm1d(64),
|
||||||
m.weight.data.normal_(0.0, 1e-3)
|
nn.Linear(64, 1),
|
||||||
m.bias.data.uniform_(-0.1, 0.1)
|
)
|
||||||
|
self._init_weights()
|
||||||
def forward(self, x, action): # pylint: disable=arguments-differ
|
|
||||||
x = self.state_bn(x)
|
def _init_weights(self):
|
||||||
x = self.act(self.state_input(x))
|
self.state_input.weight.data.normal_(0.0, 1e-2)
|
||||||
action = self.act(self.action_input(action))
|
self.state_input.bias.data.uniform_(-0.1, 0.1)
|
||||||
|
|
||||||
_input = torch.cat([x, action], dim=1)
|
self.action_input.weight.data.normal_(0.0, 1e-2)
|
||||||
value = self.layers(_input)
|
self.action_input.bias.data.uniform_(-0.1, 0.1)
|
||||||
return value
|
|
||||||
|
for m in self.layers:
|
||||||
|
if isinstance(m, nn.Linear):
|
||||||
class Actor(nn.Module):
|
m.weight.data.normal_(0.0, 1e-2)
|
||||||
|
m.bias.data.uniform_(-0.1, 0.1)
|
||||||
def __init__(self, n_states, n_actions, noisy=False):
|
|
||||||
super(Actor, self).__init__()
|
def forward(self, x, action): # pylint: disable=arguments-differ
|
||||||
self.layers = nn.Sequential(
|
x = self.act(self.state_input(x))
|
||||||
nn.Linear(n_states, 128),
|
action = self.act(self.action_input(action))
|
||||||
nn.LeakyReLU(negative_slope=0.2),
|
|
||||||
nn.BatchNorm1d(128),
|
_input = torch.cat([x, action], dim=1)
|
||||||
nn.Linear(128, 128),
|
value = self.layers(_input)
|
||||||
nn.Tanh(),
|
return value
|
||||||
nn.Dropout(0.3),
|
|
||||||
|
|
||||||
nn.Linear(128, 64),
|
class DDPG(object):
|
||||||
nn.Tanh(),
|
|
||||||
nn.BatchNorm1d(64),
|
def __init__(self, n_states, n_actions, model_name='', alr=0.001, clr=0.001,
|
||||||
)
|
gamma=0.9, batch_size=32, tau=0.002, memory_size=100000,
|
||||||
if noisy:
|
ouprocess=True, mean_var_path=None, supervised=False):
|
||||||
self.out = NoisyLinear(64, n_actions)
|
self.n_states = n_states
|
||||||
else:
|
self.n_actions = n_actions
|
||||||
self.out = nn.Linear(64, n_actions)
|
self.alr = alr
|
||||||
self._init_weights()
|
self.clr = clr
|
||||||
self.act = nn.Sigmoid()
|
self.model_name = model_name
|
||||||
|
self.batch_size = batch_size
|
||||||
def _init_weights(self):
|
self.gamma = gamma
|
||||||
|
self.tau = tau
|
||||||
for m in self.layers:
|
self.ouprocess = ouprocess
|
||||||
if isinstance(m, nn.Linear):
|
|
||||||
m.weight.data.normal_(0.0, 1e-2)
|
if mean_var_path is None:
|
||||||
m.bias.data.uniform_(-0.1, 0.1)
|
mean = np.zeros(n_states)
|
||||||
|
var = np.zeros(n_states)
|
||||||
def sample_noise(self):
|
elif not os.path.exists(mean_var_path):
|
||||||
self.out.sample_noise()
|
mean = np.zeros(n_states)
|
||||||
|
var = np.zeros(n_states)
|
||||||
def forward(self, x): # pylint: disable=arguments-differ
|
else:
|
||||||
|
with open(mean_var_path, 'rb') as f:
|
||||||
out = self.act(self.out(self.layers(x)))
|
mean, var = pickle.load(f)
|
||||||
return out
|
|
||||||
|
self.normalizer = Normalizer(mean, var)
|
||||||
|
|
||||||
class Critic(nn.Module):
|
if supervised:
|
||||||
|
self._build_actor()
|
||||||
def __init__(self, n_states, n_actions):
|
LOG.info("Supervised Learning Initialized")
|
||||||
super(Critic, self).__init__()
|
else:
|
||||||
self.state_input = nn.Linear(n_states, 128)
|
# Build Network
|
||||||
self.action_input = nn.Linear(n_actions, 128)
|
self._build_network()
|
||||||
self.act = nn.Tanh()
|
LOG.info('Finish Initializing Networks')
|
||||||
self.layers = nn.Sequential(
|
|
||||||
nn.Linear(256, 256),
|
self.replay_memory = PrioritizedReplayMemory(capacity=memory_size)
|
||||||
nn.LeakyReLU(negative_slope=0.2),
|
self.noise = OUProcess(n_actions)
|
||||||
nn.BatchNorm1d(256),
|
|
||||||
|
@staticmethod
|
||||||
nn.Linear(256, 64),
|
def totensor(x):
|
||||||
nn.Tanh(),
|
return Variable(torch.FloatTensor(x))
|
||||||
nn.Dropout(0.3),
|
|
||||||
nn.BatchNorm1d(64),
|
def _build_actor(self):
|
||||||
nn.Linear(64, 1),
|
if self.ouprocess:
|
||||||
)
|
noisy = False
|
||||||
self._init_weights()
|
else:
|
||||||
|
noisy = True
|
||||||
def _init_weights(self):
|
self.actor = Actor(self.n_states, self.n_actions, noisy=noisy)
|
||||||
self.state_input.weight.data.normal_(0.0, 1e-2)
|
self.actor_criterion = nn.MSELoss()
|
||||||
self.state_input.bias.data.uniform_(-0.1, 0.1)
|
self.actor_optimizer = optimizer.Adam(lr=self.alr, params=self.actor.parameters())
|
||||||
|
|
||||||
self.action_input.weight.data.normal_(0.0, 1e-2)
|
def _build_network(self):
|
||||||
self.action_input.bias.data.uniform_(-0.1, 0.1)
|
if self.ouprocess:
|
||||||
|
noisy = False
|
||||||
for m in self.layers:
|
else:
|
||||||
if isinstance(m, nn.Linear):
|
noisy = True
|
||||||
m.weight.data.normal_(0.0, 1e-2)
|
self.actor = Actor(self.n_states, self.n_actions, noisy=noisy)
|
||||||
m.bias.data.uniform_(-0.1, 0.1)
|
self.target_actor = Actor(self.n_states, self.n_actions)
|
||||||
|
self.critic = Critic(self.n_states, self.n_actions)
|
||||||
def forward(self, x, action): # pylint: disable=arguments-differ
|
self.target_critic = Critic(self.n_states, self.n_actions)
|
||||||
x = self.act(self.state_input(x))
|
|
||||||
action = self.act(self.action_input(action))
|
# if model params are provided, load them
|
||||||
|
if len(self.model_name):
|
||||||
_input = torch.cat([x, action], dim=1)
|
self.load_model(model_name=self.model_name)
|
||||||
value = self.layers(_input)
|
LOG.info("Loading model from file: %s", self.model_name)
|
||||||
return value
|
|
||||||
|
# Copy actor's parameters
|
||||||
|
self._update_target(self.target_actor, self.actor, tau=1.0)
|
||||||
class DDPG(object):
|
|
||||||
|
# Copy critic's parameters
|
||||||
def __init__(self, n_states, n_actions, opt=None, ouprocess=True, mean_var_path=None,
|
self._update_target(self.target_critic, self.critic, tau=1.0)
|
||||||
supervised=False):
|
|
||||||
""" DDPG Algorithms
|
self.loss_criterion = nn.MSELoss()
|
||||||
Args:
|
self.actor_optimizer = optimizer.Adam(lr=self.alr, params=self.actor.parameters(),
|
||||||
n_states: int, dimension of states
|
weight_decay=1e-5)
|
||||||
n_actions: int, dimension of actions
|
self.critic_optimizer = optimizer.Adam(lr=self.clr, params=self.critic.parameters(),
|
||||||
opt: dict, params
|
weight_decay=1e-5)
|
||||||
supervised, bool, pre-train the actor with supervised learning
|
|
||||||
"""
|
@staticmethod
|
||||||
self.n_states = n_states
|
def _update_target(target, source, tau):
|
||||||
self.n_actions = n_actions
|
for (target_param, param) in zip(target.parameters(), source.parameters()):
|
||||||
|
target_param.data.copy_(
|
||||||
if opt is None:
|
target_param.data * (1 - tau) + param.data * tau
|
||||||
opt = {
|
)
|
||||||
'model': '',
|
|
||||||
'alr': 0.001,
|
def reset(self, sigma):
|
||||||
'clr': 0.001,
|
self.noise.reset(sigma)
|
||||||
'gamma': 0.9,
|
|
||||||
'batch_size': 32,
|
def _sample_batch(self):
|
||||||
'tau': 0.002,
|
batch, idx = self.replay_memory.sample(self.batch_size)
|
||||||
'memory_size': 100000
|
# batch = self.replay_memory.sample(self.batch_size)
|
||||||
}
|
states = list(map(lambda x: x[0].tolist(), batch)) # pylint: disable=W0141
|
||||||
|
next_states = list(map(lambda x: x[3].tolist(), batch)) # pylint: disable=W0141
|
||||||
# Params
|
actions = list(map(lambda x: x[1].tolist(), batch)) # pylint: disable=W0141
|
||||||
self.alr = opt['alr']
|
rewards = list(map(lambda x: x[2], batch)) # pylint: disable=W0141
|
||||||
self.clr = opt['clr']
|
terminates = list(map(lambda x: x[4], batch)) # pylint: disable=W0141
|
||||||
self.model_name = opt['model']
|
|
||||||
self.batch_size = opt['batch_size']
|
return idx, states, next_states, actions, rewards, terminates
|
||||||
self.gamma = opt['gamma']
|
|
||||||
self.tau = opt['tau']
|
def add_sample(self, state, action, reward, next_state, terminate):
|
||||||
self.ouprocess = ouprocess
|
self.critic.eval()
|
||||||
|
self.actor.eval()
|
||||||
if mean_var_path is None:
|
self.target_critic.eval()
|
||||||
mean = np.zeros(n_states)
|
self.target_actor.eval()
|
||||||
var = np.zeros(n_states)
|
batch_state = self.normalizer([state.tolist()])
|
||||||
elif not os.path.exists(mean_var_path):
|
batch_next_state = self.normalizer([next_state.tolist()])
|
||||||
mean = np.zeros(n_states)
|
current_value = self.critic(batch_state, self.totensor([action.tolist()]))
|
||||||
var = np.zeros(n_states)
|
target_action = self.target_actor(batch_next_state)
|
||||||
else:
|
target_value = self.totensor([reward]) \
|
||||||
with open(mean_var_path, 'rb') as f:
|
+ self.totensor([0 if x else 1 for x in [terminate]]) \
|
||||||
mean, var = pickle.load(f)
|
* self.target_critic(batch_next_state, target_action) * self.gamma
|
||||||
|
error = float(torch.abs(current_value - target_value).data.numpy()[0])
|
||||||
self.normalizer = Normalizer(mean, var)
|
|
||||||
|
self.target_actor.train()
|
||||||
if supervised:
|
self.actor.train()
|
||||||
self._build_actor()
|
self.critic.train()
|
||||||
LOG.info("Supervised Learning Initialized")
|
self.target_critic.train()
|
||||||
else:
|
self.replay_memory.add(error, (state, action, reward, next_state, terminate))
|
||||||
# Build Network
|
|
||||||
self._build_network()
|
def update(self):
|
||||||
LOG.info('Finish Initializing Networks')
|
idxs, states, next_states, actions, rewards, terminates = self._sample_batch()
|
||||||
|
batch_states = self.normalizer(states)
|
||||||
self.replay_memory = PrioritizedReplayMemory(capacity=opt['memory_size'])
|
batch_next_states = self.normalizer(next_states)
|
||||||
self.noise = OUProcess(n_actions)
|
batch_actions = self.totensor(actions)
|
||||||
# LOG.info('DDPG Initialzed!')
|
batch_rewards = self.totensor(rewards)
|
||||||
|
mask = [0 if x else 1 for x in terminates]
|
||||||
@staticmethod
|
mask = self.totensor(mask)
|
||||||
def totensor(x):
|
|
||||||
return Variable(torch.FloatTensor(x))
|
target_next_actions = self.target_actor(batch_next_states).detach()
|
||||||
|
target_next_value = self.target_critic(batch_next_states, target_next_actions).detach()
|
||||||
def _build_actor(self):
|
current_value = self.critic(batch_states, batch_actions)
|
||||||
if self.ouprocess:
|
# TODO (dongshen): This clause is the original clause, but it has some mistakes
|
||||||
noisy = False
|
# next_value = batch_rewards + mask * target_next_value * self.gamma
|
||||||
else:
|
# Since terminate is always false, I remove the mask here.
|
||||||
noisy = True
|
next_value = batch_rewards + target_next_value * self.gamma
|
||||||
self.actor = Actor(self.n_states, self.n_actions, noisy=noisy)
|
# Update Critic
|
||||||
self.actor_criterion = nn.MSELoss()
|
|
||||||
self.actor_optimizer = optimizer.Adam(lr=self.alr, params=self.actor.parameters())
|
# update prioritized memory
|
||||||
|
error = torch.abs(current_value - next_value).data.numpy()
|
||||||
def _build_network(self):
|
for i in range(self.batch_size):
|
||||||
if self.ouprocess:
|
idx = idxs[i]
|
||||||
noisy = False
|
self.replay_memory.update(idx, error[i][0])
|
||||||
else:
|
|
||||||
noisy = True
|
loss = self.loss_criterion(current_value, next_value)
|
||||||
self.actor = Actor(self.n_states, self.n_actions, noisy=noisy)
|
self.critic_optimizer.zero_grad()
|
||||||
self.target_actor = Actor(self.n_states, self.n_actions)
|
loss.backward()
|
||||||
self.critic = Critic(self.n_states, self.n_actions)
|
self.critic_optimizer.step()
|
||||||
self.target_critic = Critic(self.n_states, self.n_actions)
|
|
||||||
|
# Update Actor
|
||||||
# if model params are provided, load them
|
self.critic.eval()
|
||||||
if len(self.model_name):
|
policy_loss = -self.critic(batch_states, self.actor(batch_states))
|
||||||
self.load_model(model_name=self.model_name)
|
policy_loss = policy_loss.mean()
|
||||||
LOG.info("Loading model from file: %s", self.model_name)
|
self.actor_optimizer.zero_grad()
|
||||||
|
policy_loss.backward()
|
||||||
# Copy actor's parameters
|
|
||||||
self._update_target(self.target_actor, self.actor, tau=1.0)
|
self.actor_optimizer.step()
|
||||||
|
self.critic.train()
|
||||||
# Copy critic's parameters
|
|
||||||
self._update_target(self.target_critic, self.critic, tau=1.0)
|
self._update_target(self.target_critic, self.critic, tau=self.tau)
|
||||||
|
self._update_target(self.target_actor, self.actor, tau=self.tau)
|
||||||
self.loss_criterion = nn.MSELoss()
|
|
||||||
self.actor_optimizer = optimizer.Adam(lr=self.alr, params=self.actor.parameters(),
|
return loss.data, policy_loss.data
|
||||||
weight_decay=1e-5)
|
|
||||||
self.critic_optimizer = optimizer.Adam(lr=self.clr, params=self.critic.parameters(),
|
def choose_action(self, x):
|
||||||
weight_decay=1e-5)
|
""" Select Action according to the current state
|
||||||
|
Args:
|
||||||
@staticmethod
|
x: np.array, current state
|
||||||
def _update_target(target, source, tau):
|
"""
|
||||||
for (target_param, param) in zip(target.parameters(), source.parameters()):
|
self.actor.eval()
|
||||||
target_param.data.copy_(
|
act = self.actor(self.normalizer([x.tolist()])).squeeze(0)
|
||||||
target_param.data * (1 - tau) + param.data * tau
|
self.actor.train()
|
||||||
)
|
action = act.data.numpy()
|
||||||
|
if self.ouprocess:
|
||||||
def reset(self, sigma):
|
action += self.noise.noise()
|
||||||
self.noise.reset(sigma)
|
return action.clip(0, 1)
|
||||||
|
|
||||||
def _sample_batch(self):
|
def sample_noise(self):
|
||||||
batch, idx = self.replay_memory.sample(self.batch_size)
|
self.actor.sample_noise()
|
||||||
# batch = self.replay_memory.sample(self.batch_size)
|
|
||||||
states = list(map(lambda x: x[0].tolist(), batch)) # pylint: disable=W0141
|
def load_model(self, model_name):
|
||||||
next_states = list(map(lambda x: x[3].tolist(), batch)) # pylint: disable=W0141
|
""" Load Torch Model from files
|
||||||
actions = list(map(lambda x: x[1].tolist(), batch)) # pylint: disable=W0141
|
Args:
|
||||||
rewards = list(map(lambda x: x[2], batch)) # pylint: disable=W0141
|
model_name: str, model path
|
||||||
terminates = list(map(lambda x: x[4], batch)) # pylint: disable=W0141
|
"""
|
||||||
|
self.actor.load_state_dict(
|
||||||
return idx, states, next_states, actions, rewards, terminates
|
torch.load('{}_actor.pth'.format(model_name))
|
||||||
|
)
|
||||||
def add_sample(self, state, action, reward, next_state, terminate):
|
self.critic.load_state_dict(
|
||||||
self.critic.eval()
|
torch.load('{}_critic.pth'.format(model_name))
|
||||||
self.actor.eval()
|
)
|
||||||
self.target_critic.eval()
|
|
||||||
self.target_actor.eval()
|
def save_model(self, model_name):
|
||||||
batch_state = self.normalizer([state.tolist()])
|
""" Save Torch Model from files
|
||||||
batch_next_state = self.normalizer([next_state.tolist()])
|
Args:
|
||||||
current_value = self.critic(batch_state, self.totensor([action.tolist()]))
|
model_dir: str, model dir
|
||||||
target_action = self.target_actor(batch_next_state)
|
title: str, model name
|
||||||
target_value = self.totensor([reward]) \
|
"""
|
||||||
+ self.totensor([0 if x else 1 for x in [terminate]]) \
|
torch.save(
|
||||||
* self.target_critic(batch_next_state, target_action) * self.gamma
|
self.actor.state_dict(),
|
||||||
error = float(torch.abs(current_value - target_value).data.numpy()[0])
|
'{}_actor.pth'.format(model_name)
|
||||||
|
)
|
||||||
self.target_actor.train()
|
|
||||||
self.actor.train()
|
torch.save(
|
||||||
self.critic.train()
|
self.critic.state_dict(),
|
||||||
self.target_critic.train()
|
'{}_critic.pth'.format(model_name)
|
||||||
self.replay_memory.add(error, (state, action, reward, next_state, terminate))
|
)
|
||||||
|
|
||||||
def update(self):
|
def set_model(self, actor_dict, critic_dict):
|
||||||
idxs, states, next_states, actions, rewards, terminates = self._sample_batch()
|
self.actor.load_state_dict(pickle.loads(actor_dict))
|
||||||
batch_states = self.normalizer(states)
|
self.critic.load_state_dict(pickle.loads(critic_dict))
|
||||||
batch_next_states = self.normalizer(next_states)
|
|
||||||
batch_actions = self.totensor(actions)
|
def get_model(self):
|
||||||
batch_rewards = self.totensor(rewards)
|
return pickle.dumps(self.actor.state_dict()), pickle.dumps(self.critic.state_dict())
|
||||||
mask = [0 if x else 1 for x in terminates]
|
|
||||||
mask = self.totensor(mask)
|
def save_actor(self, path):
|
||||||
|
""" save actor network
|
||||||
target_next_actions = self.target_actor(batch_next_states).detach()
|
Args:
|
||||||
target_next_value = self.target_critic(batch_next_states, target_next_actions).detach()
|
path, str, path to save
|
||||||
current_value = self.critic(batch_states, batch_actions)
|
"""
|
||||||
# TODO (dongshen): This clause is the original clause, but it has some mistakes
|
torch.save(
|
||||||
# next_value = batch_rewards + mask * target_next_value * self.gamma
|
self.actor.state_dict(),
|
||||||
# Since terminate is always false, I remove the mask here.
|
path
|
||||||
next_value = batch_rewards + target_next_value * self.gamma
|
)
|
||||||
# Update Critic
|
|
||||||
|
def load_actor(self, path):
|
||||||
# update prioritized memory
|
""" load actor network
|
||||||
error = torch.abs(current_value - next_value).data.numpy()
|
Args:
|
||||||
for i in range(self.batch_size):
|
path, str, path to load
|
||||||
idx = idxs[i]
|
"""
|
||||||
self.replay_memory.update(idx, error[i][0])
|
self.actor.load_state_dict(
|
||||||
|
torch.load(path)
|
||||||
loss = self.loss_criterion(current_value, next_value)
|
)
|
||||||
self.critic_optimizer.zero_grad()
|
|
||||||
loss.backward()
|
def train_actor(self, batch_data, is_train=True):
|
||||||
self.critic_optimizer.step()
|
""" Train the actor separately with data
|
||||||
|
Args:
|
||||||
# Update Actor
|
batch_data: tuple, (states, actions)
|
||||||
self.critic.eval()
|
is_train: bool
|
||||||
policy_loss = -self.critic(batch_states, self.actor(batch_states))
|
Return:
|
||||||
policy_loss = policy_loss.mean()
|
_loss: float, training loss
|
||||||
self.actor_optimizer.zero_grad()
|
"""
|
||||||
policy_loss.backward()
|
states, action = batch_data
|
||||||
|
|
||||||
self.actor_optimizer.step()
|
if is_train:
|
||||||
self.critic.train()
|
self.actor.train()
|
||||||
|
pred = self.actor(self.normalizer(states))
|
||||||
self._update_target(self.target_critic, self.critic, tau=self.tau)
|
action = self.totensor(action)
|
||||||
self._update_target(self.target_actor, self.actor, tau=self.tau)
|
|
||||||
|
_loss = self.actor_criterion(pred, action)
|
||||||
return loss.data, policy_loss.data
|
|
||||||
|
self.actor_optimizer.zero_grad()
|
||||||
def choose_action(self, x):
|
_loss.backward()
|
||||||
""" Select Action according to the current state
|
self.actor_optimizer.step()
|
||||||
Args:
|
|
||||||
x: np.array, current state
|
else:
|
||||||
"""
|
self.actor.eval()
|
||||||
self.actor.eval()
|
pred = self.actor(self.normalizer(states))
|
||||||
act = self.actor(self.normalizer([x.tolist()])).squeeze(0)
|
action = self.totensor(action)
|
||||||
self.actor.train()
|
_loss = self.actor_criterion(pred, action)
|
||||||
action = act.data.numpy()
|
|
||||||
if self.ouprocess:
|
return _loss.data[0]
|
||||||
action += self.noise.noise()
|
|
||||||
return action.clip(0, 1)
|
|
||||||
|
|
||||||
def sample_noise(self):
|
|
||||||
self.actor.sample_noise()
|
|
||||||
|
|
||||||
def load_model(self, model_name):
|
|
||||||
""" Load Torch Model from files
|
|
||||||
Args:
|
|
||||||
model_name: str, model path
|
|
||||||
"""
|
|
||||||
self.actor.load_state_dict(
|
|
||||||
torch.load('{}_actor.pth'.format(model_name))
|
|
||||||
)
|
|
||||||
self.critic.load_state_dict(
|
|
||||||
torch.load('{}_critic.pth'.format(model_name))
|
|
||||||
)
|
|
||||||
|
|
||||||
def save_model(self, model_name):
|
|
||||||
""" Save Torch Model from files
|
|
||||||
Args:
|
|
||||||
model_dir: str, model dir
|
|
||||||
title: str, model name
|
|
||||||
"""
|
|
||||||
torch.save(
|
|
||||||
self.actor.state_dict(),
|
|
||||||
'{}_actor.pth'.format(model_name)
|
|
||||||
)
|
|
||||||
|
|
||||||
torch.save(
|
|
||||||
self.critic.state_dict(),
|
|
||||||
'{}_critic.pth'.format(model_name)
|
|
||||||
)
|
|
||||||
|
|
||||||
def save_actor(self, path):
|
|
||||||
""" save actor network
|
|
||||||
Args:
|
|
||||||
path, str, path to save
|
|
||||||
"""
|
|
||||||
torch.save(
|
|
||||||
self.actor.state_dict(),
|
|
||||||
path
|
|
||||||
)
|
|
||||||
|
|
||||||
def load_actor(self, path):
|
|
||||||
""" load actor network
|
|
||||||
Args:
|
|
||||||
path, str, path to load
|
|
||||||
"""
|
|
||||||
self.actor.load_state_dict(
|
|
||||||
torch.load(path)
|
|
||||||
)
|
|
||||||
|
|
||||||
def train_actor(self, batch_data, is_train=True):
|
|
||||||
""" Train the actor separately with data
|
|
||||||
Args:
|
|
||||||
batch_data: tuple, (states, actions)
|
|
||||||
is_train: bool
|
|
||||||
Return:
|
|
||||||
_loss: float, training loss
|
|
||||||
"""
|
|
||||||
states, action = batch_data
|
|
||||||
|
|
||||||
if is_train:
|
|
||||||
self.actor.train()
|
|
||||||
pred = self.actor(self.normalizer(states))
|
|
||||||
action = self.totensor(action)
|
|
||||||
|
|
||||||
_loss = self.actor_criterion(pred, action)
|
|
||||||
|
|
||||||
self.actor_optimizer.zero_grad()
|
|
||||||
_loss.backward()
|
|
||||||
self.actor_optimizer.step()
|
|
||||||
|
|
||||||
else:
|
|
||||||
self.actor.eval()
|
|
||||||
pred = self.actor(self.normalizer(states))
|
|
||||||
action = self.totensor(action)
|
|
||||||
_loss = self.actor_criterion(pred, action)
|
|
||||||
|
|
||||||
return _loss.data[0]
|
|
||||||
|
|
|
@ -1,41 +1,33 @@
|
||||||
#
|
#
|
||||||
# OUProcess.py
|
# OtterTune - ou_process.py
|
||||||
#
|
#
|
||||||
# Copyright
|
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
|
||||||
#
|
#
|
||||||
|
# from: https://github.com/KqSMea8/CDBTune
|
||||||
import numpy as np
|
# Zhang, Ji, et al. "An end-to-end automatic cloud database tuning system using
|
||||||
|
# deep reinforcement learning." Proceedings of the 2019 International Conference
|
||||||
|
# on Management of Data. ACM, 2019
|
||||||
# from https://github.com/songrotek/DDPG/blob/master/ou_noise.py
|
|
||||||
class OUProcess(object):
|
import numpy as np
|
||||||
|
|
||||||
def __init__(self, n_actions, theta=0.15, mu=0, sigma=0.1, ):
|
|
||||||
|
class OUProcess(object):
|
||||||
self.n_actions = n_actions
|
|
||||||
self.theta = theta
|
def __init__(self, n_actions, theta=0.15, mu=0, sigma=0.1, ):
|
||||||
self.mu = mu
|
|
||||||
self.sigma = sigma
|
self.n_actions = n_actions
|
||||||
self.current_value = np.ones(self.n_actions) * self.mu
|
self.theta = theta
|
||||||
|
self.mu = mu
|
||||||
def reset(self, sigma=0):
|
self.sigma = sigma
|
||||||
self.current_value = np.ones(self.n_actions) * self.mu
|
self.current_value = np.ones(self.n_actions) * self.mu
|
||||||
if sigma != 0:
|
|
||||||
self.sigma = sigma
|
def reset(self, sigma=0):
|
||||||
|
self.current_value = np.ones(self.n_actions) * self.mu
|
||||||
def noise(self):
|
if sigma != 0:
|
||||||
x = self.current_value
|
self.sigma = sigma
|
||||||
dx = self.theta * (self.mu - x) + self.sigma * np.random.randn(len(x))
|
|
||||||
self.current_value = x + dx
|
def noise(self):
|
||||||
return self.current_value
|
x = self.current_value
|
||||||
|
dx = self.theta * (self.mu - x) + self.sigma * np.random.randn(len(x))
|
||||||
|
self.current_value = x + dx
|
||||||
if __name__ == '__main__':
|
return self.current_value
|
||||||
import matplotlib.pyplot as plt # pylint: disable=wrong-import-position
|
|
||||||
ou = OUProcess(3, theta=0.3) # pylint: disable=invalid-name
|
|
||||||
states = [] # pylint: disable=invalid-name
|
|
||||||
for i in range(1000):
|
|
||||||
states.append(ou.noise())
|
|
||||||
|
|
||||||
plt.plot(states)
|
|
||||||
plt.show()
|
|
|
@ -1,121 +1,132 @@
|
||||||
#
|
#
|
||||||
# prioritized_replay_memory.py
|
# OtterTune - prioritized_replay_memory.py
|
||||||
#
|
#
|
||||||
# Copyright
|
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
|
||||||
#
|
#
|
||||||
import random
|
# from: https://github.com/KqSMea8/CDBTune
|
||||||
import pickle
|
# Zhang, Ji, et al. "An end-to-end automatic cloud database tuning system using
|
||||||
import numpy as np
|
# deep reinforcement learning." Proceedings of the 2019 International Conference
|
||||||
|
# on Management of Data. ACM, 2019
|
||||||
|
|
||||||
class SumTree(object):
|
import random
|
||||||
write = 0
|
import pickle
|
||||||
|
import numpy as np
|
||||||
def __init__(self, capacity):
|
|
||||||
self.capacity = capacity
|
|
||||||
self.tree = np.zeros(2 * capacity - 1)
|
class SumTree(object):
|
||||||
self.data = np.zeros(capacity, dtype=object)
|
write = 0
|
||||||
self.num_entries = 0
|
|
||||||
|
def __init__(self, capacity):
|
||||||
def _propagate(self, idx, change):
|
self.capacity = capacity
|
||||||
parent = (idx - 1) // 2
|
self.tree = np.zeros(2 * capacity - 1)
|
||||||
self.tree[parent] += change
|
self.data = np.zeros(capacity, dtype=object)
|
||||||
if parent != 0:
|
self.num_entries = 0
|
||||||
self._propagate(parent, change)
|
|
||||||
|
def _propagate(self, idx, change):
|
||||||
def _retrieve(self, idx, s):
|
parent = (idx - 1) // 2
|
||||||
left = 2 * idx + 1
|
self.tree[parent] += change
|
||||||
right = left + 1
|
if parent != 0:
|
||||||
|
self._propagate(parent, change)
|
||||||
if left >= len(self.tree):
|
|
||||||
return idx
|
def _retrieve(self, idx, s):
|
||||||
|
left = 2 * idx + 1
|
||||||
if s <= self.tree[left]:
|
right = left + 1
|
||||||
return self._retrieve(left, s)
|
|
||||||
else:
|
if left >= len(self.tree):
|
||||||
return self._retrieve(right, s - self.tree[left])
|
return idx
|
||||||
|
|
||||||
def total(self):
|
if s <= self.tree[left]:
|
||||||
return self.tree[0]
|
return self._retrieve(left, s)
|
||||||
|
else:
|
||||||
def add(self, p, data):
|
return self._retrieve(right, s - self.tree[left])
|
||||||
idx = self.write + self.capacity - 1
|
|
||||||
|
def total(self):
|
||||||
self.data[self.write] = data
|
return self.tree[0]
|
||||||
self.update(idx, p)
|
|
||||||
|
def add(self, p, data):
|
||||||
self.write += 1
|
idx = self.write + self.capacity - 1
|
||||||
if self.write >= self.capacity:
|
|
||||||
self.write = 0
|
self.data[self.write] = data
|
||||||
if self.num_entries < self.capacity:
|
self.update(idx, p)
|
||||||
self.num_entries += 1
|
|
||||||
|
self.write += 1
|
||||||
def update(self, idx, p):
|
if self.write >= self.capacity:
|
||||||
change = p - self.tree[idx]
|
self.write = 0
|
||||||
|
if self.num_entries < self.capacity:
|
||||||
self.tree[idx] = p
|
self.num_entries += 1
|
||||||
self._propagate(idx, change)
|
|
||||||
|
def update(self, idx, p):
|
||||||
def get(self, s):
|
change = p - self.tree[idx]
|
||||||
idx = self._retrieve(0, s)
|
|
||||||
data_idx = idx - self.capacity + 1
|
self.tree[idx] = p
|
||||||
return [idx, self.tree[idx], self.data[data_idx]]
|
self._propagate(idx, change)
|
||||||
|
|
||||||
|
def get(self, s):
|
||||||
class PrioritizedReplayMemory(object):
|
idx = self._retrieve(0, s)
|
||||||
|
data_idx = idx - self.capacity + 1
|
||||||
def __init__(self, capacity):
|
return [idx, self.tree[idx], self.data[data_idx]]
|
||||||
self.tree = SumTree(capacity)
|
|
||||||
self.capacity = capacity
|
|
||||||
self.e = 0.01 # pylint: disable=invalid-name
|
class PrioritizedReplayMemory(object):
|
||||||
self.a = 0.6 # pylint: disable=invalid-name
|
|
||||||
self.beta = 0.4
|
def __init__(self, capacity):
|
||||||
self.beta_increment_per_sampling = 0.001
|
self.tree = SumTree(capacity)
|
||||||
|
self.capacity = capacity
|
||||||
def _get_priority(self, error):
|
self.e = 0.01 # pylint: disable=invalid-name
|
||||||
return (error + self.e) ** self.a
|
self.a = 0.6 # pylint: disable=invalid-name
|
||||||
|
self.beta = 0.4
|
||||||
def add(self, error, sample):
|
self.beta_increment_per_sampling = 0.001
|
||||||
# (s, a, r, s, t)
|
|
||||||
p = self._get_priority(error)
|
def _get_priority(self, error):
|
||||||
self.tree.add(p, sample)
|
return (error + self.e) ** self.a
|
||||||
|
|
||||||
def __len__(self):
|
def add(self, error, sample):
|
||||||
return self.tree.num_entries
|
# (s, a, r, s, t)
|
||||||
|
p = self._get_priority(error)
|
||||||
def sample(self, n):
|
self.tree.add(p, sample)
|
||||||
batch = []
|
|
||||||
idxs = []
|
def __len__(self):
|
||||||
segment = self.tree.total() / n
|
return self.tree.num_entries
|
||||||
priorities = []
|
|
||||||
|
def sample(self, n):
|
||||||
self.beta = np.min([1., self.beta + self.beta_increment_per_sampling])
|
batch = []
|
||||||
|
idxs = []
|
||||||
for i in range(n):
|
segment = self.tree.total() / n
|
||||||
a = segment * i
|
priorities = []
|
||||||
b = segment * (i + 1)
|
|
||||||
|
self.beta = np.min([1., self.beta + self.beta_increment_per_sampling])
|
||||||
s = random.uniform(a, b)
|
|
||||||
(idx, p, data) = self.tree.get(s)
|
for i in range(n):
|
||||||
priorities.append(p)
|
a = segment * i
|
||||||
batch.append(data)
|
b = segment * (i + 1)
|
||||||
idxs.append(idx)
|
|
||||||
return batch, idxs
|
s = random.uniform(a, b)
|
||||||
|
(idx, p, data) = self.tree.get(s)
|
||||||
# sampling_probabilities = priorities / self.tree.total()
|
priorities.append(p)
|
||||||
# is_weight = np.power(self.tree.num_entries * sampling_probabilities, -self.beta)
|
batch.append(data)
|
||||||
# is_weight /= is_weight.max()
|
idxs.append(idx)
|
||||||
|
return batch, idxs
|
||||||
def update(self, idx, error):
|
|
||||||
p = self._get_priority(error)
|
# sampling_probabilities = priorities / self.tree.total()
|
||||||
self.tree.update(idx, p)
|
# is_weight = np.power(self.tree.num_entries * sampling_probabilities, -self.beta)
|
||||||
|
# is_weight /= is_weight.max()
|
||||||
def save(self, path):
|
|
||||||
f = open(path, 'wb')
|
def update(self, idx, error):
|
||||||
pickle.dump({"tree": self.tree}, f)
|
p = self._get_priority(error)
|
||||||
f.close()
|
self.tree.update(idx, p)
|
||||||
|
|
||||||
def load_memory(self, path):
|
def save(self, path):
|
||||||
with open(path, 'rb') as f:
|
f = open(path, 'wb')
|
||||||
_memory = pickle.load(f)
|
pickle.dump({"tree": self.tree}, f)
|
||||||
self.tree = _memory['tree']
|
f.close()
|
||||||
|
|
||||||
|
def load_memory(self, path):
|
||||||
|
with open(path, 'rb') as f:
|
||||||
|
_memory = pickle.load(f)
|
||||||
|
self.tree = _memory['tree']
|
||||||
|
|
||||||
|
def get(self):
|
||||||
|
return pickle.dumps({"tree": self.tree})
|
||||||
|
|
||||||
|
def set(self, binary):
|
||||||
|
self.tree = pickle.loads(binary)['tree']
|
||||||
|
|
|
@ -185,6 +185,9 @@ class Migration(migrations.Migration):
|
||||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||||
('name', models.CharField(max_length=64, verbose_name=b'session name')),
|
('name', models.CharField(max_length=64, verbose_name=b'session name')),
|
||||||
('description', models.TextField(blank=True, null=True)),
|
('description', models.TextField(blank=True, null=True)),
|
||||||
|
('ddpg_actor_model', models.BinaryField(null=True, blank=True)),
|
||||||
|
('ddpg_critic_model', models.BinaryField(null=True, blank=True)),
|
||||||
|
('ddpg_reply_memory', models.BinaryField(null=True, blank=True)),
|
||||||
('creation_time', models.DateTimeField()),
|
('creation_time', models.DateTimeField()),
|
||||||
('last_update', models.DateTimeField()),
|
('last_update', models.DateTimeField()),
|
||||||
('upload_code', models.CharField(max_length=30, unique=True)),
|
('upload_code', models.CharField(max_length=30, unique=True)),
|
||||||
|
|
|
@ -187,6 +187,9 @@ class Session(BaseModel):
|
||||||
hardware = models.ForeignKey(Hardware)
|
hardware = models.ForeignKey(Hardware)
|
||||||
algorithm = models.IntegerField(choices=AlgorithmType.choices(),
|
algorithm = models.IntegerField(choices=AlgorithmType.choices(),
|
||||||
default=AlgorithmType.OTTERTUNE)
|
default=AlgorithmType.OTTERTUNE)
|
||||||
|
ddpg_actor_model = models.BinaryField(null=True, blank=True)
|
||||||
|
ddpg_critic_model = models.BinaryField(null=True, blank=True)
|
||||||
|
ddpg_reply_memory = models.BinaryField(null=True, blank=True)
|
||||||
|
|
||||||
project = models.ForeignKey(Project)
|
project = models.ForeignKey(Project)
|
||||||
creation_time = models.DateTimeField()
|
creation_time = models.DateTimeField()
|
||||||
|
|
|
@ -353,7 +353,7 @@ class BaseParser(object, metaclass=ABCMeta):
|
||||||
|
|
||||||
def format_enum(self, enum_value, metadata):
|
def format_enum(self, enum_value, metadata):
|
||||||
enumvals = metadata.enumvals.split(',')
|
enumvals = metadata.enumvals.split(',')
|
||||||
return enumvals[enum_value]
|
return enumvals[int(round(enum_value))]
|
||||||
|
|
||||||
def format_integer(self, int_value, metadata):
|
def format_integer(self, int_value, metadata):
|
||||||
return int(round(int_value))
|
return int(round(int_value))
|
||||||
|
|
|
@ -35,7 +35,7 @@ MAX_TRAIN_SIZE = 7000
|
||||||
# Batch size in GPR model
|
# Batch size in GPR model
|
||||||
BATCH_SIZE = 3000
|
BATCH_SIZE = 3000
|
||||||
|
|
||||||
# Threads for TensorFlow config
|
# Threads for TensorFlow config
|
||||||
NUM_THREADS = 4
|
NUM_THREADS = 4
|
||||||
|
|
||||||
# ---GRADIENT DESCENT CONSTANTS---
|
# ---GRADIENT DESCENT CONSTANTS---
|
||||||
|
@ -54,3 +54,19 @@ DEFAULT_EPSILON = 1e-6
|
||||||
DEFAULT_SIGMA_MULTIPLIER = 3.0
|
DEFAULT_SIGMA_MULTIPLIER = 3.0
|
||||||
|
|
||||||
DEFAULT_MU_MULTIPLIER = 1.0
|
DEFAULT_MU_MULTIPLIER = 1.0
|
||||||
|
|
||||||
|
# ---CONSTRAINTS CONSTANTS---
|
||||||
|
# Batch size in DDPG model
|
||||||
|
DDPG_BATCH_SIZE = 32
|
||||||
|
|
||||||
|
# Learning rate of actor network
|
||||||
|
ACTOR_LEARNING_RATE = 0.001
|
||||||
|
|
||||||
|
# Learning rate of critic network
|
||||||
|
CRITIC_LEARNING_RATE = 0.001
|
||||||
|
|
||||||
|
# The impact of future reward on the decision
|
||||||
|
GAMMA = 0.1
|
||||||
|
|
||||||
|
# The changing rate of the target network
|
||||||
|
TAU = 0.002
|
||||||
|
|
|
@ -7,7 +7,7 @@ from .async_tasks import (aggregate_target_results,
|
||||||
configuration_recommendation,
|
configuration_recommendation,
|
||||||
map_workload,
|
map_workload,
|
||||||
train_ddpg,
|
train_ddpg,
|
||||||
run_ddpg)
|
configuration_recommendation_ddpg)
|
||||||
|
|
||||||
|
|
||||||
from .periodic_tasks import (run_background_tasks)
|
from .periodic_tasks import (run_background_tasks)
|
||||||
|
|
|
@ -5,14 +5,12 @@
|
||||||
#
|
#
|
||||||
import random
|
import random
|
||||||
import queue
|
import queue
|
||||||
from os.path import dirname, abspath, join
|
|
||||||
import os
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from celery.task import task, Task
|
from celery.task import task, Task
|
||||||
from celery.utils.log import get_task_logger
|
from celery.utils.log import get_task_logger
|
||||||
from djcelery.models import TaskMeta
|
from djcelery.models import TaskMeta
|
||||||
from sklearn.preprocessing import StandardScaler
|
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
||||||
|
|
||||||
from analysis.ddpg.ddpg import DDPG
|
from analysis.ddpg.ddpg import DDPG
|
||||||
from analysis.gp import GPRNP
|
from analysis.gp import GPRNP
|
||||||
|
@ -29,7 +27,10 @@ from website.settings import (DEFAULT_LENGTH_SCALE, DEFAULT_MAGNITUDE,
|
||||||
MAX_TRAIN_SIZE, BATCH_SIZE, NUM_THREADS,
|
MAX_TRAIN_SIZE, BATCH_SIZE, NUM_THREADS,
|
||||||
DEFAULT_RIDGE, DEFAULT_LEARNING_RATE,
|
DEFAULT_RIDGE, DEFAULT_LEARNING_RATE,
|
||||||
DEFAULT_EPSILON, MAX_ITER, GPR_EPS,
|
DEFAULT_EPSILON, MAX_ITER, GPR_EPS,
|
||||||
DEFAULT_SIGMA_MULTIPLIER, DEFAULT_MU_MULTIPLIER)
|
DEFAULT_SIGMA_MULTIPLIER, DEFAULT_MU_MULTIPLIER,
|
||||||
|
DDPG_BATCH_SIZE, ACTOR_LEARNING_RATE,
|
||||||
|
CRITIC_LEARNING_RATE, GAMMA, TAU)
|
||||||
|
|
||||||
from website.settings import INIT_FLIP_PROB, FLIP_PROB_DECAY
|
from website.settings import INIT_FLIP_PROB, FLIP_PROB_DECAY
|
||||||
from website.types import VarType
|
from website.types import VarType
|
||||||
|
|
||||||
|
@ -235,10 +236,10 @@ def train_ddpg(result_id):
|
||||||
|
|
||||||
# Clean knob data
|
# Clean knob data
|
||||||
cleaned_agg_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'], session)
|
cleaned_agg_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'], session)
|
||||||
agg_data['X_matrix'] = np.array(cleaned_agg_data[0]).flatten()
|
knob_data = np.array(cleaned_agg_data[0])
|
||||||
agg_data['X_columnlabels'] = np.array(cleaned_agg_data[1]).flatten()
|
knob_labels = np.array(cleaned_agg_data[1])
|
||||||
knob_data = DataUtil.normalize_knob_data(agg_data['X_matrix'],
|
knob_bounds = np.vstack(DataUtil.get_knob_bounds(knob_labels.flatten(), session))
|
||||||
agg_data['X_columnlabels'], session)
|
knob_data = MinMaxScaler().fit(knob_bounds).transform(knob_data)[0]
|
||||||
knob_num = len(knob_data)
|
knob_num = len(knob_data)
|
||||||
metric_num = len(metric_data)
|
metric_num = len(metric_data)
|
||||||
LOG.info('knob_num: %d, metric_num: %d', knob_num, metric_num)
|
LOG.info('knob_num: %d, metric_num: %d', knob_num, metric_num)
|
||||||
|
@ -276,26 +277,23 @@ def train_ddpg(result_id):
|
||||||
* (2 * prev_objective - objective) / prev_objective
|
* (2 * prev_objective - objective) / prev_objective
|
||||||
|
|
||||||
# Update ddpg
|
# Update ddpg
|
||||||
project_root = dirname(dirname(dirname(abspath(__file__))))
|
ddpg = DDPG(n_actions=knob_num, n_states=metric_num, alr=ACTOR_LEARNING_RATE,
|
||||||
saved_memory = join(project_root, 'checkpoint/reply_memory_' + session.project.name)
|
clr=CRITIC_LEARNING_RATE, gamma=GAMMA, batch_size=DDPG_BATCH_SIZE, tau=TAU)
|
||||||
saved_model = join(project_root, 'checkpoint/ddpg_' + session.project.name)
|
if session.ddpg_actor_model and session.ddpg_critic_model:
|
||||||
ddpg = DDPG(n_actions=knob_num, n_states=metric_num)
|
ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model)
|
||||||
if os.path.exists(saved_memory):
|
if session.ddpg_reply_memory:
|
||||||
ddpg.replay_memory.load_memory(saved_memory)
|
ddpg.replay_memory.set(session.ddpg_reply_memory)
|
||||||
ddpg.load_model(saved_model)
|
|
||||||
ddpg.add_sample(prev_metric_data, knob_data, reward, metric_data, False)
|
ddpg.add_sample(prev_metric_data, knob_data, reward, metric_data, False)
|
||||||
if len(ddpg.replay_memory) > 32:
|
if len(ddpg.replay_memory) > 32:
|
||||||
ddpg.update()
|
ddpg.update()
|
||||||
checkpoint_dir = join(project_root, 'checkpoint')
|
session.ddpg_actor_model, session.ddpg_critic_model = ddpg.get_model()
|
||||||
if not os.path.exists(checkpoint_dir):
|
session.ddpg_reply_memory = ddpg.replay_memory.get()
|
||||||
os.makedirs(checkpoint_dir)
|
session.save()
|
||||||
ddpg.replay_memory.save(saved_memory)
|
|
||||||
ddpg.save_model(saved_model)
|
|
||||||
return result_info
|
return result_info
|
||||||
|
|
||||||
|
|
||||||
@task(base=ConfigurationRecommendation, name='run_ddpg')
|
@task(base=ConfigurationRecommendation, name='configuration_recommendation_ddpg')
|
||||||
def run_ddpg(result_info):
|
def configuration_recommendation_ddpg(result_info): # pylint: disable=invalid-name
|
||||||
LOG.info('Use ddpg to recommend configuration')
|
LOG.info('Use ddpg to recommend configuration')
|
||||||
result_id = result_info['newest_result_id']
|
result_id = result_info['newest_result_id']
|
||||||
result = Result.objects.filter(pk=result_id)
|
result = Result.objects.filter(pk=result_id)
|
||||||
|
@ -305,20 +303,20 @@ def run_ddpg(result_info):
|
||||||
cleaned_agg_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'],
|
cleaned_agg_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'],
|
||||||
session)
|
session)
|
||||||
knob_labels = np.array(cleaned_agg_data[1]).flatten()
|
knob_labels = np.array(cleaned_agg_data[1]).flatten()
|
||||||
knob_data = np.array(cleaned_agg_data[0]).flatten()
|
knob_num = len(knob_labels)
|
||||||
knob_num = len(knob_data)
|
|
||||||
metric_num = len(metric_data)
|
metric_num = len(metric_data)
|
||||||
|
|
||||||
project_root = dirname(dirname(dirname(abspath(__file__))))
|
ddpg = DDPG(n_actions=knob_num, n_states=metric_num, alr=ACTOR_LEARNING_RATE,
|
||||||
saved_memory = join(project_root, 'checkpoint/reply_memory_' + session.project.name)
|
clr=CRITIC_LEARNING_RATE, gamma=GAMMA, batch_size=DDPG_BATCH_SIZE, tau=TAU)
|
||||||
saved_model = join(project_root, 'checkpoint/ddpg_' + session.project.name)
|
if session.ddpg_actor_model is not None and session.ddpg_critic_model is not None:
|
||||||
ddpg = DDPG(n_actions=knob_num, n_states=metric_num)
|
ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model)
|
||||||
if os.path.exists(saved_memory):
|
if session.ddpg_reply_memory is not None:
|
||||||
ddpg.replay_memory.load_memory(saved_memory)
|
ddpg.replay_memory.set(session.ddpg_reply_memory)
|
||||||
ddpg.load_model(saved_model)
|
|
||||||
knob_data = ddpg.choose_action(metric_data)
|
knob_data = ddpg.choose_action(metric_data)
|
||||||
LOG.info('recommended knob: %s', knob_data)
|
LOG.info('recommended knob: %s', knob_data)
|
||||||
knob_data = DataUtil.denormalize_knob_data(knob_data, knob_labels, session)
|
|
||||||
|
knob_bounds = np.vstack(DataUtil.get_knob_bounds(knob_labels, session))
|
||||||
|
knob_data = MinMaxScaler().fit(knob_bounds).inverse_transform(knob_data.reshape(1, -1))[0]
|
||||||
conf_map = {k: knob_data[i] for i, k in enumerate(knob_labels)}
|
conf_map = {k: knob_data[i] for i, k in enumerate(knob_labels)}
|
||||||
conf_map_res = {}
|
conf_map_res = {}
|
||||||
conf_map_res['status'] = 'good'
|
conf_map_res['status'] = 'good'
|
||||||
|
|
|
@ -93,30 +93,35 @@ class TaskUtil(object):
|
||||||
class DataUtil(object):
|
class DataUtil(object):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def normalize_knob_data(knob_values, knob_labels, session):
|
def get_knob_bounds(knob_labels, session):
|
||||||
for i, knob in enumerate(knob_labels):
|
minvals = []
|
||||||
|
maxvals = []
|
||||||
|
for _, knob in enumerate(knob_labels):
|
||||||
knob_object = KnobCatalog.objects.get(dbms=session.dbms, name=knob, tunable=True)
|
knob_object = KnobCatalog.objects.get(dbms=session.dbms, name=knob, tunable=True)
|
||||||
minval = float(knob_object.minval)
|
|
||||||
maxval = float(knob_object.maxval)
|
|
||||||
knob_new = SessionKnob.objects.filter(knob=knob_object, session=session, tunable=True)
|
|
||||||
if knob_new.exists():
|
|
||||||
minval = float(knob_new[0].minval)
|
|
||||||
maxval = float(knob_new[0].maxval)
|
|
||||||
knob_values[i] = (knob_values[i] - minval) / (maxval - minval)
|
|
||||||
knob_values[i] = max(0, min(knob_values[i], 1))
|
|
||||||
return knob_values
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def denormalize_knob_data(knob_values, knob_labels, session):
|
|
||||||
for i, knob in enumerate(knob_labels):
|
|
||||||
knob_object = KnobCatalog.objects.get(dbms=session.dbms, name=knob, tunable=True)
|
|
||||||
minval = float(knob_object.minval)
|
|
||||||
maxval = float(knob_object.maxval)
|
|
||||||
knob_session_object = SessionKnob.objects.filter(knob=knob_object, session=session,
|
knob_session_object = SessionKnob.objects.filter(knob=knob_object, session=session,
|
||||||
tunable=True)
|
tunable=True)
|
||||||
if knob_session_object.exists():
|
if knob_session_object.exists():
|
||||||
minval = float(knob_session_object[0].minval)
|
minval = float(knob_session_object[0].minval)
|
||||||
maxval = float(knob_session_object[0].maxval)
|
maxval = float(knob_session_object[0].maxval)
|
||||||
|
else:
|
||||||
|
minval = float(knob_object.minval)
|
||||||
|
maxval = float(knob_object.maxval)
|
||||||
|
minvals.append(minval)
|
||||||
|
maxvals.append(maxval)
|
||||||
|
return np.array(minvals), np.array(maxvals)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def denormalize_knob_data(knob_values, knob_labels, session):
|
||||||
|
for i, knob in enumerate(knob_labels):
|
||||||
|
knob_object = KnobCatalog.objects.get(dbms=session.dbms, name=knob, tunable=True)
|
||||||
|
knob_session_object = SessionKnob.objects.filter(knob=knob_object, session=session,
|
||||||
|
tunable=True)
|
||||||
|
if knob_session_object.exists():
|
||||||
|
minval = float(knob_session_object[0].minval)
|
||||||
|
maxval = float(knob_session_object[0].maxval)
|
||||||
|
else:
|
||||||
|
minval = float(knob_object.minval)
|
||||||
|
maxval = float(knob_object.maxval)
|
||||||
knob_values[i] = knob_values[i] * (maxval - minval) + minval
|
knob_values[i] = knob_values[i] * (maxval - minval) + minval
|
||||||
return knob_values
|
return knob_values
|
||||||
|
|
||||||
|
|
|
@ -30,8 +30,8 @@ from .models import (BackupData, DBMSCatalog, KnobCatalog, KnobData, MetricCatal
|
||||||
MetricData, MetricManager, Project, Result, Session, Workload,
|
MetricData, MetricManager, Project, Result, Session, Workload,
|
||||||
SessionKnob)
|
SessionKnob)
|
||||||
from .parser import Parser
|
from .parser import Parser
|
||||||
from .tasks import (aggregate_target_results, map_workload, train_ddpg, run_ddpg,
|
from .tasks import (aggregate_target_results, map_workload, train_ddpg,
|
||||||
configuration_recommendation)
|
configuration_recommendation, configuration_recommendation_ddpg)
|
||||||
from .types import (DBMSType, KnobUnitType, MetricType,
|
from .types import (DBMSType, KnobUnitType, MetricType,
|
||||||
TaskType, VarType, WorkloadStatusType, AlgorithmType)
|
TaskType, VarType, WorkloadStatusType, AlgorithmType)
|
||||||
from .utils import JSONUtil, LabelUtil, MediaUtil, TaskUtil
|
from .utils import JSONUtil, LabelUtil, MediaUtil, TaskUtil
|
||||||
|
|
Loading…
Reference in New Issue