save ddpg model in database

This commit is contained in:
Dongsheng Yang 2019-09-26 14:44:28 -04:00 committed by Dana Van Aken
parent c8fbaf6e4b
commit a3fcf59f07
12 changed files with 683 additions and 736 deletions

View File

@ -1,10 +1,10 @@
# #
# __init__.py # OtterTune - __init__.py
# #
# Copyright # Copyright (c) 2017-18, Carnegie Mellon University Database Group
# #
from analysis.ddpg.ddpg import DDPG from analysis.ddpg.ddpg import DDPG
__all__ = ["DDPG"] __all__ = ["DDPG"]

View File

@ -1,509 +1,428 @@
# #
# ddpg.py # OtterTune - ddpg.py
# #
# Copyright # Copyright (c) 2017-18, Carnegie Mellon University Database Group
# #
""" # from: https://github.com/KqSMea8/CDBTune
Deep Deterministic Policy Gradient Model # Zhang, Ji, et al. "An end-to-end automatic cloud database tuning system using
# deep reinforcement learning." Proceedings of the 2019 International Conference
""" # on Management of Data. ACM, 2019
import logging import os
import os import pickle
import sys import math
import math import numpy as np
import pickle import torch
import numpy as np import torch.nn as nn
import torch from torch.nn import init, Parameter
import torch.nn as nn import torch.nn.functional as F
from torch.nn import init, Parameter import torch.optim as optimizer
import torch.nn.functional as F from torch.autograd import Variable
import torch.optim as optimizer
from torch.autograd import Variable from analysis.ddpg.ou_process import OUProcess
from analysis.ddpg.prioritized_replay_memory import PrioritizedReplayMemory
from analysis.ddpg.OUProcess import OUProcess from analysis.util import get_analysis_logger
from analysis.ddpg.prioritized_replay_memory import PrioritizedReplayMemory
LOG = get_analysis_logger(__name__)
LOG = logging.getLogger(__name__)
sys.path.append('../') # code from https://github.com/Kaixhin/NoisyNet-A3C/blob/master/model.py
class NoisyLinear(nn.Linear):
def __init__(self, in_features, out_features, sigma_init=0.05, bias=True):
# code from https://github.com/Kaixhin/NoisyNet-A3C/blob/master/model.py super(NoisyLinear, self).__init__(in_features, out_features, bias=True)
class NoisyLinear(nn.Linear): # reuse self.weight and self.bias
def __init__(self, in_features, out_features, sigma_init=0.05, bias=True): self.sigma_init = sigma_init
super(NoisyLinear, self).__init__(in_features, out_features, bias=True) self.sigma_weight = Parameter(torch.Tensor(out_features, in_features))
# reuse self.weight and self.bias self.sigma_bias = Parameter(torch.Tensor(out_features))
self.sigma_init = sigma_init self.epsilon_weight = None
self.sigma_weight = Parameter(torch.Tensor(out_features, in_features)) self.epsilon_bias = None
self.sigma_bias = Parameter(torch.Tensor(out_features)) self.register_buffer('epsilon_weight', torch.zeros(out_features, in_features))
self.register_buffer('epsilon_weight', torch.zeros(out_features, in_features)) self.register_buffer('epsilon_bias', torch.zeros(out_features))
self.register_buffer('epsilon_bias', torch.zeros(out_features)) self.reset_parameters()
self.reset_parameters()
def reset_parameters(self):
def reset_parameters(self): # Only init after all params added (otherwise super().__init__() fails)
# Only init after all params added (otherwise super().__init__() fails) if hasattr(self, 'sigma_weight'):
if hasattr(self, 'sigma_weight'): init.uniform(self.weight, -math.sqrt(3 / self.in_features),
init.uniform(self.weight, -math.sqrt(3 / self.in_features), math.sqrt(3 / self.in_features))
math.sqrt(3 / self.in_features)) init.uniform(self.bias, -math.sqrt(3 / self.in_features),
init.uniform(self.bias, -math.sqrt(3 / self.in_features), math.sqrt(3 / self.in_features))
math.sqrt(3 / self.in_features)) init.constant(self.sigma_weight, self.sigma_init)
init.constant(self.sigma_weight, self.sigma_init) init.constant(self.sigma_bias, self.sigma_init)
init.constant(self.sigma_bias, self.sigma_init)
def forward(self, x):
def forward(self, x): return F.linear(x, self.weight + self.sigma_weight * Variable(self.epsilon_weight),
return F.linear(x, self.weight + self.sigma_weight * Variable(self.epsilon_weight), self.bias + self.sigma_bias * Variable(self.epsilon_bias))
self.bias + self.sigma_bias * Variable(self.epsilon_bias))
def sample_noise(self):
# pylint: disable=attribute-defined-outside-init self.epsilon_weight = torch.randn(self.out_features, self.in_features)
def sample_noise(self): self.epsilon_bias = torch.randn(self.out_features)
self.epsilon_weight = torch.randn(self.out_features, self.in_features)
self.epsilon_bias = torch.randn(self.out_features) def remove_noise(self):
self.epsilon_weight = torch.zeros(self.out_features, self.in_features)
def remove_noise(self): self.epsilon_bias = torch.zeros(self.out_features)
self.epsilon_weight = torch.zeros(self.out_features, self.in_features)
self.epsilon_bias = torch.zeros(self.out_features)
# pylint: enable=attribute-defined-outside-init class Normalizer(object):
def __init__(self, mean, variance):
class Normalizer(object): if isinstance(mean, list):
mean = np.array(mean)
def __init__(self, mean, variance): if isinstance(variance, list):
if isinstance(mean, list): variance = np.array(variance)
mean = np.array(mean) self.mean = mean
if isinstance(variance, list): self.std = np.sqrt(variance + 0.00001)
variance = np.array(variance)
self.mean = mean def normalize(self, x):
self.std = np.sqrt(variance + 0.00001) if isinstance(x, list):
x = np.array(x)
def normalize(self, x): x = x - self.mean
if isinstance(x, list): x = x / self.std
x = np.array(x)
x = x - self.mean return Variable(torch.FloatTensor(x))
x = x / self.std
def __call__(self, x, *args, **kwargs):
return Variable(torch.FloatTensor(x)) return self.normalize(x)
def __call__(self, x, *args, **kwargs):
return self.normalize(x) class Actor(nn.Module):
def __init__(self, n_states, n_actions, noisy=False):
class ActorLow(nn.Module): super(Actor, self).__init__()
self.layers = nn.Sequential(
def __init__(self, n_states, n_actions, ): nn.Linear(n_states, 128),
super(ActorLow, self).__init__() nn.LeakyReLU(negative_slope=0.2),
self.layers = nn.Sequential( nn.BatchNorm1d(128),
nn.BatchNorm1d(n_states), nn.Linear(128, 128),
nn.Linear(n_states, 32), nn.Tanh(),
nn.LeakyReLU(negative_slope=0.2), nn.Dropout(0.3),
nn.BatchNorm1d(32),
nn.Linear(32, n_actions), nn.Linear(128, 64),
nn.LeakyReLU(negative_slope=0.2) nn.Tanh(),
) nn.BatchNorm1d(64),
self._init_weights() )
self.out_func = nn.Tanh() if noisy:
self.out = NoisyLinear(64, n_actions)
def _init_weights(self): else:
self.out = nn.Linear(64, n_actions)
for m in self.layers: self._init_weights()
if isinstance(m, nn.Linear): self.act = nn.Sigmoid()
m.weight.data.normal_(0.0, 1e-3)
m.bias.data.uniform_(-0.1, 0.1) def _init_weights(self):
def forward(self, x): # pylint: disable=arguments-differ for m in self.layers:
if isinstance(m, nn.Linear):
out = self.layers(x) m.weight.data.normal_(0.0, 1e-2)
m.bias.data.uniform_(-0.1, 0.1)
return self.out_func(out)
def sample_noise(self):
self.out.sample_noise()
class CriticLow(nn.Module):
def forward(self, x): # pylint: disable=arguments-differ
def __init__(self, n_states, n_actions):
super(CriticLow, self).__init__() out = self.act(self.out(self.layers(x)))
self.state_input = nn.Linear(n_states, 32) return out
self.action_input = nn.Linear(n_actions, 32)
self.act = nn.LeakyReLU(negative_slope=0.2)
self.state_bn = nn.BatchNorm1d(n_states) class Critic(nn.Module):
self.layers = nn.Sequential(
nn.Linear(64, 1), def __init__(self, n_states, n_actions):
nn.LeakyReLU(negative_slope=0.2), super(Critic, self).__init__()
) self.state_input = nn.Linear(n_states, 128)
self._init_weights() self.action_input = nn.Linear(n_actions, 128)
self.act = nn.Tanh()
def _init_weights(self): self.layers = nn.Sequential(
self.state_input.weight.data.normal_(0.0, 1e-3) nn.Linear(256, 256),
self.state_input.bias.data.uniform_(-0.1, 0.1) nn.LeakyReLU(negative_slope=0.2),
nn.BatchNorm1d(256),
self.action_input.weight.data.normal_(0.0, 1e-3)
self.action_input.bias.data.uniform_(-0.1, 0.1) nn.Linear(256, 64),
nn.Tanh(),
for m in self.layers: nn.Dropout(0.3),
if isinstance(m, nn.Linear): nn.BatchNorm1d(64),
m.weight.data.normal_(0.0, 1e-3) nn.Linear(64, 1),
m.bias.data.uniform_(-0.1, 0.1) )
self._init_weights()
def forward(self, x, action): # pylint: disable=arguments-differ
x = self.state_bn(x) def _init_weights(self):
x = self.act(self.state_input(x)) self.state_input.weight.data.normal_(0.0, 1e-2)
action = self.act(self.action_input(action)) self.state_input.bias.data.uniform_(-0.1, 0.1)
_input = torch.cat([x, action], dim=1) self.action_input.weight.data.normal_(0.0, 1e-2)
value = self.layers(_input) self.action_input.bias.data.uniform_(-0.1, 0.1)
return value
for m in self.layers:
if isinstance(m, nn.Linear):
class Actor(nn.Module): m.weight.data.normal_(0.0, 1e-2)
m.bias.data.uniform_(-0.1, 0.1)
def __init__(self, n_states, n_actions, noisy=False):
super(Actor, self).__init__() def forward(self, x, action): # pylint: disable=arguments-differ
self.layers = nn.Sequential( x = self.act(self.state_input(x))
nn.Linear(n_states, 128), action = self.act(self.action_input(action))
nn.LeakyReLU(negative_slope=0.2),
nn.BatchNorm1d(128), _input = torch.cat([x, action], dim=1)
nn.Linear(128, 128), value = self.layers(_input)
nn.Tanh(), return value
nn.Dropout(0.3),
nn.Linear(128, 64), class DDPG(object):
nn.Tanh(),
nn.BatchNorm1d(64), def __init__(self, n_states, n_actions, model_name='', alr=0.001, clr=0.001,
) gamma=0.9, batch_size=32, tau=0.002, memory_size=100000,
if noisy: ouprocess=True, mean_var_path=None, supervised=False):
self.out = NoisyLinear(64, n_actions) self.n_states = n_states
else: self.n_actions = n_actions
self.out = nn.Linear(64, n_actions) self.alr = alr
self._init_weights() self.clr = clr
self.act = nn.Sigmoid() self.model_name = model_name
self.batch_size = batch_size
def _init_weights(self): self.gamma = gamma
self.tau = tau
for m in self.layers: self.ouprocess = ouprocess
if isinstance(m, nn.Linear):
m.weight.data.normal_(0.0, 1e-2) if mean_var_path is None:
m.bias.data.uniform_(-0.1, 0.1) mean = np.zeros(n_states)
var = np.zeros(n_states)
def sample_noise(self): elif not os.path.exists(mean_var_path):
self.out.sample_noise() mean = np.zeros(n_states)
var = np.zeros(n_states)
def forward(self, x): # pylint: disable=arguments-differ else:
with open(mean_var_path, 'rb') as f:
out = self.act(self.out(self.layers(x))) mean, var = pickle.load(f)
return out
self.normalizer = Normalizer(mean, var)
class Critic(nn.Module): if supervised:
self._build_actor()
def __init__(self, n_states, n_actions): LOG.info("Supervised Learning Initialized")
super(Critic, self).__init__() else:
self.state_input = nn.Linear(n_states, 128) # Build Network
self.action_input = nn.Linear(n_actions, 128) self._build_network()
self.act = nn.Tanh() LOG.info('Finish Initializing Networks')
self.layers = nn.Sequential(
nn.Linear(256, 256), self.replay_memory = PrioritizedReplayMemory(capacity=memory_size)
nn.LeakyReLU(negative_slope=0.2), self.noise = OUProcess(n_actions)
nn.BatchNorm1d(256),
@staticmethod
nn.Linear(256, 64), def totensor(x):
nn.Tanh(), return Variable(torch.FloatTensor(x))
nn.Dropout(0.3),
nn.BatchNorm1d(64), def _build_actor(self):
nn.Linear(64, 1), if self.ouprocess:
) noisy = False
self._init_weights() else:
noisy = True
def _init_weights(self): self.actor = Actor(self.n_states, self.n_actions, noisy=noisy)
self.state_input.weight.data.normal_(0.0, 1e-2) self.actor_criterion = nn.MSELoss()
self.state_input.bias.data.uniform_(-0.1, 0.1) self.actor_optimizer = optimizer.Adam(lr=self.alr, params=self.actor.parameters())
self.action_input.weight.data.normal_(0.0, 1e-2) def _build_network(self):
self.action_input.bias.data.uniform_(-0.1, 0.1) if self.ouprocess:
noisy = False
for m in self.layers: else:
if isinstance(m, nn.Linear): noisy = True
m.weight.data.normal_(0.0, 1e-2) self.actor = Actor(self.n_states, self.n_actions, noisy=noisy)
m.bias.data.uniform_(-0.1, 0.1) self.target_actor = Actor(self.n_states, self.n_actions)
self.critic = Critic(self.n_states, self.n_actions)
def forward(self, x, action): # pylint: disable=arguments-differ self.target_critic = Critic(self.n_states, self.n_actions)
x = self.act(self.state_input(x))
action = self.act(self.action_input(action)) # if model params are provided, load them
if len(self.model_name):
_input = torch.cat([x, action], dim=1) self.load_model(model_name=self.model_name)
value = self.layers(_input) LOG.info("Loading model from file: %s", self.model_name)
return value
# Copy actor's parameters
self._update_target(self.target_actor, self.actor, tau=1.0)
class DDPG(object):
# Copy critic's parameters
def __init__(self, n_states, n_actions, opt=None, ouprocess=True, mean_var_path=None, self._update_target(self.target_critic, self.critic, tau=1.0)
supervised=False):
""" DDPG Algorithms self.loss_criterion = nn.MSELoss()
Args: self.actor_optimizer = optimizer.Adam(lr=self.alr, params=self.actor.parameters(),
n_states: int, dimension of states weight_decay=1e-5)
n_actions: int, dimension of actions self.critic_optimizer = optimizer.Adam(lr=self.clr, params=self.critic.parameters(),
opt: dict, params weight_decay=1e-5)
supervised, bool, pre-train the actor with supervised learning
""" @staticmethod
self.n_states = n_states def _update_target(target, source, tau):
self.n_actions = n_actions for (target_param, param) in zip(target.parameters(), source.parameters()):
target_param.data.copy_(
if opt is None: target_param.data * (1 - tau) + param.data * tau
opt = { )
'model': '',
'alr': 0.001, def reset(self, sigma):
'clr': 0.001, self.noise.reset(sigma)
'gamma': 0.9,
'batch_size': 32, def _sample_batch(self):
'tau': 0.002, batch, idx = self.replay_memory.sample(self.batch_size)
'memory_size': 100000 # batch = self.replay_memory.sample(self.batch_size)
} states = list(map(lambda x: x[0].tolist(), batch)) # pylint: disable=W0141
next_states = list(map(lambda x: x[3].tolist(), batch)) # pylint: disable=W0141
# Params actions = list(map(lambda x: x[1].tolist(), batch)) # pylint: disable=W0141
self.alr = opt['alr'] rewards = list(map(lambda x: x[2], batch)) # pylint: disable=W0141
self.clr = opt['clr'] terminates = list(map(lambda x: x[4], batch)) # pylint: disable=W0141
self.model_name = opt['model']
self.batch_size = opt['batch_size'] return idx, states, next_states, actions, rewards, terminates
self.gamma = opt['gamma']
self.tau = opt['tau'] def add_sample(self, state, action, reward, next_state, terminate):
self.ouprocess = ouprocess self.critic.eval()
self.actor.eval()
if mean_var_path is None: self.target_critic.eval()
mean = np.zeros(n_states) self.target_actor.eval()
var = np.zeros(n_states) batch_state = self.normalizer([state.tolist()])
elif not os.path.exists(mean_var_path): batch_next_state = self.normalizer([next_state.tolist()])
mean = np.zeros(n_states) current_value = self.critic(batch_state, self.totensor([action.tolist()]))
var = np.zeros(n_states) target_action = self.target_actor(batch_next_state)
else: target_value = self.totensor([reward]) \
with open(mean_var_path, 'rb') as f: + self.totensor([0 if x else 1 for x in [terminate]]) \
mean, var = pickle.load(f) * self.target_critic(batch_next_state, target_action) * self.gamma
error = float(torch.abs(current_value - target_value).data.numpy()[0])
self.normalizer = Normalizer(mean, var)
self.target_actor.train()
if supervised: self.actor.train()
self._build_actor() self.critic.train()
LOG.info("Supervised Learning Initialized") self.target_critic.train()
else: self.replay_memory.add(error, (state, action, reward, next_state, terminate))
# Build Network
self._build_network() def update(self):
LOG.info('Finish Initializing Networks') idxs, states, next_states, actions, rewards, terminates = self._sample_batch()
batch_states = self.normalizer(states)
self.replay_memory = PrioritizedReplayMemory(capacity=opt['memory_size']) batch_next_states = self.normalizer(next_states)
self.noise = OUProcess(n_actions) batch_actions = self.totensor(actions)
# LOG.info('DDPG Initialzed!') batch_rewards = self.totensor(rewards)
mask = [0 if x else 1 for x in terminates]
@staticmethod mask = self.totensor(mask)
def totensor(x):
return Variable(torch.FloatTensor(x)) target_next_actions = self.target_actor(batch_next_states).detach()
target_next_value = self.target_critic(batch_next_states, target_next_actions).detach()
def _build_actor(self): current_value = self.critic(batch_states, batch_actions)
if self.ouprocess: # TODO (dongshen): This clause is the original clause, but it has some mistakes
noisy = False # next_value = batch_rewards + mask * target_next_value * self.gamma
else: # Since terminate is always false, I remove the mask here.
noisy = True next_value = batch_rewards + target_next_value * self.gamma
self.actor = Actor(self.n_states, self.n_actions, noisy=noisy) # Update Critic
self.actor_criterion = nn.MSELoss()
self.actor_optimizer = optimizer.Adam(lr=self.alr, params=self.actor.parameters()) # update prioritized memory
error = torch.abs(current_value - next_value).data.numpy()
def _build_network(self): for i in range(self.batch_size):
if self.ouprocess: idx = idxs[i]
noisy = False self.replay_memory.update(idx, error[i][0])
else:
noisy = True loss = self.loss_criterion(current_value, next_value)
self.actor = Actor(self.n_states, self.n_actions, noisy=noisy) self.critic_optimizer.zero_grad()
self.target_actor = Actor(self.n_states, self.n_actions) loss.backward()
self.critic = Critic(self.n_states, self.n_actions) self.critic_optimizer.step()
self.target_critic = Critic(self.n_states, self.n_actions)
# Update Actor
# if model params are provided, load them self.critic.eval()
if len(self.model_name): policy_loss = -self.critic(batch_states, self.actor(batch_states))
self.load_model(model_name=self.model_name) policy_loss = policy_loss.mean()
LOG.info("Loading model from file: %s", self.model_name) self.actor_optimizer.zero_grad()
policy_loss.backward()
# Copy actor's parameters
self._update_target(self.target_actor, self.actor, tau=1.0) self.actor_optimizer.step()
self.critic.train()
# Copy critic's parameters
self._update_target(self.target_critic, self.critic, tau=1.0) self._update_target(self.target_critic, self.critic, tau=self.tau)
self._update_target(self.target_actor, self.actor, tau=self.tau)
self.loss_criterion = nn.MSELoss()
self.actor_optimizer = optimizer.Adam(lr=self.alr, params=self.actor.parameters(), return loss.data, policy_loss.data
weight_decay=1e-5)
self.critic_optimizer = optimizer.Adam(lr=self.clr, params=self.critic.parameters(), def choose_action(self, x):
weight_decay=1e-5) """ Select Action according to the current state
Args:
@staticmethod x: np.array, current state
def _update_target(target, source, tau): """
for (target_param, param) in zip(target.parameters(), source.parameters()): self.actor.eval()
target_param.data.copy_( act = self.actor(self.normalizer([x.tolist()])).squeeze(0)
target_param.data * (1 - tau) + param.data * tau self.actor.train()
) action = act.data.numpy()
if self.ouprocess:
def reset(self, sigma): action += self.noise.noise()
self.noise.reset(sigma) return action.clip(0, 1)
def _sample_batch(self): def sample_noise(self):
batch, idx = self.replay_memory.sample(self.batch_size) self.actor.sample_noise()
# batch = self.replay_memory.sample(self.batch_size)
states = list(map(lambda x: x[0].tolist(), batch)) # pylint: disable=W0141 def load_model(self, model_name):
next_states = list(map(lambda x: x[3].tolist(), batch)) # pylint: disable=W0141 """ Load Torch Model from files
actions = list(map(lambda x: x[1].tolist(), batch)) # pylint: disable=W0141 Args:
rewards = list(map(lambda x: x[2], batch)) # pylint: disable=W0141 model_name: str, model path
terminates = list(map(lambda x: x[4], batch)) # pylint: disable=W0141 """
self.actor.load_state_dict(
return idx, states, next_states, actions, rewards, terminates torch.load('{}_actor.pth'.format(model_name))
)
def add_sample(self, state, action, reward, next_state, terminate): self.critic.load_state_dict(
self.critic.eval() torch.load('{}_critic.pth'.format(model_name))
self.actor.eval() )
self.target_critic.eval()
self.target_actor.eval() def save_model(self, model_name):
batch_state = self.normalizer([state.tolist()]) """ Save Torch Model from files
batch_next_state = self.normalizer([next_state.tolist()]) Args:
current_value = self.critic(batch_state, self.totensor([action.tolist()])) model_dir: str, model dir
target_action = self.target_actor(batch_next_state) title: str, model name
target_value = self.totensor([reward]) \ """
+ self.totensor([0 if x else 1 for x in [terminate]]) \ torch.save(
* self.target_critic(batch_next_state, target_action) * self.gamma self.actor.state_dict(),
error = float(torch.abs(current_value - target_value).data.numpy()[0]) '{}_actor.pth'.format(model_name)
)
self.target_actor.train()
self.actor.train() torch.save(
self.critic.train() self.critic.state_dict(),
self.target_critic.train() '{}_critic.pth'.format(model_name)
self.replay_memory.add(error, (state, action, reward, next_state, terminate)) )
def update(self): def set_model(self, actor_dict, critic_dict):
idxs, states, next_states, actions, rewards, terminates = self._sample_batch() self.actor.load_state_dict(pickle.loads(actor_dict))
batch_states = self.normalizer(states) self.critic.load_state_dict(pickle.loads(critic_dict))
batch_next_states = self.normalizer(next_states)
batch_actions = self.totensor(actions) def get_model(self):
batch_rewards = self.totensor(rewards) return pickle.dumps(self.actor.state_dict()), pickle.dumps(self.critic.state_dict())
mask = [0 if x else 1 for x in terminates]
mask = self.totensor(mask) def save_actor(self, path):
""" save actor network
target_next_actions = self.target_actor(batch_next_states).detach() Args:
target_next_value = self.target_critic(batch_next_states, target_next_actions).detach() path, str, path to save
current_value = self.critic(batch_states, batch_actions) """
# TODO (dongshen): This clause is the original clause, but it has some mistakes torch.save(
# next_value = batch_rewards + mask * target_next_value * self.gamma self.actor.state_dict(),
# Since terminate is always false, I remove the mask here. path
next_value = batch_rewards + target_next_value * self.gamma )
# Update Critic
def load_actor(self, path):
# update prioritized memory """ load actor network
error = torch.abs(current_value - next_value).data.numpy() Args:
for i in range(self.batch_size): path, str, path to load
idx = idxs[i] """
self.replay_memory.update(idx, error[i][0]) self.actor.load_state_dict(
torch.load(path)
loss = self.loss_criterion(current_value, next_value) )
self.critic_optimizer.zero_grad()
loss.backward() def train_actor(self, batch_data, is_train=True):
self.critic_optimizer.step() """ Train the actor separately with data
Args:
# Update Actor batch_data: tuple, (states, actions)
self.critic.eval() is_train: bool
policy_loss = -self.critic(batch_states, self.actor(batch_states)) Return:
policy_loss = policy_loss.mean() _loss: float, training loss
self.actor_optimizer.zero_grad() """
policy_loss.backward() states, action = batch_data
self.actor_optimizer.step() if is_train:
self.critic.train() self.actor.train()
pred = self.actor(self.normalizer(states))
self._update_target(self.target_critic, self.critic, tau=self.tau) action = self.totensor(action)
self._update_target(self.target_actor, self.actor, tau=self.tau)
_loss = self.actor_criterion(pred, action)
return loss.data, policy_loss.data
self.actor_optimizer.zero_grad()
def choose_action(self, x): _loss.backward()
""" Select Action according to the current state self.actor_optimizer.step()
Args:
x: np.array, current state else:
""" self.actor.eval()
self.actor.eval() pred = self.actor(self.normalizer(states))
act = self.actor(self.normalizer([x.tolist()])).squeeze(0) action = self.totensor(action)
self.actor.train() _loss = self.actor_criterion(pred, action)
action = act.data.numpy()
if self.ouprocess: return _loss.data[0]
action += self.noise.noise()
return action.clip(0, 1)
def sample_noise(self):
self.actor.sample_noise()
def load_model(self, model_name):
""" Load Torch Model from files
Args:
model_name: str, model path
"""
self.actor.load_state_dict(
torch.load('{}_actor.pth'.format(model_name))
)
self.critic.load_state_dict(
torch.load('{}_critic.pth'.format(model_name))
)
def save_model(self, model_name):
""" Save Torch Model from files
Args:
model_dir: str, model dir
title: str, model name
"""
torch.save(
self.actor.state_dict(),
'{}_actor.pth'.format(model_name)
)
torch.save(
self.critic.state_dict(),
'{}_critic.pth'.format(model_name)
)
def save_actor(self, path):
""" save actor network
Args:
path, str, path to save
"""
torch.save(
self.actor.state_dict(),
path
)
def load_actor(self, path):
""" load actor network
Args:
path, str, path to load
"""
self.actor.load_state_dict(
torch.load(path)
)
def train_actor(self, batch_data, is_train=True):
""" Train the actor separately with data
Args:
batch_data: tuple, (states, actions)
is_train: bool
Return:
_loss: float, training loss
"""
states, action = batch_data
if is_train:
self.actor.train()
pred = self.actor(self.normalizer(states))
action = self.totensor(action)
_loss = self.actor_criterion(pred, action)
self.actor_optimizer.zero_grad()
_loss.backward()
self.actor_optimizer.step()
else:
self.actor.eval()
pred = self.actor(self.normalizer(states))
action = self.totensor(action)
_loss = self.actor_criterion(pred, action)
return _loss.data[0]

View File

@ -1,41 +1,33 @@
# #
# OUProcess.py # OtterTune - ou_process.py
# #
# Copyright # Copyright (c) 2017-18, Carnegie Mellon University Database Group
# #
# from: https://github.com/KqSMea8/CDBTune
import numpy as np # Zhang, Ji, et al. "An end-to-end automatic cloud database tuning system using
# deep reinforcement learning." Proceedings of the 2019 International Conference
# on Management of Data. ACM, 2019
# from https://github.com/songrotek/DDPG/blob/master/ou_noise.py
class OUProcess(object): import numpy as np
def __init__(self, n_actions, theta=0.15, mu=0, sigma=0.1, ):
class OUProcess(object):
self.n_actions = n_actions
self.theta = theta def __init__(self, n_actions, theta=0.15, mu=0, sigma=0.1, ):
self.mu = mu
self.sigma = sigma self.n_actions = n_actions
self.current_value = np.ones(self.n_actions) * self.mu self.theta = theta
self.mu = mu
def reset(self, sigma=0): self.sigma = sigma
self.current_value = np.ones(self.n_actions) * self.mu self.current_value = np.ones(self.n_actions) * self.mu
if sigma != 0:
self.sigma = sigma def reset(self, sigma=0):
self.current_value = np.ones(self.n_actions) * self.mu
def noise(self): if sigma != 0:
x = self.current_value self.sigma = sigma
dx = self.theta * (self.mu - x) + self.sigma * np.random.randn(len(x))
self.current_value = x + dx def noise(self):
return self.current_value x = self.current_value
dx = self.theta * (self.mu - x) + self.sigma * np.random.randn(len(x))
self.current_value = x + dx
if __name__ == '__main__': return self.current_value
import matplotlib.pyplot as plt # pylint: disable=wrong-import-position
ou = OUProcess(3, theta=0.3) # pylint: disable=invalid-name
states = [] # pylint: disable=invalid-name
for i in range(1000):
states.append(ou.noise())
plt.plot(states)
plt.show()

View File

@ -1,121 +1,132 @@
# #
# prioritized_replay_memory.py # OtterTune - prioritized_replay_memory.py
# #
# Copyright # Copyright (c) 2017-18, Carnegie Mellon University Database Group
# #
import random # from: https://github.com/KqSMea8/CDBTune
import pickle # Zhang, Ji, et al. "An end-to-end automatic cloud database tuning system using
import numpy as np # deep reinforcement learning." Proceedings of the 2019 International Conference
# on Management of Data. ACM, 2019
class SumTree(object): import random
write = 0 import pickle
import numpy as np
def __init__(self, capacity):
self.capacity = capacity
self.tree = np.zeros(2 * capacity - 1) class SumTree(object):
self.data = np.zeros(capacity, dtype=object) write = 0
self.num_entries = 0
def __init__(self, capacity):
def _propagate(self, idx, change): self.capacity = capacity
parent = (idx - 1) // 2 self.tree = np.zeros(2 * capacity - 1)
self.tree[parent] += change self.data = np.zeros(capacity, dtype=object)
if parent != 0: self.num_entries = 0
self._propagate(parent, change)
def _propagate(self, idx, change):
def _retrieve(self, idx, s): parent = (idx - 1) // 2
left = 2 * idx + 1 self.tree[parent] += change
right = left + 1 if parent != 0:
self._propagate(parent, change)
if left >= len(self.tree):
return idx def _retrieve(self, idx, s):
left = 2 * idx + 1
if s <= self.tree[left]: right = left + 1
return self._retrieve(left, s)
else: if left >= len(self.tree):
return self._retrieve(right, s - self.tree[left]) return idx
def total(self): if s <= self.tree[left]:
return self.tree[0] return self._retrieve(left, s)
else:
def add(self, p, data): return self._retrieve(right, s - self.tree[left])
idx = self.write + self.capacity - 1
def total(self):
self.data[self.write] = data return self.tree[0]
self.update(idx, p)
def add(self, p, data):
self.write += 1 idx = self.write + self.capacity - 1
if self.write >= self.capacity:
self.write = 0 self.data[self.write] = data
if self.num_entries < self.capacity: self.update(idx, p)
self.num_entries += 1
self.write += 1
def update(self, idx, p): if self.write >= self.capacity:
change = p - self.tree[idx] self.write = 0
if self.num_entries < self.capacity:
self.tree[idx] = p self.num_entries += 1
self._propagate(idx, change)
def update(self, idx, p):
def get(self, s): change = p - self.tree[idx]
idx = self._retrieve(0, s)
data_idx = idx - self.capacity + 1 self.tree[idx] = p
return [idx, self.tree[idx], self.data[data_idx]] self._propagate(idx, change)
def get(self, s):
class PrioritizedReplayMemory(object): idx = self._retrieve(0, s)
data_idx = idx - self.capacity + 1
def __init__(self, capacity): return [idx, self.tree[idx], self.data[data_idx]]
self.tree = SumTree(capacity)
self.capacity = capacity
self.e = 0.01 # pylint: disable=invalid-name class PrioritizedReplayMemory(object):
self.a = 0.6 # pylint: disable=invalid-name
self.beta = 0.4 def __init__(self, capacity):
self.beta_increment_per_sampling = 0.001 self.tree = SumTree(capacity)
self.capacity = capacity
def _get_priority(self, error): self.e = 0.01 # pylint: disable=invalid-name
return (error + self.e) ** self.a self.a = 0.6 # pylint: disable=invalid-name
self.beta = 0.4
def add(self, error, sample): self.beta_increment_per_sampling = 0.001
# (s, a, r, s, t)
p = self._get_priority(error) def _get_priority(self, error):
self.tree.add(p, sample) return (error + self.e) ** self.a
def __len__(self): def add(self, error, sample):
return self.tree.num_entries # (s, a, r, s, t)
p = self._get_priority(error)
def sample(self, n): self.tree.add(p, sample)
batch = []
idxs = [] def __len__(self):
segment = self.tree.total() / n return self.tree.num_entries
priorities = []
def sample(self, n):
self.beta = np.min([1., self.beta + self.beta_increment_per_sampling]) batch = []
idxs = []
for i in range(n): segment = self.tree.total() / n
a = segment * i priorities = []
b = segment * (i + 1)
self.beta = np.min([1., self.beta + self.beta_increment_per_sampling])
s = random.uniform(a, b)
(idx, p, data) = self.tree.get(s) for i in range(n):
priorities.append(p) a = segment * i
batch.append(data) b = segment * (i + 1)
idxs.append(idx)
return batch, idxs s = random.uniform(a, b)
(idx, p, data) = self.tree.get(s)
# sampling_probabilities = priorities / self.tree.total() priorities.append(p)
# is_weight = np.power(self.tree.num_entries * sampling_probabilities, -self.beta) batch.append(data)
# is_weight /= is_weight.max() idxs.append(idx)
return batch, idxs
def update(self, idx, error):
p = self._get_priority(error) # sampling_probabilities = priorities / self.tree.total()
self.tree.update(idx, p) # is_weight = np.power(self.tree.num_entries * sampling_probabilities, -self.beta)
# is_weight /= is_weight.max()
def save(self, path):
f = open(path, 'wb') def update(self, idx, error):
pickle.dump({"tree": self.tree}, f) p = self._get_priority(error)
f.close() self.tree.update(idx, p)
def load_memory(self, path): def save(self, path):
with open(path, 'rb') as f: f = open(path, 'wb')
_memory = pickle.load(f) pickle.dump({"tree": self.tree}, f)
self.tree = _memory['tree'] f.close()
def load_memory(self, path):
with open(path, 'rb') as f:
_memory = pickle.load(f)
self.tree = _memory['tree']
def get(self):
return pickle.dumps({"tree": self.tree})
def set(self, binary):
self.tree = pickle.loads(binary)['tree']

View File

@ -185,6 +185,9 @@ class Migration(migrations.Migration):
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=64, verbose_name=b'session name')), ('name', models.CharField(max_length=64, verbose_name=b'session name')),
('description', models.TextField(blank=True, null=True)), ('description', models.TextField(blank=True, null=True)),
('ddpg_actor_model', models.BinaryField(null=True, blank=True)),
('ddpg_critic_model', models.BinaryField(null=True, blank=True)),
('ddpg_reply_memory', models.BinaryField(null=True, blank=True)),
('creation_time', models.DateTimeField()), ('creation_time', models.DateTimeField()),
('last_update', models.DateTimeField()), ('last_update', models.DateTimeField()),
('upload_code', models.CharField(max_length=30, unique=True)), ('upload_code', models.CharField(max_length=30, unique=True)),

View File

@ -187,6 +187,9 @@ class Session(BaseModel):
hardware = models.ForeignKey(Hardware) hardware = models.ForeignKey(Hardware)
algorithm = models.IntegerField(choices=AlgorithmType.choices(), algorithm = models.IntegerField(choices=AlgorithmType.choices(),
default=AlgorithmType.OTTERTUNE) default=AlgorithmType.OTTERTUNE)
ddpg_actor_model = models.BinaryField(null=True, blank=True)
ddpg_critic_model = models.BinaryField(null=True, blank=True)
ddpg_reply_memory = models.BinaryField(null=True, blank=True)
project = models.ForeignKey(Project) project = models.ForeignKey(Project)
creation_time = models.DateTimeField() creation_time = models.DateTimeField()

View File

@ -353,7 +353,7 @@ class BaseParser(object, metaclass=ABCMeta):
def format_enum(self, enum_value, metadata): def format_enum(self, enum_value, metadata):
enumvals = metadata.enumvals.split(',') enumvals = metadata.enumvals.split(',')
return enumvals[enum_value] return enumvals[int(round(enum_value))]
def format_integer(self, int_value, metadata): def format_integer(self, int_value, metadata):
return int(round(int_value)) return int(round(int_value))

View File

@ -35,7 +35,7 @@ MAX_TRAIN_SIZE = 7000
# Batch size in GPR model # Batch size in GPR model
BATCH_SIZE = 3000 BATCH_SIZE = 3000
# Threads for TensorFlow config # Threads for TensorFlow config
NUM_THREADS = 4 NUM_THREADS = 4
# ---GRADIENT DESCENT CONSTANTS--- # ---GRADIENT DESCENT CONSTANTS---
@ -54,3 +54,19 @@ DEFAULT_EPSILON = 1e-6
DEFAULT_SIGMA_MULTIPLIER = 3.0 DEFAULT_SIGMA_MULTIPLIER = 3.0
DEFAULT_MU_MULTIPLIER = 1.0 DEFAULT_MU_MULTIPLIER = 1.0
# ---CONSTRAINTS CONSTANTS---
# Batch size in DDPG model
DDPG_BATCH_SIZE = 32
# Learning rate of actor network
ACTOR_LEARNING_RATE = 0.001
# Learning rate of critic network
CRITIC_LEARNING_RATE = 0.001
# The impact of future reward on the decision
GAMMA = 0.1
# The changing rate of the target network
TAU = 0.002

View File

@ -7,7 +7,7 @@ from .async_tasks import (aggregate_target_results,
configuration_recommendation, configuration_recommendation,
map_workload, map_workload,
train_ddpg, train_ddpg,
run_ddpg) configuration_recommendation_ddpg)
from .periodic_tasks import (run_background_tasks) from .periodic_tasks import (run_background_tasks)

View File

@ -5,14 +5,12 @@
# #
import random import random
import queue import queue
from os.path import dirname, abspath, join
import os
import numpy as np import numpy as np
from celery.task import task, Task from celery.task import task, Task
from celery.utils.log import get_task_logger from celery.utils.log import get_task_logger
from djcelery.models import TaskMeta from djcelery.models import TaskMeta
from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import StandardScaler, MinMaxScaler
from analysis.ddpg.ddpg import DDPG from analysis.ddpg.ddpg import DDPG
from analysis.gp import GPRNP from analysis.gp import GPRNP
@ -29,7 +27,10 @@ from website.settings import (DEFAULT_LENGTH_SCALE, DEFAULT_MAGNITUDE,
MAX_TRAIN_SIZE, BATCH_SIZE, NUM_THREADS, MAX_TRAIN_SIZE, BATCH_SIZE, NUM_THREADS,
DEFAULT_RIDGE, DEFAULT_LEARNING_RATE, DEFAULT_RIDGE, DEFAULT_LEARNING_RATE,
DEFAULT_EPSILON, MAX_ITER, GPR_EPS, DEFAULT_EPSILON, MAX_ITER, GPR_EPS,
DEFAULT_SIGMA_MULTIPLIER, DEFAULT_MU_MULTIPLIER) DEFAULT_SIGMA_MULTIPLIER, DEFAULT_MU_MULTIPLIER,
DDPG_BATCH_SIZE, ACTOR_LEARNING_RATE,
CRITIC_LEARNING_RATE, GAMMA, TAU)
from website.settings import INIT_FLIP_PROB, FLIP_PROB_DECAY from website.settings import INIT_FLIP_PROB, FLIP_PROB_DECAY
from website.types import VarType from website.types import VarType
@ -235,10 +236,10 @@ def train_ddpg(result_id):
# Clean knob data # Clean knob data
cleaned_agg_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'], session) cleaned_agg_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'], session)
agg_data['X_matrix'] = np.array(cleaned_agg_data[0]).flatten() knob_data = np.array(cleaned_agg_data[0])
agg_data['X_columnlabels'] = np.array(cleaned_agg_data[1]).flatten() knob_labels = np.array(cleaned_agg_data[1])
knob_data = DataUtil.normalize_knob_data(agg_data['X_matrix'], knob_bounds = np.vstack(DataUtil.get_knob_bounds(knob_labels.flatten(), session))
agg_data['X_columnlabels'], session) knob_data = MinMaxScaler().fit(knob_bounds).transform(knob_data)[0]
knob_num = len(knob_data) knob_num = len(knob_data)
metric_num = len(metric_data) metric_num = len(metric_data)
LOG.info('knob_num: %d, metric_num: %d', knob_num, metric_num) LOG.info('knob_num: %d, metric_num: %d', knob_num, metric_num)
@ -276,26 +277,23 @@ def train_ddpg(result_id):
* (2 * prev_objective - objective) / prev_objective * (2 * prev_objective - objective) / prev_objective
# Update ddpg # Update ddpg
project_root = dirname(dirname(dirname(abspath(__file__)))) ddpg = DDPG(n_actions=knob_num, n_states=metric_num, alr=ACTOR_LEARNING_RATE,
saved_memory = join(project_root, 'checkpoint/reply_memory_' + session.project.name) clr=CRITIC_LEARNING_RATE, gamma=GAMMA, batch_size=DDPG_BATCH_SIZE, tau=TAU)
saved_model = join(project_root, 'checkpoint/ddpg_' + session.project.name) if session.ddpg_actor_model and session.ddpg_critic_model:
ddpg = DDPG(n_actions=knob_num, n_states=metric_num) ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model)
if os.path.exists(saved_memory): if session.ddpg_reply_memory:
ddpg.replay_memory.load_memory(saved_memory) ddpg.replay_memory.set(session.ddpg_reply_memory)
ddpg.load_model(saved_model)
ddpg.add_sample(prev_metric_data, knob_data, reward, metric_data, False) ddpg.add_sample(prev_metric_data, knob_data, reward, metric_data, False)
if len(ddpg.replay_memory) > 32: if len(ddpg.replay_memory) > 32:
ddpg.update() ddpg.update()
checkpoint_dir = join(project_root, 'checkpoint') session.ddpg_actor_model, session.ddpg_critic_model = ddpg.get_model()
if not os.path.exists(checkpoint_dir): session.ddpg_reply_memory = ddpg.replay_memory.get()
os.makedirs(checkpoint_dir) session.save()
ddpg.replay_memory.save(saved_memory)
ddpg.save_model(saved_model)
return result_info return result_info
@task(base=ConfigurationRecommendation, name='run_ddpg') @task(base=ConfigurationRecommendation, name='configuration_recommendation_ddpg')
def run_ddpg(result_info): def configuration_recommendation_ddpg(result_info): # pylint: disable=invalid-name
LOG.info('Use ddpg to recommend configuration') LOG.info('Use ddpg to recommend configuration')
result_id = result_info['newest_result_id'] result_id = result_info['newest_result_id']
result = Result.objects.filter(pk=result_id) result = Result.objects.filter(pk=result_id)
@ -305,20 +303,20 @@ def run_ddpg(result_info):
cleaned_agg_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'], cleaned_agg_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'],
session) session)
knob_labels = np.array(cleaned_agg_data[1]).flatten() knob_labels = np.array(cleaned_agg_data[1]).flatten()
knob_data = np.array(cleaned_agg_data[0]).flatten() knob_num = len(knob_labels)
knob_num = len(knob_data)
metric_num = len(metric_data) metric_num = len(metric_data)
project_root = dirname(dirname(dirname(abspath(__file__)))) ddpg = DDPG(n_actions=knob_num, n_states=metric_num, alr=ACTOR_LEARNING_RATE,
saved_memory = join(project_root, 'checkpoint/reply_memory_' + session.project.name) clr=CRITIC_LEARNING_RATE, gamma=GAMMA, batch_size=DDPG_BATCH_SIZE, tau=TAU)
saved_model = join(project_root, 'checkpoint/ddpg_' + session.project.name) if session.ddpg_actor_model is not None and session.ddpg_critic_model is not None:
ddpg = DDPG(n_actions=knob_num, n_states=metric_num) ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model)
if os.path.exists(saved_memory): if session.ddpg_reply_memory is not None:
ddpg.replay_memory.load_memory(saved_memory) ddpg.replay_memory.set(session.ddpg_reply_memory)
ddpg.load_model(saved_model)
knob_data = ddpg.choose_action(metric_data) knob_data = ddpg.choose_action(metric_data)
LOG.info('recommended knob: %s', knob_data) LOG.info('recommended knob: %s', knob_data)
knob_data = DataUtil.denormalize_knob_data(knob_data, knob_labels, session)
knob_bounds = np.vstack(DataUtil.get_knob_bounds(knob_labels, session))
knob_data = MinMaxScaler().fit(knob_bounds).inverse_transform(knob_data.reshape(1, -1))[0]
conf_map = {k: knob_data[i] for i, k in enumerate(knob_labels)} conf_map = {k: knob_data[i] for i, k in enumerate(knob_labels)}
conf_map_res = {} conf_map_res = {}
conf_map_res['status'] = 'good' conf_map_res['status'] = 'good'

View File

@ -93,30 +93,35 @@ class TaskUtil(object):
class DataUtil(object): class DataUtil(object):
@staticmethod @staticmethod
def normalize_knob_data(knob_values, knob_labels, session): def get_knob_bounds(knob_labels, session):
for i, knob in enumerate(knob_labels): minvals = []
maxvals = []
for _, knob in enumerate(knob_labels):
knob_object = KnobCatalog.objects.get(dbms=session.dbms, name=knob, tunable=True) knob_object = KnobCatalog.objects.get(dbms=session.dbms, name=knob, tunable=True)
minval = float(knob_object.minval)
maxval = float(knob_object.maxval)
knob_new = SessionKnob.objects.filter(knob=knob_object, session=session, tunable=True)
if knob_new.exists():
minval = float(knob_new[0].minval)
maxval = float(knob_new[0].maxval)
knob_values[i] = (knob_values[i] - minval) / (maxval - minval)
knob_values[i] = max(0, min(knob_values[i], 1))
return knob_values
@staticmethod
def denormalize_knob_data(knob_values, knob_labels, session):
for i, knob in enumerate(knob_labels):
knob_object = KnobCatalog.objects.get(dbms=session.dbms, name=knob, tunable=True)
minval = float(knob_object.minval)
maxval = float(knob_object.maxval)
knob_session_object = SessionKnob.objects.filter(knob=knob_object, session=session, knob_session_object = SessionKnob.objects.filter(knob=knob_object, session=session,
tunable=True) tunable=True)
if knob_session_object.exists(): if knob_session_object.exists():
minval = float(knob_session_object[0].minval) minval = float(knob_session_object[0].minval)
maxval = float(knob_session_object[0].maxval) maxval = float(knob_session_object[0].maxval)
else:
minval = float(knob_object.minval)
maxval = float(knob_object.maxval)
minvals.append(minval)
maxvals.append(maxval)
return np.array(minvals), np.array(maxvals)
@staticmethod
def denormalize_knob_data(knob_values, knob_labels, session):
for i, knob in enumerate(knob_labels):
knob_object = KnobCatalog.objects.get(dbms=session.dbms, name=knob, tunable=True)
knob_session_object = SessionKnob.objects.filter(knob=knob_object, session=session,
tunable=True)
if knob_session_object.exists():
minval = float(knob_session_object[0].minval)
maxval = float(knob_session_object[0].maxval)
else:
minval = float(knob_object.minval)
maxval = float(knob_object.maxval)
knob_values[i] = knob_values[i] * (maxval - minval) + minval knob_values[i] = knob_values[i] * (maxval - minval) + minval
return knob_values return knob_values

View File

@ -30,8 +30,8 @@ from .models import (BackupData, DBMSCatalog, KnobCatalog, KnobData, MetricCatal
MetricData, MetricManager, Project, Result, Session, Workload, MetricData, MetricManager, Project, Result, Session, Workload,
SessionKnob) SessionKnob)
from .parser import Parser from .parser import Parser
from .tasks import (aggregate_target_results, map_workload, train_ddpg, run_ddpg, from .tasks import (aggregate_target_results, map_workload, train_ddpg,
configuration_recommendation) configuration_recommendation, configuration_recommendation_ddpg)
from .types import (DBMSType, KnobUnitType, MetricType, from .types import (DBMSType, KnobUnitType, MetricType,
TaskType, VarType, WorkloadStatusType, AlgorithmType) TaskType, VarType, WorkloadStatusType, AlgorithmType)
from .utils import JSONUtil, LabelUtil, MediaUtil, TaskUtil from .utils import JSONUtil, LabelUtil, MediaUtil, TaskUtil