modify the hyper parameter and reward of ddpg
This commit is contained in:
parent
a5f393cdc1
commit
f071a0e62c
|
@ -77,13 +77,7 @@ DNN_DEBUG_INTERVAL = 100
|
||||||
DDPG_BATCH_SIZE = 32
|
DDPG_BATCH_SIZE = 32
|
||||||
|
|
||||||
# Learning rate of actor network
|
# Learning rate of actor network
|
||||||
ACTOR_LEARNING_RATE = 0.001
|
ACTOR_LEARNING_RATE = 0.01
|
||||||
|
|
||||||
# Learning rate of critic network
|
# Learning rate of critic network
|
||||||
CRITIC_LEARNING_RATE = 0.001
|
CRITIC_LEARNING_RATE = 0.01
|
||||||
|
|
||||||
# The impact of future reward on the decision
|
|
||||||
GAMMA = 0.9
|
|
||||||
|
|
||||||
# The changing rate of the target network
|
|
||||||
TAU = 0.002
|
|
||||||
|
|
|
@ -30,7 +30,7 @@ from website.settings import (DEFAULT_LENGTH_SCALE, DEFAULT_MAGNITUDE,
|
||||||
DEFAULT_EPSILON, MAX_ITER, GPR_EPS,
|
DEFAULT_EPSILON, MAX_ITER, GPR_EPS,
|
||||||
DEFAULT_SIGMA_MULTIPLIER, DEFAULT_MU_MULTIPLIER,
|
DEFAULT_SIGMA_MULTIPLIER, DEFAULT_MU_MULTIPLIER,
|
||||||
DDPG_BATCH_SIZE, ACTOR_LEARNING_RATE,
|
DDPG_BATCH_SIZE, ACTOR_LEARNING_RATE,
|
||||||
CRITIC_LEARNING_RATE, GAMMA, TAU,
|
CRITIC_LEARNING_RATE,
|
||||||
DNN_TRAIN_ITER, DNN_EXPLORE, DNN_EXPLORE_ITER,
|
DNN_TRAIN_ITER, DNN_EXPLORE, DNN_EXPLORE_ITER,
|
||||||
DNN_NOISE_SCALE_BEGIN, DNN_NOISE_SCALE_END,
|
DNN_NOISE_SCALE_BEGIN, DNN_NOISE_SCALE_END,
|
||||||
DNN_DEBUG, DNN_DEBUG_INTERVAL)
|
DNN_DEBUG, DNN_DEBUG_INTERVAL)
|
||||||
|
@ -232,22 +232,18 @@ def train_ddpg(result_id):
|
||||||
if len(session_results) == 0:
|
if len(session_results) == 0:
|
||||||
LOG.info('No previous result. Abort.')
|
LOG.info('No previous result. Abort.')
|
||||||
return result_info
|
return result_info
|
||||||
prev_result_id = session_results[len(session_results) - 1].pk
|
|
||||||
base_result_id = session_results[0].pk
|
|
||||||
prev_result = Result.objects.filter(pk=prev_result_id)
|
|
||||||
base_result = Result.objects.filter(pk=base_result_id)
|
|
||||||
|
|
||||||
# Extract data from result
|
# Extract data from result
|
||||||
result = Result.objects.filter(pk=result_id)
|
result = Result.objects.filter(pk=result_id)
|
||||||
agg_data = DataUtil.aggregate_data(result)
|
agg_data = DataUtil.aggregate_data(result)
|
||||||
metric_data = agg_data['y_matrix'].flatten()
|
metric_data = agg_data['y_matrix'].flatten()
|
||||||
prev_metric_data = (DataUtil.aggregate_data(prev_result))['y_matrix'].flatten()
|
metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1))
|
||||||
base_metric_data = (DataUtil.aggregate_data(base_result))['y_matrix'].flatten()
|
normalized_metric_data = metric_scalar.transform(metric_data.reshape(1, -1))[0]
|
||||||
|
|
||||||
# Clean knob data
|
# Clean knob data
|
||||||
cleaned_agg_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'], session)
|
cleaned_knob_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'], session)
|
||||||
knob_data = np.array(cleaned_agg_data[0])
|
knob_data = np.array(cleaned_knob_data[0])
|
||||||
knob_labels = np.array(cleaned_agg_data[1])
|
knob_labels = np.array(cleaned_knob_data[1])
|
||||||
knob_bounds = np.vstack(DataUtil.get_knob_bounds(knob_labels.flatten(), session))
|
knob_bounds = np.vstack(DataUtil.get_knob_bounds(knob_labels.flatten(), session))
|
||||||
knob_data = MinMaxScaler().fit(knob_bounds).transform(knob_data)[0]
|
knob_data = MinMaxScaler().fit(knob_bounds).transform(knob_data)[0]
|
||||||
knob_num = len(knob_data)
|
knob_num = len(knob_data)
|
||||||
|
@ -266,34 +262,24 @@ def train_ddpg(result_id):
|
||||||
'metrics (target_obj={})').format(len(target_obj_idx),
|
'metrics (target_obj={})').format(len(target_obj_idx),
|
||||||
target_objective))
|
target_objective))
|
||||||
objective = metric_data[target_obj_idx]
|
objective = metric_data[target_obj_idx]
|
||||||
prev_objective = prev_metric_data[target_obj_idx]
|
|
||||||
base_objective = base_metric_data[target_obj_idx]
|
|
||||||
metric_meta = MetricCatalog.objects.get_metric_meta(result.session.dbms,
|
metric_meta = MetricCatalog.objects.get_metric_meta(result.session.dbms,
|
||||||
result.session.target_objective)
|
result.session.target_objective)
|
||||||
|
|
||||||
# Calculate the reward
|
# Calculate the reward
|
||||||
reward = 0
|
|
||||||
if metric_meta[target_objective].improvement == '(less is better)':
|
if metric_meta[target_objective].improvement == '(less is better)':
|
||||||
if objective - base_objective <= 0:
|
reward = -objective
|
||||||
reward = -(np.square(objective / base_objective) - 1) * objective / prev_objective
|
|
||||||
else:
|
else:
|
||||||
reward = (np.square((2 * base_objective - objective) / base_objective) - 1)\
|
reward = objective
|
||||||
* (2 * prev_objective - objective) / prev_objective
|
LOG.info('reward: %f', reward)
|
||||||
else:
|
|
||||||
if objective - base_objective > 0:
|
|
||||||
reward = (np.square(objective / base_objective) - 1) * objective / prev_objective
|
|
||||||
else:
|
|
||||||
reward = -(np.square((2 * base_objective - objective) / base_objective) - 1)\
|
|
||||||
* (2 * prev_objective - objective) / prev_objective
|
|
||||||
|
|
||||||
# Update ddpg
|
# Update ddpg
|
||||||
ddpg = DDPG(n_actions=knob_num, n_states=metric_num, alr=ACTOR_LEARNING_RATE,
|
ddpg = DDPG(n_actions=knob_num, n_states=metric_num, alr=ACTOR_LEARNING_RATE,
|
||||||
clr=CRITIC_LEARNING_RATE, gamma=GAMMA, batch_size=DDPG_BATCH_SIZE, tau=TAU)
|
clr=CRITIC_LEARNING_RATE, gamma=0.0, batch_size=DDPG_BATCH_SIZE, tau=0.0)
|
||||||
if session.ddpg_actor_model and session.ddpg_critic_model:
|
if session.ddpg_actor_model and session.ddpg_critic_model:
|
||||||
ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model)
|
ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model)
|
||||||
if session.ddpg_reply_memory:
|
if session.ddpg_reply_memory:
|
||||||
ddpg.replay_memory.set(session.ddpg_reply_memory)
|
ddpg.replay_memory.set(session.ddpg_reply_memory)
|
||||||
ddpg.add_sample(prev_metric_data, knob_data, reward, metric_data, False)
|
ddpg.add_sample(normalized_metric_data, knob_data, reward, normalized_metric_data, False)
|
||||||
if len(ddpg.replay_memory) > 32:
|
if len(ddpg.replay_memory) > 32:
|
||||||
ddpg.update()
|
ddpg.update()
|
||||||
session.ddpg_actor_model, session.ddpg_critic_model = ddpg.get_model()
|
session.ddpg_actor_model, session.ddpg_critic_model = ddpg.get_model()
|
||||||
|
@ -310,20 +296,21 @@ def configuration_recommendation_ddpg(result_info): # pylint: disable=invalid-n
|
||||||
session = Result.objects.get(pk=result_id).session
|
session = Result.objects.get(pk=result_id).session
|
||||||
agg_data = DataUtil.aggregate_data(result)
|
agg_data = DataUtil.aggregate_data(result)
|
||||||
metric_data = agg_data['y_matrix'].flatten()
|
metric_data = agg_data['y_matrix'].flatten()
|
||||||
cleaned_agg_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'],
|
metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1))
|
||||||
|
normalized_metric_data = metric_scalar.transform(metric_data.reshape(1, -1))[0]
|
||||||
|
cleaned_knob_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'],
|
||||||
session)
|
session)
|
||||||
knob_labels = np.array(cleaned_agg_data[1]).flatten()
|
knob_labels = np.array(cleaned_knob_data[1]).flatten()
|
||||||
knob_num = len(knob_labels)
|
knob_num = len(knob_labels)
|
||||||
metric_num = len(metric_data)
|
metric_num = len(metric_data)
|
||||||
|
|
||||||
ddpg = DDPG(n_actions=knob_num, n_states=metric_num, alr=ACTOR_LEARNING_RATE,
|
ddpg = DDPG(n_actions=knob_num, n_states=metric_num, alr=ACTOR_LEARNING_RATE,
|
||||||
clr=CRITIC_LEARNING_RATE, gamma=GAMMA, batch_size=DDPG_BATCH_SIZE, tau=TAU)
|
clr=CRITIC_LEARNING_RATE, gamma=0.0, batch_size=DDPG_BATCH_SIZE, tau=0.0)
|
||||||
if session.ddpg_actor_model is not None and session.ddpg_critic_model is not None:
|
if session.ddpg_actor_model is not None and session.ddpg_critic_model is not None:
|
||||||
ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model)
|
ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model)
|
||||||
if session.ddpg_reply_memory is not None:
|
if session.ddpg_reply_memory is not None:
|
||||||
ddpg.replay_memory.set(session.ddpg_reply_memory)
|
ddpg.replay_memory.set(session.ddpg_reply_memory)
|
||||||
knob_data = ddpg.choose_action(metric_data)
|
knob_data = ddpg.choose_action(normalized_metric_data)
|
||||||
LOG.info('recommended knob: %s', knob_data)
|
|
||||||
|
|
||||||
knob_bounds = np.vstack(DataUtil.get_knob_bounds(knob_labels, session))
|
knob_bounds = np.vstack(DataUtil.get_knob_bounds(knob_labels, session))
|
||||||
knob_data = MinMaxScaler().fit(knob_bounds).inverse_transform(knob_data.reshape(1, -1))[0]
|
knob_data = MinMaxScaler().fit(knob_bounds).inverse_transform(knob_data.reshape(1, -1))[0]
|
||||||
|
@ -333,8 +320,6 @@ def configuration_recommendation_ddpg(result_info): # pylint: disable=invalid-n
|
||||||
conf_map_res['result_id'] = result_id
|
conf_map_res['result_id'] = result_id
|
||||||
conf_map_res['recommendation'] = conf_map
|
conf_map_res['recommendation'] = conf_map
|
||||||
conf_map_res['info'] = 'INFO: ddpg'
|
conf_map_res['info'] = 'INFO: ddpg'
|
||||||
for k in knob_labels:
|
|
||||||
LOG.info('%s: %f', k, conf_map[k])
|
|
||||||
return conf_map_res
|
return conf_map_res
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue