From f071a0e62c359e747e06a21719fae074d080643a Mon Sep 17 00:00:00 2001 From: yangdsh Date: Mon, 14 Oct 2019 02:58:50 +0000 Subject: [PATCH] modify the hyper parameter and reward of ddpg --- server/website/website/settings/constants.py | 10 +--- server/website/website/tasks/async_tasks.py | 49 +++++++------------- 2 files changed, 19 insertions(+), 40 deletions(-) diff --git a/server/website/website/settings/constants.py b/server/website/website/settings/constants.py index 02a0f92..fd040e8 100644 --- a/server/website/website/settings/constants.py +++ b/server/website/website/settings/constants.py @@ -77,13 +77,7 @@ DNN_DEBUG_INTERVAL = 100 DDPG_BATCH_SIZE = 32 # Learning rate of actor network -ACTOR_LEARNING_RATE = 0.001 +ACTOR_LEARNING_RATE = 0.01 # Learning rate of critic network -CRITIC_LEARNING_RATE = 0.001 - -# The impact of future reward on the decision -GAMMA = 0.9 - -# The changing rate of the target network -TAU = 0.002 +CRITIC_LEARNING_RATE = 0.01 diff --git a/server/website/website/tasks/async_tasks.py b/server/website/website/tasks/async_tasks.py index 118a735..510b888 100644 --- a/server/website/website/tasks/async_tasks.py +++ b/server/website/website/tasks/async_tasks.py @@ -30,7 +30,7 @@ from website.settings import (DEFAULT_LENGTH_SCALE, DEFAULT_MAGNITUDE, DEFAULT_EPSILON, MAX_ITER, GPR_EPS, DEFAULT_SIGMA_MULTIPLIER, DEFAULT_MU_MULTIPLIER, DDPG_BATCH_SIZE, ACTOR_LEARNING_RATE, - CRITIC_LEARNING_RATE, GAMMA, TAU, + CRITIC_LEARNING_RATE, DNN_TRAIN_ITER, DNN_EXPLORE, DNN_EXPLORE_ITER, DNN_NOISE_SCALE_BEGIN, DNN_NOISE_SCALE_END, DNN_DEBUG, DNN_DEBUG_INTERVAL) @@ -232,22 +232,18 @@ def train_ddpg(result_id): if len(session_results) == 0: LOG.info('No previous result. Abort.') return result_info - prev_result_id = session_results[len(session_results) - 1].pk - base_result_id = session_results[0].pk - prev_result = Result.objects.filter(pk=prev_result_id) - base_result = Result.objects.filter(pk=base_result_id) # Extract data from result result = Result.objects.filter(pk=result_id) agg_data = DataUtil.aggregate_data(result) metric_data = agg_data['y_matrix'].flatten() - prev_metric_data = (DataUtil.aggregate_data(prev_result))['y_matrix'].flatten() - base_metric_data = (DataUtil.aggregate_data(base_result))['y_matrix'].flatten() + metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1)) + normalized_metric_data = metric_scalar.transform(metric_data.reshape(1, -1))[0] # Clean knob data - cleaned_agg_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'], session) - knob_data = np.array(cleaned_agg_data[0]) - knob_labels = np.array(cleaned_agg_data[1]) + cleaned_knob_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'], session) + knob_data = np.array(cleaned_knob_data[0]) + knob_labels = np.array(cleaned_knob_data[1]) knob_bounds = np.vstack(DataUtil.get_knob_bounds(knob_labels.flatten(), session)) knob_data = MinMaxScaler().fit(knob_bounds).transform(knob_data)[0] knob_num = len(knob_data) @@ -266,34 +262,24 @@ def train_ddpg(result_id): 'metrics (target_obj={})').format(len(target_obj_idx), target_objective)) objective = metric_data[target_obj_idx] - prev_objective = prev_metric_data[target_obj_idx] - base_objective = base_metric_data[target_obj_idx] metric_meta = MetricCatalog.objects.get_metric_meta(result.session.dbms, result.session.target_objective) # Calculate the reward - reward = 0 if metric_meta[target_objective].improvement == '(less is better)': - if objective - base_objective <= 0: - reward = -(np.square(objective / base_objective) - 1) * objective / prev_objective - else: - reward = (np.square((2 * base_objective - objective) / base_objective) - 1)\ - * (2 * prev_objective - objective) / prev_objective + reward = -objective else: - if objective - base_objective > 0: - reward = (np.square(objective / base_objective) - 1) * objective / prev_objective - else: - reward = -(np.square((2 * base_objective - objective) / base_objective) - 1)\ - * (2 * prev_objective - objective) / prev_objective + reward = objective + LOG.info('reward: %f', reward) # Update ddpg ddpg = DDPG(n_actions=knob_num, n_states=metric_num, alr=ACTOR_LEARNING_RATE, - clr=CRITIC_LEARNING_RATE, gamma=GAMMA, batch_size=DDPG_BATCH_SIZE, tau=TAU) + clr=CRITIC_LEARNING_RATE, gamma=0.0, batch_size=DDPG_BATCH_SIZE, tau=0.0) if session.ddpg_actor_model and session.ddpg_critic_model: ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model) if session.ddpg_reply_memory: ddpg.replay_memory.set(session.ddpg_reply_memory) - ddpg.add_sample(prev_metric_data, knob_data, reward, metric_data, False) + ddpg.add_sample(normalized_metric_data, knob_data, reward, normalized_metric_data, False) if len(ddpg.replay_memory) > 32: ddpg.update() session.ddpg_actor_model, session.ddpg_critic_model = ddpg.get_model() @@ -310,20 +296,21 @@ def configuration_recommendation_ddpg(result_info): # pylint: disable=invalid-n session = Result.objects.get(pk=result_id).session agg_data = DataUtil.aggregate_data(result) metric_data = agg_data['y_matrix'].flatten() - cleaned_agg_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'], + metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1)) + normalized_metric_data = metric_scalar.transform(metric_data.reshape(1, -1))[0] + cleaned_knob_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'], session) - knob_labels = np.array(cleaned_agg_data[1]).flatten() + knob_labels = np.array(cleaned_knob_data[1]).flatten() knob_num = len(knob_labels) metric_num = len(metric_data) ddpg = DDPG(n_actions=knob_num, n_states=metric_num, alr=ACTOR_LEARNING_RATE, - clr=CRITIC_LEARNING_RATE, gamma=GAMMA, batch_size=DDPG_BATCH_SIZE, tau=TAU) + clr=CRITIC_LEARNING_RATE, gamma=0.0, batch_size=DDPG_BATCH_SIZE, tau=0.0) if session.ddpg_actor_model is not None and session.ddpg_critic_model is not None: ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model) if session.ddpg_reply_memory is not None: ddpg.replay_memory.set(session.ddpg_reply_memory) - knob_data = ddpg.choose_action(metric_data) - LOG.info('recommended knob: %s', knob_data) + knob_data = ddpg.choose_action(normalized_metric_data) knob_bounds = np.vstack(DataUtil.get_knob_bounds(knob_labels, session)) knob_data = MinMaxScaler().fit(knob_bounds).inverse_transform(knob_data.reshape(1, -1))[0] @@ -333,8 +320,6 @@ def configuration_recommendation_ddpg(result_info): # pylint: disable=invalid-n conf_map_res['result_id'] = result_id conf_map_res['recommendation'] = conf_map conf_map_res['info'] = 'INFO: ddpg' - for k in knob_labels: - LOG.info('%s: %f', k, conf_map[k]) return conf_map_res