improve ddpg
This commit is contained in:
		
							parent
							
								
									67a4a70c09
								
							
						
					
					
						commit
						21fce27291
					
				|  | @ -23,21 +23,21 @@ LOG = get_analysis_logger(__name__) | ||||||
| 
 | 
 | ||||||
| class Actor(nn.Module): | class Actor(nn.Module): | ||||||
| 
 | 
 | ||||||
|     def __init__(self, n_states, n_actions): |     def __init__(self, n_states, n_actions, hidden_sizes): | ||||||
|         super(Actor, self).__init__() |         super(Actor, self).__init__() | ||||||
|         self.layers = nn.Sequential( |         self.layers = nn.Sequential( | ||||||
|             nn.Linear(n_states, 128), |             nn.Linear(n_states, hidden_sizes[0]), | ||||||
|             nn.LeakyReLU(negative_slope=0.2), |             nn.LeakyReLU(negative_slope=0.2), | ||||||
|             nn.BatchNorm1d(128), |             nn.BatchNorm1d(hidden_sizes[0]), | ||||||
|             nn.Linear(128, 128), |             nn.Linear(hidden_sizes[0], hidden_sizes[1]), | ||||||
|             nn.Tanh(), |             nn.Tanh(), | ||||||
|             nn.Dropout(0.3), |             nn.Dropout(0.3), | ||||||
|             nn.BatchNorm1d(128), |             nn.BatchNorm1d(hidden_sizes[1]), | ||||||
| 
 |             nn.Linear(hidden_sizes[1], hidden_sizes[2]), | ||||||
|             nn.Linear(128, 64), |  | ||||||
|             nn.Tanh(), |             nn.Tanh(), | ||||||
|             nn.BatchNorm1d(64), |             nn.Dropout(0.3), | ||||||
|             nn.Linear(64, n_actions) |             nn.BatchNorm1d(hidden_sizes[2]), | ||||||
|  |             nn.Linear(hidden_sizes[2], n_actions) | ||||||
|         ) |         ) | ||||||
|         # This act layer maps the output to (0, 1) |         # This act layer maps the output to (0, 1) | ||||||
|         self.act = nn.Sigmoid() |         self.act = nn.Sigmoid() | ||||||
|  | @ -58,21 +58,21 @@ class Actor(nn.Module): | ||||||
| 
 | 
 | ||||||
| class Critic(nn.Module): | class Critic(nn.Module): | ||||||
| 
 | 
 | ||||||
|     def __init__(self, n_states, n_actions): |     def __init__(self, n_states, n_actions, hidden_sizes): | ||||||
|         super(Critic, self).__init__() |         super(Critic, self).__init__() | ||||||
|         self.state_input = nn.Linear(n_states, 128) |         self.state_input = nn.Linear(n_states, hidden_sizes[0]) | ||||||
|         self.action_input = nn.Linear(n_actions, 128) |         self.action_input = nn.Linear(n_actions, hidden_sizes[0]) | ||||||
|         self.act = nn.Tanh() |         self.act = nn.Tanh() | ||||||
|         self.layers = nn.Sequential( |         self.layers = nn.Sequential( | ||||||
|             nn.Linear(256, 256), |             nn.Linear(hidden_sizes[0] * 2, hidden_sizes[1]), | ||||||
|             nn.LeakyReLU(negative_slope=0.2), |             nn.LeakyReLU(negative_slope=0.2), | ||||||
|             nn.BatchNorm1d(256), |             nn.Dropout(0.3), | ||||||
| 
 |             nn.BatchNorm1d(hidden_sizes[1]), | ||||||
|             nn.Linear(256, 64), |             nn.Linear(hidden_sizes[1], hidden_sizes[2]), | ||||||
|             nn.Tanh(), |             nn.Tanh(), | ||||||
|             nn.Dropout(0.3), |             nn.Dropout(0.3), | ||||||
|             nn.BatchNorm1d(64), |             nn.BatchNorm1d(hidden_sizes[2]), | ||||||
|             nn.Linear(64, 1), |             nn.Linear(hidden_sizes[2], 1), | ||||||
|         ) |         ) | ||||||
|         self._init_weights() |         self._init_weights() | ||||||
| 
 | 
 | ||||||
|  | @ -100,7 +100,8 @@ class Critic(nn.Module): | ||||||
| class DDPG(object): | class DDPG(object): | ||||||
| 
 | 
 | ||||||
|     def __init__(self, n_states, n_actions, model_name='', alr=0.001, clr=0.001, |     def __init__(self, n_states, n_actions, model_name='', alr=0.001, clr=0.001, | ||||||
|                  gamma=0.9, batch_size=32, tau=0.002, shift=0, memory_size=100000): |                  gamma=0.9, batch_size=32, tau=0.002, shift=0, memory_size=100000, | ||||||
|  |                  a_hidden_sizes=[128, 128, 64], c_hidden_sizes=[128, 256, 64]): | ||||||
|         self.n_states = n_states |         self.n_states = n_states | ||||||
|         self.n_actions = n_actions |         self.n_actions = n_actions | ||||||
|         self.alr = alr |         self.alr = alr | ||||||
|  | @ -109,6 +110,8 @@ class DDPG(object): | ||||||
|         self.batch_size = batch_size |         self.batch_size = batch_size | ||||||
|         self.gamma = gamma |         self.gamma = gamma | ||||||
|         self.tau = tau |         self.tau = tau | ||||||
|  |         self.a_hidden_sizes = a_hidden_sizes | ||||||
|  |         self.c_hidden_sizes = c_hidden_sizes | ||||||
|         self.shift = shift |         self.shift = shift | ||||||
| 
 | 
 | ||||||
|         self._build_network() |         self._build_network() | ||||||
|  | @ -121,10 +124,10 @@ class DDPG(object): | ||||||
|         return Variable(torch.FloatTensor(x)) |         return Variable(torch.FloatTensor(x)) | ||||||
| 
 | 
 | ||||||
|     def _build_network(self): |     def _build_network(self): | ||||||
|         self.actor = Actor(self.n_states, self.n_actions) |         self.actor = Actor(self.n_states, self.n_actions, self.a_hidden_sizes) | ||||||
|         self.target_actor = Actor(self.n_states, self.n_actions) |         self.target_actor = Actor(self.n_states, self.n_actions, self.a_hidden_sizes) | ||||||
|         self.critic = Critic(self.n_states, self.n_actions) |         self.critic = Critic(self.n_states, self.n_actions, self.c_hidden_sizes) | ||||||
|         self.target_critic = Critic(self.n_states, self.n_actions) |         self.target_critic = Critic(self.n_states, self.n_actions, self.c_hidden_sizes) | ||||||
| 
 | 
 | ||||||
|         # Copy actor's parameters |         # Copy actor's parameters | ||||||
|         self._update_target(self.target_actor, self.actor, tau=1.0) |         self._update_target(self.target_actor, self.actor, tau=1.0) | ||||||
|  |  | ||||||
|  | @ -25,6 +25,7 @@ from analysis.gpr import gpr_models  # noqa | ||||||
| from analysis.gpr import ucb  # noqa | from analysis.gpr import ucb  # noqa | ||||||
| from analysis.gpr.optimize import tf_optimize  # noqa | from analysis.gpr.optimize import tf_optimize  # noqa | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
| LOG = get_analysis_logger(__name__) | LOG = get_analysis_logger(__name__) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -106,8 +107,10 @@ def ddpg(env, config, n_loops=100): | ||||||
|     a_lr = config['a_lr'] |     a_lr = config['a_lr'] | ||||||
|     c_lr = config['c_lr'] |     c_lr = config['c_lr'] | ||||||
|     n_epochs = config['n_epochs'] |     n_epochs = config['n_epochs'] | ||||||
|  |     ahs = config['a_hidden_sizes'] | ||||||
|  |     chs = config['c_hidden_sizes'] | ||||||
|     model_ddpg = DDPG(n_actions=env.knob_dim, n_states=env.metric_dim, gamma=gamma, |     model_ddpg = DDPG(n_actions=env.knob_dim, n_states=env.metric_dim, gamma=gamma, | ||||||
|                       clr=c_lr, alr=a_lr, shift=0.1) |                       clr=c_lr, alr=a_lr, shift=0, a_hidden_sizes=ahs, c_hidden_sizes=chs) | ||||||
|     knob_data = np.random.rand(env.knob_dim) |     knob_data = np.random.rand(env.knob_dim) | ||||||
|     prev_metric_data = np.zeros(env.metric_dim) |     prev_metric_data = np.zeros(env.metric_dim) | ||||||
| 
 | 
 | ||||||
|  | @ -122,7 +125,7 @@ def ddpg(env, config, n_loops=100): | ||||||
| 
 | 
 | ||||||
|     for i in range(n_loops): |     for i in range(n_loops): | ||||||
|         reward, metric_data = env.simulate(knob_data) |         reward, metric_data = env.simulate(knob_data) | ||||||
|         model_ddpg.add_sample(prev_metric_data, prev_knob_data, prev_reward, metric_data) |         model_ddpg.add_sample(prev_metric_data, prev_knob_data, prev_reward, prev_metric_data) | ||||||
|         prev_metric_data = metric_data |         prev_metric_data = metric_data | ||||||
|         prev_knob_data = knob_data |         prev_knob_data = knob_data | ||||||
|         prev_reward = reward |         prev_reward = reward | ||||||
|  | @ -184,6 +187,7 @@ def dnn(env, config, n_loops=100): | ||||||
|         actions, rewards = memory.get_all() |         actions, rewards = memory.get_all() | ||||||
|         model_nn.fit(np.array(actions), -np.array(rewards), fit_epochs=50) |         model_nn.fit(np.array(actions), -np.array(rewards), fit_epochs=50) | ||||||
|         res = model_nn.recommend(X_samples, Xmin, Xmax, recommend_epochs=10, explore=False) |         res = model_nn.recommend(X_samples, Xmin, Xmax, recommend_epochs=10, explore=False) | ||||||
|  | 
 | ||||||
|         best_config_idx = np.argmin(res.minl.ravel()) |         best_config_idx = np.argmin(res.minl.ravel()) | ||||||
|         best_config = res.minl_conf[best_config_idx, :] |         best_config = res.minl_conf[best_config_idx, :] | ||||||
|         if ou_process: |         if ou_process: | ||||||
|  | @ -313,7 +317,7 @@ def gpr_new(env, config, n_loops=100): | ||||||
|             model_kwargs['hyperparameters'] = None |             model_kwargs['hyperparameters'] = None | ||||||
|         model_kwargs['optimize_hyperparameters'] = optimize_hyperparams |         model_kwargs['optimize_hyperparameters'] = optimize_hyperparams | ||||||
| 
 | 
 | ||||||
|         X_new, ypred, model_params, hyperparameters = run_optimize(np.array(actions), |         X_new, ypred, _, hyperparameters = run_optimize(np.array(actions), | ||||||
|                                                         -np.array(rewards), |                                                         -np.array(rewards), | ||||||
|                                                         X_samples, |                                                         X_samples, | ||||||
|                                                         model_name, |                                                         model_name, | ||||||
|  | @ -342,8 +346,8 @@ def plotlines(xs, results, labels, title, path): | ||||||
|         N = 1 |         N = 1 | ||||||
|         weights = np.ones(N) |         weights = np.ones(N) | ||||||
|         for x_axis, result, label in zip(xs, results, labels): |         for x_axis, result, label in zip(xs, results, labels): | ||||||
|             result = np.convolve(weights/weights.sum(), result.flatten())[N-1:-N+1] |             result = np.convolve(weights/weights.sum(), result.flatten())[N-1:-N] | ||||||
|             lines.append(plt.plot(x_axis[:-N+1], result, label=label, lw=4)[0]) |             lines.append(plt.plot(x_axis[:-N], result, label=label, lw=4)[0]) | ||||||
|         plt.legend(handles=lines, fontsize=30) |         plt.legend(handles=lines, fontsize=30) | ||||||
|         plt.title(title, fontsize=25) |         plt.title(title, fontsize=25) | ||||||
|         plt.xticks(fontsize=25) |         plt.xticks(fontsize=25) | ||||||
|  | @ -357,8 +361,8 @@ def plotlines(xs, results, labels, title, path): | ||||||
| def run(tuners, configs, labels, title, env, n_loops, n_repeats): | def run(tuners, configs, labels, title, env, n_loops, n_repeats): | ||||||
|     if not plt: |     if not plt: | ||||||
|         LOG.info("Cannot import matplotlib. Will write results to files instead of figures.") |         LOG.info("Cannot import matplotlib. Will write results to files instead of figures.") | ||||||
|     random.seed(0) |     random.seed(2) | ||||||
|     np.random.seed(1) |     np.random.seed(2) | ||||||
|     torch.manual_seed(0) |     torch.manual_seed(0) | ||||||
|     results = [] |     results = [] | ||||||
|     xs = [] |     xs = [] | ||||||
|  | @ -385,16 +389,17 @@ def run(tuners, configs, labels, title, env, n_loops, n_repeats): | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def main(): | def main(): | ||||||
|     env = Environment(knob_dim=24, metric_dim=60, modes=[2], reward_variance=0.05) |     env = Environment(knob_dim=8, metric_dim=60, modes=[2], reward_variance=0.15) | ||||||
|     title = 'compare' |     title = 'ddpg_structure_nodrop' | ||||||
|     n_repeats = [1, 1, 1, 1] |     n_repeats = [2, 2] | ||||||
|     n_loops = 80 |     n_loops = 100 | ||||||
|     configs = [{'gamma': 0., 'c_lr': 0.001, 'a_lr': 0.01, 'num_collections': 50, 'n_epochs': 50}, |     configs = [{'gamma': 0., 'c_lr': 0.001, 'a_lr': 0.02, 'num_collections': 1, 'n_epochs': 30, | ||||||
|                {'num_samples': 30, 'num_collections': 50}, |                 'a_hidden_sizes': [128, 128, 64], 'c_hidden_sizes': [64, 128, 64]}, | ||||||
|                {'num_samples': 30, 'num_collections': 50}, |                {'gamma': 0., 'c_lr': 0.001, 'a_lr': 0.02, 'num_collections': 1, 'n_epochs': 30, | ||||||
|                {'num_samples': 30, 'num_collections': 50}] |                 'a_hidden_sizes': [64, 64, 32], 'c_hidden_sizes': [64, 128, 64]}, | ||||||
|     tuners = [ddpg, gpr_new, dnn, gpr] |                ] | ||||||
|     labels = [tuner.__name__ for tuner in tuners] |     tuners = [ddpg, ddpg] | ||||||
|  |     labels = ['1', '2'] | ||||||
|     run(tuners, configs, labels, title, env, n_loops, n_repeats) |     run(tuners, configs, labels, title, env, n_loops, n_repeats) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -25,15 +25,18 @@ class TestDDPG(unittest.TestCase): | ||||||
|         np.random.seed(0) |         np.random.seed(0) | ||||||
|         torch.manual_seed(0) |         torch.manual_seed(0) | ||||||
|         super(TestDDPG, cls).setUpClass() |         super(TestDDPG, cls).setUpClass() | ||||||
|         cls.ddpg = DDPG(n_actions=1, n_states=1, gamma=0) |         cls.ddpg = DDPG(n_actions=1, n_states=1, gamma=0, alr=0.02) | ||||||
|         for _ in range(700): |         knob_data = np.zeros(1) | ||||||
|             knob_data = np.array([random.random()]) |         metric_data = np.array([random.random()]) | ||||||
|             prev_metric_data = np.array([random.random()]) |         for _ in range(100): | ||||||
|  |             prev_metric_data = metric_data | ||||||
|             metric_data = np.array([random.random()]) |             metric_data = np.array([random.random()]) | ||||||
|             reward = 1.0 if (prev_metric_data[0] - 0.5) * (knob_data[0] - 0.5) > 0 else 0.0 |             reward = 1.0 if (prev_metric_data[0] - 0.5) * (knob_data[0] - 0.5) > 0 else 0.0 | ||||||
|             reward = np.array([reward]) |             reward = np.array([reward]) | ||||||
|             cls.ddpg.add_sample(prev_metric_data, knob_data, reward, metric_data) |             cls.ddpg.add_sample(prev_metric_data, knob_data, reward, metric_data) | ||||||
|  |             for _ in range(10): | ||||||
|                 cls.ddpg.update() |                 cls.ddpg.update() | ||||||
|  |             knob_data = cls.ddpg.choose_action(metric_data) | ||||||
| 
 | 
 | ||||||
|     def test_ddpg_ypreds(self): |     def test_ddpg_ypreds(self): | ||||||
|         total_reward = 0.0 |         total_reward = 0.0 | ||||||
|  |  | ||||||
|  | @ -77,7 +77,16 @@ DNN_DEBUG_INTERVAL = 100 | ||||||
| DDPG_BATCH_SIZE = 32 | DDPG_BATCH_SIZE = 32 | ||||||
| 
 | 
 | ||||||
| #  Learning rate of actor network | #  Learning rate of actor network | ||||||
| ACTOR_LEARNING_RATE = 0.01 | ACTOR_LEARNING_RATE = 0.02 | ||||||
| 
 | 
 | ||||||
| #  Learning rate of critic network | #  Learning rate of critic network | ||||||
| CRITIC_LEARNING_RATE = 0.001 | CRITIC_LEARNING_RATE = 0.001 | ||||||
|  | 
 | ||||||
|  | #  Number of update epochs per iteration | ||||||
|  | UPDATE_EPOCHS = 30 | ||||||
|  | 
 | ||||||
|  | #  The number of hidden units in each layer of the actor MLP | ||||||
|  | ACTOR_HIDDEN_SIZES = [128, 128, 64] | ||||||
|  | 
 | ||||||
|  | #  The number of hidden units in each layer of the critic MLP | ||||||
|  | CRITIC_HIDDEN_SIZES = [64, 128, 64] | ||||||
|  |  | ||||||
|  | @ -31,7 +31,8 @@ from website.settings import (DEFAULT_LENGTH_SCALE, DEFAULT_MAGNITUDE, | ||||||
|                               DEFAULT_EPSILON, MAX_ITER, GPR_EPS, |                               DEFAULT_EPSILON, MAX_ITER, GPR_EPS, | ||||||
|                               DEFAULT_SIGMA_MULTIPLIER, DEFAULT_MU_MULTIPLIER, |                               DEFAULT_SIGMA_MULTIPLIER, DEFAULT_MU_MULTIPLIER, | ||||||
|                               DDPG_BATCH_SIZE, ACTOR_LEARNING_RATE, |                               DDPG_BATCH_SIZE, ACTOR_LEARNING_RATE, | ||||||
|                               CRITIC_LEARNING_RATE, |                               CRITIC_LEARNING_RATE, UPDATE_EPOCHS, | ||||||
|  |                               ACTOR_HIDDEN_SIZES, CRITIC_HIDDEN_SIZES, | ||||||
|                               DNN_TRAIN_ITER, DNN_EXPLORE, DNN_EXPLORE_ITER, |                               DNN_TRAIN_ITER, DNN_EXPLORE, DNN_EXPLORE_ITER, | ||||||
|                               DNN_NOISE_SCALE_BEGIN, DNN_NOISE_SCALE_END, |                               DNN_NOISE_SCALE_BEGIN, DNN_NOISE_SCALE_END, | ||||||
|                               DNN_DEBUG, DNN_DEBUG_INTERVAL) |                               DNN_DEBUG, DNN_DEBUG_INTERVAL) | ||||||
|  | @ -278,12 +279,9 @@ def train_ddpg(result_id): | ||||||
|     result = Result.objects.get(pk=result_id) |     result = Result.objects.get(pk=result_id) | ||||||
|     session = Result.objects.get(pk=result_id).session |     session = Result.objects.get(pk=result_id).session | ||||||
|     session_results = Result.objects.filter(session=session, |     session_results = Result.objects.filter(session=session, | ||||||
|                                             creation_time__lt=result.creation_time) |                                             creation_time__lte=result.creation_time) | ||||||
|     result_info = {} |     result_info = {} | ||||||
|     result_info['newest_result_id'] = result_id |     result_info['newest_result_id'] = result_id | ||||||
|     if len(session_results) == 0: |  | ||||||
|         LOG.info('No previous result. Abort.') |  | ||||||
|         return result_info |  | ||||||
| 
 | 
 | ||||||
|     # Extract data from result |     # Extract data from result | ||||||
|     result = Result.objects.filter(pk=result_id) |     result = Result.objects.filter(pk=result_id) | ||||||
|  | @ -332,13 +330,14 @@ def train_ddpg(result_id): | ||||||
| 
 | 
 | ||||||
|     # Update ddpg |     # Update ddpg | ||||||
|     ddpg = DDPG(n_actions=knob_num, n_states=metric_num, alr=ACTOR_LEARNING_RATE, |     ddpg = DDPG(n_actions=knob_num, n_states=metric_num, alr=ACTOR_LEARNING_RATE, | ||||||
|                 clr=CRITIC_LEARNING_RATE, gamma=0, batch_size=DDPG_BATCH_SIZE) |                 clr=CRITIC_LEARNING_RATE, gamma=0, batch_size=DDPG_BATCH_SIZE, | ||||||
|  |                 a_hidden_sizes=ACTOR_HIDDEN_SIZES, c_hidden_sizes=CRITIC_HIDDEN_SIZES) | ||||||
|     if session.ddpg_actor_model and session.ddpg_critic_model: |     if session.ddpg_actor_model and session.ddpg_critic_model: | ||||||
|         ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model) |         ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model) | ||||||
|     if session.ddpg_reply_memory: |     if session.ddpg_reply_memory: | ||||||
|         ddpg.replay_memory.set(session.ddpg_reply_memory) |         ddpg.replay_memory.set(session.ddpg_reply_memory) | ||||||
|     ddpg.add_sample(normalized_metric_data, knob_data, reward, normalized_metric_data) |     ddpg.add_sample(normalized_metric_data, knob_data, reward, normalized_metric_data) | ||||||
|     for _ in range(25): |     for _ in range(UPDATE_EPOCHS): | ||||||
|         ddpg.update() |         ddpg.update() | ||||||
|     session.ddpg_actor_model, session.ddpg_critic_model = ddpg.get_model() |     session.ddpg_actor_model, session.ddpg_critic_model = ddpg.get_model() | ||||||
|     session.ddpg_reply_memory = ddpg.replay_memory.get() |     session.ddpg_reply_memory = ddpg.replay_memory.get() | ||||||
|  | @ -362,7 +361,8 @@ def configuration_recommendation_ddpg(result_info):  # pylint: disable=invalid-n | ||||||
|     knob_num = len(knob_labels) |     knob_num = len(knob_labels) | ||||||
|     metric_num = len(metric_data) |     metric_num = len(metric_data) | ||||||
| 
 | 
 | ||||||
|     ddpg = DDPG(n_actions=knob_num, n_states=metric_num) |     ddpg = DDPG(n_actions=knob_num, n_states=metric_num, a_hidden_sizes=ACTOR_HIDDEN_SIZES, | ||||||
|  |                 c_hidden_sizes=CRITIC_HIDDEN_SIZES) | ||||||
|     if session.ddpg_actor_model is not None and session.ddpg_critic_model is not None: |     if session.ddpg_actor_model is not None and session.ddpg_critic_model is not None: | ||||||
|         ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model) |         ddpg.set_model(session.ddpg_actor_model, session.ddpg_critic_model) | ||||||
|     if session.ddpg_reply_memory is not None: |     if session.ddpg_reply_memory is not None: | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue