diff --git a/server/analysis/ddpg/ddpg.py b/server/analysis/ddpg/ddpg.py index 39f89bc..d3d00f5 100644 --- a/server/analysis/ddpg/ddpg.py +++ b/server/analysis/ddpg/ddpg.py @@ -198,7 +198,6 @@ class DDPG(object): else: # Build Network self._build_network() - LOG.info('Finish Initializing Networks') self.replay_memory = PrioritizedReplayMemory(capacity=memory_size) self.noise = OUProcess(n_actions) diff --git a/server/analysis/tests/test_ddpg.py b/server/analysis/tests/test_ddpg.py new file mode 100644 index 0000000..85a74f7 --- /dev/null +++ b/server/analysis/tests/test_ddpg.py @@ -0,0 +1,44 @@ +# +# OtterTune - test_ddpg.py +# +# Copyright (c) 2017-18, Carnegie Mellon University Database Group +# + +import random +import unittest +from sklearn import datasets +import numpy as np +import torch +from analysis.ddpg.ddpg import DDPG + + +# test ddpg model +class TestDDPG(unittest.TestCase): + + @classmethod + def setUpClass(cls): + torch.manual_seed(0) + random.seed(0) + np.random.seed(0) + super(TestDDPG, cls).setUpClass() + boston = datasets.load_boston() + data = boston['data'] + X_train = data[0:500] + cls.X_test = data[500:] + y_train = boston['target'][0:500].reshape(500, 1) + cls.ddpg = DDPG(n_actions=1, n_states=13) + for i in range(500): + knob_data = np.array([random.random()]) + prev_metric_data = X_train[i - 1] + metric_data = X_train[i] + reward = y_train[i - 1] + cls.ddpg.add_sample(prev_metric_data, knob_data, reward, metric_data, False) + if len(cls.ddpg.replay_memory) > 32: + cls.ddpg.update() + + def test_ddpg_ypreds(self): + ypreds_round = [round(self.ddpg.choose_action(x)[0], 4) for x in self.X_test] + expected_ypreds = [0.1778, 0.1914, 0.2607, 0.4459, 0.5660, 0.3836] + self.assertEqual(ypreds_round, expected_ypreds) + for ypred_round, expected_ypred in zip(ypreds_round, expected_ypreds): + self.assertAlmostEqual(ypred_round, expected_ypred, places=6) diff --git a/server/website/website/settings/constants.py b/server/website/website/settings/constants.py index 100058e..02a0f92 100644 --- a/server/website/website/settings/constants.py +++ b/server/website/website/settings/constants.py @@ -83,7 +83,7 @@ ACTOR_LEARNING_RATE = 0.001 CRITIC_LEARNING_RATE = 0.001 # The impact of future reward on the decision -GAMMA = 0.1 +GAMMA = 0.9 # The changing rate of the target network TAU = 0.002