From ab17aafac49003ca324a432fd75c8f9ae19cf846 Mon Sep 17 00:00:00 2001 From: yangdsh Date: Tue, 15 Oct 2019 00:30:12 +0000 Subject: [PATCH] modify ddpg test change the expectation values in dnn test --- server/analysis/tests/test_ddpg.py | 44 ++++++++++++++++++------------ server/analysis/tests/test_nn.py | 10 +++++-- 2 files changed, 33 insertions(+), 21 deletions(-) diff --git a/server/analysis/tests/test_ddpg.py b/server/analysis/tests/test_ddpg.py index 8e7808f..ea13bb2 100644 --- a/server/analysis/tests/test_ddpg.py +++ b/server/analysis/tests/test_ddpg.py @@ -6,13 +6,17 @@ import random import unittest -from sklearn import datasets import numpy as np import torch from analysis.ddpg.ddpg import DDPG -# test ddpg model +# test ddpg model: +# The enviroment has 1-dim state and 1-dim action, the reward is calculated as follows: +# if state < 0.5, taking action < 0.5 gets reward 1, taking action >= 0.5 gets reward 0 +# if state >= 0.5, taking action >= 0.5 gets reward 1, taking action < 0.5 gets reward 0 +# Train 500 iterations and test for 500 iterations +# If the average reward during test is larger than 0.9, this test passes class TestDDPG(unittest.TestCase): @classmethod @@ -21,22 +25,26 @@ class TestDDPG(unittest.TestCase): np.random.seed(0) torch.manual_seed(0) super(TestDDPG, cls).setUpClass() - boston = datasets.load_boston() - data = boston['data'] - X_train = data[0:500] - X_test = data[500:] - y_train = boston['target'][0:500].reshape(500, 1) - ddpg = DDPG(n_actions=1, n_states=13) - for i in range(500): + cls.ddpg = DDPG(n_actions=1, n_states=1, gamma=0) + for _ in range(700): knob_data = np.array([random.random()]) - prev_metric_data = X_train[i - 1] - metric_data = X_train[i] - reward = y_train[i - 1] - ddpg.add_sample(prev_metric_data, knob_data, reward, metric_data, False) - if len(ddpg.replay_memory) > 32: - ddpg.update() - cls.ypreds_round = ['%.4f' % ddpg.choose_action(x)[0] for x in X_test] + prev_metric_data = np.array([random.random()]) + metric_data = np.array([random.random()]) + reward = 1.0 if (prev_metric_data[0] - 0.5) * (knob_data[0] - 0.5) > 0 else 0.0 + reward = np.array([reward]) + cls.ddpg.add_sample(prev_metric_data, knob_data, reward, metric_data, False) + if len(cls.ddpg.replay_memory) > 32: + cls.ddpg.update() def test_ddpg_ypreds(self): - expected_ypreds = ['0.3169', '0.3240', '0.3934', '0.5787', '0.6988', '0.5163'] - self.assertEqual(self.ypreds_round, expected_ypreds) + total_reward = 0.0 + for _ in range(500): + prev_metric_data = np.array([random.random()]) + knob_data = self.ddpg.choose_action(prev_metric_data) + reward = 1.0 if (prev_metric_data[0] - 0.5) * (knob_data[0] - 0.5) > 0 else 0.0 + total_reward += reward + self.assertGreater(total_reward / 500, 0.9) + + +if __name__ == '__main__': + unittest.main() diff --git a/server/analysis/tests/test_nn.py b/server/analysis/tests/test_nn.py index b90fe81..0705435 100644 --- a/server/analysis/tests/test_nn.py +++ b/server/analysis/tests/test_nn.py @@ -3,7 +3,9 @@ # # Copyright (c) 2017-18, Carnegie Mellon University Database Group # +import random import unittest +import numpy as np from tensorflow import set_random_seed from sklearn import datasets from analysis.nn_tf import NeuralNet @@ -20,7 +22,9 @@ class TestNN(unittest.TestCase): X_train = data[0:500] X_test = data[500:] y_train = boston['target'][0:500].reshape(500, 1) - set_random_seed(1) + random.seed(0) + np.random.seed(0) + set_random_seed(0) cls.model = NeuralNet(n_input=X_test.shape[1], batch_size=X_test.shape[0]) cls.model.fit(X_train, y_train) @@ -29,10 +33,10 @@ class TestNN(unittest.TestCase): def test_nn_ypreds(self): ypreds_round = ['%.3f' % x[0] for x in self.nn_result] - expected_ypreds = ['20.503', '22.158', '22.158', '25.692', '24.536', '23.637'] + expected_ypreds = ['21.279', '22.668', '23.115', '27.228', '25.892', '23.967'] self.assertEqual(ypreds_round, expected_ypreds) def test_nn_yrecommend(self): recommends_round = ['%.3f' % x[0] for x in self.nn_recommend.minl] - expected_recommends = ['14.229', '22.158', '22.158', '23.591', '23.591', '23.593'] + expected_recommends = ['21.279', '21.279', '21.279', '21.279', '21.279', '21.279'] self.assertEqual(recommends_round, expected_recommends)