fix multithread issue in DNN

2019-12-16 22:27:14 -05:00 · 2019-12-16 22:27:14 -05:00 · c76c8e7bfb
parent 2a7bc6145f
commit c76c8e7bfb
2 changed files with 144 additions and 112 deletions
--- a/client/driver/fabfile.py
+++ b/client/driver/fabfile.py
@ -760,9 +760,21 @@ def integration_tests():
    response = get_result(upload_code='ottertuneTestTuningDNN')
    assert response['status'] == 'good'
    # 2rd iteration Test DNN
    upload_result(result_dir='./integrationTests/data/', prefix='1__',
                  upload_code='ottertuneTestTuningDNN')
    response = get_result(upload_code='ottertuneTestTuningDNN')
    assert response['status'] == 'good'
    # Test GPR
    LOG.info('Test GPR (gaussian process regression)')
    upload_result(result_dir='./integrationTests/data/', prefix='0__',
                  upload_code='ottertuneTestTuningGPR')
    response = get_result(upload_code='ottertuneTestTuningGPR')
    assert response['status'] == 'good'
    # 2rd iteration Test GPR
    upload_result(result_dir='./integrationTests/data/', prefix='1__',
                  upload_code='ottertuneTestTuningGPR')
    response = get_result(upload_code='ottertuneTestTuningGPR')
    assert response['status'] == 'good'
--- a/server/analysis/nn_tf.py
+++ b/server/analysis/nn_tf.py
@ -46,6 +46,13 @@ class NeuralNet(object):
        self.explore_iters = explore_iters
        self.noise_scale_begin = noise_scale_begin
        self.noise_scale_end = noise_scale_end
        self.vars = {}
        self.ops = {}
        self.session = tf.Session()
        self.graph = tf.get_default_graph()
        with self.graph.as_default():
            with self.session.as_default():   # pylint: disable=not-context-manager
                self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
                # input X is placeholder, weights are variables.
                self.model = keras.Sequential([
@ -57,15 +64,17 @@ class NeuralNet(object):
                self.model.compile(loss='mean_squared_error',
                                   optimizer=self.optimizer,
                                   metrics=['mean_squared_error', 'mean_absolute_error'])
-        self.vars = {}
+        self._build_graph()
        self.ops = {}
        self.build_graph()
-    def save_weights(self, weights_file):
+    def save_weights_file(self, weights_file):
        with self.graph.as_default():
            with self.session.as_default():  # pylint: disable=not-context-manager
                self.model.save_weights(weights_file)
-    def load_weights(self, weights_file):
+    def load_weights_file(self, weights_file):
        try:
            with self.graph.as_default():
                with self.session.as_default():  # pylint: disable=not-context-manager
                    self.model.load_weights(weights_file)
            if self.debug:
                LOG.info('Neural Network Model weights file exists, load weights from the file')
@ -73,10 +82,15 @@ class NeuralNet(object):
            LOG.info('Weights file does not match neural network model, train model from scratch')
    def get_weights_bin(self):
-        return pickle.dumps(self.model.get_weights())
+        with self.graph.as_default():
            with self.session.as_default():  # pylint: disable=not-context-manager
                weights = self.model.get_weights()
                return pickle.dumps(weights)
    def set_weights_bin(self, weights):
        try:
            with self.graph.as_default():
                with self.session.as_default():  # pylint: disable=not-context-manager
                    self.model.set_weights(pickle.loads(weights))
            if self.debug:
                LOG.info('Neural Network Model weights exists, load the existing weights')
@ -85,10 +99,10 @@ class NeuralNet(object):
    # Build same neural network as self.model, But input X is variables,
    # weights are placedholders. Find optimial X using gradient descent.
-    def build_graph(self):
+    def _build_graph(self):
        batch_size = self.batch_size
        self.graph = tf.Graph()
        with self.graph.as_default():
            with self.session.as_default():  # pylint: disable=not-context-manager
                x_ = tf.Variable(tf.ones([batch_size, self.n_input]))
                w1_ = tf.placeholder(tf.float32, [self.n_input, 64])
                b1_ = tf.placeholder(tf.float32, [64])
@ -113,6 +127,8 @@ class NeuralNet(object):
                self.ops['train_'] = train_
    def fit(self, X_train, y_train, fit_epochs=500):
        with self.graph.as_default():
            with self.session.as_default():  # pylint: disable=not-context-manager
                self.history = self.model.fit(
                    X_train, y_train, epochs=fit_epochs, verbose=0)
                if self.debug:
@ -127,16 +143,18 @@ class NeuralNet(object):
                             size - 1, mse[size - 1])
    def predict(self, X_pred):
        with self.graph.as_default():
            with self.session.as_default():  # pylint: disable=not-context-manager
                return self.model.predict(X_pred)
    # Reference: Parameter Space Noise for Exploration.ICLR 2018, https://arxiv.org/abs/1706.01905
-    def add_noise(self, weights):
+    def _add_noise(self, weights):
-        scale = self.adaptive_noise_scale()
+        scale = self._adaptive_noise_scale()
        size = weights.shape[-1]
        noise = scale * np.random.normal(size=size)
        return weights + noise
-    def adaptive_noise_scale(self):
+    def _adaptive_noise_scale(self):
        if self.recommend_iters > self.explore_iters:
            scale = self.noise_scale_end
        else:
@ -147,24 +165,26 @@ class NeuralNet(object):
    def recommend(self, X_start, X_min=None, X_max=None, recommend_epochs=500, explore=False):
        batch_size = len(X_start)
        assert(batch_size == self.batch_size)
        with self.graph.as_default():
            with self.session.as_default() as sess:  # pylint: disable=not-context-manager
                w1, b1 = self.model.get_layer(index=0).get_weights()
                w2, b2 = self.model.get_layer(index=2).get_weights()
                w3, b3 = self.model.get_layer(index=3).get_weights()
                if explore is True:
-            w1 = self.add_noise(w1)
+                    w1 = self._add_noise(w1)
-            b1 = self.add_noise(b1)
+                    b1 = self._add_noise(b1)
-            w2 = self.add_noise(w2)
+                    w2 = self._add_noise(w2)
-            b2 = self.add_noise(b2)
+                    b2 = self._add_noise(b2)
-            w3 = self.add_noise(w3)
+                    w3 = self._add_noise(w3)
-            b3 = self.add_noise(b3)
+                    b3 = self._add_noise(b3)
                y_predict = self.predict(X_start)
                if self.debug:
                    LOG.info("Recommend phase, y prediction: min %f, max %f, mean %f",
                             np.min(y_predict), np.max(y_predict), np.mean(y_predict))
        with tf.Session(graph=self.graph) as sess:
                init = tf.global_variables_initializer()
                sess.run(init)
                assign_x_op = self.vars['x_'].assign(X_start)