From b1cabc2d08580b86acbe36fd4979d31bcc3e39c4 Mon Sep 17 00:00:00 2001
From: rical730 <likejiao@baidu.com>
Date: Mon, 23 Mar 2020 21:21:21 +0800
Subject: [PATCH] add SGD and Adam Optimizer for DeepES (#222)

* add SGD and Adam Optimizer for DeepES

* update deepes readme

* add warning when input different size in the same param update()

* add error return in update(), add optimizer.cc

* separate SGD and Adam, optimizer type in config is not case sensitive

* delete optimizer.cc

* config optimizer in deepes.proto

* more readable

* update maddpg readme, fixed gym version
---
 deepes/README.md                         |  7 +--
 deepes/deepes_config.prototxt            | 10 +++--
 deepes/demo/cartpole_solver_parallel.cpp |  2 +-
 deepes/include/adam_optimizer.h          | 50 +++++++++++++++++++++
 deepes/include/optimizer.h               | 55 +++++++++++++++---------
 deepes/include/sgd_optimizer.h           | 44 +++++++++++++++++++
 deepes/include/torch_predictor.h         | 21 +++++++--
 deepes/src/adam_optimizer.cc             | 49 +++++++++++++++++++++
 deepes/src/proto/deepes.proto            |  9 +++-
 deepes/src/sgd_optimizer.cc              | 39 +++++++++++++++++
 examples/MADDPG/README.md                |  2 +-
 11 files changed, 254 insertions(+), 34 deletions(-)
 create mode 100644 deepes/include/adam_optimizer.h
 create mode 100644 deepes/include/sgd_optimizer.h
 create mode 100644 deepes/src/adam_optimizer.cc
 create mode 100644 deepes/src/sgd_optimizer.cc
diff --git a/deepes/README.md b/deepes/README.md
index ae14819..75b9eae 100644
--- a/deepes/README.md
+++ b/deepes/README.md
@@ -26,9 +26,10 @@ predictor->update(noisy_keys, noisy_rewards);
 - **裸写网络**：
 
 ## 相关依赖:
-- Protobuf >= 2.4.2
-- glog
-- gflag
+- Protobuf2
+- OpenMP
+- [glog](https://github.com/gflags/gflags/blob/master/INSTALL.md)
+- [gflag](https://github.com/google/glog)
 
 ## 额外依赖：
 
diff --git a/deepes/deepes_config.prototxt b/deepes/deepes_config.prototxt
index db2608f..07e337c 100644
--- a/deepes/deepes_config.prototxt
+++ b/deepes/deepes_config.prototxt
@@ -1,10 +1,14 @@
 seed : 1024
 
 gaussian_sampling {
-  std: 0.3
+  std: 0.5
 }
 
 optimizer {
-  type: "SGD",
-  base_lr: 1e-2
+  type: "Adam",
+  base_lr: 0.05,
+  momentum: 0.9,
+  beta1: 0.9,
+  beta2: 0.999,
+  epsilon: 1e-8,
 }
diff --git a/deepes/demo/cartpole_solver_parallel.cpp b/deepes/demo/cartpole_solver_parallel.cpp
index f6f39a6..960a8f7 100644
--- a/deepes/demo/cartpole_solver_parallel.cpp
+++ b/deepes/demo/cartpole_solver_parallel.cpp
@@ -60,7 +60,7 @@ int main(int argc, char* argv[]) {
   std::vector<float> noisy_rewards(ITER, 0.0f);
   noisy_keys.resize(ITER);
 
-  for (int epoch = 0; epoch < 10000; ++epoch) {
+  for (int epoch = 0; epoch < 1000; ++epoch) {
 #pragma omp parallel for schedule(dynamic, 1)
     for (int i = 0; i < ITER; ++i) {
       auto noisy_predictor = noisy_predictors[i];
diff --git a/deepes/include/adam_optimizer.h b/deepes/include/adam_optimizer.h
new file mode 100644
index 0000000..995fa00
--- /dev/null
+++ b/deepes/include/adam_optimizer.h
@@ -0,0 +1,50 @@
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <map>
+#include "optimizer.h"
+
+#ifndef ADAM_OPTIMIZER_H
+#define ADAM_OPTIMIZER_H
+namespace DeepES{
+
+/*@brief AdamOptimizer.
+  * Implements Adam algorithm.
+  *
+  *@Args:
+  *     base_lr: learning rate (default: 1e-3).
+  *     beta1: coefficients used for computing running averages of gradient (default: 0.9).
+  *     beta2: coefficients used for computing running averages of gradient's square (default: 0.999).
+  *     epsilon: term added to the denominator to improve numerical stability (default: 1e-8).
+  */
+class AdamOptimizer: public Optimizer {
+public:
+  AdamOptimizer(float base_lr, float beta1=0.9, float beta2=0.999, float epsilon=1e-8):Optimizer(base_lr), \
+                                    _beta1(beta1), _beta2(beta2), _epsilon(epsilon) {}
+  ~AdamOptimizer();
+
+protected:
+  void compute_step(float* gradient, int size, std::string param_name);
+
+private:
+  float _beta1;
+  float _beta2;
+  float _epsilon;
+  std::map<std::string, float*> _momentum;
+  std::map<std::string, float*> _velocity;
+};
+
+}//namespace
+
+#endif
diff --git a/deepes/include/optimizer.h b/deepes/include/optimizer.h
index 6aca758..eb790c5 100644
--- a/deepes/include/optimizer.h
+++ b/deepes/include/optimizer.h
@@ -12,52 +12,65 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include <map>
+#include <glog/logging.h>
+
 #ifndef OPTIMIZER_H
 #define OPTIMIZER_H
 namespace DeepES{
 
-/* Base class for optimizers. Subclsses are required to implement the following functions:
+/*@brief Optimizer. Base class for optimizers. 
+ * 
+ *@Args:
+ *     base_lr: learning rate (default: 1e-3).
+ *     
+ * .. warning: update () is based on the parameter level, 
+ *             you need to perform update () on each parameter.
+ * 
+ * Subclasses are required to implement the following functions:
  * 1. compute_steps
  */
-
 class Optimizer {
 public:
   Optimizer() : _base_lr(1e-3), _update_times(0) {}
   Optimizer(float base_lr) : _base_lr(base_lr), _update_times(0) {}
+  virtual ~Optimizer() {
+    _params_size.clear();
+  }
+
   template<typename T>
   bool update(T weights, float* gradient, int size, std::string param_name="") {
-    bool success = true;
+  /*@ Performs a single optimization step (parameter update) at the parameter level.
+    *
+    *@Args:
+    *     weights (array): parameter weights.
+    *     gradient (array): gradient for updating weights.
+    *     size: size of gradient.
+    *     param_name: the name corresponding to the weights.
+    */
+    if (_params_size.count(param_name) == 0) {
+      _params_size[param_name] = size;
+    } else if (_params_size[param_name] != size) {
+      LOG(WARNING) << "[Warning] Update times: "<< int(_update_times / _params_size.size()) \
+       << ". Size of weights[" << param_name << "] is " << _params_size[param_name] << ", not " << size;
+      return false;
+    }
+
     ++_update_times;
     compute_step(gradient, size, param_name);
     for (int i = 0; i < size; ++i) {
       weights[i] -= _base_lr * gradient[i];
     }
-    return success;
+    return true;
   } // template function
 
 protected:
   virtual void compute_step(float* graident, int size, std::string param_name="") = 0;
   float _base_lr;
   float _update_times;
+  std::map<std::string, int> _params_size;
 };
 
-class SGDOptimizer: public Optimizer {
-public:
-  SGDOptimizer(float base_lr, float momentum=0.0):Optimizer(base_lr), _momentum(momentum) {}
-
-protected:
-  void compute_step(float* gradient, int size, std::string param_name="") {
-  }
-
-private:
-  float _momentum;
-
-}; //class
-
-//class AdamOptimizer: public Optimizer {
-//public:
-//  AdamOptimizer(float base)
-//};
 
 }//namespace
 #endif
diff --git a/deepes/include/sgd_optimizer.h b/deepes/include/sgd_optimizer.h
new file mode 100644
index 0000000..6176902
--- /dev/null
+++ b/deepes/include/sgd_optimizer.h
@@ -0,0 +1,44 @@
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <map>
+#include "optimizer.h"
+
+#ifndef SGD_OPTIMIZER_H
+#define SGD_OPTIMIZER_H
+namespace DeepES{
+
+/*@brief SGDOptimizer.
+  * Implements stochastic gradient descent (optionally with momentum).
+  *
+  *@Args:
+  *     base_lr: learning rate (default: 1e-3).
+  *     momentum: momentum factor (default: 0.9).
+  */
+class SGDOptimizer: public Optimizer {
+public:
+  SGDOptimizer(float base_lr, float momentum=0.9):Optimizer(base_lr), _momentum(momentum) {}
+  ~SGDOptimizer();
+
+protected:
+  void compute_step(float* gradient, int size, std::string param_name);
+
+private:
+  float _momentum;
+  std::map<std::string, float*> _velocity;
+};
+
+}
+
+#endif
diff --git a/deepes/include/torch_predictor.h b/deepes/include/torch_predictor.h
index f17f65f..8001240 100644
--- a/deepes/include/torch_predictor.h
+++ b/deepes/include/torch_predictor.h
@@ -16,7 +16,9 @@
 #define TORCHPREDICTOR_H
 #include <memory>
 #include <string>
-#include "optimizer.h"
+#include <algorithm>
+#include "sgd_optimizer.h"
+#include "adam_optimizer.h"
 #include "utils.h"
 #include "gaussian_sampling.h"
 #include "deepes.pb.h"
@@ -40,7 +42,20 @@ public:
     load_proto_conf(config_path, *_config);
     _sampling_method = std::make_shared<GaussianSampling>();
     _sampling_method->load_config(*_config);
-    _optimizer = std::make_shared<SGDOptimizer>(_config->optimizer().base_lr());
+
+    std::string opt_type = _config->optimizer().type();
+    std::transform(opt_type.begin(),opt_type.end(),opt_type.begin(),::tolower);
+    if (opt_type == "sgd") {
+      _optimizer = std::make_shared<SGDOptimizer>(_config->optimizer().base_lr(), \
+                                                  _config->optimizer().momentum());
+    }else if (opt_type == "adam") {
+      _optimizer = std::make_shared<AdamOptimizer>(_config->optimizer().base_lr(), \
+                                                    _config->optimizer().beta1(), \
+                                                    _config->optimizer().beta2(), \
+                                                    _config->optimizer().epsilon());
+    }else {
+      // TODO: NotImplementedError
+    }
     _param_size = 0;
     _sampled_model = model;
     param_size();
@@ -111,7 +126,7 @@ public:
     for (auto& param: params) {
       torch::Tensor tensor = param.value().view({-1});
       auto tensor_a = tensor.accessor<float,1>();
-      _optimizer->update(tensor_a, neg_gradients+counter, tensor.size(0));
+      _optimizer->update(tensor_a, neg_gradients+counter, tensor.size(0), param.key());
       counter += tensor.size(0);
     }
     delete[] noise;
diff --git a/deepes/src/adam_optimizer.cc b/deepes/src/adam_optimizer.cc
new file mode 100644
index 0000000..608f916
--- /dev/null
+++ b/deepes/src/adam_optimizer.cc
@@ -0,0 +1,49 @@
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cmath>
+#include "adam_optimizer.h"
+
+namespace DeepES {
+
+AdamOptimizer::~AdamOptimizer() {
+	for (auto iter = _momentum.begin(); iter != _momentum.end(); iter++) {
+		delete[] iter->second;
+	}
+	for (auto iter = _velocity.begin(); iter != _velocity.end(); iter++) {
+		delete[] iter->second;
+	}
+	_momentum.clear();
+	_velocity.clear();
+}
+
+void AdamOptimizer::compute_step(float* gradient, int size, std::string param_name="") {
+	if (_momentum.count(param_name) == 0) {
+		_momentum[param_name] = new float [size];
+		memset(_momentum[param_name], 0, size * sizeof(float));
+	}
+	if (_velocity.count(param_name) == 0) {
+		_velocity[param_name] = new float [size];
+		memset(_velocity[param_name], 0, size * sizeof(float));
+	}
+	int true_update_times = int(_update_times / _velocity.size());
+	float alpha = std::sqrt(1 - std::pow(_beta2, _update_times)) / (1 - std::pow(_beta1, _update_times));
+	for (int i = 0; i < size; ++i) {
+		_momentum[param_name][i] = _beta1 * _momentum[param_name][i] + (1 - _beta1) * gradient[i];
+		_velocity[param_name][i] = _beta2 * _velocity[param_name][i] + (1 - _beta2) * gradient[i] * gradient[i];
+		gradient[i] = alpha * _momentum[param_name][i] / (std::sqrt(_velocity[param_name][i]) + _epsilon);
+	}
+}
+
+}//namespace
diff --git a/deepes/src/proto/deepes.proto b/deepes/src/proto/deepes.proto
index 26a97a5..38abee9 100644
--- a/deepes/src/proto/deepes.proto
+++ b/deepes/src/proto/deepes.proto
@@ -31,8 +31,13 @@ message GaussianSamplingConfig {
 
 message OptimizerConfig{
   optional string type = 1 [default = "SGD"];
-  optional float base_lr = 2; // The base learning rate
-  optional float momentum = 3; // The momentum value.
+  optional float base_lr = 2 [default = 1e-3]; // The base learning rate.
+  optional float momentum = 3 [default = 0.9]; // The momentum value for SGD.
+
+  // ------------Adam Optimizer---------
+  optional float beta1 = 4 [default = 0.9];
+  optional float beta2 = 5 [default = 0.999];
+  optional float epsilon = 6 [default = 1e-8];
 }
 
 message SamplingKey{
diff --git a/deepes/src/sgd_optimizer.cc b/deepes/src/sgd_optimizer.cc
new file mode 100644
index 0000000..06a65b6
--- /dev/null
+++ b/deepes/src/sgd_optimizer.cc
@@ -0,0 +1,39 @@
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cmath>
+#include "sgd_optimizer.h"
+
+namespace DeepES {
+
+SGDOptimizer::~SGDOptimizer() {
+	for (auto iter = _velocity.begin(); iter != _velocity.end(); iter++) {
+		delete[] iter->second;
+	}
+	_velocity.clear();
+}
+
+void SGDOptimizer::compute_step(float* gradient, int size, std::string param_name="") {
+	if (_velocity.count(param_name) == 0) {
+		_velocity[param_name] = new float [size];
+		memset(_velocity[param_name], 0, size * sizeof(float));
+	}
+	for (int i = 0; i < size; ++i) {
+		_velocity[param_name][i] = _momentum * _velocity[param_name][i] + (1 - _momentum) * gradient[i];
+		gradient[i] = _velocity[param_name][i];
+	}
+}
+
+
+}//namespace
diff --git a/examples/MADDPG/README.md b/examples/MADDPG/README.md
index 55d1914..0bf3a59 100644
--- a/examples/MADDPG/README.md
+++ b/examples/MADDPG/README.md
@@ -98,7 +98,7 @@ simple_world_comm<br>
 + [paddlepaddle>=1.6.1](https://github.com/PaddlePaddle/Paddle)
 + [parl](https://github.com/PaddlePaddle/PARL)
 + [multiagent-particle-envs](https://github.com/openai/multiagent-particle-envs)
-+ gym
++ gym==0.10.5
 
 ### Start Training:
 ```
-- 
GitLab