add SGD and Adam Optimizer for DeepES (#222)

* add SGD and Adam Optimizer for DeepES * update deepes readme * add warning when input different size in the same param update() * add error return in update(), add optimizer.cc * separate SGD and Adam, optimizer type in config is not case sensitive * delete optimizer.cc * config optimizer in deepes.proto * more readable * update maddpg readme, fixed gym version

add SGD and Adam Optimizer for DeepES (#222)
* add SGD and Adam Optimizer for DeepES * update deepes readme * add warning when input different size in the same param update() * add error return in update(), add optimizer.cc * separate SGD and Adam, optimizer type in config is not case sensitive * delete optimizer.cc * config optimizer in deepes.proto * more readable * update maddpg readme, fixed gym version
b1cabc2d · rical730 · GitHub · 7b5c5241 · b1cabc2d · b1cabc2d
11 changed file
--- a/deepes/README.md
+++ b/deepes/README.md
@@ -26,9 +26,10 @@ predictor->update(noisy_keys, noisy_rewards);
 - **裸写网络**：
 ## 相关依赖:
- Protobuf >= 2.4.2
+- Protobuf2
- glog
+- OpenMP
- gflag
+- [glog](https://github.com/gflags/gflags/blob/master/INSTALL.md)
+- [gflag](https://github.com/google/glog)
 ## 额外依赖：

--- a/deepes/deepes_config.prototxt
+++ b/deepes/deepes_config.prototxt
 seed : 1024
 gaussian_sampling {
-  std: 0.3
+  std: 0.5
 }
 optimizer {
-  type: "SGD",
+  type: "Adam",
-  base_lr: 1e-2
+  base_lr: 0.05,
+  momentum: 0.9,
+  beta1: 0.9,
+  beta2: 0.999,
+  epsilon: 1e-8,
 }
--- a/deepes/demo/cartpole_solver_parallel.cpp
+++ b/deepes/demo/cartpole_solver_parallel.cpp
@@ -60,7 +60,7 @@ int main(int argc, char* argv[]) {
  std::vector<float> noisy_rewards(ITER, 0.0f);
  noisy_keys.resize(ITER);
-  for (int epoch = 0; epoch < 10000; ++epoch) {
+  for (int epoch = 0; epoch < 1000; ++epoch) {
 #pragma omp parallel for schedule(dynamic, 1)
    for (int i = 0; i < ITER; ++i) {
      auto noisy_predictor = noisy_predictors[i];

--- a/deepes/include/adam_optimizer.h
+++ b/deepes/include/adam_optimizer.h
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <map>
+#include "optimizer.h"
+#ifndef ADAM_OPTIMIZER_H
+#define ADAM_OPTIMIZER_H
+namespace DeepES{
+/*@brief AdamOptimizer.
+  * Implements Adam algorithm.
+  *
+  *@Args:
+  *     base_lr: learning rate (default: 1e-3).
+  *     beta1: coefficients used for computing running averages of gradient (default: 0.9).
+  *     beta2: coefficients used for computing running averages of gradient's square (default: 0.999).
+  *     epsilon: term added to the denominator to improve numerical stability (default: 1e-8).
+  */
+class AdamOptimizer: public Optimizer {
+public:
+  AdamOptimizer(float base_lr, float beta1=0.9, float beta2=0.999, float epsilon=1e-8):Optimizer(base_lr), \
+                                    _beta1(beta1), _beta2(beta2), _epsilon(epsilon) {}
+  ~AdamOptimizer();
+protected:
+  void compute_step(float* gradient, int size, std::string param_name);
+private:
+  float _beta1;
+  float _beta2;
+  float _epsilon;
+  std::map<std::string, float*> _momentum;
+  std::map<std::string, float*> _velocity;
+};
+}//namespace
+#endif
--- a/deepes/include/optimizer.h
+++ b/deepes/include/optimizer.h
@@ -12,52 +12,65 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
+#include <map>
+#include <glog/logging.h>
 #ifndef OPTIMIZER_H
 #define OPTIMIZER_H
 namespace DeepES{
-/* Base class for optimizers. Subclsses are required to implement the following functions:
+/*@brief Optimizer. Base class for optimizers. 
+ * 
+ *@Args:
+ *     base_lr: learning rate (default: 1e-3).
+ *     
+ * .. warning: update () is based on the parameter level, 
+ *             you need to perform update () on each parameter.
+ * 
+ * Subclasses are required to implement the following functions:
 * 1. compute_steps
 */
 class Optimizer {
 public:
  Optimizer() : _base_lr(1e-3), _update_times(0) {}
  Optimizer(float base_lr) : _base_lr(base_lr), _update_times(0) {}
+  virtual ~Optimizer() {
+    _params_size.clear();
+  }
  template<typename T>
  bool update(T weights, float* gradient, int size, std::string param_name="") {
-    bool success = true;
+  /*@ Performs a single optimization step (parameter update) at the parameter level.
+    *
+    *@Args:
+    *     weights (array): parameter weights.
+    *     gradient (array): gradient for updating weights.
+    *     size: size of gradient.
+    *     param_name: the name corresponding to the weights.
+    */
+    if (_params_size.count(param_name) == 0) {
+      _params_size[param_name] = size;
+    } else if (_params_size[param_name] != size) {
+      LOG(WARNING) << "[Warning] Update times: "<< int(_update_times / _params_size.size()) \
+       << ". Size of weights[" << param_name << "] is " << _params_size[param_name] << ", not " << size;
+      return false;
+    }
    ++_update_times;
    compute_step(gradient, size, param_name);
    for (int i = 0; i < size; ++i) {
      weights[i] -= _base_lr * gradient[i];
    }
-    return success;
+    return true;
  } // template function
 protected:
  virtual void compute_step(float* graident, int size, std::string param_name="") = 0;
  float _base_lr;
  float _update_times;
+  std::map<std::string, int> _params_size;
 };
-class SGDOptimizer: public Optimizer {
-public:
-  SGDOptimizer(float base_lr, float momentum=0.0):Optimizer(base_lr), _momentum(momentum) {}
-protected:
-  void compute_step(float* gradient, int size, std::string param_name="") {
-  }
-private:
-  float _momentum;
-}; //class
-//class AdamOptimizer: public Optimizer {
-//public:
-//  AdamOptimizer(float base)
-//};
 }//namespace
 #endif
--- a/deepes/include/sgd_optimizer.h
+++ b/deepes/include/sgd_optimizer.h
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <map>
+#include "optimizer.h"
+#ifndef SGD_OPTIMIZER_H
+#define SGD_OPTIMIZER_H
+namespace DeepES{
+/*@brief SGDOptimizer.
+  * Implements stochastic gradient descent (optionally with momentum).
+  *
+  *@Args:
+  *     base_lr: learning rate (default: 1e-3).
+  *     momentum: momentum factor (default: 0.9).
+  */
+class SGDOptimizer: public Optimizer {
+public:
+  SGDOptimizer(float base_lr, float momentum=0.9):Optimizer(base_lr), _momentum(momentum) {}
+  ~SGDOptimizer();
+protected:
+  void compute_step(float* gradient, int size, std::string param_name);
+private:
+  float _momentum;
+  std::map<std::string, float*> _velocity;
+};
+}
+#endif
--- a/deepes/include/torch_predictor.h
+++ b/deepes/include/torch_predictor.h
@@ -16,7 +16,9 @@
 #define TORCHPREDICTOR_H
 #include <memory>
 #include <string>
-#include "optimizer.h"
+#include <algorithm>
+#include "sgd_optimizer.h"
+#include "adam_optimizer.h"
 #include "utils.h"
 #include "gaussian_sampling.h"
 #include "deepes.pb.h"
@@ -40,7 +42,20 @@ public:
    load_proto_conf(config_path, *_config);
    _sampling_method = std::make_shared<GaussianSampling>();
    _sampling_method->load_config(*_config);
-    _optimizer = std::make_shared<SGDOptimizer>(_config->optimizer().base_lr());
+    std::string opt_type = _config->optimizer().type();
+    std::transform(opt_type.begin(),opt_type.end(),opt_type.begin(),::tolower);
+    if (opt_type == "sgd") {
+      _optimizer = std::make_shared<SGDOptimizer>(_config->optimizer().base_lr(), \
+                                                  _config->optimizer().momentum());
+    }else if (opt_type == "adam") {
+      _optimizer = std::make_shared<AdamOptimizer>(_config->optimizer().base_lr(), \
+                                                    _config->optimizer().beta1(), \
+                                                    _config->optimizer().beta2(), \
+                                                    _config->optimizer().epsilon());
+    }else {
+      // TODO: NotImplementedError
+    }
    _param_size = 0;
    _sampled_model = model;
    param_size();
@@ -111,7 +126,7 @@ public:
    for (auto& param: params) {
      torch::Tensor tensor = param.value().view({-1});
      auto tensor_a = tensor.accessor<float,1>();
-      _optimizer->update(tensor_a, neg_gradients+counter, tensor.size(0));
+      _optimizer->update(tensor_a, neg_gradients+counter, tensor.size(0), param.key());
      counter += tensor.size(0);
    }
    delete[] noise;

--- a/deepes/src/adam_optimizer.cc
+++ b/deepes/src/adam_optimizer.cc
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <cmath>
+#include "adam_optimizer.h"
+namespace DeepES {
+AdamOptimizer::~AdamOptimizer() {
+	for (auto iter = _momentum.begin(); iter != _momentum.end(); iter++) {
+		delete[] iter->second;
+	}
+	for (auto iter = _velocity.begin(); iter != _velocity.end(); iter++) {
+		delete[] iter->second;
+	}
+	_momentum.clear();
+	_velocity.clear();
+}
+void AdamOptimizer::compute_step(float* gradient, int size, std::string param_name="") {
+	if (_momentum.count(param_name) == 0) {
+		_momentum[param_name] = new float [size];
+		memset(_momentum[param_name], 0, size * sizeof(float));
+	}
+	if (_velocity.count(param_name) == 0) {
+		_velocity[param_name] = new float [size];
+		memset(_velocity[param_name], 0, size * sizeof(float));
+	}
+	int true_update_times = int(_update_times / _velocity.size());
+	float alpha = std::sqrt(1 - std::pow(_beta2, _update_times)) / (1 - std::pow(_beta1, _update_times));
+	for (int i = 0; i < size; ++i) {
+		_momentum[param_name][i] = _beta1 * _momentum[param_name][i] + (1 - _beta1) * gradient[i];
+		_velocity[param_name][i] = _beta2 * _velocity[param_name][i] + (1 - _beta2) * gradient[i] * gradient[i];
+		gradient[i] = alpha * _momentum[param_name][i] / (std::sqrt(_velocity[param_name][i]) + _epsilon);
+	}
+}
+}//namespace
--- a/deepes/src/proto/deepes.proto
+++ b/deepes/src/proto/deepes.proto
@@ -31,8 +31,13 @@ message GaussianSamplingConfig {
 message OptimizerConfig{
  optional string type = 1 [default = "SGD"];
-  optional float base_lr = 2; // The base learning rate
+  optional float base_lr = 2 [default = 1e-3]; // The base learning rate.
-  optional float momentum = 3; // The momentum value.
+  optional float momentum = 3 [default = 0.9]; // The momentum value for SGD.
+  // ------------Adam Optimizer---------
+  optional float beta1 = 4 [default = 0.9];
+  optional float beta2 = 5 [default = 0.999];
+  optional float epsilon = 6 [default = 1e-8];
 }
 message SamplingKey{

--- a/deepes/src/sgd_optimizer.cc
+++ b/deepes/src/sgd_optimizer.cc
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <cmath>
+#include "sgd_optimizer.h"
+namespace DeepES {
+SGDOptimizer::~SGDOptimizer() {
+	for (auto iter = _velocity.begin(); iter != _velocity.end(); iter++) {
+		delete[] iter->second;
+	}
+	_velocity.clear();
+}
+void SGDOptimizer::compute_step(float* gradient, int size, std::string param_name="") {
+	if (_velocity.count(param_name) == 0) {
+		_velocity[param_name] = new float [size];
+		memset(_velocity[param_name], 0, size * sizeof(float));
+	}
+	for (int i = 0; i < size; ++i) {
+		_velocity[param_name][i] = _momentum * _velocity[param_name][i] + (1 - _momentum) * gradient[i];
+		gradient[i] = _velocity[param_name][i];
+	}
+}
+}//namespace
--- a/examples/MADDPG/README.md
+++ b/examples/MADDPG/README.md
@@ -98,7 +98,7 @@ simple_world_comm<br>
 + [paddlepaddle>=1.6.1](https://github.com/PaddlePaddle/Paddle)
 + [parl](https://github.com/PaddlePaddle/PARL)
 + [multiagent-particle-envs](https://github.com/openai/multiagent-particle-envs)
-+ gym
+ gym==0.10.5
 ### Start Training:
 ```