From b1cabc2d08580b86acbe36fd4979d31bcc3e39c4 Mon Sep 17 00:00:00 2001 From: rical730 Date: Mon, 23 Mar 2020 21:21:21 +0800 Subject: [PATCH] add SGD and Adam Optimizer for DeepES (#222) * add SGD and Adam Optimizer for DeepES * update deepes readme * add warning when input different size in the same param update() * add error return in update(), add optimizer.cc * separate SGD and Adam, optimizer type in config is not case sensitive * delete optimizer.cc * config optimizer in deepes.proto * more readable * update maddpg readme, fixed gym version --- deepes/README.md | 7 +-- deepes/deepes_config.prototxt | 10 +++-- deepes/demo/cartpole_solver_parallel.cpp | 2 +- deepes/include/adam_optimizer.h | 50 +++++++++++++++++++++ deepes/include/optimizer.h | 55 +++++++++++++++--------- deepes/include/sgd_optimizer.h | 44 +++++++++++++++++++ deepes/include/torch_predictor.h | 21 +++++++-- deepes/src/adam_optimizer.cc | 49 +++++++++++++++++++++ deepes/src/proto/deepes.proto | 9 +++- deepes/src/sgd_optimizer.cc | 39 +++++++++++++++++ examples/MADDPG/README.md | 2 +- 11 files changed, 254 insertions(+), 34 deletions(-) create mode 100644 deepes/include/adam_optimizer.h create mode 100644 deepes/include/sgd_optimizer.h create mode 100644 deepes/src/adam_optimizer.cc create mode 100644 deepes/src/sgd_optimizer.cc diff --git a/deepes/README.md b/deepes/README.md index ae14819..75b9eae 100644 --- a/deepes/README.md +++ b/deepes/README.md @@ -26,9 +26,10 @@ predictor->update(noisy_keys, noisy_rewards); - **裸写网络**: ## 相关依赖: -- Protobuf >= 2.4.2 -- glog -- gflag +- Protobuf2 +- OpenMP +- [glog](https://github.com/gflags/gflags/blob/master/INSTALL.md) +- [gflag](https://github.com/google/glog) ## 额外依赖: diff --git a/deepes/deepes_config.prototxt b/deepes/deepes_config.prototxt index db2608f..07e337c 100644 --- a/deepes/deepes_config.prototxt +++ b/deepes/deepes_config.prototxt @@ -1,10 +1,14 @@ seed : 1024 gaussian_sampling { - std: 0.3 + std: 0.5 } optimizer { - type: "SGD", - base_lr: 1e-2 + type: "Adam", + base_lr: 0.05, + momentum: 0.9, + beta1: 0.9, + beta2: 0.999, + epsilon: 1e-8, } diff --git a/deepes/demo/cartpole_solver_parallel.cpp b/deepes/demo/cartpole_solver_parallel.cpp index f6f39a6..960a8f7 100644 --- a/deepes/demo/cartpole_solver_parallel.cpp +++ b/deepes/demo/cartpole_solver_parallel.cpp @@ -60,7 +60,7 @@ int main(int argc, char* argv[]) { std::vector noisy_rewards(ITER, 0.0f); noisy_keys.resize(ITER); - for (int epoch = 0; epoch < 10000; ++epoch) { + for (int epoch = 0; epoch < 1000; ++epoch) { #pragma omp parallel for schedule(dynamic, 1) for (int i = 0; i < ITER; ++i) { auto noisy_predictor = noisy_predictors[i]; diff --git a/deepes/include/adam_optimizer.h b/deepes/include/adam_optimizer.h new file mode 100644 index 0000000..995fa00 --- /dev/null +++ b/deepes/include/adam_optimizer.h @@ -0,0 +1,50 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "optimizer.h" + +#ifndef ADAM_OPTIMIZER_H +#define ADAM_OPTIMIZER_H +namespace DeepES{ + +/*@brief AdamOptimizer. + * Implements Adam algorithm. + * + *@Args: + * base_lr: learning rate (default: 1e-3). + * beta1: coefficients used for computing running averages of gradient (default: 0.9). + * beta2: coefficients used for computing running averages of gradient's square (default: 0.999). + * epsilon: term added to the denominator to improve numerical stability (default: 1e-8). + */ +class AdamOptimizer: public Optimizer { +public: + AdamOptimizer(float base_lr, float beta1=0.9, float beta2=0.999, float epsilon=1e-8):Optimizer(base_lr), \ + _beta1(beta1), _beta2(beta2), _epsilon(epsilon) {} + ~AdamOptimizer(); + +protected: + void compute_step(float* gradient, int size, std::string param_name); + +private: + float _beta1; + float _beta2; + float _epsilon; + std::map _momentum; + std::map _velocity; +}; + +}//namespace + +#endif diff --git a/deepes/include/optimizer.h b/deepes/include/optimizer.h index 6aca758..eb790c5 100644 --- a/deepes/include/optimizer.h +++ b/deepes/include/optimizer.h @@ -12,52 +12,65 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include +#include + #ifndef OPTIMIZER_H #define OPTIMIZER_H namespace DeepES{ -/* Base class for optimizers. Subclsses are required to implement the following functions: +/*@brief Optimizer. Base class for optimizers. + * + *@Args: + * base_lr: learning rate (default: 1e-3). + * + * .. warning: update () is based on the parameter level, + * you need to perform update () on each parameter. + * + * Subclasses are required to implement the following functions: * 1. compute_steps */ - class Optimizer { public: Optimizer() : _base_lr(1e-3), _update_times(0) {} Optimizer(float base_lr) : _base_lr(base_lr), _update_times(0) {} + virtual ~Optimizer() { + _params_size.clear(); + } + template bool update(T weights, float* gradient, int size, std::string param_name="") { - bool success = true; + /*@ Performs a single optimization step (parameter update) at the parameter level. + * + *@Args: + * weights (array): parameter weights. + * gradient (array): gradient for updating weights. + * size: size of gradient. + * param_name: the name corresponding to the weights. + */ + if (_params_size.count(param_name) == 0) { + _params_size[param_name] = size; + } else if (_params_size[param_name] != size) { + LOG(WARNING) << "[Warning] Update times: "<< int(_update_times / _params_size.size()) \ + << ". Size of weights[" << param_name << "] is " << _params_size[param_name] << ", not " << size; + return false; + } + ++_update_times; compute_step(gradient, size, param_name); for (int i = 0; i < size; ++i) { weights[i] -= _base_lr * gradient[i]; } - return success; + return true; } // template function protected: virtual void compute_step(float* graident, int size, std::string param_name="") = 0; float _base_lr; float _update_times; + std::map _params_size; }; -class SGDOptimizer: public Optimizer { -public: - SGDOptimizer(float base_lr, float momentum=0.0):Optimizer(base_lr), _momentum(momentum) {} - -protected: - void compute_step(float* gradient, int size, std::string param_name="") { - } - -private: - float _momentum; - -}; //class - -//class AdamOptimizer: public Optimizer { -//public: -// AdamOptimizer(float base) -//}; }//namespace #endif diff --git a/deepes/include/sgd_optimizer.h b/deepes/include/sgd_optimizer.h new file mode 100644 index 0000000..6176902 --- /dev/null +++ b/deepes/include/sgd_optimizer.h @@ -0,0 +1,44 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "optimizer.h" + +#ifndef SGD_OPTIMIZER_H +#define SGD_OPTIMIZER_H +namespace DeepES{ + +/*@brief SGDOptimizer. + * Implements stochastic gradient descent (optionally with momentum). + * + *@Args: + * base_lr: learning rate (default: 1e-3). + * momentum: momentum factor (default: 0.9). + */ +class SGDOptimizer: public Optimizer { +public: + SGDOptimizer(float base_lr, float momentum=0.9):Optimizer(base_lr), _momentum(momentum) {} + ~SGDOptimizer(); + +protected: + void compute_step(float* gradient, int size, std::string param_name); + +private: + float _momentum; + std::map _velocity; +}; + +} + +#endif diff --git a/deepes/include/torch_predictor.h b/deepes/include/torch_predictor.h index f17f65f..8001240 100644 --- a/deepes/include/torch_predictor.h +++ b/deepes/include/torch_predictor.h @@ -16,7 +16,9 @@ #define TORCHPREDICTOR_H #include #include -#include "optimizer.h" +#include +#include "sgd_optimizer.h" +#include "adam_optimizer.h" #include "utils.h" #include "gaussian_sampling.h" #include "deepes.pb.h" @@ -40,7 +42,20 @@ public: load_proto_conf(config_path, *_config); _sampling_method = std::make_shared(); _sampling_method->load_config(*_config); - _optimizer = std::make_shared(_config->optimizer().base_lr()); + + std::string opt_type = _config->optimizer().type(); + std::transform(opt_type.begin(),opt_type.end(),opt_type.begin(),::tolower); + if (opt_type == "sgd") { + _optimizer = std::make_shared(_config->optimizer().base_lr(), \ + _config->optimizer().momentum()); + }else if (opt_type == "adam") { + _optimizer = std::make_shared(_config->optimizer().base_lr(), \ + _config->optimizer().beta1(), \ + _config->optimizer().beta2(), \ + _config->optimizer().epsilon()); + }else { + // TODO: NotImplementedError + } _param_size = 0; _sampled_model = model; param_size(); @@ -111,7 +126,7 @@ public: for (auto& param: params) { torch::Tensor tensor = param.value().view({-1}); auto tensor_a = tensor.accessor(); - _optimizer->update(tensor_a, neg_gradients+counter, tensor.size(0)); + _optimizer->update(tensor_a, neg_gradients+counter, tensor.size(0), param.key()); counter += tensor.size(0); } delete[] noise; diff --git a/deepes/src/adam_optimizer.cc b/deepes/src/adam_optimizer.cc new file mode 100644 index 0000000..608f916 --- /dev/null +++ b/deepes/src/adam_optimizer.cc @@ -0,0 +1,49 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "adam_optimizer.h" + +namespace DeepES { + +AdamOptimizer::~AdamOptimizer() { + for (auto iter = _momentum.begin(); iter != _momentum.end(); iter++) { + delete[] iter->second; + } + for (auto iter = _velocity.begin(); iter != _velocity.end(); iter++) { + delete[] iter->second; + } + _momentum.clear(); + _velocity.clear(); +} + +void AdamOptimizer::compute_step(float* gradient, int size, std::string param_name="") { + if (_momentum.count(param_name) == 0) { + _momentum[param_name] = new float [size]; + memset(_momentum[param_name], 0, size * sizeof(float)); + } + if (_velocity.count(param_name) == 0) { + _velocity[param_name] = new float [size]; + memset(_velocity[param_name], 0, size * sizeof(float)); + } + int true_update_times = int(_update_times / _velocity.size()); + float alpha = std::sqrt(1 - std::pow(_beta2, _update_times)) / (1 - std::pow(_beta1, _update_times)); + for (int i = 0; i < size; ++i) { + _momentum[param_name][i] = _beta1 * _momentum[param_name][i] + (1 - _beta1) * gradient[i]; + _velocity[param_name][i] = _beta2 * _velocity[param_name][i] + (1 - _beta2) * gradient[i] * gradient[i]; + gradient[i] = alpha * _momentum[param_name][i] / (std::sqrt(_velocity[param_name][i]) + _epsilon); + } +} + +}//namespace diff --git a/deepes/src/proto/deepes.proto b/deepes/src/proto/deepes.proto index 26a97a5..38abee9 100644 --- a/deepes/src/proto/deepes.proto +++ b/deepes/src/proto/deepes.proto @@ -31,8 +31,13 @@ message GaussianSamplingConfig { message OptimizerConfig{ optional string type = 1 [default = "SGD"]; - optional float base_lr = 2; // The base learning rate - optional float momentum = 3; // The momentum value. + optional float base_lr = 2 [default = 1e-3]; // The base learning rate. + optional float momentum = 3 [default = 0.9]; // The momentum value for SGD. + + // ------------Adam Optimizer--------- + optional float beta1 = 4 [default = 0.9]; + optional float beta2 = 5 [default = 0.999]; + optional float epsilon = 6 [default = 1e-8]; } message SamplingKey{ diff --git a/deepes/src/sgd_optimizer.cc b/deepes/src/sgd_optimizer.cc new file mode 100644 index 0000000..06a65b6 --- /dev/null +++ b/deepes/src/sgd_optimizer.cc @@ -0,0 +1,39 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "sgd_optimizer.h" + +namespace DeepES { + +SGDOptimizer::~SGDOptimizer() { + for (auto iter = _velocity.begin(); iter != _velocity.end(); iter++) { + delete[] iter->second; + } + _velocity.clear(); +} + +void SGDOptimizer::compute_step(float* gradient, int size, std::string param_name="") { + if (_velocity.count(param_name) == 0) { + _velocity[param_name] = new float [size]; + memset(_velocity[param_name], 0, size * sizeof(float)); + } + for (int i = 0; i < size; ++i) { + _velocity[param_name][i] = _momentum * _velocity[param_name][i] + (1 - _momentum) * gradient[i]; + gradient[i] = _velocity[param_name][i]; + } +} + + +}//namespace diff --git a/examples/MADDPG/README.md b/examples/MADDPG/README.md index 55d1914..0bf3a59 100644 --- a/examples/MADDPG/README.md +++ b/examples/MADDPG/README.md @@ -98,7 +98,7 @@ simple_world_comm
+ [paddlepaddle>=1.6.1](https://github.com/PaddlePaddle/Paddle) + [parl](https://github.com/PaddlePaddle/PARL) + [multiagent-particle-envs](https://github.com/openai/multiagent-particle-envs) -+ gym ++ gym==0.10.5 ### Start Training: ``` -- GitLab