未验证 提交 b1cabc2d 编写于 作者: R rical730 提交者: GitHub

add SGD and Adam Optimizer for DeepES (#222)

* add SGD and Adam Optimizer for DeepES

* update deepes readme

* add warning when input different size in the same param update()

* add error return in update(), add optimizer.cc

* separate SGD and Adam, optimizer type in config is not case sensitive

* delete optimizer.cc

* config optimizer in deepes.proto

* more readable

* update maddpg readme, fixed gym version
上级 7b5c5241
...@@ -26,9 +26,10 @@ predictor->update(noisy_keys, noisy_rewards); ...@@ -26,9 +26,10 @@ predictor->update(noisy_keys, noisy_rewards);
- **裸写网络** - **裸写网络**
## 相关依赖: ## 相关依赖:
- Protobuf >= 2.4.2 - Protobuf2
- glog - OpenMP
- gflag - [glog](https://github.com/gflags/gflags/blob/master/INSTALL.md)
- [gflag](https://github.com/google/glog)
## 额外依赖: ## 额外依赖:
......
seed : 1024 seed : 1024
gaussian_sampling { gaussian_sampling {
std: 0.3 std: 0.5
} }
optimizer { optimizer {
type: "SGD", type: "Adam",
base_lr: 1e-2 base_lr: 0.05,
momentum: 0.9,
beta1: 0.9,
beta2: 0.999,
epsilon: 1e-8,
} }
...@@ -60,7 +60,7 @@ int main(int argc, char* argv[]) { ...@@ -60,7 +60,7 @@ int main(int argc, char* argv[]) {
std::vector<float> noisy_rewards(ITER, 0.0f); std::vector<float> noisy_rewards(ITER, 0.0f);
noisy_keys.resize(ITER); noisy_keys.resize(ITER);
for (int epoch = 0; epoch < 10000; ++epoch) { for (int epoch = 0; epoch < 1000; ++epoch) {
#pragma omp parallel for schedule(dynamic, 1) #pragma omp parallel for schedule(dynamic, 1)
for (int i = 0; i < ITER; ++i) { for (int i = 0; i < ITER; ++i) {
auto noisy_predictor = noisy_predictors[i]; auto noisy_predictor = noisy_predictors[i];
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <map>
#include "optimizer.h"
#ifndef ADAM_OPTIMIZER_H
#define ADAM_OPTIMIZER_H
namespace DeepES{
/*@brief AdamOptimizer.
* Implements Adam algorithm.
*
*@Args:
* base_lr: learning rate (default: 1e-3).
* beta1: coefficients used for computing running averages of gradient (default: 0.9).
* beta2: coefficients used for computing running averages of gradient's square (default: 0.999).
* epsilon: term added to the denominator to improve numerical stability (default: 1e-8).
*/
class AdamOptimizer: public Optimizer {
public:
AdamOptimizer(float base_lr, float beta1=0.9, float beta2=0.999, float epsilon=1e-8):Optimizer(base_lr), \
_beta1(beta1), _beta2(beta2), _epsilon(epsilon) {}
~AdamOptimizer();
protected:
void compute_step(float* gradient, int size, std::string param_name);
private:
float _beta1;
float _beta2;
float _epsilon;
std::map<std::string, float*> _momentum;
std::map<std::string, float*> _velocity;
};
}//namespace
#endif
...@@ -12,52 +12,65 @@ ...@@ -12,52 +12,65 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include <map>
#include <glog/logging.h>
#ifndef OPTIMIZER_H #ifndef OPTIMIZER_H
#define OPTIMIZER_H #define OPTIMIZER_H
namespace DeepES{ namespace DeepES{
/* Base class for optimizers. Subclsses are required to implement the following functions: /*@brief Optimizer. Base class for optimizers.
*
*@Args:
* base_lr: learning rate (default: 1e-3).
*
* .. warning: update () is based on the parameter level,
* you need to perform update () on each parameter.
*
* Subclasses are required to implement the following functions:
* 1. compute_steps * 1. compute_steps
*/ */
class Optimizer { class Optimizer {
public: public:
Optimizer() : _base_lr(1e-3), _update_times(0) {} Optimizer() : _base_lr(1e-3), _update_times(0) {}
Optimizer(float base_lr) : _base_lr(base_lr), _update_times(0) {} Optimizer(float base_lr) : _base_lr(base_lr), _update_times(0) {}
virtual ~Optimizer() {
_params_size.clear();
}
template<typename T> template<typename T>
bool update(T weights, float* gradient, int size, std::string param_name="") { bool update(T weights, float* gradient, int size, std::string param_name="") {
bool success = true; /*@ Performs a single optimization step (parameter update) at the parameter level.
*
*@Args:
* weights (array): parameter weights.
* gradient (array): gradient for updating weights.
* size: size of gradient.
* param_name: the name corresponding to the weights.
*/
if (_params_size.count(param_name) == 0) {
_params_size[param_name] = size;
} else if (_params_size[param_name] != size) {
LOG(WARNING) << "[Warning] Update times: "<< int(_update_times / _params_size.size()) \
<< ". Size of weights[" << param_name << "] is " << _params_size[param_name] << ", not " << size;
return false;
}
++_update_times; ++_update_times;
compute_step(gradient, size, param_name); compute_step(gradient, size, param_name);
for (int i = 0; i < size; ++i) { for (int i = 0; i < size; ++i) {
weights[i] -= _base_lr * gradient[i]; weights[i] -= _base_lr * gradient[i];
} }
return success; return true;
} // template function } // template function
protected: protected:
virtual void compute_step(float* graident, int size, std::string param_name="") = 0; virtual void compute_step(float* graident, int size, std::string param_name="") = 0;
float _base_lr; float _base_lr;
float _update_times; float _update_times;
std::map<std::string, int> _params_size;
}; };
class SGDOptimizer: public Optimizer {
public:
SGDOptimizer(float base_lr, float momentum=0.0):Optimizer(base_lr), _momentum(momentum) {}
protected:
void compute_step(float* gradient, int size, std::string param_name="") {
}
private:
float _momentum;
}; //class
//class AdamOptimizer: public Optimizer {
//public:
// AdamOptimizer(float base)
//};
}//namespace }//namespace
#endif #endif
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <map>
#include "optimizer.h"
#ifndef SGD_OPTIMIZER_H
#define SGD_OPTIMIZER_H
namespace DeepES{
/*@brief SGDOptimizer.
* Implements stochastic gradient descent (optionally with momentum).
*
*@Args:
* base_lr: learning rate (default: 1e-3).
* momentum: momentum factor (default: 0.9).
*/
class SGDOptimizer: public Optimizer {
public:
SGDOptimizer(float base_lr, float momentum=0.9):Optimizer(base_lr), _momentum(momentum) {}
~SGDOptimizer();
protected:
void compute_step(float* gradient, int size, std::string param_name);
private:
float _momentum;
std::map<std::string, float*> _velocity;
};
}
#endif
...@@ -16,7 +16,9 @@ ...@@ -16,7 +16,9 @@
#define TORCHPREDICTOR_H #define TORCHPREDICTOR_H
#include <memory> #include <memory>
#include <string> #include <string>
#include "optimizer.h" #include <algorithm>
#include "sgd_optimizer.h"
#include "adam_optimizer.h"
#include "utils.h" #include "utils.h"
#include "gaussian_sampling.h" #include "gaussian_sampling.h"
#include "deepes.pb.h" #include "deepes.pb.h"
...@@ -40,7 +42,20 @@ public: ...@@ -40,7 +42,20 @@ public:
load_proto_conf(config_path, *_config); load_proto_conf(config_path, *_config);
_sampling_method = std::make_shared<GaussianSampling>(); _sampling_method = std::make_shared<GaussianSampling>();
_sampling_method->load_config(*_config); _sampling_method->load_config(*_config);
_optimizer = std::make_shared<SGDOptimizer>(_config->optimizer().base_lr());
std::string opt_type = _config->optimizer().type();
std::transform(opt_type.begin(),opt_type.end(),opt_type.begin(),::tolower);
if (opt_type == "sgd") {
_optimizer = std::make_shared<SGDOptimizer>(_config->optimizer().base_lr(), \
_config->optimizer().momentum());
}else if (opt_type == "adam") {
_optimizer = std::make_shared<AdamOptimizer>(_config->optimizer().base_lr(), \
_config->optimizer().beta1(), \
_config->optimizer().beta2(), \
_config->optimizer().epsilon());
}else {
// TODO: NotImplementedError
}
_param_size = 0; _param_size = 0;
_sampled_model = model; _sampled_model = model;
param_size(); param_size();
...@@ -111,7 +126,7 @@ public: ...@@ -111,7 +126,7 @@ public:
for (auto& param: params) { for (auto& param: params) {
torch::Tensor tensor = param.value().view({-1}); torch::Tensor tensor = param.value().view({-1});
auto tensor_a = tensor.accessor<float,1>(); auto tensor_a = tensor.accessor<float,1>();
_optimizer->update(tensor_a, neg_gradients+counter, tensor.size(0)); _optimizer->update(tensor_a, neg_gradients+counter, tensor.size(0), param.key());
counter += tensor.size(0); counter += tensor.size(0);
} }
delete[] noise; delete[] noise;
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cmath>
#include "adam_optimizer.h"
namespace DeepES {
AdamOptimizer::~AdamOptimizer() {
for (auto iter = _momentum.begin(); iter != _momentum.end(); iter++) {
delete[] iter->second;
}
for (auto iter = _velocity.begin(); iter != _velocity.end(); iter++) {
delete[] iter->second;
}
_momentum.clear();
_velocity.clear();
}
void AdamOptimizer::compute_step(float* gradient, int size, std::string param_name="") {
if (_momentum.count(param_name) == 0) {
_momentum[param_name] = new float [size];
memset(_momentum[param_name], 0, size * sizeof(float));
}
if (_velocity.count(param_name) == 0) {
_velocity[param_name] = new float [size];
memset(_velocity[param_name], 0, size * sizeof(float));
}
int true_update_times = int(_update_times / _velocity.size());
float alpha = std::sqrt(1 - std::pow(_beta2, _update_times)) / (1 - std::pow(_beta1, _update_times));
for (int i = 0; i < size; ++i) {
_momentum[param_name][i] = _beta1 * _momentum[param_name][i] + (1 - _beta1) * gradient[i];
_velocity[param_name][i] = _beta2 * _velocity[param_name][i] + (1 - _beta2) * gradient[i] * gradient[i];
gradient[i] = alpha * _momentum[param_name][i] / (std::sqrt(_velocity[param_name][i]) + _epsilon);
}
}
}//namespace
...@@ -31,8 +31,13 @@ message GaussianSamplingConfig { ...@@ -31,8 +31,13 @@ message GaussianSamplingConfig {
message OptimizerConfig{ message OptimizerConfig{
optional string type = 1 [default = "SGD"]; optional string type = 1 [default = "SGD"];
optional float base_lr = 2; // The base learning rate optional float base_lr = 2 [default = 1e-3]; // The base learning rate.
optional float momentum = 3; // The momentum value. optional float momentum = 3 [default = 0.9]; // The momentum value for SGD.
// ------------Adam Optimizer---------
optional float beta1 = 4 [default = 0.9];
optional float beta2 = 5 [default = 0.999];
optional float epsilon = 6 [default = 1e-8];
} }
message SamplingKey{ message SamplingKey{
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cmath>
#include "sgd_optimizer.h"
namespace DeepES {
SGDOptimizer::~SGDOptimizer() {
for (auto iter = _velocity.begin(); iter != _velocity.end(); iter++) {
delete[] iter->second;
}
_velocity.clear();
}
void SGDOptimizer::compute_step(float* gradient, int size, std::string param_name="") {
if (_velocity.count(param_name) == 0) {
_velocity[param_name] = new float [size];
memset(_velocity[param_name], 0, size * sizeof(float));
}
for (int i = 0; i < size; ++i) {
_velocity[param_name][i] = _momentum * _velocity[param_name][i] + (1 - _momentum) * gradient[i];
gradient[i] = _velocity[param_name][i];
}
}
}//namespace
...@@ -98,7 +98,7 @@ simple_world_comm<br> ...@@ -98,7 +98,7 @@ simple_world_comm<br>
+ [paddlepaddle>=1.6.1](https://github.com/PaddlePaddle/Paddle) + [paddlepaddle>=1.6.1](https://github.com/PaddlePaddle/Paddle)
+ [parl](https://github.com/PaddlePaddle/PARL) + [parl](https://github.com/PaddlePaddle/PARL)
+ [multiagent-particle-envs](https://github.com/openai/multiagent-particle-envs) + [multiagent-particle-envs](https://github.com/openai/multiagent-particle-envs)
+ gym + gym==0.10.5
### Start Training: ### Start Training:
``` ```
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册