未验证 提交 6d23261a 编写于 作者: B Bo Zhou 提交者: GitHub

update code format to meet icode requirement (#243)

* update code format to meet icode requirement

* update torch code
上级 53da94b8
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef ADAM_OPTIMIZER_H #ifndef ADAM_OPTIMIZER_H
#define ADAM_OPTIMIZER_H #define ADAM_OPTIMIZER_H
...@@ -20,7 +19,7 @@ ...@@ -20,7 +19,7 @@
#include <cmath> #include <cmath>
#include "optimizer.h" #include "optimizer.h"
namespace DeepES{ namespace deep_es {
/*@brief AdamOptimizer. /*@brief AdamOptimizer.
* Implements Adam algorithm. * Implements Adam algorithm.
...@@ -33,19 +32,20 @@ namespace DeepES{ ...@@ -33,19 +32,20 @@ namespace DeepES{
*/ */
class AdamOptimizer: public Optimizer { class AdamOptimizer: public Optimizer {
public: public:
AdamOptimizer(float base_lr, float beta1=0.9, float beta2=0.999, float epsilon=1e-8):Optimizer(base_lr), \ AdamOptimizer(float base_lr, float beta1 = 0.9, float beta2 = 0.999,
_beta1(beta1), _beta2(beta2), _epsilon(epsilon) {} float epsilon = 1e-8): Optimizer(base_lr), \
~AdamOptimizer(); _beta1(beta1), _beta2(beta2), _epsilon(epsilon) {}
~AdamOptimizer();
protected: protected:
void compute_step(float* gradient, int size, std::string param_name); void compute_step(float* gradient, int size, std::string param_name);
private: private:
float _beta1; float _beta1;
float _beta2; float _beta2;
float _epsilon; float _epsilon;
std::map<std::string, float*> _momentum; std::map<std::string, float*> _momentum;
std::map<std::string, float*> _velocity; std::map<std::string, float*> _velocity;
}; };
}//namespace }//namespace
......
...@@ -23,54 +23,54 @@ ...@@ -23,54 +23,54 @@
#include "utils.h" #include "utils.h"
#include <glog/logging.h> #include <glog/logging.h>
namespace DeepES{ namespace deep_es {
class CachedGaussianSampling: public SamplingMethod { class CachedGaussianSampling: public SamplingMethod {
public: public:
CachedGaussianSampling(); CachedGaussianSampling();
~CachedGaussianSampling(); ~CachedGaussianSampling();
/*Initialize the sampling algorithm given the config with the protobuf format. /*Initialize the sampling algorithm given the config with the protobuf format.
*DeepES library uses only one configuration file for all sampling algorithms. *DeepES library uses only one configuration file for all sampling algorithms.
A defalut configuration file can be found at: . // TODO: where? A defalut configuration file can be found at: . // TODO: where?
Usally you won't have to modify the configuration items of other algorithms Usally you won't have to modify the configuration items of other algorithms
if you are not using them. if you are not using them.
*/ */
bool load_config(const DeepESConfig& config); bool load_config(const DeepESConfig& config);
/*@brief generate Gaussian noise and the related key. /*@brief generate Gaussian noise and the related key.
* *
*@Args: *@Args:
* key: a unique key associated with the sampled noise. * key: a unique key associated with the sampled noise.
* noise: a pointer pointed to the memory that stores the noise * noise: a pointer pointed to the memory that stores the noise
* size: the number of float to be sampled. * size: the number of float to be sampled.
* *
*@return: *@return:
* success: generate Gaussian successfully or not. * success: generate Gaussian successfully or not.
*/ */
bool sampling(int* key, float* noise, int64_t size); bool sampling(int* key, float* noise, int64_t size);
/*@brief reconstruct the Gaussion noise given the key. /*@brief reconstruct the Gaussion noise given the key.
* This function is often used for updating the neuron network parameters in the offline environment. * This function is often used for updating the neuron network parameters in the offline environment.
* *
*@Args: *@Args:
* key: a unique key associated with the sampled noise. * key: a unique key associated with the sampled noise.
* noise: a pointer pointed to the memory that stores the noise * noise: a pointer pointed to the memory that stores the noise
* size: the number of float to be sampled. * size: the number of float to be sampled.
* *
*@return: *@return:
* success: reconstruct Gaussian successfully or not. * success: reconstruct Gaussian successfully or not.
*/ */
bool resampling(int key, float* noise, int64_t size); bool resampling(int key, float* noise, int64_t size);
private: private:
float _std; float _std;
int _cache_size; int _cache_size;
float* _noise_cache = nullptr; float* _noise_cache = nullptr;
bool _create_noise_cache(); bool _create_noise_cache();
}; };
} }
......
...@@ -22,50 +22,50 @@ ...@@ -22,50 +22,50 @@
#include "sampling_method.h" #include "sampling_method.h"
#include "utils.h" #include "utils.h"
namespace DeepES{ namespace deep_es {
class GaussianSampling: public SamplingMethod { class GaussianSampling: public SamplingMethod {
public: public:
GaussianSampling() {} GaussianSampling() {}
~GaussianSampling() {} ~GaussianSampling() {}
/*Initialize the sampling algorithm given the config with the protobuf format. /*Initialize the sampling algorithm given the config with the protobuf format.
*DeepES library uses only one configuration file for all sampling algorithms. *DeepES library uses only one configuration file for all sampling algorithms.
A defalut configuration file can be found at: . // TODO: where? A defalut configuration file can be found at: . // TODO: where?
Usally you won't have to modify the configuration items of other algorithms Usally you won't have to modify the configuration items of other algorithms
if you are not using them. if you are not using them.
*/ */
bool load_config(const DeepESConfig& config); bool load_config(const DeepESConfig& config);
/*@brief generate Gaussian noise and the related key. /*@brief generate Gaussian noise and the related key.
* *
*@Args: *@Args:
* key: a unique key associated with the sampled noise. * key: a unique key associated with the sampled noise.
* noise: a pointer pointed to the memory that stores the noise * noise: a pointer pointed to the memory that stores the noise
* size: the number of float to be sampled. * size: the number of float to be sampled.
* *
*@return: *@return:
* success: generate Gaussian successfully or not. * success: generate Gaussian successfully or not.
*/ */
bool sampling(int* key, float* noise, int64_t size); bool sampling(int* key, float* noise, int64_t size);
/*@brief reconstruct the Gaussion noise given the key. /*@brief reconstruct the Gaussion noise given the key.
* This function is often used for updating the neuron network parameters in the offline environment. * This function is often used for updating the neuron network parameters in the offline environment.
* *
*@Args: *@Args:
* key: a unique key associated with the sampled noise. * key: a unique key associated with the sampled noise.
* noise: a pointer pointed to the memory that stores the noise * noise: a pointer pointed to the memory that stores the noise
* size: the number of float to be sampled. * size: the number of float to be sampled.
* *
*@return: *@return:
* success: reconstruct Gaussian successfully or not. * success: reconstruct Gaussian successfully or not.
*/ */
bool resampling(int key, float* noise, int64_t size); bool resampling(int key, float* noise, int64_t size);
private: private:
float _std; float _std;
}; };
} }
......
...@@ -18,59 +18,60 @@ ...@@ -18,59 +18,60 @@
#include <map> #include <map>
#include <glog/logging.h> #include <glog/logging.h>
namespace deep_es {
namespace DeepES{ /*@brief Optimizer. Base class for optimizers.
*
/*@brief Optimizer. Base class for optimizers.
*
*@Args: *@Args:
* base_lr: learning rate (default: 1e-3). * base_lr: learning rate (default: 1e-3).
* *
* .. warning: update () is based on the parameter level, * .. warning: update () is based on the parameter level,
* you need to perform update () on each parameter. * you need to perform update () on each parameter.
* *
* Subclasses are required to implement the following functions: * Subclasses are required to implement the following functions:
* 1. compute_steps * 1. compute_steps
*/ */
class Optimizer { class Optimizer {
public: public:
Optimizer() : _base_lr(1e-3), _update_times(0) {} Optimizer() : _base_lr(1e-3), _update_times(0) {}
Optimizer(float base_lr) : _base_lr(base_lr), _update_times(0) {} Optimizer(float base_lr) : _base_lr(base_lr), _update_times(0) {}
virtual ~Optimizer() { virtual ~Optimizer() {
_params_size.clear(); _params_size.clear();
}
template<typename T>
bool update(T weights, float* gradient, int size, std::string param_name="") {
/*@ Performs a single optimization step (parameter update) at the parameter level.
*
*@Args:
* weights (array): parameter weights.
* gradient (array): gradient for updating weights.
* size: size of gradient.
* param_name: the name corresponding to the weights.
*/
if (_params_size.count(param_name) == 0) {
_params_size[param_name] = size;
} else if (_params_size[param_name] != size) {
LOG(WARNING) << "[Warning] Update times: "<< int(_update_times / _params_size.size()) \
<< ". Size of weights[" << param_name << "] is " << _params_size[param_name] << ", not " << size;
return false;
} }
++_update_times; template<typename T>
compute_step(gradient, size, param_name); bool update(T weights, float* gradient, int size, std::string param_name = "") {
for (int i = 0; i < size; ++i) { /*@ Performs a single optimization step (parameter update) at the parameter level.
weights[i] -= _base_lr * gradient[i]; *
} *@Args:
return true; * weights (array): parameter weights.
} // template function * gradient (array): gradient for updating weights.
* size: size of gradient.
* param_name: the name corresponding to the weights.
*/
if (_params_size.count(param_name) == 0) {
_params_size[param_name] = size;
} else if (_params_size[param_name] != size) {
LOG(WARNING) << "[Warning] Update times: " << int(_update_times / _params_size.size()) \
<< ". Size of weights[" << param_name << "] is " << _params_size[param_name] << ", not " << size;
return false;
}
++_update_times;
compute_step(gradient, size, param_name);
for (int i = 0; i < size; ++i) {
weights[i] -= _base_lr * gradient[i];
}
return true;
} // template function
protected: protected:
virtual void compute_step(float* graident, int size, std::string param_name="") = 0; virtual void compute_step(float* graident, int size, std::string param_name = "") = 0;
float _base_lr; float _base_lr;
float _update_times; float _update_times;
std::map<std::string, int> _params_size; std::map<std::string, int> _params_size;
}; };
......
...@@ -23,11 +23,11 @@ ...@@ -23,11 +23,11 @@
#include "deepes.pb.h" #include "deepes.pb.h"
#include <glog/logging.h> #include <glog/logging.h>
namespace DeepES{ namespace deep_es {
/* @brief: create an optimizer according to the configuration" /* @brief: create an optimizer according to the configuration"
* @args: * @args:
* config: configuration for the optimizer * config: configuration for the optimizer
* *
*/ */
std::shared_ptr<Optimizer> create_optimizer(const OptimizerConfig& optimizer_config); std::shared_ptr<Optimizer> create_optimizer(const OptimizerConfig& optimizer_config);
......
...@@ -23,11 +23,11 @@ ...@@ -23,11 +23,11 @@
#include "deepes.pb.h" #include "deepes.pb.h"
#include <glog/logging.h> #include <glog/logging.h>
namespace DeepES{ namespace deep_es {
/* @brief: create an sampling_method according to the configuration" /* @brief: create an sampling_method according to the configuration"
* @args: * @args:
* config: configuration for the DeepES * config: configuration for the DeepES
* *
*/ */
std::shared_ptr<SamplingMethod> create_sampling_method(const DeepESConfig& Config); std::shared_ptr<SamplingMethod> create_sampling_method(const DeepESConfig& Config);
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
#include <random> #include <random>
#include "deepes.pb.h" #include "deepes.pb.h"
namespace DeepES{ namespace deep_es {
/*Base class for sampling algorithms. All algorithms are required to override the following functions: /*Base class for sampling algorithms. All algorithms are required to override the following functions:
* *
...@@ -30,59 +30,59 @@ namespace DeepES{ ...@@ -30,59 +30,59 @@ namespace DeepES{
* View an demostrative algorithm in gaussian_sampling.h * View an demostrative algorithm in gaussian_sampling.h
* */ * */
class SamplingMethod{ class SamplingMethod {
public: public:
SamplingMethod(): _seed(0) {} SamplingMethod(): _seed(0) {}
virtual ~SamplingMethod() {} virtual ~SamplingMethod() {}
/*Initialize the sampling algorithm given the config with the protobuf format. /*Initialize the sampling algorithm given the config with the protobuf format.
*DeepES library uses only one configuration file for all sampling algorithms. *DeepES library uses only one configuration file for all sampling algorithms.
A defalut configuration file can be found at: . // TODO: where? A defalut configuration file can be found at: . // TODO: where?
Usally you won't have to modify the configuration items of other algorithms Usally you won't have to modify the configuration items of other algorithms
if you are not using them. if you are not using them.
*/ */
virtual bool load_config(const DeepESConfig& config)=0; virtual bool load_config(const DeepESConfig& config) = 0;
/*@brief generate Gaussian noise and the related key. /*@brief generate Gaussian noise and the related key.
* *
*@Args: *@Args:
* key: a unique key associated with the sampled noise. * key: a unique key associated with the sampled noise.
* noise: a pointer pointed to the memory that stores the noise * noise: a pointer pointed to the memory that stores the noise
* size: the number of float to be sampled. * size: the number of float to be sampled.
* *
*@return: *@return:
* success: generate Gaussian successfully or not. * success: generate Gaussian successfully or not.
*/ */
virtual bool sampling(int* key, float* noise, int64_t size)=0; virtual bool sampling(int* key, float* noise, int64_t size) = 0;
/*@brief reconstruct the Gaussion noise given the key. /*@brief reconstruct the Gaussion noise given the key.
* This function is often used for updating the neuron network parameters in the offline environment. * This function is often used for updating the neuron network parameters in the offline environment.
* *
*@Args: *@Args:
* key: a unique key associated with the sampled noise. * key: a unique key associated with the sampled noise.
* noise: a pointer pointed to the memory that stores the noise * noise: a pointer pointed to the memory that stores the noise
* size: the number of float to be sampled. * size: the number of float to be sampled.
* *
*@return: *@return:
* success: reconstruct Gaussian successfully or not. * success: reconstruct Gaussian successfully or not.
*/ */
virtual bool resampling(int key, float* noise, int64_t size)=0; virtual bool resampling(int key, float* noise, int64_t size) = 0;
bool set_seed(int seed) { bool set_seed(int seed) {
_seed = seed; _seed = seed;
srand(_seed); srand(_seed);
return true; return true;
} }
int get_seed() { int get_seed() {
return _seed; return _seed;
} }
protected: protected:
int _seed; int _seed;
}; };
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
#include <cmath> #include <cmath>
#include "optimizer.h" #include "optimizer.h"
namespace DeepES{ namespace deep_es {
/*@brief SGDOptimizer. /*@brief SGDOptimizer.
* Implements stochastic gradient descent (optionally with momentum). * Implements stochastic gradient descent (optionally with momentum).
...@@ -30,15 +30,15 @@ namespace DeepES{ ...@@ -30,15 +30,15 @@ namespace DeepES{
*/ */
class SGDOptimizer: public Optimizer { class SGDOptimizer: public Optimizer {
public: public:
SGDOptimizer(float base_lr, float momentum=0.9):Optimizer(base_lr), _momentum(momentum) {} SGDOptimizer(float base_lr, float momentum = 0.9): Optimizer(base_lr), _momentum(momentum) {}
~SGDOptimizer(); ~SGDOptimizer();
protected: protected:
void compute_step(float* gradient, int size, std::string param_name); void compute_step(float* gradient, int size, std::string param_name);
private: private:
float _momentum; float _momentum;
std::map<std::string, float*> _velocity; std::map<std::string, float*> _velocity;
}; };
} }
......
...@@ -23,13 +23,13 @@ ...@@ -23,13 +23,13 @@
#include <google/protobuf/text_format.h> #include <google/protobuf/text_format.h>
#include <fstream> #include <fstream>
namespace DeepES{ namespace deep_es {
/*Return ranks that is normliazed to [-0.5, 0.5] with the rewards as input. /*Return ranks that is normliazed to [-0.5, 0.5] with the rewards as input.
Args: Args:
reward: an array of rewards reward: an array of rewards
*/ */
bool compute_centered_ranks(std::vector<float> &reward); bool compute_centered_ranks(std::vector<float>& reward);
std::string read_file(const std::string& filename); std::string read_file(const std::string& filename);
...@@ -37,50 +37,58 @@ std::string read_file(const std::string& filename); ...@@ -37,50 +37,58 @@ std::string read_file(const std::string& filename);
* Args: * Args:
* config_file: file path. * config_file: file path.
* proto_config: protobuff message for configuration. * proto_config: protobuff message for configuration.
* return * return
*/ */
template<typename T> template<typename T>
bool load_proto_conf(const std::string& config_file, T& proto_config) { bool load_proto_conf(const std::string& config_file, T& proto_config) {
bool success = true; bool success = true;
std::ifstream fin(config_file); std::ifstream fin(config_file);
if (!fin || fin.fail()) {
LOG(ERROR) << "open prototxt config failed: " << config_file; if (!fin || fin.fail()) {
success = false; LOG(ERROR) << "open prototxt config failed: " << config_file;
} else { success = false;
fin.seekg(0, std::ios::end); } else {
size_t file_size = fin.tellg(); fin.seekg(0, std::ios::end);
fin.seekg(0, std::ios::beg); size_t file_size = fin.tellg();
fin.seekg(0, std::ios::beg);
char* file_content_buffer = new char[file_size];
fin.read(file_content_buffer, file_size); char* file_content_buffer = new char[file_size];
fin.read(file_content_buffer, file_size);
std::string proto_str(file_content_buffer, file_size);
if (!google::protobuf::TextFormat::ParseFromString(proto_str, &proto_config)) { std::string proto_str(file_content_buffer, file_size);
LOG(ERROR) << "Failed to load config: " << config_file;
success = false; if (!google::protobuf::TextFormat::ParseFromString(proto_str, &proto_config)) {
LOG(ERROR) << "Failed to load config: " << config_file;
success = false;
}
delete[] file_content_buffer;
fin.close();
} }
delete[] file_content_buffer;
fin.close(); return success;
}
return success;
} }
template<typename T> template<typename T>
bool save_proto_conf(const std::string& config_file, T&proto_config) { bool save_proto_conf(const std::string& config_file, T& proto_config) {
bool success = true; bool success = true;
std::ofstream ofs(config_file, std::ofstream::out); std::ofstream ofs(config_file, std::ofstream::out);
if (!ofs || ofs.fail()) {
LOG(ERROR) << "open prototxt config failed: " << config_file; if (!ofs || ofs.fail()) {
success = false; LOG(ERROR) << "open prototxt config failed: " << config_file;
} else { success = false;
std::string config_str; } else {
success = google::protobuf::TextFormat::PrintToString(proto_config, &config_str); std::string config_str;
if (!success) { success = google::protobuf::TextFormat::PrintToString(proto_config, &config_str);
return success;
if (!success) {
return success;
}
ofs << config_str;
} }
ofs << config_str;
} return success;
return success;
} }
std::vector<std::string> list_all_model_dirs(std::string path); std::vector<std::string> list_all_model_dirs(std::string path);
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
syntax = "proto2"; syntax = "proto2";
package DeepES; package deep_es;
message DeepESConfig { message DeepESConfig {
//sampling configuration //sampling configuration
......
...@@ -14,35 +14,42 @@ ...@@ -14,35 +14,42 @@
#include "adam_optimizer.h" #include "adam_optimizer.h"
namespace DeepES { namespace deep_es {
AdamOptimizer::~AdamOptimizer() { AdamOptimizer::~AdamOptimizer() {
for (auto iter = _momentum.begin(); iter != _momentum.end(); iter++) { for (auto iter = _momentum.begin(); iter != _momentum.end(); iter++) {
delete[] iter->second; delete[] iter->second;
} }
for (auto iter = _velocity.begin(); iter != _velocity.end(); iter++) {
delete[] iter->second; for (auto iter = _velocity.begin(); iter != _velocity.end(); iter++) {
} delete[] iter->second;
_momentum.clear(); }
_velocity.clear();
_momentum.clear();
_velocity.clear();
} }
void AdamOptimizer::compute_step(float* gradient, int size, std::string param_name="") { void AdamOptimizer::compute_step(float* gradient, int size, std::string param_name = "") {
if (_momentum.count(param_name) == 0) { if (_momentum.count(param_name) == 0) {
_momentum[param_name] = new float [size]; _momentum[param_name] = new float [size];
memset(_momentum[param_name], 0, size * sizeof(float)); memset(_momentum[param_name], 0, size * sizeof(float));
} }
if (_velocity.count(param_name) == 0) {
_velocity[param_name] = new float [size]; if (_velocity.count(param_name) == 0) {
memset(_velocity[param_name], 0, size * sizeof(float)); _velocity[param_name] = new float [size];
} memset(_velocity[param_name], 0, size * sizeof(float));
int true_update_times = int(_update_times / _velocity.size()); }
float alpha = std::sqrt(1 - std::pow(_beta2, _update_times)) / (1 - std::pow(_beta1, _update_times));
for (int i = 0; i < size; ++i) { int true_update_times = int(_update_times / _velocity.size());
_momentum[param_name][i] = _beta1 * _momentum[param_name][i] + (1 - _beta1) * gradient[i]; float alpha = std::sqrt(1 - std::pow(_beta2, _update_times)) / (1 - std::pow(_beta1,
_velocity[param_name][i] = _beta2 * _velocity[param_name][i] + (1 - _beta2) * gradient[i] * gradient[i]; _update_times));
gradient[i] = alpha * _momentum[param_name][i] / (std::sqrt(_velocity[param_name][i]) + _epsilon);
} for (int i = 0; i < size; ++i) {
_momentum[param_name][i] = _beta1 * _momentum[param_name][i] + (1 - _beta1) * gradient[i];
_velocity[param_name][i] = _beta2 * _velocity[param_name][i] + (1 - _beta2) * gradient[i] *
gradient[i];
gradient[i] = alpha * _momentum[param_name][i] / (std::sqrt(_velocity[param_name][i]) + _epsilon);
}
} }
}//namespace }//namespace
...@@ -14,90 +14,107 @@ ...@@ -14,90 +14,107 @@
#include "cached_gaussian_sampling.h" #include "cached_gaussian_sampling.h"
namespace DeepES{ namespace deep_es {
CachedGaussianSampling::CachedGaussianSampling() {} CachedGaussianSampling::CachedGaussianSampling() {}
CachedGaussianSampling::~CachedGaussianSampling() { CachedGaussianSampling::~CachedGaussianSampling() {
delete[] _noise_cache; delete[] _noise_cache;
} }
bool CachedGaussianSampling::load_config(const DeepESConfig& config) { bool CachedGaussianSampling::load_config(const DeepESConfig& config) {
bool success = true; bool success = true;
_std = config.gaussian_sampling().std(); _std = config.gaussian_sampling().std();
success = set_seed(config.seed()); success = set_seed(config.seed());
CHECK(success) << "[DeepES] Fail to set seed while load config."; CHECK(success) << "[DeepES] Fail to set seed while load config.";
_cache_size = config.gaussian_sampling().cache_size(); _cache_size = config.gaussian_sampling().cache_size();
_noise_cache = new float [_cache_size]; _noise_cache = new float [_cache_size];
memset(_noise_cache, 0, _cache_size * sizeof(float)); memset(_noise_cache, 0, _cache_size * sizeof(float));
success = _create_noise_cache(); success = _create_noise_cache();
CHECK(success) << "[DeepES] Fail to create noise_cache while load config."; CHECK(success) << "[DeepES] Fail to create noise_cache while load config.";
return success; return success;
} }
bool CachedGaussianSampling::sampling(int* key, float* noise, int64_t size) { bool CachedGaussianSampling::sampling(int* key, float* noise, int64_t size) {
bool success = true; bool success = true;
if (_noise_cache == nullptr) {
LOG(ERROR) << "[DeepES] Please use load_config() first."; if (_noise_cache == nullptr) {
success = false; LOG(ERROR) << "[DeepES] Please use load_config() first.";
return success; success = false;
} return success;
if (noise == nullptr) { }
LOG(ERROR) << "[DeepES] Input noise array cannot be nullptr.";
success = false; if (noise == nullptr) {
return success; LOG(ERROR) << "[DeepES] Input noise array cannot be nullptr.";
} success = false;
if ((size >= _cache_size) || (size < 0)) { return success;
LOG(ERROR) << "[DeepES] Input size " << size << " is out of bounds [0, " << _cache_size << "), cache_size: " << _cache_size; }
success = false;
if ((size >= _cache_size) || (size < 0)) {
LOG(ERROR) << "[DeepES] Input size " << size << " is out of bounds [0, " << _cache_size <<
"), cache_size: " << _cache_size;
success = false;
return success;
}
int rand_key = rand();
std::default_random_engine generator(rand_key);
std::uniform_int_distribution<unsigned int> uniform(0, _cache_size - size);
int index = uniform(generator);
*key = index;
for (int64_t i = 0; i < size; ++i) {
*(noise + i) = *(_noise_cache + index + i);
}
return success; return success;
}
int rand_key = rand();
std::default_random_engine generator(rand_key);
std::uniform_int_distribution<unsigned int> uniform(0, _cache_size - size);
int index = uniform(generator);
*key = index;
for (int64_t i = 0; i < size; ++i) {
*(noise + i) = *(_noise_cache + index + i);
}
return success;
} }
bool CachedGaussianSampling::resampling(int key, float* noise, int64_t size) { bool CachedGaussianSampling::resampling(int key, float* noise, int64_t size) {
bool success = true; bool success = true;
if (_noise_cache == nullptr) {
LOG(ERROR) << "[DeepES] Please use load_config() first."; if (_noise_cache == nullptr) {
success = false; LOG(ERROR) << "[DeepES] Please use load_config() first.";
return success; success = false;
} return success;
if (noise == nullptr) { }
LOG(ERROR) << "[DeepES] Input noise array cannot be nullptr.";
success = false; if (noise == nullptr) {
return success; LOG(ERROR) << "[DeepES] Input noise array cannot be nullptr.";
} success = false;
if ((size >= _cache_size) || (size < 0)) { return success;
LOG(ERROR) << "[DeepES] Input size " << size << " is out of bounds [0, " << _cache_size << "), cache_size: " << _cache_size; }
success = false;
return success; if ((size >= _cache_size) || (size < 0)) {
} LOG(ERROR) << "[DeepES] Input size " << size << " is out of bounds [0, " << _cache_size <<
if ((key > _cache_size - size) || (key < 0)) { "), cache_size: " << _cache_size;
LOG(ERROR) << "[DeepES] Resampling key " << key << " is out of bounds [0, " << _cache_size - size << "], cache_size: " << _cache_size << ", size: " << size; success = false;
success = false; return success;
}
if ((key > _cache_size - size) || (key < 0)) {
LOG(ERROR) << "[DeepES] Resampling key " << key << " is out of bounds [0, " << _cache_size - size <<
"], cache_size: " << _cache_size << ", size: " << size;
success = false;
return success;
}
for (int64_t i = 0; i < size; ++i) {
*(noise + i) = *(_noise_cache + key + i);
}
return success; return success;
}
for (int64_t i = 0; i < size; ++i) {
*(noise + i) = *(_noise_cache + key + i);
}
return success;
} }
bool CachedGaussianSampling::_create_noise_cache() { bool CachedGaussianSampling::_create_noise_cache() {
std::default_random_engine generator(_seed); std::default_random_engine generator(_seed);
std::normal_distribution<float> norm; std::normal_distribution<float> norm;
for (int64_t i = 0; i < _cache_size; ++i) {
*(_noise_cache + i) = norm(generator) * _std; for (int64_t i = 0; i < _cache_size; ++i) {
} *(_noise_cache + i) = norm(generator) * _std;
return true; }
return true;
} }
} }
...@@ -14,45 +14,52 @@ ...@@ -14,45 +14,52 @@
#include "gaussian_sampling.h" #include "gaussian_sampling.h"
namespace DeepES{ namespace deep_es {
bool GaussianSampling::load_config(const DeepESConfig& config) { bool GaussianSampling::load_config(const DeepESConfig& config) {
bool success = true; bool success = true;
_std = config.gaussian_sampling().std(); _std = config.gaussian_sampling().std();
success = set_seed(config.seed()); success = set_seed(config.seed());
return success; return success;
} }
bool GaussianSampling::sampling(int* key, float* noise, int64_t size) { bool GaussianSampling::sampling(int* key, float* noise, int64_t size) {
bool success = true; bool success = true;
if (noise == nullptr) {
LOG(ERROR) << "[DeepES] Input noise array cannot be nullptr."; if (noise == nullptr) {
success = false; LOG(ERROR) << "[DeepES] Input noise array cannot be nullptr.";
success = false;
return success;
}
int rand_key = rand();
*key = rand_key;
std::default_random_engine generator(rand_key);
std::normal_distribution<float> norm;
for (int64_t i = 0; i < size; ++i) {
*(noise + i) = norm(generator) * _std;
}
return success; return success;
}
int rand_key = rand();
*key = rand_key;
std::default_random_engine generator(rand_key);
std::normal_distribution<float> norm;
for (int64_t i = 0; i < size; ++i) {
*(noise + i) = norm(generator) * _std;
}
return success;
} }
bool GaussianSampling::resampling(int key, float* noise, int64_t size) { bool GaussianSampling::resampling(int key, float* noise, int64_t size) {
bool success = true; bool success = true;
if (noise == nullptr) {
LOG(ERROR) << "[DeepES] Input noise array cannot be nullptr."; if (noise == nullptr) {
success = false; LOG(ERROR) << "[DeepES] Input noise array cannot be nullptr.";
} else { success = false;
std::default_random_engine generator(key); } else {
std::normal_distribution<float> norm; std::default_random_engine generator(key);
for (int64_t i = 0; i < size; ++i) { std::normal_distribution<float> norm;
*(noise + i) = norm(generator) * _std;
for (int64_t i = 0; i < size; ++i) {
*(noise + i) = norm(generator) * _std;
}
} }
}
return success; return success;
} }
} }
...@@ -14,25 +14,26 @@ ...@@ -14,25 +14,26 @@
#include "optimizer_factory.h" #include "optimizer_factory.h"
namespace DeepES{ namespace deep_es {
std::shared_ptr<Optimizer> create_optimizer(const OptimizerConfig& optimizer_config) { std::shared_ptr<Optimizer> create_optimizer(const OptimizerConfig& optimizer_config) {
std::shared_ptr<Optimizer> optimizer; std::shared_ptr<Optimizer> optimizer;
std::string opt_type = optimizer_config.type(); std::string opt_type = optimizer_config.type();
std::transform(opt_type.begin(), opt_type.end(), opt_type.begin(), ::tolower); std::transform(opt_type.begin(), opt_type.end(), opt_type.begin(), ::tolower);
if (opt_type == "sgd") {
optimizer = std::make_shared<SGDOptimizer>(optimizer_config.base_lr(), \ if (opt_type == "sgd") {
optimizer_config.momentum()); optimizer = std::make_shared<SGDOptimizer>(optimizer_config.base_lr(), \
} else if (opt_type == "adam") { optimizer_config.momentum());
optimizer = std::make_shared<AdamOptimizer>(optimizer_config.base_lr(), \ } else if (opt_type == "adam") {
optimizer_config.beta1(), \ optimizer = std::make_shared<AdamOptimizer>(optimizer_config.base_lr(), \
optimizer_config.beta2(), \ optimizer_config.beta1(), \
optimizer_config.epsilon()); optimizer_config.beta2(), \
} else { optimizer_config.epsilon());
LOG(ERROR) << "type of OptimizerConfig must be SGD or Adam."; // NotImplementedError } else {
} LOG(ERROR) << "type of OptimizerConfig must be SGD or Adam."; // NotImplementedError
return optimizer; }
return optimizer;
} }
}//namespace }//namespace
...@@ -14,26 +14,28 @@ ...@@ -14,26 +14,28 @@
#include "sampling_factory.h" #include "sampling_factory.h"
namespace DeepES{ namespace deep_es {
std::shared_ptr<SamplingMethod> create_sampling_method(const DeepESConfig& config) { std::shared_ptr<SamplingMethod> create_sampling_method(const DeepESConfig& config) {
std::shared_ptr<SamplingMethod> sampling_method; std::shared_ptr<SamplingMethod> sampling_method;
bool cached = config.gaussian_sampling().cached(); bool cached = config.gaussian_sampling().cached();
if (cached) {
sampling_method = std::make_shared<CachedGaussianSampling>(); if (cached) {
} else { sampling_method = std::make_shared<CachedGaussianSampling>();
sampling_method = std::make_shared<GaussianSampling>(); } else {
} sampling_method = std::make_shared<GaussianSampling>();
}
bool success = sampling_method->load_config(config);
if(success) { bool success = sampling_method->load_config(config);
return sampling_method;
} else { if (success) {
LOG(ERROR) << "[DeepES] Fail to create sampling_method"; return sampling_method;
return nullptr; } else {
} LOG(ERROR) << "[DeepES] Fail to create sampling_method";
return nullptr;
}
} }
}//namespace }//namespace
...@@ -14,24 +14,26 @@ ...@@ -14,24 +14,26 @@
#include "sgd_optimizer.h" #include "sgd_optimizer.h"
namespace DeepES { namespace deep_es {
SGDOptimizer::~SGDOptimizer() { SGDOptimizer::~SGDOptimizer() {
for (auto iter = _velocity.begin(); iter != _velocity.end(); iter++) { for (auto iter = _velocity.begin(); iter != _velocity.end(); iter++) {
delete[] iter->second; delete[] iter->second;
} }
_velocity.clear();
_velocity.clear();
} }
void SGDOptimizer::compute_step(float* gradient, int size, std::string param_name="") { void SGDOptimizer::compute_step(float* gradient, int size, std::string param_name = "") {
if (_velocity.count(param_name) == 0) { if (_velocity.count(param_name) == 0) {
_velocity[param_name] = new float [size]; _velocity[param_name] = new float [size];
memset(_velocity[param_name], 0, size * sizeof(float)); memset(_velocity[param_name], 0, size * sizeof(float));
} }
for (int i = 0; i < size; ++i) {
_velocity[param_name][i] = _momentum * _velocity[param_name][i] + (1 - _momentum) * gradient[i]; for (int i = 0; i < size; ++i) {
gradient[i] = _velocity[param_name][i]; _velocity[param_name][i] = _momentum * _velocity[param_name][i] + (1 - _momentum) * gradient[i];
} gradient[i] = _velocity[param_name][i];
}
} }
......
...@@ -15,54 +15,67 @@ ...@@ -15,54 +15,67 @@
#include "utils.h" #include "utils.h"
#include <dirent.h> #include <dirent.h>
namespace DeepES { namespace deep_es {
bool compute_centered_ranks(std::vector<float> &reward) { bool compute_centered_ranks(std::vector<float>& reward) {
std::vector<std::pair<float, int>> reward_index; std::vector<std::pair<float, int>> reward_index;
float gap = 1.0 / (reward.size() - 1); float gap = 1.0 / (reward.size() - 1);
float normlized_rank = -0.5; float normlized_rank = -0.5;
int id = 0; int id = 0;
for (auto& rew: reward) {
reward_index.push_back(std::make_pair(rew, id)); for (auto& rew : reward) {
++id; reward_index.push_back(std::make_pair(rew, id));
} ++id;
std::sort(reward_index.begin(), reward_index.end()); }
for (int i = 0; i < reward.size(); ++i) {
id = reward_index[i].second; std::sort(reward_index.begin(), reward_index.end());
reward[id] = normlized_rank;
normlized_rank += gap; for (int i = 0; i < reward.size(); ++i) {
} id = reward_index[i].second;
return true; reward[id] = normlized_rank;
normlized_rank += gap;
}
return true;
} }
std::vector<std::string> list_all_model_dirs(std::string path) { std::vector<std::string> list_all_model_dirs(std::string path) {
std::vector<std::string> model_dirs; std::vector<std::string> model_dirs;
DIR *dpdf; DIR* dpdf;
struct dirent *epdf; struct dirent* epdf;
dpdf = opendir(path.data()); dpdf = opendir(path.data());
if (dpdf != NULL){
while (epdf = readdir(dpdf)){ if (dpdf != NULL) {
std::string dir(epdf->d_name); while (epdf = readdir(dpdf)) {
if (dir.find("model_iter_id") != std::string::npos) { std::string dir(epdf->d_name);
model_dirs.push_back(path + "/" + dir);
} if (dir.find("model_iter_id") != std::string::npos) {
model_dirs.push_back(path + "/" + dir);
}
}
} }
}
closedir(dpdf); closedir(dpdf);
return model_dirs; return model_dirs;
} }
std::string read_file(const std::string& filename) { std::string read_file(const std::string& filename) {
std::ifstream ifile(filename.c_str()); std::ifstream ifile(filename.c_str());
if (!ifile.is_open()) {
LOG(ERROR) << "Open file: [" << filename << "] failed."; if (!ifile.is_open()) {
return ""; LOG(ERROR) << "Open file: [" << filename << "] failed.";
} return "";
std::ostringstream buf; }
char ch;
while (buf && ifile.get(ch)) buf.put(ch); std::ostringstream buf;
ifile.close(); char ch = '\n';
return buf.str();
while (buf && ifile.get(ch)) {
buf.put(ch);
}
ifile.close();
return buf.str();
} }
}//namespace }//namespace
seed: 1024 seed: 1024
gaussian_sampling { gaussian_sampling {
std: 0.5 std: 0.5
cached: true cached: true
cache_size : 100000 cache_size : 100000
} }
optimizer { optimizer {
type: "Adam" type: "Adam"
base_lr: 0.05 base_lr: 0.05
...@@ -12,6 +14,7 @@ optimizer { ...@@ -12,6 +14,7 @@ optimizer {
beta2: 0.999 beta2: 0.999
epsilon: 1e-08 epsilon: 1e-08
} }
async_es { async_es {
model_iter_id: 0 model_iter_id: 0
} }
...@@ -19,104 +19,116 @@ ...@@ -19,104 +19,116 @@
#include "async_es_agent.h" #include "async_es_agent.h"
#include "paddle_api.h" #include "paddle_api.h"
using namespace DeepES; using namespace deep_es;
using namespace paddle::lite_api; using namespace paddle::lite_api;
const int ITER = 10; const int ITER = 10;
// Use PaddlePredictor of CartPole model to predict the action. // Use PaddlePredictor of CartPole model to predict the action.
std::vector<float> forward(std::shared_ptr<PaddlePredictor> predictor, const float* obs) { std::vector<float> forward(std::shared_ptr<PaddlePredictor> predictor, const float* obs) {
std::unique_ptr<Tensor> input_tensor(std::move(predictor->GetInput(0))); std::unique_ptr<Tensor> input_tensor(std::move(predictor->GetInput(0)));
input_tensor->Resize({1, 4}); input_tensor->Resize({1, 4});
input_tensor->CopyFromCpu(obs); input_tensor->CopyFromCpu(obs);
predictor->Run(); predictor->Run();
std::vector<float> probs(2, 0.0); std::vector<float> probs(2, 0.0);
std::unique_ptr<const Tensor> output_tensor( std::unique_ptr<const Tensor> output_tensor(
std::move(predictor->GetOutput(0))); std::move(predictor->GetOutput(0)));
output_tensor->CopyToCpu(probs.data()); output_tensor->CopyToCpu(probs.data());
return probs; return probs;
} }
int arg_max(const std::vector<float>& vec) { int arg_max(const std::vector<float>& vec) {
return static_cast<int>(std::distance(vec.begin(), std::max_element(vec.begin(), vec.end()))); return static_cast<int>(std::distance(vec.begin(), std::max_element(vec.begin(), vec.end())));
} }
float evaluate(CartPole& env, std::shared_ptr<AsyncESAgent> agent) { float evaluate(CartPole& env, std::shared_ptr<AsyncESAgent> agent) {
float total_reward = 0.0; float total_reward = 0.0;
env.reset(); env.reset();
const float* obs = env.getState(); const float* obs = env.getState();
std::shared_ptr<PaddlePredictor> paddle_predictor; std::shared_ptr<PaddlePredictor> paddle_predictor;
paddle_predictor = agent->get_predictor(); paddle_predictor = agent->get_predictor();
while (true) { while (true) {
std::vector<float> probs = forward(paddle_predictor, obs); std::vector<float> probs = forward(paddle_predictor, obs);
int act = arg_max(probs); int act = arg_max(probs);
env.step(act); env.step(act);
float reward = env.getReward(); float reward = env.getReward();
bool done = env.isDone(); bool done = env.isDone();
total_reward += reward; total_reward += reward;
if (done) break;
obs = env.getState(); if (done) {
} break;
return total_reward; }
obs = env.getState();
}
return total_reward;
} }
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
std::vector<CartPole> envs; std::vector<CartPole> envs;
for (int i = 0; i < ITER; ++i) {
envs.push_back(CartPole());
}
std::shared_ptr<AsyncESAgent> agent = std::make_shared<AsyncESAgent>("../demo/paddle/cartpole_init_model",
"../demo/cartpole_config.prototxt");
// Clone agents to sample (explore).
std::vector< std::shared_ptr<AsyncESAgent> > sampling_agents;
for (int i = 0; i < ITER; ++i) {
sampling_agents.push_back(agent->clone());
}
std::vector<SamplingInfo> noisy_info;
std::vector<SamplingInfo> last_noisy_info;
std::vector<float> noisy_rewards(ITER, 0.0f);
std::vector<float> last_noisy_rewards;
noisy_info.resize(ITER);
omp_set_num_threads(10);
for (int epoch = 0; epoch < 100; ++epoch) {
last_noisy_info.clear();
last_noisy_rewards.clear();
if (epoch != 0) {
for (int i = 0; i < ITER; ++i){
last_noisy_info.push_back(noisy_info[i]);
last_noisy_rewards.push_back(noisy_rewards[i]);
}
}
#pragma omp parallel for schedule(dynamic, 1)
for (int i = 0; i < ITER; ++i) {
std::shared_ptr<AsyncESAgent> sampling_agent = sampling_agents[i];
SamplingInfo info;
bool success = sampling_agent->add_noise(info);
float reward = evaluate(envs[i], sampling_agent);
noisy_info[i] = info; for (int i = 0; i < ITER; ++i) {
noisy_rewards[i] = reward; envs.push_back(CartPole());
} }
for (int i = 0; i < ITER; ++i){ std::shared_ptr<AsyncESAgent> agent =
last_noisy_info.push_back(noisy_info[i]); std::make_shared<AsyncESAgent>("./demo/paddle/cartpole_init_model",
last_noisy_rewards.push_back(noisy_rewards[i]); "./demo/cartpole_config.prototxt");
// Clone agents to sample (explore).
std::vector< std::shared_ptr<AsyncESAgent> > sampling_agents;
for (int i = 0; i < ITER; ++i) {
sampling_agents.push_back(agent->clone());
} }
// NOTE: all parameters of sampling_agents will be updated std::vector<SamplingInfo> noisy_info;
bool success = agent->update(last_noisy_info, last_noisy_rewards); std::vector<SamplingInfo> last_noisy_info;
std::vector<float> noisy_rewards(ITER, 0.0f);
int reward = evaluate(envs[0], agent); std::vector<float> last_noisy_rewards;
LOG(INFO) << "Epoch:" << epoch << " Reward: " << reward; noisy_info.resize(ITER);
}
omp_set_num_threads(10);
for (int epoch = 0; epoch < 100; ++epoch) {
last_noisy_info.clear();
last_noisy_rewards.clear();
if (epoch != 0) {
for (int i = 0; i < ITER; ++i) {
last_noisy_info.push_back(noisy_info[i]);
last_noisy_rewards.push_back(noisy_rewards[i]);
}
}
#pragma omp parallel for schedule(dynamic, 1)
for (int i = 0; i < ITER; ++i) {
std::shared_ptr<AsyncESAgent> sampling_agent = sampling_agents[i];
SamplingInfo info;
bool success = sampling_agent->add_noise(info);
float reward = evaluate(envs[i], sampling_agent);
noisy_info[i] = info;
noisy_rewards[i] = reward;
}
for (int i = 0; i < ITER; ++i) {
last_noisy_info.push_back(noisy_info[i]);
last_noisy_rewards.push_back(noisy_rewards[i]);
}
// NOTE: all parameters of sampling_agents will be updated
bool success = agent->update(last_noisy_info, last_noisy_rewards);
int reward = evaluate(envs[0], agent);
LOG(INFO) << "Epoch:" << epoch << " Reward: " << reward;
}
} }
...@@ -19,89 +19,97 @@ ...@@ -19,89 +19,97 @@
#include "es_agent.h" #include "es_agent.h"
#include "paddle_api.h" #include "paddle_api.h"
using namespace DeepES; using namespace deep_es;
using namespace paddle::lite_api; using namespace paddle::lite_api;
const int ITER = 10; const int ITER = 10;
// Use PaddlePredictor of CartPole model to predict the action. // Use PaddlePredictor of CartPole model to predict the action.
std::vector<float> forward(std::shared_ptr<PaddlePredictor> predictor, const float* obs) { std::vector<float> forward(std::shared_ptr<PaddlePredictor> predictor, const float* obs) {
std::unique_ptr<Tensor> input_tensor(std::move(predictor->GetInput(0))); std::unique_ptr<Tensor> input_tensor(std::move(predictor->GetInput(0)));
input_tensor->Resize({1, 4}); input_tensor->Resize({1, 4});
input_tensor->CopyFromCpu(obs); input_tensor->CopyFromCpu(obs);
predictor->Run(); predictor->Run();
std::vector<float> probs(2, 0.0); std::vector<float> probs(2, 0.0);
std::unique_ptr<const Tensor> output_tensor( std::unique_ptr<const Tensor> output_tensor(
std::move(predictor->GetOutput(0))); std::move(predictor->GetOutput(0)));
output_tensor->CopyToCpu(probs.data()); output_tensor->CopyToCpu(probs.data());
return probs; return probs;
} }
int arg_max(const std::vector<float>& vec) { int arg_max(const std::vector<float>& vec) {
return static_cast<int>(std::distance(vec.begin(), std::max_element(vec.begin(), vec.end()))); return static_cast<int>(std::distance(vec.begin(), std::max_element(vec.begin(), vec.end())));
} }
float evaluate(CartPole& env, std::shared_ptr<ESAgent> agent) { float evaluate(CartPole& env, std::shared_ptr<ESAgent> agent) {
float total_reward = 0.0; float total_reward = 0.0;
env.reset(); env.reset();
const float* obs = env.getState(); const float* obs = env.getState();
std::shared_ptr<PaddlePredictor> paddle_predictor; std::shared_ptr<PaddlePredictor> paddle_predictor;
paddle_predictor = agent->get_predictor(); paddle_predictor = agent->get_predictor();
while (true) { while (true) {
std::vector<float> probs = forward(paddle_predictor, obs); std::vector<float> probs = forward(paddle_predictor, obs);
int act = arg_max(probs); int act = arg_max(probs);
env.step(act); env.step(act);
float reward = env.getReward(); float reward = env.getReward();
bool done = env.isDone(); bool done = env.isDone();
total_reward += reward; total_reward += reward;
if (done) break;
obs = env.getState(); if (done) {
} break;
return total_reward; }
obs = env.getState();
}
return total_reward;
} }
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
std::vector<CartPole> envs; std::vector<CartPole> envs;
for (int i = 0; i < ITER; ++i) {
envs.push_back(CartPole());
}
std::shared_ptr<ESAgent> agent = std::make_shared<ESAgent>("../demo/paddle/cartpole_init_model",
"../demo/cartpole_config.prototxt");
// Clone agents to sample (explore).
std::vector< std::shared_ptr<ESAgent> > sampling_agents;
for (int i = 0; i < ITER; ++i) {
sampling_agents.push_back(agent->clone());
}
std::vector<SamplingInfo> noisy_keys;
std::vector<float> noisy_rewards(ITER, 0.0f);
noisy_keys.resize(ITER);
omp_set_num_threads(10);
for (int epoch = 0; epoch < 100; ++epoch) {
#pragma omp parallel for schedule(dynamic, 1)
for (int i = 0; i < ITER; ++i) { for (int i = 0; i < ITER; ++i) {
std::shared_ptr<ESAgent> sampling_agent = sampling_agents[i]; envs.push_back(CartPole());
SamplingInfo key; }
bool success = sampling_agent->add_noise(key);
float reward = evaluate(envs[i], sampling_agent); std::shared_ptr<ESAgent> agent = std::make_shared<ESAgent>("./demo/paddle/cartpole_init_model",
"./demo/cartpole_config.prototxt");
// Clone agents to sample (explore).
std::vector< std::shared_ptr<ESAgent> > sampling_agents;
noisy_keys[i] = key; for (int i = 0; i < ITER; ++i) {
noisy_rewards[i] = reward; sampling_agents.push_back(agent->clone());
} }
// NOTE: all parameters of sampling_agents will be updated std::vector<SamplingInfo> noisy_keys;
bool success = agent->update(noisy_keys, noisy_rewards); std::vector<float> noisy_rewards(ITER, 0.0f);
noisy_keys.resize(ITER);
int reward = evaluate(envs[0], agent);
LOG(INFO) << "Epoch:" << epoch << " Reward: " << reward; omp_set_num_threads(10);
}
for (int epoch = 0; epoch < 100; ++epoch) {
#pragma omp parallel for schedule(dynamic, 1)
for (int i = 0; i < ITER; ++i) {
std::shared_ptr<ESAgent> sampling_agent = sampling_agents[i];
SamplingInfo key;
bool success = sampling_agent->add_noise(key);
float reward = evaluate(envs[i], sampling_agent);
noisy_keys[i] = key;
noisy_rewards[i] = reward;
}
// NOTE: all parameters of sampling_agents will be updated
bool success = agent->update(noisy_keys, noisy_rewards);
int reward = evaluate(envs[0], agent);
LOG(INFO) << "Epoch:" << epoch << " Reward: " << reward;
}
} }
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
#include <map> #include <map>
#include <stdlib.h> #include <stdlib.h>
namespace DeepES{ namespace deep_es {
/* DeepES agent with PaddleLite as backend. This agent supports asynchronous update. /* DeepES agent with PaddleLite as backend. This agent supports asynchronous update.
* Users mainly focus on the following functions: * Users mainly focus on the following functions:
* 1. clone: clone an agent for multi-thread evaluation * 1. clone: clone an agent for multi-thread evaluation
...@@ -27,10 +27,10 @@ namespace DeepES{ ...@@ -27,10 +27,10 @@ namespace DeepES{
* 3. update: update parameters given data collected during evaluation. * 3. update: update parameters given data collected during evaluation.
*/ */
class AsyncESAgent: public ESAgent { class AsyncESAgent: public ESAgent {
public: public:
AsyncESAgent() {} AsyncESAgent() {}
~AsyncESAgent(); ~AsyncESAgent();
/** /**
* @args: * @args:
...@@ -39,9 +39,9 @@ class AsyncESAgent: public ESAgent { ...@@ -39,9 +39,9 @@ class AsyncESAgent: public ESAgent {
* Note that AsyncESAgent will update the configuration file after calling the update function. * Note that AsyncESAgent will update the configuration file after calling the update function.
* Please use the up-to-date configuration. * Please use the up-to-date configuration.
*/ */
AsyncESAgent( AsyncESAgent(
const std::string& model_dir, const std::string& model_dir,
const std::string& config_path); const std::string& config_path);
/** /**
* @brief: Clone an agent for sampling. * @brief: Clone an agent for sampling.
...@@ -58,7 +58,7 @@ class AsyncESAgent: public ESAgent { ...@@ -58,7 +58,7 @@ class AsyncESAgent: public ESAgent {
std::vector<SamplingInfo>& noisy_info, std::vector<SamplingInfo>& noisy_info,
std::vector<float>& noisy_rewards); std::vector<float>& noisy_rewards);
private: private:
std::map<int, std::shared_ptr<PaddlePredictor>> _previous_predictors; std::map<int, std::shared_ptr<PaddlePredictor>> _previous_predictors;
std::map<int, float*> _param_delta; std::map<int, float*> _param_delta;
std::string _config_path; std::string _config_path;
...@@ -76,7 +76,7 @@ class AsyncESAgent: public ESAgent { ...@@ -76,7 +76,7 @@ class AsyncESAgent: public ESAgent {
/** /**
* @brief: remove expired models to avoid overuse of disk space. * @brief: remove expired models to avoid overuse of disk space.
* @args: * @args:
* max_to_keep: the maximum number of models to keep locally. * max_to_keep: the maximum number of models to keep locally.
*/ */
bool _remove_expired_model(int max_to_keep); bool _remove_expired_model(int max_to_keep);
......
...@@ -22,11 +22,13 @@ ...@@ -22,11 +22,13 @@
#include "deepes.pb.h" #include "deepes.pb.h"
#include <vector> #include <vector>
using namespace paddle::lite_api; namespace deep_es {
namespace DeepES { typedef paddle::lite_api::PaddlePredictor PaddlePredictor;
typedef paddle::lite_api::CxxConfig CxxConfig;
typedef paddle::lite_api::Tensor Tensor;
int64_t ShapeProduction(const shape_t& shape); int64_t ShapeProduction(const paddle::lite_api::shape_t& shape);
/** /**
* @brief DeepES agent with PaddleLite as backend. * @brief DeepES agent with PaddleLite as backend.
...@@ -37,65 +39,63 @@ int64_t ShapeProduction(const shape_t& shape); ...@@ -37,65 +39,63 @@ int64_t ShapeProduction(const shape_t& shape);
* *
*/ */
class ESAgent { class ESAgent {
public: public:
ESAgent() {} ESAgent() {}
~ESAgent(); ~ESAgent();
ESAgent(const std::string& model_dir, const std::string& config_path); ESAgent(const std::string& model_dir, const std::string& config_path);
/** /**
* @breif Clone a sampling agent * @breif Clone a sampling agent
* *
* Only cloned ESAgent can call `add_noise` function. * Only cloned ESAgent can call `add_noise` function.
* Each cloned ESAgent will have a copy of original parameters. * Each cloned ESAgent will have a copy of original parameters.
* (support sampling in multi-thread way) * (support sampling in multi-thread way)
*/ */
std::shared_ptr<ESAgent> clone(); std::shared_ptr<ESAgent> clone();
/** /**
* @brief Update parameters of predictor based on ES algorithm. * @brief Update parameters of predictor based on ES algorithm.
* *
* Only not cloned ESAgent can call `update` function. * Only not cloned ESAgent can call `update` function.
* Parameters of cloned agents will also be updated. * Parameters of cloned agents will also be updated.
*/ */
bool update( bool update(
std::vector<SamplingInfo>& noisy_info, std::vector<SamplingInfo>& noisy_info,
std::vector<float>& noisy_rewards); std::vector<float>& noisy_rewards);
// copied parameters = original parameters + noise // copied parameters = original parameters + noise
bool add_noise(SamplingInfo& sampling_info); bool add_noise(SamplingInfo& sampling_info);
/** /**
* @brief Get paddle predict * @brief Get paddle predict
* *
* if _is_sampling_agent is true, will return predictor with added noise; * if _is_sampling_agent is true, will return predictor with added noise;
* if _is_sampling_agent is false, will return predictor without added noise. * if _is_sampling_agent is false, will return predictor without added noise.
*/ */
std::shared_ptr<PaddlePredictor> get_predictor(); std::shared_ptr<PaddlePredictor> get_predictor();
// get param size of model // get param size of model
int64_t param_size() { int64_t param_size() {
return _param_size; return _param_size;
} }
protected:
int64_t _calculate_param_size();
protected:
int64_t _calculate_param_size(); std::shared_ptr<PaddlePredictor> _predictor;
std::shared_ptr<PaddlePredictor> _sampling_predictor;
std::shared_ptr<PaddlePredictor> _predictor; std::shared_ptr<SamplingMethod> _sampling_method;
std::shared_ptr<PaddlePredictor> _sampling_predictor; std::shared_ptr<Optimizer> _optimizer;
std::shared_ptr<SamplingMethod> _sampling_method; std::shared_ptr<DeepESConfig> _config;
std::shared_ptr<Optimizer> _optimizer; std::shared_ptr<CxxConfig> _cxx_config;
std::shared_ptr<DeepESConfig> _config; std::vector<std::string> _param_names;
std::shared_ptr<CxxConfig> _cxx_config; // malloc memory of noise and neg_gradients in advance.
std::vector<std::string> _param_names; float* _noise;
// malloc memory of noise and neg_gradients in advance. float* _neg_gradients;
float* _noise; int64_t _param_size;
float* _neg_gradients; bool _is_sampling_agent;
int64_t _param_size;
bool _is_sampling_agent;
}; };
} }
......
...@@ -13,241 +13,276 @@ ...@@ -13,241 +13,276 @@
// limitations under the License. // limitations under the License.
#include "async_es_agent.h" #include "async_es_agent.h"
namespace DeepES { namespace deep_es {
AsyncESAgent::AsyncESAgent( AsyncESAgent::AsyncESAgent(
const std::string& model_dir, const std::string& model_dir,
const std::string& config_path): ESAgent(model_dir, config_path) { const std::string& config_path): ESAgent(model_dir, config_path) {
_config_path = config_path; _config_path = config_path;
} }
AsyncESAgent::~AsyncESAgent() { AsyncESAgent::~AsyncESAgent() {
for(const auto kv: _param_delta) { for (const auto kv : _param_delta) {
float* delta = kv.second; float* delta = kv.second;
delete[] delta; delete[] delta;
} }
} }
bool AsyncESAgent::_save() { bool AsyncESAgent::_save() {
bool success = true; using namespace paddle::lite_api;
if (_is_sampling_agent) { bool success = true;
LOG(ERROR) << "[DeepES] Cloned AsyncESAgent cannot call `save`.Please use original AsyncESAgent.";
success = false; if (_is_sampling_agent) {
return success; LOG(ERROR) << "[DeepES] Cloned AsyncESAgent cannot call `save`.Please use original AsyncESAgent.";
} success = false;
int model_iter_id = _config->async_es().model_iter_id() + 1; return success;
//current time }
time_t rawtime;
struct tm * timeinfo; int model_iter_id = _config->async_es().model_iter_id() + 1;
char buffer[80]; //current time
time_t rawtime;
time (&rawtime); struct tm* timeinfo;
timeinfo = localtime(&rawtime); char buffer[80];
std::string model_name = "model_iter_id-"+ std::to_string(model_iter_id); time(&rawtime);
std::string model_path = _config->async_es().model_warehouse() + "/" + model_name; timeinfo = localtime(&rawtime);
LOG(INFO) << "[save]model_path: " << model_path;
_predictor->SaveOptimizedModel(model_path, LiteModelType::kProtobuf); std::string model_name = "model_iter_id-" + std::to_string(model_iter_id);
// save config std::string model_path = _config->async_es().model_warehouse() + "/" + model_name;
auto async_es = _config->mutable_async_es(); LOG(INFO) << "[save]model_path: " << model_path;
async_es->set_model_iter_id(model_iter_id); _predictor->SaveOptimizedModel(model_path, LiteModelType::kProtobuf);
success = save_proto_conf(_config_path, *_config); // save config
if (!success) { auto async_es = _config->mutable_async_es();
LOG(ERROR) << "[]unable to save config for AsyncESAgent"; async_es->set_model_iter_id(model_iter_id);
success = false; success = save_proto_conf(_config_path, *_config);
if (!success) {
LOG(ERROR) << "[]unable to save config for AsyncESAgent";
success = false;
return success;
}
int max_to_keep = _config->async_es().max_to_keep();
success = _remove_expired_model(max_to_keep);
return success; return success;
}
int max_to_keep = _config->async_es().max_to_keep();
success = _remove_expired_model(max_to_keep);
return success;
} }
bool AsyncESAgent::_remove_expired_model(int max_to_keep) { bool AsyncESAgent::_remove_expired_model(int max_to_keep) {
bool success = true; bool success = true;
std::string model_path = _config->async_es().model_warehouse(); std::string model_path = _config->async_es().model_warehouse();
std::vector<std::string> model_dirs = list_all_model_dirs(model_path); std::vector<std::string> model_dirs = list_all_model_dirs(model_path);
int model_iter_id = _config->async_es().model_iter_id() + 1; int model_iter_id = _config->async_es().model_iter_id() + 1;
for (const auto& dir: model_dirs) {
int dir_model_iter_id = _parse_model_iter_id(dir); for (const auto& dir : model_dirs) {
if (model_iter_id - dir_model_iter_id >= max_to_keep) { int dir_model_iter_id = _parse_model_iter_id(dir);
std::string rm_command = std::string("rm -rf ") + dir;
int ret = system(rm_command.c_str()); if (model_iter_id - dir_model_iter_id >= max_to_keep) {
if (ret == 0) { std::string rm_command = std::string("rm -rf ") + dir;
LOG(INFO) << "[DeepES] remove expired Model: " << dir; int ret = system(rm_command.c_str());
} else {
LOG(ERROR) << "[DeepES] fail to remove expired Model: " << dir; if (ret == 0) {
success = false; LOG(INFO) << "[DeepES] remove expired Model: " << dir;
return success; } else {
} LOG(ERROR) << "[DeepES] fail to remove expired Model: " << dir;
success = false;
return success;
}
}
} }
}
return success; return success;
} }
bool AsyncESAgent::_compute_model_diff() { bool AsyncESAgent::_compute_model_diff() {
bool success = true; bool success = true;
for (const auto& kv: _previous_predictors) {
int model_iter_id = kv.first; for (const auto& kv : _previous_predictors) {
std::shared_ptr<PaddlePredictor> old_predictor = kv.second; int model_iter_id = kv.first;
float* diff = new float[_param_size]; std::shared_ptr<PaddlePredictor> old_predictor = kv.second;
memset(diff, 0, _param_size * sizeof(float)); float* diff = new float[_param_size];
int offset = 0; memset(diff, 0, _param_size * sizeof(float));
for (const std::string& param_name: _param_names) { int offset = 0;
auto des_tensor = old_predictor->GetTensor(param_name);
auto src_tensor = _predictor->GetTensor(param_name); for (const std::string& param_name : _param_names) {
const float* des_data = des_tensor->data<float>(); auto des_tensor = old_predictor->GetTensor(param_name);
const float* src_data = src_tensor->data<float>(); auto src_tensor = _predictor->GetTensor(param_name);
int64_t tensor_size = ShapeProduction(src_tensor->shape()); const float* des_data = des_tensor->data<float>();
for (int i = 0; i < tensor_size; ++i) { const float* src_data = src_tensor->data<float>();
diff[i + offset] = des_data[i] - src_data[i]; int64_t tensor_size = ShapeProduction(src_tensor->shape());
}
offset += tensor_size; for (int i = 0; i < tensor_size; ++i) {
diff[i + offset] = des_data[i] - src_data[i];
}
offset += tensor_size;
}
_param_delta[model_iter_id] = diff;
} }
_param_delta[model_iter_id] = diff;
} return success;
return success;
} }
bool AsyncESAgent::_load() { bool AsyncESAgent::_load() {
bool success = true; bool success = true;
std::string model_path = _config->async_es().model_warehouse(); std::string model_path = _config->async_es().model_warehouse();
std::vector<std::string> model_dirs = list_all_model_dirs(model_path); std::vector<std::string> model_dirs = list_all_model_dirs(model_path);
if (model_dirs.size() == 0) {
int model_iter_id = _config->async_es().model_iter_id(); if (model_dirs.size() == 0) {
success = model_iter_id == 0 ? true: false; int model_iter_id = _config->async_es().model_iter_id();
if (!success) { success = model_iter_id == 0 ? true : false;
LOG(WARNING) << "[DeepES] current_model_iter_id is nonzero, but no model is \
if (!success) {
LOG(WARNING) << "[DeepES] current_model_iter_id is nonzero, but no model is \
found at the dir: " << model_path; found at the dir: " << model_path;
}
return success;
} }
return success;
} for (auto& dir : model_dirs) {
for(auto &dir: model_dirs) { int model_iter_id = _parse_model_iter_id(dir);
int model_iter_id = _parse_model_iter_id(dir);
if (model_iter_id == -1) { if (model_iter_id == -1) {
LOG(WARNING) << "[DeepES] fail to parse model_iter_id: " << dir; LOG(WARNING) << "[DeepES] fail to parse model_iter_id: " << dir;
success = false; success = false;
return success; return success;
} }
std::shared_ptr<PaddlePredictor> predictor = _load_previous_model(dir);
if (predictor == nullptr) { std::shared_ptr<PaddlePredictor> predictor = _load_previous_model(dir);
success = false;
LOG(WARNING) << "[DeepES] fail to load model: " << dir; if (predictor == nullptr) {
return success; success = false;
LOG(WARNING) << "[DeepES] fail to load model: " << dir;
return success;
}
_previous_predictors[model_iter_id] = predictor;
} }
_previous_predictors[model_iter_id] = predictor;
} success = _compute_model_diff();
success = _compute_model_diff(); return success;
return success;
} }
std::shared_ptr<PaddlePredictor> AsyncESAgent::_load_previous_model(std::string model_dir) { std::shared_ptr<PaddlePredictor> AsyncESAgent::_load_previous_model(std::string model_dir) {
// 1. Create CxxConfig using namespace paddle::lite_api;
CxxConfig config; // 1. Create CxxConfig
config.set_model_file(model_dir + "/model"); CxxConfig config;
config.set_param_file(model_dir + "/params"); config.set_model_file(model_dir + "/model");
config.set_valid_places({ config.set_param_file(model_dir + "/params");
Place{TARGET(kX86), PRECISION(kFloat)}, config.set_valid_places({
Place{TARGET(kHost), PRECISION(kFloat)} Place{TARGET(kX86), PRECISION(kFloat)},
}); Place{TARGET(kHost), PRECISION(kFloat)}
});
// 2. Create PaddlePredictor by CxxConfig
std::shared_ptr<PaddlePredictor> predictor = CreatePaddlePredictor<CxxConfig>(config); // 2. Create PaddlePredictor by CxxConfig
return predictor; std::shared_ptr<PaddlePredictor> predictor = CreatePaddlePredictor<CxxConfig>(config);
return predictor;
} }
std::shared_ptr<AsyncESAgent> AsyncESAgent::clone() { std::shared_ptr<AsyncESAgent> AsyncESAgent::clone() {
std::shared_ptr<AsyncESAgent> new_agent = std::make_shared<AsyncESAgent>(); std::shared_ptr<AsyncESAgent> new_agent = std::make_shared<AsyncESAgent>();
float* noise = new float [_param_size]; float* noise = new float [_param_size];
new_agent->_predictor = _predictor; new_agent->_predictor = _predictor;
new_agent->_sampling_predictor = CreatePaddlePredictor<CxxConfig>(*_cxx_config); new_agent->_sampling_predictor = paddle::lite_api::CreatePaddlePredictor<CxxConfig>(*_cxx_config);
new_agent->_is_sampling_agent = true; new_agent->_is_sampling_agent = true;
new_agent->_sampling_method = _sampling_method; new_agent->_sampling_method = _sampling_method;
new_agent->_param_names = _param_names; new_agent->_param_names = _param_names;
new_agent->_param_size = _param_size; new_agent->_param_size = _param_size;
new_agent->_config = _config; new_agent->_config = _config;
new_agent->_noise = noise; new_agent->_noise = noise;
return new_agent; return new_agent;
} }
bool AsyncESAgent::update( bool AsyncESAgent::update(
std::vector<SamplingInfo>& noisy_info, std::vector<SamplingInfo>& noisy_info,
std::vector<float>& noisy_rewards) { std::vector<float>& noisy_rewards) {
CHECK(!_is_sampling_agent) << "[DeepES] Cloned ESAgent cannot call update function. \ CHECK(!_is_sampling_agent) << "[DeepES] Cloned ESAgent cannot call update function. \
Please use original ESAgent."; Please use original ESAgent.";
bool success = _load(); bool success = _load();
CHECK(success) << "[DeepES] fail to load previous models."; CHECK(success) << "[DeepES] fail to load previous models.";
int current_model_iter_id = _config->async_es().model_iter_id(); int current_model_iter_id = _config->async_es().model_iter_id();
// validate model_iter_id for each sample before the update
for (int i = 0; i < noisy_info.size(); ++i) { // validate model_iter_id for each sample before the update
int model_iter_id = noisy_info[i].model_iter_id(); for (int i = 0; i < noisy_info.size(); ++i) {
if (model_iter_id != current_model_iter_id int model_iter_id = noisy_info[i].model_iter_id();
&& _previous_predictors.count(model_iter_id) == 0) {
LOG(WARNING) << "[DeepES] The sample with model_dir_id: " << model_iter_id \ if (model_iter_id != current_model_iter_id
<< " cannot match any local model"; && _previous_predictors.count(model_iter_id) == 0) {
success = false; LOG(WARNING) << "[DeepES] The sample with model_dir_id: " << model_iter_id \
return success; << " cannot match any local model";
success = false;
return success;
}
}
compute_centered_ranks(noisy_rewards);
memset(_neg_gradients, 0, _param_size * sizeof(float));
for (int i = 0; i < noisy_info.size(); ++i) {
int key = noisy_info[i].key(0);
float reward = noisy_rewards[i];
int model_iter_id = noisy_info[i].model_iter_id();
bool success = _sampling_method->resampling(key, _noise, _param_size);
CHECK(success) << "[DeepES] resampling error occurs at sample: " << i;
float* delta = _param_delta[model_iter_id];
// compute neg_gradients
if (model_iter_id == current_model_iter_id) {
for (int64_t j = 0; j < _param_size; ++j) {
_neg_gradients[j] += _noise[j] * reward;
}
} else {
for (int64_t j = 0; j < _param_size; ++j) {
_neg_gradients[j] += (_noise[j] + delta[j]) * reward;
}
}
}
for (int64_t j = 0; j < _param_size; ++j) {
_neg_gradients[j] /= -1.0 * noisy_info.size();
} }
}
//update
compute_centered_ranks(noisy_rewards); int64_t counter = 0;
memset(_neg_gradients, 0, _param_size * sizeof(float));
for (std::string param_name : _param_names) {
for (int i = 0; i < noisy_info.size(); ++i) { std::unique_ptr<Tensor> tensor = _predictor->GetMutableTensor(param_name);
int key = noisy_info[i].key(0); float* tensor_data = tensor->mutable_data<float>();
float reward = noisy_rewards[i]; int64_t tensor_size = ShapeProduction(tensor->shape());
int model_iter_id = noisy_info[i].model_iter_id(); _optimizer->update(tensor_data, _neg_gradients + counter, tensor_size, param_name);
bool success = _sampling_method->resampling(key, _noise, _param_size); counter += tensor_size;
CHECK(success) << "[DeepES] resampling error occurs at sample: " << i;
float* delta = _param_delta[model_iter_id];
// compute neg_gradients
if (model_iter_id == current_model_iter_id) {
for (int64_t j = 0; j < _param_size; ++j) {
_neg_gradients[j] += _noise[j] * reward;
}
} else {
for (int64_t j = 0; j < _param_size; ++j) {
_neg_gradients[j] += (_noise[j] + delta[j]) * reward;
}
} }
}
for (int64_t j = 0; j < _param_size; ++j) { success = _save();
_neg_gradients[j] /= -1.0 * noisy_info.size(); CHECK(success) << "[DeepES] fail to save model.";
} return true;
//update
int64_t counter = 0;
for (std::string param_name: _param_names) {
std::unique_ptr<Tensor> tensor = _predictor->GetMutableTensor(param_name);
float* tensor_data = tensor->mutable_data<float>();
int64_t tensor_size = ShapeProduction(tensor->shape());
_optimizer->update(tensor_data, _neg_gradients + counter, tensor_size, param_name);
counter += tensor_size;
}
success = _save();
CHECK(success) << "[DeepES] fail to save model.";
return true;
} }
int AsyncESAgent::_parse_model_iter_id(const std::string& model_path) { int AsyncESAgent::_parse_model_iter_id(const std::string& model_path) {
int model_iter_id = -1; int model_iter_id = -1;
int pow = 1; int pow = 1;
for (int i = model_path.size() - 1; i >= 0; --i) {
if (model_path[i] >= '0' && model_path[i] <= '9') { for (int i = model_path.size() - 1; i >= 0; --i) {
if (model_iter_id == -1) model_iter_id = 0; if (model_path[i] >= '0' && model_path[i] <= '9') {
} else { if (model_iter_id == -1) {
break; model_iter_id = 0;
}
} else {
break;
}
model_iter_id += pow * (model_path[i] - '0');
pow *= 10;
} }
model_iter_id += pow * (model_path[i] - '0');
pow *= 10; return model_iter_id;
}
return model_iter_id;
} }
}//namespace }//namespace
...@@ -15,153 +15,171 @@ ...@@ -15,153 +15,171 @@
#include "es_agent.h" #include "es_agent.h"
#include <ctime> #include <ctime>
namespace DeepES { namespace deep_es {
int64_t ShapeProduction(const shape_t& shape) { int64_t ShapeProduction(const paddle::lite_api::shape_t& shape) {
int64_t res = 1; int64_t res = 1;
for (auto i : shape) res *= i;
return res; for (auto i : shape) {
res *= i;
}
return res;
} }
ESAgent::~ESAgent() { ESAgent::~ESAgent() {
delete[] _noise; delete[] _noise;
if (!_is_sampling_agent)
delete[] _neg_gradients; if (!_is_sampling_agent) {
delete[] _neg_gradients;
}
} }
ESAgent::ESAgent(const std::string& model_dir, const std::string& config_path) { ESAgent::ESAgent(const std::string& model_dir, const std::string& config_path) {
// 1. Create CxxConfig using namespace paddle::lite_api;
_cxx_config = std::make_shared<CxxConfig>(); // 1. Create CxxConfig
std::string model_path = model_dir + "/model"; _cxx_config = std::make_shared<CxxConfig>();
std::string param_path = model_dir + "/param"; std::string model_path = model_dir + "/model";
std::string model_buffer = read_file(model_path); std::string param_path = model_dir + "/param";
std::string param_buffer = read_file(param_path); std::string model_buffer = read_file(model_path);
_cxx_config->set_model_buffer(model_buffer.c_str(), model_buffer.size(), std::string param_buffer = read_file(param_path);
param_buffer.c_str(), param_buffer.size()); _cxx_config->set_model_buffer(model_buffer.c_str(), model_buffer.size(),
_cxx_config->set_valid_places({ param_buffer.c_str(), param_buffer.size());
Place{TARGET(kX86), PRECISION(kFloat)}, _cxx_config->set_valid_places({
Place{TARGET(kHost), PRECISION(kFloat)} Place{TARGET(kX86), PRECISION(kFloat)},
}); Place{TARGET(kHost), PRECISION(kFloat)}
});
_predictor = CreatePaddlePredictor<CxxConfig>(*_cxx_config);
_predictor = CreatePaddlePredictor<CxxConfig>(*_cxx_config);
_is_sampling_agent = false;
// Original agent can't be used to sample, so keep it same with _predictor for evaluating. _is_sampling_agent = false;
_sampling_predictor = _predictor; // Original agent can't be used to sample, so keep it same with _predictor for evaluating.
_sampling_predictor = _predictor;
_config = std::make_shared<DeepESConfig>();
load_proto_conf(config_path, *_config); _config = std::make_shared<DeepESConfig>();
load_proto_conf(config_path, *_config);
_sampling_method = create_sampling_method(*_config);
_optimizer = create_optimizer(_config->optimizer());
_param_names = _predictor->GetParamNames();
_param_size = _calculate_param_size();
_noise = new float [_param_size];
_neg_gradients = new float [_param_size];
}
_sampling_method = create_sampling_method(*_config); std::shared_ptr<ESAgent> ESAgent::clone() {
if (_is_sampling_agent) {
LOG(ERROR) << "[DeepES] only original ESAgent can call `clone` function.";
return nullptr;
}
_optimizer = create_optimizer(_config->optimizer()); std::shared_ptr<ESAgent> new_agent = std::make_shared<ESAgent>();
_param_names = _predictor->GetParamNames(); float* noise = new float [_param_size];
_param_size = _calculate_param_size();
_noise = new float [_param_size]; new_agent->_sampling_predictor = paddle::lite_api::CreatePaddlePredictor<CxxConfig>(*_cxx_config);
_neg_gradients = new float [_param_size]; new_agent->_predictor = _predictor;
} new_agent->_cxx_config = _cxx_config;
new_agent->_is_sampling_agent = true;
new_agent->_sampling_method = _sampling_method;
new_agent->_param_names = _param_names;
new_agent->_config = _config;
new_agent->_param_size = _param_size;
new_agent->_noise = noise;
std::shared_ptr<ESAgent> ESAgent::clone() { return new_agent;
if (_is_sampling_agent) {
LOG(ERROR) << "[DeepES] only original ESAgent can call `clone` function.";
return nullptr;
}
std::shared_ptr<ESAgent> new_agent = std::make_shared<ESAgent>();
float* noise = new float [_param_size];
new_agent->_sampling_predictor = CreatePaddlePredictor<CxxConfig>(*_cxx_config);
new_agent->_predictor = _predictor;
new_agent->_cxx_config = _cxx_config;
new_agent->_is_sampling_agent = true;
new_agent->_sampling_method = _sampling_method;
new_agent->_param_names = _param_names;
new_agent->_config = _config;
new_agent->_param_size = _param_size;
new_agent->_noise = noise;
return new_agent;
} }
bool ESAgent::update( bool ESAgent::update(
std::vector<SamplingInfo>& noisy_info, std::vector<SamplingInfo>& noisy_info,
std::vector<float>& noisy_rewards) { std::vector<float>& noisy_rewards) {
if (_is_sampling_agent) { if (_is_sampling_agent) {
LOG(ERROR) << "[DeepES] Cloned ESAgent cannot call update function, please use original ESAgent."; LOG(ERROR) << "[DeepES] Cloned ESAgent cannot call update function, please use original ESAgent.";
return false; return false;
} }
compute_centered_ranks(noisy_rewards); compute_centered_ranks(noisy_rewards);
memset(_neg_gradients, 0, _param_size * sizeof(float)); memset(_neg_gradients, 0, _param_size * sizeof(float));
for (int i = 0; i < noisy_info.size(); ++i) {
int key = noisy_info[i].key(0); for (int i = 0; i < noisy_info.size(); ++i) {
float reward = noisy_rewards[i]; int key = noisy_info[i].key(0);
bool success = _sampling_method->resampling(key, _noise, _param_size); float reward = noisy_rewards[i];
CHECK(success) << "[DeepES] resampling error occurs at sample: " << i; bool success = _sampling_method->resampling(key, _noise, _param_size);
CHECK(success) << "[DeepES] resampling error occurs at sample: " << i;
for (int64_t j = 0; j < _param_size; ++j) {
_neg_gradients[j] += _noise[j] * reward;
}
}
for (int64_t j = 0; j < _param_size; ++j) { for (int64_t j = 0; j < _param_size; ++j) {
_neg_gradients[j] += _noise[j] * reward; _neg_gradients[j] /= -1.0 * noisy_info.size();
}
//update
int64_t counter = 0;
for (std::string param_name : _param_names) {
std::unique_ptr<Tensor> tensor = _predictor->GetMutableTensor(param_name);
float* tensor_data = tensor->mutable_data<float>();
int64_t tensor_size = ShapeProduction(tensor->shape());
_optimizer->update(tensor_data, _neg_gradients + counter, tensor_size, param_name);
counter += tensor_size;
} }
}
for (int64_t j = 0; j < _param_size; ++j) { return true;
_neg_gradients[j] /= -1.0 * noisy_info.size();
}
//update
int64_t counter = 0;
for (std::string param_name: _param_names) {
std::unique_ptr<Tensor> tensor = _predictor->GetMutableTensor(param_name);
float* tensor_data = tensor->mutable_data<float>();
int64_t tensor_size = ShapeProduction(tensor->shape());
_optimizer->update(tensor_data, _neg_gradients + counter, tensor_size, param_name);
counter += tensor_size;
}
return true;
} }
bool ESAgent::add_noise(SamplingInfo& sampling_info) { bool ESAgent::add_noise(SamplingInfo& sampling_info) {
bool success = true; bool success = true;
if (!_is_sampling_agent) {
LOG(ERROR) << "[DeepES] Original ESAgent cannot call add_noise function, please use cloned ESAgent."; if (!_is_sampling_agent) {
success = false; LOG(ERROR) <<
return success; "[DeepES] Original ESAgent cannot call add_noise function, please use cloned ESAgent.";
} success = false;
return success;
int key = 0;
success = _sampling_method->sampling(&key, _noise, _param_size);
CHECK(success) << "[DeepES] sampling error occurs while add_noise.";
int model_iter_id = _config->async_es().model_iter_id();
sampling_info.add_key(key);
sampling_info.set_model_iter_id(model_iter_id);
int64_t counter = 0;
for (std::string param_name: _param_names) {
std::unique_ptr<Tensor> sample_tensor = _sampling_predictor->GetMutableTensor(param_name);
std::unique_ptr<const Tensor> tensor = _predictor->GetTensor(param_name);
int64_t tensor_size = ShapeProduction(tensor->shape());
for (int64_t j = 0; j < tensor_size; ++j) {
sample_tensor->mutable_data<float>()[j] = tensor->data<float>()[j] + _noise[counter + j];
} }
counter += tensor_size;
}
return success; int key = 0;
success = _sampling_method->sampling(&key, _noise, _param_size);
CHECK(success) << "[DeepES] sampling error occurs while add_noise.";
int model_iter_id = _config->async_es().model_iter_id();
sampling_info.add_key(key);
sampling_info.set_model_iter_id(model_iter_id);
int64_t counter = 0;
for (std::string param_name : _param_names) {
std::unique_ptr<Tensor> sample_tensor = _sampling_predictor->GetMutableTensor(param_name);
std::unique_ptr<const Tensor> tensor = _predictor->GetTensor(param_name);
int64_t tensor_size = ShapeProduction(tensor->shape());
for (int64_t j = 0; j < tensor_size; ++j) {
sample_tensor->mutable_data<float>()[j] = tensor->data<float>()[j] + _noise[counter + j];
}
counter += tensor_size;
}
return success;
} }
std::shared_ptr<PaddlePredictor> ESAgent::get_predictor() { std::shared_ptr<PaddlePredictor> ESAgent::get_predictor() {
return _sampling_predictor; return _sampling_predictor;
} }
int64_t ESAgent::_calculate_param_size() { int64_t ESAgent::_calculate_param_size() {
int64_t param_size = 0; int64_t param_size = 0;
for (std::string param_name: _param_names) {
std::unique_ptr<const Tensor> tensor = _predictor->GetTensor(param_name); for (std::string param_name : _param_names) {
param_size += ShapeProduction(tensor->shape()); std::unique_ptr<const Tensor> tensor = _predictor->GetTensor(param_name);
} param_size += ShapeProduction(tensor->shape());
return param_size; }
return param_size;
} }
}//namespace }//namespace
...@@ -36,7 +36,7 @@ else ...@@ -36,7 +36,7 @@ else
fi fi
#----------------protobuf-------------# #----------------protobuf-------------#
cp ./core/src/proto/deepes.proto ./ cp ./core/proto/deepes.proto ./
protoc deepes.proto --cpp_out ./ protoc deepes.proto --cpp_out ./
mv deepes.pb.h core/include mv deepes.pb.h core/include
mv deepes.pb.cc core/src mv deepes.pb.cc core/src
...@@ -49,6 +49,7 @@ mkdir build ...@@ -49,6 +49,7 @@ mkdir build
cd build cd build
cmake ../ ${FLAGS} cmake ../ ${FLAGS}
make -j10 make -j10
cd -
#-----------------run----------------# #-----------------run----------------#
./parallel_main ./build/parallel_main
...@@ -12,7 +12,7 @@ echo "Cannot find the torch library: ../libtorch" ...@@ -12,7 +12,7 @@ echo "Cannot find the torch library: ../libtorch"
fi fi
#----------------protobuf-------------# #----------------protobuf-------------#
cp ./core/src/proto/deepes.proto ./ cp ./core/proto/deepes.proto ./
protoc deepes.proto --cpp_out ./ protoc deepes.proto --cpp_out ./
mv deepes.pb.h core/include mv deepes.pb.h core/include
mv deepes.pb.cc core/src mv deepes.pb.cc core/src
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
#include "optimizer_factory.h" #include "optimizer_factory.h"
#include <memory> #include <memory>
namespace DeepES { namespace deep_es {
TEST(SGDOptimizersTest, Method_update) { TEST(SGDOptimizersTest, Method_update) {
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
#include "cached_gaussian_sampling.h" #include "cached_gaussian_sampling.h"
#include <memory> #include <memory>
namespace DeepES { namespace deep_es {
class SamplingTest : public ::testing::Test { class SamplingTest : public ::testing::Test {
......
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
#include <random> #include <random>
#include <math.h> #include <math.h>
namespace DeepES { namespace deep_es {
// The fixture for testing class Foo. // The fixture for testing class Foo.
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#include <vector> #include <vector>
#include "utils.h" #include "utils.h"
namespace DeepES { namespace deep_es {
// Tests that the Utils::compute_centered_rank() method. // Tests that the Utils::compute_centered_rank() method.
TEST(UtilsTest, Method_compute_centered_ranks) { TEST(UtilsTest, Method_compute_centered_ranks) {
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#include "utils.h" #include "utils.h"
#include "deepes.pb.h" #include "deepes.pb.h"
namespace DeepES{ namespace deep_es{
/** /**
* @brief DeepES agent for Torch. * @brief DeepES agent for Torch.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册