未验证 提交 31ee4fa5 编写于 作者: R rical730 提交者: GitHub

add gaussian sampling method with noise table (#241)

* add gaussian sampling method with noise table

* add table sampling test and agent param_size test

* add model param_size test

* get param size of paddle demo

* rename gaussian_table_sampling to cached_gaussian_sampling

* add unittest to sampling method and uniform indentation to 2 spaces
Co-authored-by: NTomorrowIsAnOtherDay <2466956298@qq.com>
上级 65554cd6
...@@ -9,38 +9,38 @@ const double kPi = 3.1415926535898; ...@@ -9,38 +9,38 @@ const double kPi = 3.1415926535898;
class CartPole { class CartPole {
public: public:
double gravity = 9.8; double gravity = 9.8;
double masscart = 1.0; double masscart = 1.0;
double masspole = 0.1; double masspole = 0.1;
double total_mass = (masspole + masscart); double total_mass = (masspole + masscart);
double length = 0.5; // actually half the pole's length; double length = 0.5; // actually half the pole's length;
double polemass_length = (masspole * length); double polemass_length = (masspole * length);
double force_mag = 10.0; double force_mag = 10.0;
double tau = 0.02; // seconds between state updates; double tau = 0.02; // seconds between state updates;
// Angle at which to fail the episode // Angle at which to fail the episode
double theta_threshold_radians = 12 * 2 * kPi / 360; double theta_threshold_radians = 12 * 2 * kPi / 360;
double x_threshold = 2.4; double x_threshold = 2.4;
int steps_beyond_done = -1; int steps_beyond_done = -1;
std::vector<float> state = {0, 0, 0, 0}; std::vector<float> state = {0, 0, 0, 0};
double reward; double reward;
bool done; bool done;
int step_ = 0; int step_ = 0;
const float* getState() { const float* getState() {
return state.data(); return state.data();
} }
double getReward() { double getReward() {
return reward; return reward;
} }
double isDone() { double isDone() {
return done; return done;
} }
void reset() { void reset() {
std::random_device rd; std::random_device rd;
std::default_random_engine generator(rd()); std::default_random_engine generator(rd());
std::uniform_real_distribution<float> distribution(-0.05, 0.05); std::uniform_real_distribution<float> distribution(-0.05, 0.05);
...@@ -48,53 +48,51 @@ public: ...@@ -48,53 +48,51 @@ public:
state[i] = distribution(generator); state[i] = distribution(generator);
} }
steps_beyond_done = -1; steps_beyond_done = -1;
step_ = 0; step_ = 0;
} }
CartPole() { CartPole() {
reset(); reset();
} }
void step(int action) { void step(int action) {
float x = state[0]; float x = state[0];
float x_dot = state[1]; float x_dot = state[1];
float theta = state[2]; float theta = state[2];
float theta_dot = state[3]; float theta_dot = state[3];
auto force = (action == 1) ? force_mag : -force_mag; auto force = (action == 1) ? force_mag : -force_mag;
auto costheta = std::cos(theta); auto costheta = std::cos(theta);
auto sintheta = std::sin(theta); auto sintheta = std::sin(theta);
auto temp = (force + polemass_length * theta_dot * theta_dot * sintheta) / auto temp = (force + polemass_length * theta_dot * theta_dot * sintheta) /
total_mass; total_mass;
auto thetaacc = (gravity * sintheta - costheta * temp) / auto thetaacc = (gravity * sintheta - costheta * temp) /
(length * (4.0 / 3.0 - masspole * costheta * costheta / total_mass)); (length * (4.0 / 3.0 - masspole * costheta * costheta / total_mass));
auto xacc = temp - polemass_length * thetaacc * costheta / total_mass; auto xacc = temp - polemass_length * thetaacc * costheta / total_mass;
x = x + tau * x_dot; x = x + tau * x_dot;
x_dot = x_dot + tau * xacc; x_dot = x_dot + tau * xacc;
theta = theta + tau * theta_dot; theta = theta + tau * theta_dot;
theta_dot = theta_dot + tau * thetaacc; theta_dot = theta_dot + tau * thetaacc;
state = {x, x_dot, theta, theta_dot}; state = {x, x_dot, theta, theta_dot};
done = x < -x_threshold || x > x_threshold || done = x < -x_threshold || x > x_threshold ||
theta < -theta_threshold_radians || theta > theta_threshold_radians || theta < -theta_threshold_radians || theta > theta_threshold_radians ||
step_ > 200; step_ > 200;
if (!done) { if (!done) {
reward = 1.0; reward = 1.0;
} } else if (steps_beyond_done == -1) {
else if (steps_beyond_done == -1) { // Pole just fell!
// Pole just fell! steps_beyond_done = 0;
steps_beyond_done = 0; reward = 0;
reward = 0; } else {
} if (steps_beyond_done == 0) {
else { assert(false); // Can't do this
if (steps_beyond_done == 0) { }
assert(false); // Can't do this }
} step_++;
} }
step_++;
}
}; };
seed: 1024 seed: 1024
gaussian_sampling { gaussian_sampling {
std: 0.5 std: 0.5
cached: true
cache_size : 100000
} }
optimizer { optimizer {
type: "Adam" type: "Adam"
......
...@@ -12,11 +12,14 @@ ...@@ -12,11 +12,14 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include <map>
#include "optimizer.h"
#ifndef ADAM_OPTIMIZER_H #ifndef ADAM_OPTIMIZER_H
#define ADAM_OPTIMIZER_H #define ADAM_OPTIMIZER_H
#include <map>
#include <cmath>
#include "optimizer.h"
namespace DeepES{ namespace DeepES{
/*@brief AdamOptimizer. /*@brief AdamOptimizer.
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef CACHED_GAUSSIAN_SAMPLING_H
#define CACHED_GAUSSIAN_SAMPLING_H
#include <random>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "sampling_method.h"
#include "utils.h"
#include <glog/logging.h>
namespace DeepES{
class CachedGaussianSampling: public SamplingMethod {
public:
CachedGaussianSampling();
~CachedGaussianSampling();
/*Initialize the sampling algorithm given the config with the protobuf format.
*DeepES library uses only one configuration file for all sampling algorithms.
A defalut configuration file can be found at: . // TODO: where?
Usally you won't have to modify the configuration items of other algorithms
if you are not using them.
*/
bool load_config(const DeepESConfig& config);
/*@brief generate Gaussian noise and the related key.
*
*@Args:
* key: a unique key associated with the sampled noise.
* noise: a pointer pointed to the memory that stores the noise
* size: the number of float to be sampled.
*
*@return:
* success: generate Gaussian successfully or not.
*/
bool sampling(int* key, float* noise, int64_t size);
/*@brief reconstruct the Gaussion noise given the key.
* This function is often used for updating the neuron network parameters in the offline environment.
*
*@Args:
* key: a unique key associated with the sampled noise.
* noise: a pointer pointed to the memory that stores the noise
* size: the number of float to be sampled.
*
*@return:
* success: reconstruct Gaussian successfully or not.
*/
bool resampling(int key, float* noise, int64_t size);
private:
float _std;
int _cache_size;
float* _noise_cache = nullptr;
bool _create_noise_cache();
};
}
#endif
...@@ -12,49 +12,60 @@ ...@@ -12,49 +12,60 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
// //
#ifndef _GAUSSIAN_SAMPLING_H #ifndef GAUSSIAN_SAMPLING_H
#define _GAUSSIAN_SAMPLING_H #define GAUSSIAN_SAMPLING_H
#include <random>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "sampling_method.h" #include "sampling_method.h"
#include "utils.h"
namespace DeepES{ namespace DeepES{
class GaussianSampling: public SamplingMethod { class GaussianSampling: public SamplingMethod {
public: public:
GaussianSampling() {} GaussianSampling() {}
~GaussianSampling() {} ~GaussianSampling() {}
/*Initialize the sampling algorithm given the config with the protobuf format.
*DeepES library uses only one configuration file for all sampling algorithms. A defalut /*Initialize the sampling algorithm given the config with the protobuf format.
configuration file can be found at: . Usally you won't have to modify the configuration items of other algorithms *DeepES library uses only one configuration file for all sampling algorithms.
if you are not using them. A defalut configuration file can be found at: . // TODO: where?
*/ Usally you won't have to modify the configuration items of other algorithms
void load_config(const DeepESConfig& config); if you are not using them.
*/
/*@brief add Gaussian noise to the parameter. bool load_config(const DeepESConfig& config);
*
*@Args: /*@brief generate Gaussian noise and the related key.
* param: a pointer pointed to the memory of the parameter. *
* size: the number of floats of the parameter. *@Args:
* noisy_param: The pointer pointed to updated parameter. * key: a unique key associated with the sampled noise.
* * noise: a pointer pointed to the memory that stores the noise
*@return: * size: the number of float to be sampled.
* success: load configuration successfully or not. *
*/ *@return:
int sampling(float* noise, int64_t size); * success: generate Gaussian successfully or not.
*/
/*@brief reconstruct the Gaussion noise given the key. bool sampling(int* key, float* noise, int64_t size);
* This function is often used for updating the neuron network parameters in the offline environment.
* /*@brief reconstruct the Gaussion noise given the key.
*@Args: * This function is often used for updating the neuron network parameters in the offline environment.
* key: a unique key associated with the sampled noise. *
* noise: a pointer pointed to the memory that stores the noise *@Args:
* size: the number of float to be sampled. * key: a unique key associated with the sampled noise.
*/ * noise: a pointer pointed to the memory that stores the noise
bool resampling(int key, float* noise, int64_t size); * size: the number of float to be sampled.
*
*@return:
* success: reconstruct Gaussian successfully or not.
*/
bool resampling(int key, float* noise, int64_t size);
private: private:
float _std; float _std;
}; };
} }
......
...@@ -12,11 +12,13 @@ ...@@ -12,11 +12,13 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef OPTIMIZER_H
#define OPTIMIZER_H
#include <map> #include <map>
#include <glog/logging.h> #include <glog/logging.h>
#ifndef OPTIMIZER_H
#define OPTIMIZER_H
namespace DeepES{ namespace DeepES{
/*@brief Optimizer. Base class for optimizers. /*@brief Optimizer. Base class for optimizers.
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "sgd_optimizer.h" #include "sgd_optimizer.h"
#include "adam_optimizer.h" #include "adam_optimizer.h"
#include "deepes.pb.h" #include "deepes.pb.h"
#include <glog/logging.h>
namespace DeepES{ namespace DeepES{
/* @brief: create an optimizer according to the configuration" /* @brief: create an optimizer according to the configuration"
......
...@@ -17,8 +17,8 @@ ...@@ -17,8 +17,8 @@
#include "paddle_api.h" #include "paddle_api.h"
#include "optimizer_factory.h" #include "optimizer_factory.h"
#include "sampling_factory.h"
#include "utils.h" #include "utils.h"
#include "gaussian_sampling.h"
#include "deepes.pb.h" #include "deepes.pb.h"
#include <vector> #include <vector>
...@@ -73,6 +73,11 @@ class ESAgent { ...@@ -73,6 +73,11 @@ class ESAgent {
* if _is_sampling_agent is false, will return predictor without added noise. * if _is_sampling_agent is false, will return predictor without added noise.
*/ */
std::shared_ptr<PaddlePredictor> get_predictor(); std::shared_ptr<PaddlePredictor> get_predictor();
// get param size of model
int64_t param_size() {
return _param_size;
}
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef SAMPLING_FACTORY_H
#define SAMPLING_FACTORY_H
#include <algorithm>
#include <memory>
#include "sampling_method.h"
#include "gaussian_sampling.h"
#include "cached_gaussian_sampling.h"
#include "deepes.pb.h"
#include <glog/logging.h>
namespace DeepES{
/* @brief: create an sampling_method according to the configuration"
* @args:
* config: configuration for the DeepES
*
*/
std::shared_ptr<SamplingMethod> create_sampling_method(const DeepESConfig& Config);
}//namespace
#endif
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef _SAMPLING_METHOD_H #ifndef SAMPLING_METHOD_H
#define _SAMPLING_METHOD_H #define SAMPLING_METHOD_H
#include <string> #include <string>
#include <random> #include <random>
...@@ -34,51 +34,55 @@ class SamplingMethod{ ...@@ -34,51 +34,55 @@ class SamplingMethod{
public: public:
SamplingMethod(): _seed(0) {} SamplingMethod(): _seed(0) {}
virtual ~SamplingMethod() {} virtual ~SamplingMethod() {}
/*Initialize the sampling algorithm given the config with the protobuf format. /*Initialize the sampling algorithm given the config with the protobuf format.
*DeepES library uses only one configuration file for all sampling algorithms. A defalut *DeepES library uses only one configuration file for all sampling algorithms.
configuration file can be found at: . Usally you won't have to modify the configuration items of other algorithms A defalut configuration file can be found at: . // TODO: where?
if you are not using them. Usally you won't have to modify the configuration items of other algorithms
*/ if you are not using them.
virtual void load_config(const DeepESConfig& config)=0; */
virtual bool load_config(const DeepESConfig& config)=0;
/*@brief add Gaussian noise to the parameter. /*@brief generate Gaussian noise and the related key.
* *
*@Args: *@Args:
* param: a pointer pointed to the memory of the parameter. * key: a unique key associated with the sampled noise.
* size: the number of floats of the parameter. * noise: a pointer pointed to the memory that stores the noise
* noisy_param: The pointer pointed to updated parameter. * size: the number of float to be sampled.
* *
*@return: *@return:
* success: load configuration successfully or not. * success: generate Gaussian successfully or not.
*/ */
virtual int sampling(float* noise, int64_t size)=0; virtual bool sampling(int* key, float* noise, int64_t size)=0;
/*@brief reconstruct the Gaussion noise given the key. /*@brief reconstruct the Gaussion noise given the key.
* This function is often used for updating the neuron network parameters in the offline environment. * This function is often used for updating the neuron network parameters in the offline environment.
* *
*@Args: *@Args:
* key: a unique key associated with the sampled noise. * key: a unique key associated with the sampled noise.
* noise: a pointer pointed to the memory that stores the noise * noise: a pointer pointed to the memory that stores the noise
* size: the number of float to be sampled. * size: the number of float to be sampled.
*/ *
virtual bool resampling(int key, float* noise, int64_t size)=0; *@return:
* success: reconstruct Gaussian successfully or not.
bool set_seed(int seed) { */
_seed = seed; virtual bool resampling(int key, float* noise, int64_t size)=0;
srand(_seed);
return true; bool set_seed(int seed) {
} _seed = seed;
srand(_seed);
return true;
}
int get_seed() { int get_seed() {
return _seed; return _seed;
} }
protected: protected:
int _seed; int _seed;
}; };
......
...@@ -12,11 +12,13 @@ ...@@ -12,11 +12,13 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef SGD_OPTIMIZER_H
#define SGD_OPTIMIZER_H
#include <map> #include <map>
#include <cmath>
#include "optimizer.h" #include "optimizer.h"
#ifndef SGD_OPTIMIZER_H
#define SGD_OPTIMIZER_H
namespace DeepES{ namespace DeepES{
/*@brief SGDOptimizer. /*@brief SGDOptimizer.
......
...@@ -18,8 +18,8 @@ ...@@ -18,8 +18,8 @@
#include <memory> #include <memory>
#include <string> #include <string>
#include "optimizer_factory.h" #include "optimizer_factory.h"
#include "sampling_factory.h"
#include "utils.h" #include "utils.h"
#include "gaussian_sampling.h"
#include "deepes.pb.h" #include "deepes.pb.h"
namespace DeepES{ namespace DeepES{
...@@ -47,8 +47,7 @@ public: ...@@ -47,8 +47,7 @@ public:
_is_sampling_agent = false; _is_sampling_agent = false;
_config = std::make_shared<DeepESConfig>(); _config = std::make_shared<DeepESConfig>();
load_proto_conf(config_path, *_config); load_proto_conf(config_path, *_config);
_sampling_method = std::make_shared<GaussianSampling>(); _sampling_method = create_sampling_method(*_config);
_sampling_method->load_config(*_config);
_optimizer = create_optimizer(_config->optimizer()); _optimizer = create_optimizer(_config->optimizer());
// Origin agent can't be used to sample, so keep it same with _model for evaluating. // Origin agent can't be used to sample, so keep it same with _model for evaluating.
_sampling_model = model; _sampling_model = model;
...@@ -111,6 +110,7 @@ public: ...@@ -111,6 +110,7 @@ public:
int key = noisy_info[i].key(0); int key = noisy_info[i].key(0);
float reward = noisy_rewards[i]; float reward = noisy_rewards[i];
bool success = _sampling_method->resampling(key, _noise, _param_size); bool success = _sampling_method->resampling(key, _noise, _param_size);
CHECK(success) << "[DeepES] resampling error occurs at sample: " << i;
for (int64_t j = 0; j < _param_size; ++j) { for (int64_t j = 0; j < _param_size; ++j) {
_neg_gradients[j] += _noise[j] * reward; _neg_gradients[j] += _noise[j] * reward;
} }
...@@ -134,14 +134,18 @@ public: ...@@ -134,14 +134,18 @@ public:
// copied parameters = original parameters + noise // copied parameters = original parameters + noise
bool add_noise(SamplingInfo& sampling_info) { bool add_noise(SamplingInfo& sampling_info) {
bool success = true;
if (!_is_sampling_agent) { if (!_is_sampling_agent) {
LOG(ERROR) << "[DeepES] Original ESAgent cannot call add_noise function, please use cloned ESAgent."; LOG(ERROR) << "[DeepES] Original ESAgent cannot call add_noise function, please use cloned ESAgent.";
return false; success = false;
return success;
} }
auto sampling_params = _sampling_model->named_parameters(); auto sampling_params = _sampling_model->named_parameters();
auto params = _model->named_parameters(); auto params = _model->named_parameters();
int key = _sampling_method->sampling(_noise, _param_size); int key = 0;
success = _sampling_method->sampling(&key, _noise, _param_size);
CHECK(success) << "[DeepES] sampling error occurs while add_noise.";
sampling_info.add_key(key); sampling_info.add_key(key);
int64_t counter = 0; int64_t counter = 0;
for (auto& param: sampling_params) { for (auto& param: sampling_params) {
...@@ -155,10 +159,14 @@ public: ...@@ -155,10 +159,14 @@ public:
} }
counter += tensor.size(0); counter += tensor.size(0);
} }
return true; return success;
}
// get param size of model
int64_t param_size() {
return _param_size;
} }
private: private:
int64_t _calculate_param_size() { int64_t _calculate_param_size() {
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#ifndef UTILS_H #ifndef UTILS_H
#define UTILS_H #define UTILS_H
#include <string> #include <string>
#include <fstream> #include <fstream>
#include <algorithm> #include <algorithm>
......
...@@ -12,38 +12,37 @@ ...@@ -12,38 +12,37 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include <cmath>
#include "adam_optimizer.h" #include "adam_optimizer.h"
namespace DeepES { namespace DeepES {
AdamOptimizer::~AdamOptimizer() { AdamOptimizer::~AdamOptimizer() {
for (auto iter = _momentum.begin(); iter != _momentum.end(); iter++) { for (auto iter = _momentum.begin(); iter != _momentum.end(); iter++) {
delete[] iter->second; delete[] iter->second;
} }
for (auto iter = _velocity.begin(); iter != _velocity.end(); iter++) { for (auto iter = _velocity.begin(); iter != _velocity.end(); iter++) {
delete[] iter->second; delete[] iter->second;
} }
_momentum.clear(); _momentum.clear();
_velocity.clear(); _velocity.clear();
} }
void AdamOptimizer::compute_step(float* gradient, int size, std::string param_name="") { void AdamOptimizer::compute_step(float* gradient, int size, std::string param_name="") {
if (_momentum.count(param_name) == 0) { if (_momentum.count(param_name) == 0) {
_momentum[param_name] = new float [size]; _momentum[param_name] = new float [size];
memset(_momentum[param_name], 0, size * sizeof(float)); memset(_momentum[param_name], 0, size * sizeof(float));
} }
if (_velocity.count(param_name) == 0) { if (_velocity.count(param_name) == 0) {
_velocity[param_name] = new float [size]; _velocity[param_name] = new float [size];
memset(_velocity[param_name], 0, size * sizeof(float)); memset(_velocity[param_name], 0, size * sizeof(float));
} }
int true_update_times = int(_update_times / _velocity.size()); int true_update_times = int(_update_times / _velocity.size());
float alpha = std::sqrt(1 - std::pow(_beta2, _update_times)) / (1 - std::pow(_beta1, _update_times)); float alpha = std::sqrt(1 - std::pow(_beta2, _update_times)) / (1 - std::pow(_beta1, _update_times));
for (int i = 0; i < size; ++i) { for (int i = 0; i < size; ++i) {
_momentum[param_name][i] = _beta1 * _momentum[param_name][i] + (1 - _beta1) * gradient[i]; _momentum[param_name][i] = _beta1 * _momentum[param_name][i] + (1 - _beta1) * gradient[i];
_velocity[param_name][i] = _beta2 * _velocity[param_name][i] + (1 - _beta2) * gradient[i] * gradient[i]; _velocity[param_name][i] = _beta2 * _velocity[param_name][i] + (1 - _beta2) * gradient[i] * gradient[i];
gradient[i] = alpha * _momentum[param_name][i] / (std::sqrt(_velocity[param_name][i]) + _epsilon); gradient[i] = alpha * _momentum[param_name][i] / (std::sqrt(_velocity[param_name][i]) + _epsilon);
} }
} }
}//namespace }//namespace
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "cached_gaussian_sampling.h"
namespace DeepES{
CachedGaussianSampling::CachedGaussianSampling() {}
CachedGaussianSampling::~CachedGaussianSampling() {
delete[] _noise_cache;
}
bool CachedGaussianSampling::load_config(const DeepESConfig& config) {
bool success = true;
_std = config.gaussian_sampling().std();
success = set_seed(config.seed());
CHECK(success) << "[DeepES] Fail to set seed while load config.";
_cache_size = config.gaussian_sampling().cache_size();
_noise_cache = new float [_cache_size];
memset(_noise_cache, 0, _cache_size * sizeof(float));
success = _create_noise_cache();
CHECK(success) << "[DeepES] Fail to create noise_cache while load config.";
return success;
}
bool CachedGaussianSampling::sampling(int* key, float* noise, int64_t size) {
bool success = true;
if (_noise_cache == nullptr) {
LOG(ERROR) << "[DeepES] Please use load_config() first.";
success = false;
return success;
}
if (noise == nullptr) {
LOG(ERROR) << "[DeepES] Input noise array cannot be nullptr.";
success = false;
return success;
}
if ((size >= _cache_size) || (size < 0)) {
LOG(ERROR) << "[DeepES] Input size " << size << " is out of bounds [0, " << _cache_size << "), cache_size: " << _cache_size;
success = false;
return success;
}
int rand_key = rand();
std::default_random_engine generator(rand_key);
std::uniform_int_distribution<unsigned int> uniform(0, _cache_size - size);
int index = uniform(generator);
*key = index;
for (int64_t i = 0; i < size; ++i) {
*(noise + i) = *(_noise_cache + index + i);
}
return success;
}
bool CachedGaussianSampling::resampling(int key, float* noise, int64_t size) {
bool success = true;
if (_noise_cache == nullptr) {
LOG(ERROR) << "[DeepES] Please use load_config() first.";
success = false;
return success;
}
if (noise == nullptr) {
LOG(ERROR) << "[DeepES] Input noise array cannot be nullptr.";
success = false;
return success;
}
if ((size >= _cache_size) || (size < 0)) {
LOG(ERROR) << "[DeepES] Input size " << size << " is out of bounds [0, " << _cache_size << "), cache_size: " << _cache_size;
success = false;
return success;
}
if ((key > _cache_size - size) || (key < 0)) {
LOG(ERROR) << "[DeepES] Resampling key " << key << " is out of bounds [0, " << _cache_size - size << "], cache_size: " << _cache_size << ", size: " << size;
success = false;
return success;
}
for (int64_t i = 0; i < size; ++i) {
*(noise + i) = *(_noise_cache + key + i);
}
return success;
}
bool CachedGaussianSampling::_create_noise_cache() {
std::default_random_engine generator(_seed);
std::normal_distribution<float> norm;
for (int64_t i = 0; i < _cache_size; ++i) {
*(_noise_cache + i) = norm(generator) * _std;
}
return true;
}
}
...@@ -12,43 +12,47 @@ ...@@ -12,43 +12,47 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include <random>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "gaussian_sampling.h" #include "gaussian_sampling.h"
#include "utils.h"
namespace DeepES{ namespace DeepES{
void GaussianSampling::load_config(const DeepESConfig& config) { bool GaussianSampling::load_config(const DeepESConfig& config) {
_std = config.gaussian_sampling().std(); bool success = true;
set_seed(config.seed()); _std = config.gaussian_sampling().std();
success = set_seed(config.seed());
return success;
} }
int GaussianSampling::sampling(float* noise, int64_t size) { bool GaussianSampling::sampling(int* key, float* noise, int64_t size) {
int key = rand(); bool success = true;
std::default_random_engine generator(key); if (noise == nullptr) {
std::normal_distribution<float> norm; LOG(ERROR) << "[DeepES] Input noise array cannot be nullptr.";
for (int64_t i = 0; i < size; ++i) { success = false;
*(noise + i) = norm(generator) * _std; return success;
} }
return key; int rand_key = rand();
*key = rand_key;
std::default_random_engine generator(rand_key);
std::normal_distribution<float> norm;
for (int64_t i = 0; i < size; ++i) {
*(noise + i) = norm(generator) * _std;
}
return success;
} }
bool GaussianSampling::resampling(int key, float* noise, int64_t size) { bool GaussianSampling::resampling(int key, float* noise, int64_t size) {
bool success = true; bool success = true;
if (noise == nullptr) { if (noise == nullptr) {
success = false; LOG(ERROR) << "[DeepES] Input noise array cannot be nullptr.";
} success = false;
else { } else {
std::default_random_engine generator(key); std::default_random_engine generator(key);
std::normal_distribution<float> norm; std::normal_distribution<float> norm;
for (int64_t i = 0; i < size; ++i) { for (int64_t i = 0; i < size; ++i) {
*(noise + i) = norm(generator) * _std; *(noise + i) = norm(generator) * _std;
}
} }
return success; }
return success;
} }
} }
...@@ -24,13 +24,13 @@ std::shared_ptr<Optimizer> create_optimizer(const OptimizerConfig& optimizer_con ...@@ -24,13 +24,13 @@ std::shared_ptr<Optimizer> create_optimizer(const OptimizerConfig& optimizer_con
if (opt_type == "sgd") { if (opt_type == "sgd") {
optimizer = std::make_shared<SGDOptimizer>(optimizer_config.base_lr(), \ optimizer = std::make_shared<SGDOptimizer>(optimizer_config.base_lr(), \
optimizer_config.momentum()); optimizer_config.momentum());
}else if (opt_type == "adam") { } else if (opt_type == "adam") {
optimizer = std::make_shared<AdamOptimizer>(optimizer_config.base_lr(), \ optimizer = std::make_shared<AdamOptimizer>(optimizer_config.base_lr(), \
optimizer_config.beta1(), \ optimizer_config.beta1(), \
optimizer_config.beta2(), \ optimizer_config.beta2(), \
optimizer_config.epsilon()); optimizer_config.epsilon());
}else { } else {
// TODO: NotImplementedError LOG(ERROR) << "type of OptimizerConfig must be SGD or Adam."; // NotImplementedError
} }
return optimizer; return optimizer;
} }
......
...@@ -52,8 +52,7 @@ ESAgent::ESAgent(const std::string& model_dir, const std::string& config_path) { ...@@ -52,8 +52,7 @@ ESAgent::ESAgent(const std::string& model_dir, const std::string& config_path) {
_config = std::make_shared<DeepESConfig>(); _config = std::make_shared<DeepESConfig>();
load_proto_conf(config_path, *_config); load_proto_conf(config_path, *_config);
_sampling_method = std::make_shared<GaussianSampling>(); _sampling_method = create_sampling_method(*_config);
_sampling_method->load_config(*_config);
_optimizer = create_optimizer(_config->optimizer()); _optimizer = create_optimizer(_config->optimizer());
...@@ -101,6 +100,7 @@ bool ESAgent::update( ...@@ -101,6 +100,7 @@ bool ESAgent::update(
int key = noisy_info[i].key(0); int key = noisy_info[i].key(0);
float reward = noisy_rewards[i]; float reward = noisy_rewards[i];
bool success = _sampling_method->resampling(key, _noise, _param_size); bool success = _sampling_method->resampling(key, _noise, _param_size);
CHECK(success) << "[DeepES] resampling error occurs at sample: " << i;
for (int64_t j = 0; j < _param_size; ++j) { for (int64_t j = 0; j < _param_size; ++j) {
_neg_gradients[j] += _noise[j] * reward; _neg_gradients[j] += _noise[j] * reward;
} }
...@@ -123,12 +123,16 @@ bool ESAgent::update( ...@@ -123,12 +123,16 @@ bool ESAgent::update(
} }
bool ESAgent::add_noise(SamplingInfo& sampling_info) { bool ESAgent::add_noise(SamplingInfo& sampling_info) {
bool success = true;
if (!_is_sampling_agent) { if (!_is_sampling_agent) {
LOG(ERROR) << "[DeepES] Original ESAgent cannot call add_noise function, please use cloned ESAgent."; LOG(ERROR) << "[DeepES] Original ESAgent cannot call add_noise function, please use cloned ESAgent.";
return false; success = false;
return success;
} }
int key = _sampling_method->sampling(_noise, _param_size); int key = 0;
success = _sampling_method->sampling(&key, _noise, _param_size);
CHECK(success) << "[DeepES] sampling error occurs while add_noise.";
int model_iter_id = _config->async_es().model_iter_id(); int model_iter_id = _config->async_es().model_iter_id();
sampling_info.add_key(key); sampling_info.add_key(key);
sampling_info.set_model_iter_id(model_iter_id); sampling_info.set_model_iter_id(model_iter_id);
...@@ -144,7 +148,7 @@ bool ESAgent::add_noise(SamplingInfo& sampling_info) { ...@@ -144,7 +148,7 @@ bool ESAgent::add_noise(SamplingInfo& sampling_info) {
counter += tensor_size; counter += tensor_size;
} }
return true; return success;
} }
std::shared_ptr<PaddlePredictor> ESAgent::get_predictor() { std::shared_ptr<PaddlePredictor> ESAgent::get_predictor() {
......
...@@ -29,6 +29,8 @@ message DeepESConfig { ...@@ -29,6 +29,8 @@ message DeepESConfig {
message GaussianSamplingConfig { message GaussianSamplingConfig {
optional float std = 1 [default = 1.0]; optional float std = 1 [default = 1.0];
optional bool cached = 2 [default = false];
optional int32 cache_size = 3 [default = 100000];
} }
message OptimizerConfig{ message OptimizerConfig{
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "sampling_factory.h"
namespace DeepES{
std::shared_ptr<SamplingMethod> create_sampling_method(const DeepESConfig& config) {
std::shared_ptr<SamplingMethod> sampling_method;
bool cached = config.gaussian_sampling().cached();
if (cached) {
sampling_method = std::make_shared<CachedGaussianSampling>();
} else {
sampling_method = std::make_shared<GaussianSampling>();
}
bool success = sampling_method->load_config(config);
if(success) {
return sampling_method;
} else {
LOG(ERROR) << "[DeepES] Fail to create sampling_method";
return nullptr;
}
}
}//namespace
...@@ -12,27 +12,26 @@ ...@@ -12,27 +12,26 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include <cmath>
#include "sgd_optimizer.h" #include "sgd_optimizer.h"
namespace DeepES { namespace DeepES {
SGDOptimizer::~SGDOptimizer() { SGDOptimizer::~SGDOptimizer() {
for (auto iter = _velocity.begin(); iter != _velocity.end(); iter++) { for (auto iter = _velocity.begin(); iter != _velocity.end(); iter++) {
delete[] iter->second; delete[] iter->second;
} }
_velocity.clear(); _velocity.clear();
} }
void SGDOptimizer::compute_step(float* gradient, int size, std::string param_name="") { void SGDOptimizer::compute_step(float* gradient, int size, std::string param_name="") {
if (_velocity.count(param_name) == 0) { if (_velocity.count(param_name) == 0) {
_velocity[param_name] = new float [size]; _velocity[param_name] = new float [size];
memset(_velocity[param_name], 0, size * sizeof(float)); memset(_velocity[param_name], 0, size * sizeof(float));
} }
for (int i = 0; i < size; ++i) { for (int i = 0; i < size; ++i) {
_velocity[param_name][i] = _momentum * _velocity[param_name][i] + (1 - _momentum) * gradient[i]; _velocity[param_name][i] = _momentum * _velocity[param_name][i] + (1 - _momentum) * gradient[i];
gradient[i] = _velocity[param_name][i]; gradient[i] = _velocity[param_name][i];
} }
} }
......
...@@ -10,9 +10,9 @@ set(CMAKE_CXX_EXTENSIONS OFF) ...@@ -10,9 +10,9 @@ set(CMAKE_CXX_EXTENSIONS OFF)
find_package(GTest REQUIRED) find_package(GTest REQUIRED)
find_package(OpenMP) find_package(OpenMP)
if (OPENMP_FOUND) if (OPENMP_FOUND)
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
endif() endif()
# Torch lib # Torch lib
......
...@@ -21,15 +21,14 @@ struct Model : public torch::nn::Module{ ...@@ -21,15 +21,14 @@ struct Model : public torch::nn::Module{
Model() = delete; Model() = delete;
Model(const int obs_dim, const int act_dim) { Model(const int obs_dim, const int act_dim, const int h1_size, const int h2_size) {
_obs_dim = obs_dim; _obs_dim = obs_dim;
_act_dim = act_dim; _act_dim = act_dim;
int hid1_size = 30; _h1_size = h1_size;
int hid2_size = 15; _h2_size = h2_size;
fc1 = register_module("fc1", torch::nn::Linear(obs_dim, hid1_size)); fc1 = register_module("fc1", torch::nn::Linear(obs_dim, h1_size));
fc2 = register_module("fc2", torch::nn::Linear(hid1_size, hid2_size)); fc2 = register_module("fc2", torch::nn::Linear(h1_size, h2_size));
fc3 = register_module("fc3", torch::nn::Linear(hid2_size, act_dim)); fc3 = register_module("fc3", torch::nn::Linear(h2_size, act_dim));
} }
torch::Tensor forward(torch::Tensor x) { torch::Tensor forward(torch::Tensor x) {
...@@ -41,7 +40,7 @@ struct Model : public torch::nn::Module{ ...@@ -41,7 +40,7 @@ struct Model : public torch::nn::Module{
} }
std::shared_ptr<Model> clone() { std::shared_ptr<Model> clone() {
std::shared_ptr<Model> model = std::make_shared<Model>(_obs_dim, _act_dim); std::shared_ptr<Model> model = std::make_shared<Model>(_obs_dim, _act_dim, _h1_size, _h2_size);
std::vector<torch::Tensor> parameters1 = parameters(); std::vector<torch::Tensor> parameters1 = parameters();
std::vector<torch::Tensor> parameters2 = model->parameters(); std::vector<torch::Tensor> parameters2 = model->parameters();
for (int i = 0; i < parameters1.size(); ++i) { for (int i = 0; i < parameters1.size(); ++i) {
...@@ -58,6 +57,8 @@ struct Model : public torch::nn::Module{ ...@@ -58,6 +57,8 @@ struct Model : public torch::nn::Module{
int _act_dim; int _act_dim;
int _obs_dim; int _obs_dim;
int _h1_size;
int _h2_size;
torch::nn::Linear fc1{nullptr}, fc2{nullptr}, fc3{nullptr}; torch::nn::Linear fc1{nullptr}, fc2{nullptr}, fc3{nullptr};
}; };
......
seed : 1024
gaussian_sampling {
std: 0.005
cached: true
cache_size : 100000
}
optimizer {
type: "Adam",
base_lr: 0.005,
momentum: 0.9,
beta1: 0.9,
beta2: 0.999,
epsilon: 1e-8,
}
...@@ -2,6 +2,7 @@ seed : 1024 ...@@ -2,6 +2,7 @@ seed : 1024
gaussian_sampling { gaussian_sampling {
std: 0.005 std: 0.005
cached: false
} }
optimizer { optimizer {
......
...@@ -4,11 +4,11 @@ export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH ...@@ -4,11 +4,11 @@ export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
#---------------libtorch-------------# #---------------libtorch-------------#
if [ ! -d "./libtorch" ];then if [ ! -d "./libtorch" ];then
echo "Cannot find the torch library: ../libtorch" echo "Cannot find the torch library: ../libtorch"
echo "Downloading Torch library" echo "Downloading Torch library"
wget -q https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-1.4.0%2Bcpu.zip wget -q https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-1.4.0%2Bcpu.zip
unzip -q libtorch-cxx11-abi-shared-with-deps-1.4.0+cpu.zip unzip -q libtorch-cxx11-abi-shared-with-deps-1.4.0+cpu.zip
rm -rf libtorch-cxx11-abi-shared-with-deps-1.4.0+cpu.zip rm -rf libtorch-cxx11-abi-shared-with-deps-1.4.0+cpu.zip
echo "Torch library Downloaded" echo "Torch library Downloaded"
fi fi
#----------------protobuf-------------# #----------------protobuf-------------#
......
...@@ -22,38 +22,38 @@ namespace DeepES { ...@@ -22,38 +22,38 @@ namespace DeepES {
TEST(SGDOptimizersTest, Method_update) { TEST(SGDOptimizersTest, Method_update) {
std::shared_ptr<DeepESConfig> config = std::make_shared<DeepESConfig>(); std::shared_ptr<DeepESConfig> config = std::make_shared<DeepESConfig>();
auto optimizer_config = config->mutable_optimizer(); auto optimizer_config = config->mutable_optimizer();
optimizer_config->set_base_lr(1.0); optimizer_config->set_base_lr(1.0);
optimizer_config->set_type("sgd"); optimizer_config->set_type("sgd");
std::shared_ptr<Optimizer> optimizer = create_optimizer(config->optimizer()); std::shared_ptr<Optimizer> optimizer = create_optimizer(config->optimizer());
float sgd_wei[10] = { 0.0 , 0.0 , 0.04216444, 0.0511456 , 0.04231584, 0.01089015, 0.06569759, 0.00127421,-0.00092832, 0.01128081}; float sgd_wei[10] = { 0.0 , 0.0 , 0.04216444, 0.0511456 , 0.04231584, 0.01089015, 0.06569759, 0.00127421,-0.00092832, 0.01128081};
float sgd_grad[10] = {-0.11992419,-0.0 , 0.07681337,-0.06616384, 0.00249889, 0.01158612,-0.3067452 , 0.36048946,-0.15820622,-0.20014143}; float sgd_grad[10] = {-0.11992419,-0.0 , 0.07681337,-0.06616384, 0.00249889, 0.01158612,-0.3067452 , 0.36048946,-0.15820622,-0.20014143};
float sgd_new[10] = { 0.01199242, 0.0 , 0.0344831 , 0.05776198, 0.04206595, 0.00973154, 0.09637211,-0.03477474, 0.014892306, 0.03129495}; float sgd_new[10] = { 0.01199242, 0.0 , 0.0344831 , 0.05776198, 0.04206595, 0.00973154, 0.09637211,-0.03477474, 0.014892306, 0.03129495};
EXPECT_TRUE(optimizer->update(sgd_wei, sgd_grad, 10, "test")); EXPECT_TRUE(optimizer->update(sgd_wei, sgd_grad, 10, "fc1"));
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 10; ++i) {
EXPECT_FLOAT_EQ(sgd_new[i], sgd_wei[i]) << " i: " << i ; EXPECT_FLOAT_EQ(sgd_new[i], sgd_wei[i]) << " i: " << i ;
} }
EXPECT_TRUE(optimizer->update(sgd_wei, sgd_grad, 10, "test")); EXPECT_TRUE(optimizer->update(sgd_wei, sgd_grad, 10, "fc1"));
EXPECT_FALSE(optimizer->update(sgd_wei, sgd_grad, 9, "test")); EXPECT_FALSE(optimizer->update(sgd_wei, sgd_grad, 9, "fc1"));
} }
TEST(AdamOptimizersTest, Method_update) { TEST(AdamOptimizersTest, Method_update) {
std::shared_ptr<DeepESConfig> config = std::make_shared<DeepESConfig>(); std::shared_ptr<DeepESConfig> config = std::make_shared<DeepESConfig>();
auto optimizer_config = config->mutable_optimizer(); auto optimizer_config = config->mutable_optimizer();
optimizer_config->set_base_lr(1.0); optimizer_config->set_base_lr(1.0);
optimizer_config->set_type("adam"); optimizer_config->set_type("adam");
std::shared_ptr<Optimizer> optimizer = create_optimizer(config->optimizer()); std::shared_ptr<Optimizer> optimizer = create_optimizer(config->optimizer());
float adam_wei[10] = { 0.0 , 0.0 , 0.04216444, 0.0511456 , 0.04231584, 0.01089015, 0.06569759, 0.00127421,-0.00092832, 0.01128081}; float adam_wei[10] = { 0.0 , 0.0 , 0.04216444, 0.0511456 , 0.04231584, 0.01089015, 0.06569759, 0.00127421,-0.00092832, 0.01128081};
float adam_grad[10] = {-0.11992419,-0.0 , 0.07681337,-0.06616384, 0.00249889, 0.01158612,-0.3067452 , 0.36048946,-0.15820622,-0.20014143}; float adam_grad[10] = {-0.11992419,-0.0 , 0.07681337,-0.06616384, 0.00249889, 0.01158612,-0.3067452 , 0.36048946,-0.15820622,-0.20014143};
float adam_new[10] = { 0.99999736, 0. ,-0.95783144, 1.05114082,-0.95755763,-0.98908256, 1.06569656,-0.99872491, 0.99906968, 1.01127923}; float adam_new[10] = { 0.99999736, 0. ,-0.95783144, 1.05114082,-0.95755763,-0.98908256, 1.06569656,-0.99872491, 0.99906968, 1.01127923};
EXPECT_TRUE(optimizer->update(adam_wei, adam_grad, 10, "test")); EXPECT_TRUE(optimizer->update(adam_wei, adam_grad, 10, "fc1"));
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 10; ++i) {
EXPECT_FLOAT_EQ(adam_new[i], adam_wei[i]) << " i: " << i ; EXPECT_FLOAT_EQ(adam_new[i], adam_wei[i]) << " i: " << i ;
} }
EXPECT_TRUE(optimizer->update(adam_wei, adam_grad, 10, "test")); EXPECT_TRUE(optimizer->update(adam_wei, adam_grad, 10, "fc1"));
EXPECT_FALSE(optimizer->update(adam_wei, adam_grad, 9, "test")); EXPECT_FALSE(optimizer->update(adam_wei, adam_grad, 9, "fc1"));
} }
} // namespace } // namespace
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "gtest/gtest.h"
#include <vector>
#include "sampling_method.h"
#include "gaussian_sampling.h"
#include "cached_gaussian_sampling.h"
#include <memory>
namespace DeepES {
class SamplingTest : public ::testing::Test {
protected:
void init_sampling_method(bool cached) {
config = std::make_shared<DeepESConfig>();
config->set_seed(1024);
auto sampling_config = config->mutable_gaussian_sampling();
sampling_config->set_std(1.0);
sampling_config->set_cached(cached);
sampling_config->set_cache_size(cache_size);
if (cached) {
sampler = std::make_shared<CachedGaussianSampling>();
} else {
sampler = std::make_shared<GaussianSampling>();
}
}
std::shared_ptr<SamplingMethod> sampler;
std::shared_ptr<DeepESConfig> config;
float array[3] = {1.0, 2.0, 3.0};
int cache_size = 100; // default cache_size 100
int key = 0;
};
TEST_F(SamplingTest, GaussianSampling_load_config) {
init_sampling_method(false);
EXPECT_TRUE(sampler->load_config(*config));
}
TEST_F(SamplingTest, GaussianSampling_sampling) {
init_sampling_method(false);
sampler->load_config(*config);
EXPECT_FALSE(sampler->sampling(&key, nullptr, 0));
EXPECT_TRUE(sampler->sampling(&key, array, 3));
}
TEST_F(SamplingTest, GaussianSampling_resampling) {
init_sampling_method(false);
sampler->load_config(*config);
EXPECT_FALSE(sampler->resampling(0, nullptr, 0));
EXPECT_TRUE(sampler->resampling(0, array, 3));
}
TEST_F(SamplingTest, CachedGaussianSampling_load_config) {
init_sampling_method(true);
EXPECT_TRUE(sampler->load_config(*config));
}
TEST_F(SamplingTest, CachedGaussianSampling_sampling) {
init_sampling_method(true);
EXPECT_FALSE(sampler->sampling(&key, array, 0));
sampler->load_config(*config);
EXPECT_FALSE(sampler->sampling(&key, nullptr, 0));
EXPECT_FALSE(sampler->sampling(&key, array, -1));
EXPECT_FALSE(sampler->sampling(&key, array, cache_size));
EXPECT_TRUE(sampler->sampling(&key, array, 0));
EXPECT_TRUE(sampler->sampling(&key, array, 3));
}
TEST_F(SamplingTest, CachedGaussianSampling_resampling) {
init_sampling_method(true);
EXPECT_FALSE(sampler->resampling(0, array, 0));
sampler->load_config(*config);
EXPECT_FALSE(sampler->resampling(0, nullptr, 0));
EXPECT_FALSE(sampler->resampling(0, array, -1));
EXPECT_FALSE(sampler->resampling(0, array, cache_size));
EXPECT_TRUE(sampler->resampling(0, array, 0));
EXPECT_TRUE(sampler->resampling(0, array, 1));
EXPECT_TRUE(sampler->resampling(0, array, 2));
EXPECT_FALSE(sampler->resampling(-1, array, 3));
EXPECT_TRUE(sampler->resampling(0, array, 3));
EXPECT_TRUE(sampler->resampling(1, array, 3));
EXPECT_TRUE(sampler->resampling(2, array, 3));
EXPECT_TRUE(sampler->resampling(cache_size-3, array, 3));
EXPECT_FALSE(sampler->resampling(cache_size-2, array, 3));
EXPECT_FALSE(sampler->resampling(cache_size-1, array, 3));
EXPECT_FALSE(sampler->resampling(cache_size, array, 3));
EXPECT_FALSE(sampler->resampling(cache_size-3, array, cache_size-1));
}
} // namespace
...@@ -32,105 +32,126 @@ namespace DeepES { ...@@ -32,105 +32,126 @@ namespace DeepES {
// The fixture for testing class Foo. // The fixture for testing class Foo.
class TorchDemoTest : public ::testing::Test { class TorchDemoTest : public ::testing::Test {
protected: protected:
float evaluate(std::vector<float>& x_list, std::vector<float>& y_list, int size, std::shared_ptr<ESAgent<Model>> agent) { float evaluate(std::vector<float>& x_list, std::vector<float>& y_list, int size, std::shared_ptr<ESAgent<Model>> agent) {
float total_loss = 0.0; float total_loss = 0.0;
for (int i = 0; i < size; ++i) { for (int i = 0; i < size; ++i) {
torch::Tensor x_input = torch::tensor(x_list[i], torch::dtype(torch::kFloat32)); torch::Tensor x_input = torch::tensor(x_list[i], torch::dtype(torch::kFloat32));
torch::Tensor predict_y = agent->predict(x_input); torch::Tensor predict_y = agent->predict(x_input);
auto pred_y = predict_y.accessor<float,2>(); auto pred_y = predict_y.accessor<float,2>();
float loss = pow((pred_y[0][0] - y_list[i]), 2); float loss = pow((pred_y[0][0] - y_list[i]), 2);
total_loss += loss; total_loss += loss;
}
return -total_loss / float(size);
} }
return -total_loss / float(size);
float train_loss() { }
return -1.0 * evaluate(x_list, y_list, train_data_size, agent);
float train_loss() {
return -1.0 * evaluate(x_list, y_list, train_data_size, agent);
}
float test_loss() {
return -1.0 * evaluate(test_x_list, test_y_list, test_data_size, agent);
}
float train_test_gap() {
float train_lo = train_loss();
float test_lo = test_loss();
if ( train_lo > test_lo) {
return train_lo - test_lo;
} else {
return test_lo - train_lo;
} }
}
float test_loss() {
return -1.0 * evaluate(test_x_list, test_y_list, test_data_size, agent); void init_agent(const int in_dim, const int out_dim, const int h1_size, const int h2_size) {
std::shared_ptr<Model> model = std::make_shared<Model>(in_dim, out_dim, h1_size, h2_size);
agent = std::make_shared<ESAgent<Model>>(model, "../test/prototxt/torch_sin_config.prototxt");
}
void train_agent(std::string config_path) {
std::default_random_engine generator(0); // fix seed
std::uniform_real_distribution<float> uniform(-3.0, 9.0);
std::normal_distribution<float> norm;
for (int i = 0; i < train_data_size; ++i) {
float x_i = uniform(generator); // generate data between [-3, 9]
float y_i = sin(x_i) + norm(generator) * 0.05; // label noise std 0.05
x_list.push_back(x_i);
y_list.push_back(y_i);
} }
for (int i= 0; i < test_data_size; ++i) {
float x_i = uniform(generator);
float y_i = sin(x_i);
test_x_list.push_back(x_i);
test_y_list.push_back(y_i);
}
std::shared_ptr<Model> model = std::make_shared<Model>(1, 1, 10, 5);
agent = std::make_shared<ESAgent<Model>>(model, config_path);
float train_test_gap() { // Clone agents to sample (explore).
float train_lo = train_loss(); std::vector<std::shared_ptr<ESAgent<Model>>> sampling_agents;
float test_lo = test_loss(); for (int i = 0; i < iter; ++i) {
if ( train_lo > test_lo) { sampling_agents.push_back(agent->clone());
return train_lo - test_lo;
}
else {
return test_lo - train_lo;
}
} }
void SetUp() override { std::vector<SamplingInfo> noisy_keys;
std::default_random_engine generator(0); // fix seed std::vector<float> noisy_rewards(iter, 0.0f);
std::uniform_real_distribution<float> uniform(-3.0, 9.0); noisy_keys.resize(iter);
std::normal_distribution<float> norm;
for (int i = 0; i < train_data_size; ++i) { LOG(INFO) << "start training...";
float x_i = uniform(generator); // generate data between [-3, 9] for (int epoch = 0; epoch < 1001; ++epoch) {
float y_i = sin(x_i) + norm(generator)*0.05; // noise std 0.05
x_list.push_back(x_i);
y_list.push_back(y_i);
}
for (int i= 0; i < test_data_size; ++i) {
float x_i = uniform(generator);
float y_i = sin(x_i);
test_x_list.push_back(x_i);
test_y_list.push_back(y_i);
}
std::shared_ptr<Model> model = std::make_shared<Model>(1, 1);
agent = std::make_shared<ESAgent<Model>>(model, "../test/torch_sin_config.prototxt");
// Clone agents to sample (explore).
std::vector<std::shared_ptr<ESAgent<Model>>> sampling_agents;
for (int i = 0; i < iter; ++i) {
sampling_agents.push_back(agent->clone());
}
std::vector<SamplingInfo> noisy_keys;
std::vector<float> noisy_rewards(iter, 0.0f);
noisy_keys.resize(iter);
LOG(INFO) << "start training...";
for (int epoch = 0; epoch < 1001; ++epoch) {
#pragma omp parallel for schedule(dynamic, 1) #pragma omp parallel for schedule(dynamic, 1)
for (int i = 0; i < iter; ++i) { for (int i = 0; i < iter; ++i) {
auto sampling_agent = sampling_agents[i]; auto sampling_agent = sampling_agents[i];
SamplingInfo key; SamplingInfo key;
bool success = sampling_agent->add_noise(key); bool success = sampling_agent->add_noise(key);
float reward = evaluate(x_list, y_list, train_data_size, sampling_agent); float reward = evaluate(x_list, y_list, train_data_size, sampling_agent);
noisy_keys[i] = key; noisy_keys[i] = key;
noisy_rewards[i] = reward; noisy_rewards[i] = reward;
} }
bool success = agent->update(noisy_keys, noisy_rewards); bool success = agent->update(noisy_keys, noisy_rewards);
if (epoch % 100 == 0) { if (epoch % 100 == 0) {
float reward = evaluate(test_x_list, test_y_list, test_data_size, agent); float reward = evaluate(test_x_list, test_y_list, test_data_size, agent);
float train_reward = evaluate(x_list, y_list, train_data_size, agent); float train_reward = evaluate(x_list, y_list, train_data_size, agent);
LOG(INFO) << "Epoch:" << epoch << " Loss: " << -reward << ", Train loss" << -train_reward; LOG(INFO) << "Epoch:" << epoch << " Loss: " << -reward << ", Train loss" << -train_reward;
} }
}
} }
}
// Class members declared here can be used by all tests in the test suite
int train_data_size = 300; // Class members declared here can be used by all tests in the test suite
int test_data_size = 100; int train_data_size = 300;
int iter = 10; int test_data_size = 100;
std::vector<float> x_list; int iter = 10;
std::vector<float> y_list; std::vector<float> x_list;
std::vector<float> test_x_list; std::vector<float> y_list;
std::vector<float> test_y_list; std::vector<float> test_x_list;
std::shared_ptr<ESAgent<Model>> agent; std::vector<float> test_y_list;
std::shared_ptr<ESAgent<Model>> agent;
}; };
TEST_F(TorchDemoTest, TrainingEffectUseNormalSampling) {
train_agent("../test/prototxt/torch_sin_config.prototxt");
EXPECT_LT(train_loss(), 0.05);
EXPECT_LT(test_loss(), 0.05);
EXPECT_LT(train_test_gap(), 0.03);
}
TEST_F(TorchDemoTest, TrainingEffectTest) { TEST_F(TorchDemoTest, TrainingEffectTestUseTableSampling) {
EXPECT_LT(train_loss(), 0.05); train_agent("../test/prototxt/torch_sin_cached_config.prototxt");
EXPECT_LT(test_loss(), 0.05); EXPECT_LT(train_loss(), 0.05);
EXPECT_LT(train_test_gap(), 0.03); EXPECT_LT(test_loss(), 0.05);
EXPECT_LT(train_test_gap(), 0.03);
} }
TEST_F(TorchDemoTest,ParamSizeTest) {
init_agent(1, 1, 10, 5);
EXPECT_EQ(agent->param_size(), 81);
init_agent(2, 3, 10, 5);
EXPECT_EQ(agent->param_size(), 103);
init_agent(1, 1, 1, 1);
EXPECT_EQ(agent->param_size(), 6);
init_agent(100, 2, 256, 64);
EXPECT_EQ(agent->param_size(), 42434);
}
} // namespace } // namespace
...@@ -20,9 +20,9 @@ namespace DeepES { ...@@ -20,9 +20,9 @@ namespace DeepES {
// Tests that the Utils::compute_centered_rank() method. // Tests that the Utils::compute_centered_rank() method.
TEST(UtilsTest, Method_compute_centered_ranks) { TEST(UtilsTest, Method_compute_centered_ranks) {
float a[5] = {9.0, 8.0, 7.0, 6.0, 5.0}; float a[5] = {9.0, 8.0, 7.0, 6.0, 5.0};
std::vector<float> reward_vec(a, a+5); std::vector<float> reward_vec(a, a+5);
EXPECT_EQ(compute_centered_ranks(reward_vec), true); EXPECT_EQ(compute_centered_ranks(reward_vec), true);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册