From 31ee4fa56c3627413107cfeaef01e0826ae2e78b Mon Sep 17 00:00:00 2001 From: rical730 Date: Tue, 7 Apr 2020 20:43:27 +0800 Subject: [PATCH] add gaussian sampling method with noise table (#241) * add gaussian sampling method with noise table * add table sampling test and agent param_size test * add model param_size test * get param size of paddle demo * rename gaussian_table_sampling to cached_gaussian_sampling * add unittest to sampling method and uniform indentation to 2 spaces Co-authored-by: TomorrowIsAnOtherDay <2466956298@qq.com> --- deepes/benchmark/cartpole.h | 132 ++++++------ deepes/demo/cartpole_config.prototxt | 2 + deepes/include/adam_optimizer.h | 7 +- deepes/include/cached_gaussian_sampling.h | 78 +++++++ deepes/include/gaussian_sampling.h | 79 ++++--- deepes/include/optimizer.h | 6 +- deepes/include/optimizer_factory.h | 1 + deepes/include/paddle/es_agent.h | 7 +- deepes/include/sampling_factory.h | 36 ++++ deepes/include/sampling_method.h | 84 ++++---- deepes/include/sgd_optimizer.h | 6 +- deepes/include/torch/es_agent.h | 22 +- deepes/include/utils.h | 1 + deepes/src/adam_optimizer.cc | 47 +++-- deepes/src/cached_gaussian_sampling.cc | 103 +++++++++ deepes/src/gaussian_sampling.cc | 58 +++--- deepes/src/optimizer_factory.cc | 12 +- deepes/src/paddle/es_agent.cc | 14 +- deepes/src/proto/deepes.proto | 2 + deepes/src/sampling_factory.cc | 39 ++++ deepes/src/sgd_optimizer.cc | 25 ++- deepes/test/CMakeLists.txt | 6 +- deepes/test/include/torch_demo_model.h | 17 +- .../prototxt/torch_sin_cached_config.prototxt | 16 ++ .../{ => prototxt}/torch_sin_config.prototxt | 1 + deepes/test/run_test.sh | 10 +- deepes/test/src/optimizers_test.cc | 56 ++--- deepes/test/src/sampling_test.cc | 117 +++++++++++ deepes/test/src/torch_agent_test.cc | 195 ++++++++++-------- deepes/test/src/utils_test.cc | 6 +- 30 files changed, 821 insertions(+), 364 deletions(-) create mode 100644 deepes/include/cached_gaussian_sampling.h create mode 100644 deepes/include/sampling_factory.h create mode 100644 deepes/src/cached_gaussian_sampling.cc create mode 100644 deepes/src/sampling_factory.cc create mode 100644 deepes/test/prototxt/torch_sin_cached_config.prototxt rename deepes/test/{ => prototxt}/torch_sin_config.prototxt (90%) create mode 100644 deepes/test/src/sampling_test.cc diff --git a/deepes/benchmark/cartpole.h b/deepes/benchmark/cartpole.h index 48d2e08..f289715 100644 --- a/deepes/benchmark/cartpole.h +++ b/deepes/benchmark/cartpole.h @@ -9,38 +9,38 @@ const double kPi = 3.1415926535898; class CartPole { public: - double gravity = 9.8; - double masscart = 1.0; - double masspole = 0.1; - double total_mass = (masspole + masscart); - double length = 0.5; // actually half the pole's length; - double polemass_length = (masspole * length); - double force_mag = 10.0; - double tau = 0.02; // seconds between state updates; + double gravity = 9.8; + double masscart = 1.0; + double masspole = 0.1; + double total_mass = (masspole + masscart); + double length = 0.5; // actually half the pole's length; + double polemass_length = (masspole * length); + double force_mag = 10.0; + double tau = 0.02; // seconds between state updates; - // Angle at which to fail the episode - double theta_threshold_radians = 12 * 2 * kPi / 360; - double x_threshold = 2.4; - int steps_beyond_done = -1; + // Angle at which to fail the episode + double theta_threshold_radians = 12 * 2 * kPi / 360; + double x_threshold = 2.4; + int steps_beyond_done = -1; std::vector state = {0, 0, 0, 0}; - double reward; - bool done; - int step_ = 0; + double reward; + bool done; + int step_ = 0; - const float* getState() { - return state.data(); - } + const float* getState() { + return state.data(); + } - double getReward() { - return reward; - } + double getReward() { + return reward; + } - double isDone() { - return done; - } + double isDone() { + return done; + } - void reset() { + void reset() { std::random_device rd; std::default_random_engine generator(rd()); std::uniform_real_distribution distribution(-0.05, 0.05); @@ -48,53 +48,51 @@ public: state[i] = distribution(generator); } - steps_beyond_done = -1; - step_ = 0; - } + steps_beyond_done = -1; + step_ = 0; + } - CartPole() { - reset(); - } + CartPole() { + reset(); + } - void step(int action) { - float x = state[0]; - float x_dot = state[1]; - float theta = state[2]; - float theta_dot = state[3]; + void step(int action) { + float x = state[0]; + float x_dot = state[1]; + float theta = state[2]; + float theta_dot = state[3]; - auto force = (action == 1) ? force_mag : -force_mag; - auto costheta = std::cos(theta); - auto sintheta = std::sin(theta); - auto temp = (force + polemass_length * theta_dot * theta_dot * sintheta) / - total_mass; - auto thetaacc = (gravity * sintheta - costheta * temp) / - (length * (4.0 / 3.0 - masspole * costheta * costheta / total_mass)); - auto xacc = temp - polemass_length * thetaacc * costheta / total_mass; + auto force = (action == 1) ? force_mag : -force_mag; + auto costheta = std::cos(theta); + auto sintheta = std::sin(theta); + auto temp = (force + polemass_length * theta_dot * theta_dot * sintheta) / + total_mass; + auto thetaacc = (gravity * sintheta - costheta * temp) / + (length * (4.0 / 3.0 - masspole * costheta * costheta / total_mass)); + auto xacc = temp - polemass_length * thetaacc * costheta / total_mass; - x = x + tau * x_dot; - x_dot = x_dot + tau * xacc; - theta = theta + tau * theta_dot; - theta_dot = theta_dot + tau * thetaacc; + x = x + tau * x_dot; + x_dot = x_dot + tau * xacc; + theta = theta + tau * theta_dot; + theta_dot = theta_dot + tau * thetaacc; - state = {x, x_dot, theta, theta_dot}; + state = {x, x_dot, theta, theta_dot}; - done = x < -x_threshold || x > x_threshold || - theta < -theta_threshold_radians || theta > theta_threshold_radians || - step_ > 200; + done = x < -x_threshold || x > x_threshold || + theta < -theta_threshold_radians || theta > theta_threshold_radians || + step_ > 200; - if (!done) { - reward = 1.0; - } - else if (steps_beyond_done == -1) { - // Pole just fell! - steps_beyond_done = 0; - reward = 0; - } - else { - if (steps_beyond_done == 0) { - assert(false); // Can't do this - } - } - step_++; - } + if (!done) { + reward = 1.0; + } else if (steps_beyond_done == -1) { + // Pole just fell! + steps_beyond_done = 0; + reward = 0; + } else { + if (steps_beyond_done == 0) { + assert(false); // Can't do this + } + } + step_++; + } }; diff --git a/deepes/demo/cartpole_config.prototxt b/deepes/demo/cartpole_config.prototxt index 03cc5fb..a1f9948 100644 --- a/deepes/demo/cartpole_config.prototxt +++ b/deepes/demo/cartpole_config.prototxt @@ -1,6 +1,8 @@ seed: 1024 gaussian_sampling { std: 0.5 + cached: true + cache_size : 100000 } optimizer { type: "Adam" diff --git a/deepes/include/adam_optimizer.h b/deepes/include/adam_optimizer.h index 995fa00..ab00557 100644 --- a/deepes/include/adam_optimizer.h +++ b/deepes/include/adam_optimizer.h @@ -12,11 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include "optimizer.h" #ifndef ADAM_OPTIMIZER_H #define ADAM_OPTIMIZER_H + +#include +#include +#include "optimizer.h" + namespace DeepES{ /*@brief AdamOptimizer. diff --git a/deepes/include/cached_gaussian_sampling.h b/deepes/include/cached_gaussian_sampling.h new file mode 100644 index 0000000..af0a6ee --- /dev/null +++ b/deepes/include/cached_gaussian_sampling.h @@ -0,0 +1,78 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#ifndef CACHED_GAUSSIAN_SAMPLING_H +#define CACHED_GAUSSIAN_SAMPLING_H + +#include +#include +#include +#include +#include "sampling_method.h" +#include "utils.h" +#include + +namespace DeepES{ + +class CachedGaussianSampling: public SamplingMethod { + +public: + CachedGaussianSampling(); + + ~CachedGaussianSampling(); + + /*Initialize the sampling algorithm given the config with the protobuf format. + *DeepES library uses only one configuration file for all sampling algorithms. + A defalut configuration file can be found at: . // TODO: where? + Usally you won't have to modify the configuration items of other algorithms + if you are not using them. + */ + bool load_config(const DeepESConfig& config); + + /*@brief generate Gaussian noise and the related key. + * + *@Args: + * key: a unique key associated with the sampled noise. + * noise: a pointer pointed to the memory that stores the noise + * size: the number of float to be sampled. + * + *@return: + * success: generate Gaussian successfully or not. + */ + bool sampling(int* key, float* noise, int64_t size); + + /*@brief reconstruct the Gaussion noise given the key. + * This function is often used for updating the neuron network parameters in the offline environment. + * + *@Args: + * key: a unique key associated with the sampled noise. + * noise: a pointer pointed to the memory that stores the noise + * size: the number of float to be sampled. + * + *@return: + * success: reconstruct Gaussian successfully or not. + */ + bool resampling(int key, float* noise, int64_t size); + +private: + float _std; + int _cache_size; + float* _noise_cache = nullptr; + + bool _create_noise_cache(); +}; + +} + +#endif diff --git a/deepes/include/gaussian_sampling.h b/deepes/include/gaussian_sampling.h index 82c58e5..9dae3a5 100644 --- a/deepes/include/gaussian_sampling.h +++ b/deepes/include/gaussian_sampling.h @@ -12,49 +12,60 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _GAUSSIAN_SAMPLING_H -#define _GAUSSIAN_SAMPLING_H +#ifndef GAUSSIAN_SAMPLING_H +#define GAUSSIAN_SAMPLING_H + +#include +#include +#include +#include #include "sampling_method.h" +#include "utils.h" namespace DeepES{ class GaussianSampling: public SamplingMethod { public: - GaussianSampling() {} - - ~GaussianSampling() {} - /*Initialize the sampling algorithm given the config with the protobuf format. - *DeepES library uses only one configuration file for all sampling algorithms. A defalut - configuration file can be found at: . Usally you won't have to modify the configuration items of other algorithms - if you are not using them. - */ - void load_config(const DeepESConfig& config); - - /*@brief add Gaussian noise to the parameter. - * - *@Args: - * param: a pointer pointed to the memory of the parameter. - * size: the number of floats of the parameter. - * noisy_param: The pointer pointed to updated parameter. - * - *@return: - * success: load configuration successfully or not. - */ - int sampling(float* noise, int64_t size); - - /*@brief reconstruct the Gaussion noise given the key. - * This function is often used for updating the neuron network parameters in the offline environment. - * - *@Args: - * key: a unique key associated with the sampled noise. - * noise: a pointer pointed to the memory that stores the noise - * size: the number of float to be sampled. - */ - bool resampling(int key, float* noise, int64_t size); + GaussianSampling() {} + + ~GaussianSampling() {} + + /*Initialize the sampling algorithm given the config with the protobuf format. + *DeepES library uses only one configuration file for all sampling algorithms. + A defalut configuration file can be found at: . // TODO: where? + Usally you won't have to modify the configuration items of other algorithms + if you are not using them. + */ + bool load_config(const DeepESConfig& config); + + /*@brief generate Gaussian noise and the related key. + * + *@Args: + * key: a unique key associated with the sampled noise. + * noise: a pointer pointed to the memory that stores the noise + * size: the number of float to be sampled. + * + *@return: + * success: generate Gaussian successfully or not. + */ + bool sampling(int* key, float* noise, int64_t size); + + /*@brief reconstruct the Gaussion noise given the key. + * This function is often used for updating the neuron network parameters in the offline environment. + * + *@Args: + * key: a unique key associated with the sampled noise. + * noise: a pointer pointed to the memory that stores the noise + * size: the number of float to be sampled. + * + *@return: + * success: reconstruct Gaussian successfully or not. + */ + bool resampling(int key, float* noise, int64_t size); private: - float _std; + float _std; }; } diff --git a/deepes/include/optimizer.h b/deepes/include/optimizer.h index eb790c5..05e4309 100644 --- a/deepes/include/optimizer.h +++ b/deepes/include/optimizer.h @@ -12,11 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. +#ifndef OPTIMIZER_H +#define OPTIMIZER_H + #include #include -#ifndef OPTIMIZER_H -#define OPTIMIZER_H + namespace DeepES{ /*@brief Optimizer. Base class for optimizers. diff --git a/deepes/include/optimizer_factory.h b/deepes/include/optimizer_factory.h index cf8851f..06e0dd7 100644 --- a/deepes/include/optimizer_factory.h +++ b/deepes/include/optimizer_factory.h @@ -21,6 +21,7 @@ #include "sgd_optimizer.h" #include "adam_optimizer.h" #include "deepes.pb.h" +#include namespace DeepES{ /* @brief: create an optimizer according to the configuration" diff --git a/deepes/include/paddle/es_agent.h b/deepes/include/paddle/es_agent.h index ffe27fb..efda231 100644 --- a/deepes/include/paddle/es_agent.h +++ b/deepes/include/paddle/es_agent.h @@ -17,8 +17,8 @@ #include "paddle_api.h" #include "optimizer_factory.h" +#include "sampling_factory.h" #include "utils.h" -#include "gaussian_sampling.h" #include "deepes.pb.h" #include @@ -73,6 +73,11 @@ class ESAgent { * if _is_sampling_agent is false, will return predictor without added noise. */ std::shared_ptr get_predictor(); + + // get param size of model + int64_t param_size() { + return _param_size; + } diff --git a/deepes/include/sampling_factory.h b/deepes/include/sampling_factory.h new file mode 100644 index 0000000..c071001 --- /dev/null +++ b/deepes/include/sampling_factory.h @@ -0,0 +1,36 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef SAMPLING_FACTORY_H +#define SAMPLING_FACTORY_H + +#include +#include +#include "sampling_method.h" +#include "gaussian_sampling.h" +#include "cached_gaussian_sampling.h" +#include "deepes.pb.h" +#include + +namespace DeepES{ +/* @brief: create an sampling_method according to the configuration" + * @args: + * config: configuration for the DeepES + * + */ +std::shared_ptr create_sampling_method(const DeepESConfig& Config); + +}//namespace + +#endif diff --git a/deepes/include/sampling_method.h b/deepes/include/sampling_method.h index 835c8d7..8f359ac 100644 --- a/deepes/include/sampling_method.h +++ b/deepes/include/sampling_method.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef _SAMPLING_METHOD_H -#define _SAMPLING_METHOD_H +#ifndef SAMPLING_METHOD_H +#define SAMPLING_METHOD_H #include #include @@ -34,51 +34,55 @@ class SamplingMethod{ public: - SamplingMethod(): _seed(0) {} + SamplingMethod(): _seed(0) {} - virtual ~SamplingMethod() {} + virtual ~SamplingMethod() {} - /*Initialize the sampling algorithm given the config with the protobuf format. - *DeepES library uses only one configuration file for all sampling algorithms. A defalut - configuration file can be found at: . Usally you won't have to modify the configuration items of other algorithms - if you are not using them. - */ - virtual void load_config(const DeepESConfig& config)=0; + /*Initialize the sampling algorithm given the config with the protobuf format. + *DeepES library uses only one configuration file for all sampling algorithms. + A defalut configuration file can be found at: . // TODO: where? + Usally you won't have to modify the configuration items of other algorithms + if you are not using them. + */ + virtual bool load_config(const DeepESConfig& config)=0; - /*@brief add Gaussian noise to the parameter. - * - *@Args: - * param: a pointer pointed to the memory of the parameter. - * size: the number of floats of the parameter. - * noisy_param: The pointer pointed to updated parameter. - * - *@return: - * success: load configuration successfully or not. - */ - virtual int sampling(float* noise, int64_t size)=0; + /*@brief generate Gaussian noise and the related key. + * + *@Args: + * key: a unique key associated with the sampled noise. + * noise: a pointer pointed to the memory that stores the noise + * size: the number of float to be sampled. + * + *@return: + * success: generate Gaussian successfully or not. + */ + virtual bool sampling(int* key, float* noise, int64_t size)=0; - /*@brief reconstruct the Gaussion noise given the key. - * This function is often used for updating the neuron network parameters in the offline environment. - * - *@Args: - * key: a unique key associated with the sampled noise. - * noise: a pointer pointed to the memory that stores the noise - * size: the number of float to be sampled. - */ - virtual bool resampling(int key, float* noise, int64_t size)=0; - - bool set_seed(int seed) { - _seed = seed; - srand(_seed); - return true; - } + /*@brief reconstruct the Gaussion noise given the key. + * This function is often used for updating the neuron network parameters in the offline environment. + * + *@Args: + * key: a unique key associated with the sampled noise. + * noise: a pointer pointed to the memory that stores the noise + * size: the number of float to be sampled. + * + *@return: + * success: reconstruct Gaussian successfully or not. + */ + virtual bool resampling(int key, float* noise, int64_t size)=0; + + bool set_seed(int seed) { + _seed = seed; + srand(_seed); + return true; + } - int get_seed() { - return _seed; - } + int get_seed() { + return _seed; + } protected: - int _seed; + int _seed; }; diff --git a/deepes/include/sgd_optimizer.h b/deepes/include/sgd_optimizer.h index 6176902..8e98983 100644 --- a/deepes/include/sgd_optimizer.h +++ b/deepes/include/sgd_optimizer.h @@ -12,11 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. +#ifndef SGD_OPTIMIZER_H +#define SGD_OPTIMIZER_H + #include +#include #include "optimizer.h" -#ifndef SGD_OPTIMIZER_H -#define SGD_OPTIMIZER_H namespace DeepES{ /*@brief SGDOptimizer. diff --git a/deepes/include/torch/es_agent.h b/deepes/include/torch/es_agent.h index c4fc821..f0c4365 100644 --- a/deepes/include/torch/es_agent.h +++ b/deepes/include/torch/es_agent.h @@ -18,8 +18,8 @@ #include #include #include "optimizer_factory.h" +#include "sampling_factory.h" #include "utils.h" -#include "gaussian_sampling.h" #include "deepes.pb.h" namespace DeepES{ @@ -47,8 +47,7 @@ public: _is_sampling_agent = false; _config = std::make_shared(); load_proto_conf(config_path, *_config); - _sampling_method = std::make_shared(); - _sampling_method->load_config(*_config); + _sampling_method = create_sampling_method(*_config); _optimizer = create_optimizer(_config->optimizer()); // Origin agent can't be used to sample, so keep it same with _model for evaluating. _sampling_model = model; @@ -111,6 +110,7 @@ public: int key = noisy_info[i].key(0); float reward = noisy_rewards[i]; bool success = _sampling_method->resampling(key, _noise, _param_size); + CHECK(success) << "[DeepES] resampling error occurs at sample: " << i; for (int64_t j = 0; j < _param_size; ++j) { _neg_gradients[j] += _noise[j] * reward; } @@ -134,14 +134,18 @@ public: // copied parameters = original parameters + noise bool add_noise(SamplingInfo& sampling_info) { + bool success = true; if (!_is_sampling_agent) { LOG(ERROR) << "[DeepES] Original ESAgent cannot call add_noise function, please use cloned ESAgent."; - return false; + success = false; + return success; } auto sampling_params = _sampling_model->named_parameters(); auto params = _model->named_parameters(); - int key = _sampling_method->sampling(_noise, _param_size); + int key = 0; + success = _sampling_method->sampling(&key, _noise, _param_size); + CHECK(success) << "[DeepES] sampling error occurs while add_noise."; sampling_info.add_key(key); int64_t counter = 0; for (auto& param: sampling_params) { @@ -155,10 +159,14 @@ public: } counter += tensor.size(0); } - return true; + return success; + } + + // get param size of model + int64_t param_size() { + return _param_size; } - private: int64_t _calculate_param_size() { diff --git a/deepes/include/utils.h b/deepes/include/utils.h index 76ba45b..481bd2c 100644 --- a/deepes/include/utils.h +++ b/deepes/include/utils.h @@ -14,6 +14,7 @@ #ifndef UTILS_H #define UTILS_H + #include #include #include diff --git a/deepes/src/adam_optimizer.cc b/deepes/src/adam_optimizer.cc index 608f916..f5eed5e 100644 --- a/deepes/src/adam_optimizer.cc +++ b/deepes/src/adam_optimizer.cc @@ -12,38 +12,37 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include #include "adam_optimizer.h" namespace DeepES { AdamOptimizer::~AdamOptimizer() { - for (auto iter = _momentum.begin(); iter != _momentum.end(); iter++) { - delete[] iter->second; - } - for (auto iter = _velocity.begin(); iter != _velocity.end(); iter++) { - delete[] iter->second; - } - _momentum.clear(); - _velocity.clear(); + for (auto iter = _momentum.begin(); iter != _momentum.end(); iter++) { + delete[] iter->second; + } + for (auto iter = _velocity.begin(); iter != _velocity.end(); iter++) { + delete[] iter->second; + } + _momentum.clear(); + _velocity.clear(); } void AdamOptimizer::compute_step(float* gradient, int size, std::string param_name="") { - if (_momentum.count(param_name) == 0) { - _momentum[param_name] = new float [size]; - memset(_momentum[param_name], 0, size * sizeof(float)); - } - if (_velocity.count(param_name) == 0) { - _velocity[param_name] = new float [size]; - memset(_velocity[param_name], 0, size * sizeof(float)); - } - int true_update_times = int(_update_times / _velocity.size()); - float alpha = std::sqrt(1 - std::pow(_beta2, _update_times)) / (1 - std::pow(_beta1, _update_times)); - for (int i = 0; i < size; ++i) { - _momentum[param_name][i] = _beta1 * _momentum[param_name][i] + (1 - _beta1) * gradient[i]; - _velocity[param_name][i] = _beta2 * _velocity[param_name][i] + (1 - _beta2) * gradient[i] * gradient[i]; - gradient[i] = alpha * _momentum[param_name][i] / (std::sqrt(_velocity[param_name][i]) + _epsilon); - } + if (_momentum.count(param_name) == 0) { + _momentum[param_name] = new float [size]; + memset(_momentum[param_name], 0, size * sizeof(float)); + } + if (_velocity.count(param_name) == 0) { + _velocity[param_name] = new float [size]; + memset(_velocity[param_name], 0, size * sizeof(float)); + } + int true_update_times = int(_update_times / _velocity.size()); + float alpha = std::sqrt(1 - std::pow(_beta2, _update_times)) / (1 - std::pow(_beta1, _update_times)); + for (int i = 0; i < size; ++i) { + _momentum[param_name][i] = _beta1 * _momentum[param_name][i] + (1 - _beta1) * gradient[i]; + _velocity[param_name][i] = _beta2 * _velocity[param_name][i] + (1 - _beta2) * gradient[i] * gradient[i]; + gradient[i] = alpha * _momentum[param_name][i] / (std::sqrt(_velocity[param_name][i]) + _epsilon); + } } }//namespace diff --git a/deepes/src/cached_gaussian_sampling.cc b/deepes/src/cached_gaussian_sampling.cc new file mode 100644 index 0000000..84667af --- /dev/null +++ b/deepes/src/cached_gaussian_sampling.cc @@ -0,0 +1,103 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cached_gaussian_sampling.h" + +namespace DeepES{ + +CachedGaussianSampling::CachedGaussianSampling() {} + +CachedGaussianSampling::~CachedGaussianSampling() { + delete[] _noise_cache; +} + +bool CachedGaussianSampling::load_config(const DeepESConfig& config) { + bool success = true; + _std = config.gaussian_sampling().std(); + success = set_seed(config.seed()); + CHECK(success) << "[DeepES] Fail to set seed while load config."; + _cache_size = config.gaussian_sampling().cache_size(); + _noise_cache = new float [_cache_size]; + memset(_noise_cache, 0, _cache_size * sizeof(float)); + success = _create_noise_cache(); + CHECK(success) << "[DeepES] Fail to create noise_cache while load config."; + return success; +} + +bool CachedGaussianSampling::sampling(int* key, float* noise, int64_t size) { + bool success = true; + if (_noise_cache == nullptr) { + LOG(ERROR) << "[DeepES] Please use load_config() first."; + success = false; + return success; + } + if (noise == nullptr) { + LOG(ERROR) << "[DeepES] Input noise array cannot be nullptr."; + success = false; + return success; + } + if ((size >= _cache_size) || (size < 0)) { + LOG(ERROR) << "[DeepES] Input size " << size << " is out of bounds [0, " << _cache_size << "), cache_size: " << _cache_size; + success = false; + return success; + } + int rand_key = rand(); + std::default_random_engine generator(rand_key); + std::uniform_int_distribution uniform(0, _cache_size - size); + int index = uniform(generator); + *key = index; + for (int64_t i = 0; i < size; ++i) { + *(noise + i) = *(_noise_cache + index + i); + } + return success; +} + +bool CachedGaussianSampling::resampling(int key, float* noise, int64_t size) { + bool success = true; + if (_noise_cache == nullptr) { + LOG(ERROR) << "[DeepES] Please use load_config() first."; + success = false; + return success; + } + if (noise == nullptr) { + LOG(ERROR) << "[DeepES] Input noise array cannot be nullptr."; + success = false; + return success; + } + if ((size >= _cache_size) || (size < 0)) { + LOG(ERROR) << "[DeepES] Input size " << size << " is out of bounds [0, " << _cache_size << "), cache_size: " << _cache_size; + success = false; + return success; + } + if ((key > _cache_size - size) || (key < 0)) { + LOG(ERROR) << "[DeepES] Resampling key " << key << " is out of bounds [0, " << _cache_size - size << "], cache_size: " << _cache_size << ", size: " << size; + success = false; + return success; + } + for (int64_t i = 0; i < size; ++i) { + *(noise + i) = *(_noise_cache + key + i); + } + return success; +} + +bool CachedGaussianSampling::_create_noise_cache() { + std::default_random_engine generator(_seed); + std::normal_distribution norm; + for (int64_t i = 0; i < _cache_size; ++i) { + *(_noise_cache + i) = norm(generator) * _std; + } + return true; +} + +} diff --git a/deepes/src/gaussian_sampling.cc b/deepes/src/gaussian_sampling.cc index f44dd5a..e0e8542 100644 --- a/deepes/src/gaussian_sampling.cc +++ b/deepes/src/gaussian_sampling.cc @@ -12,43 +12,47 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include -#include -#include #include "gaussian_sampling.h" -#include "utils.h" namespace DeepES{ -void GaussianSampling::load_config(const DeepESConfig& config) { - _std = config.gaussian_sampling().std(); - set_seed(config.seed()); +bool GaussianSampling::load_config(const DeepESConfig& config) { + bool success = true; + _std = config.gaussian_sampling().std(); + success = set_seed(config.seed()); + return success; } -int GaussianSampling::sampling(float* noise, int64_t size) { - int key = rand(); - std::default_random_engine generator(key); - std::normal_distribution norm; - for (int64_t i = 0; i < size; ++i) { - *(noise + i) = norm(generator) * _std; - } - return key; +bool GaussianSampling::sampling(int* key, float* noise, int64_t size) { + bool success = true; + if (noise == nullptr) { + LOG(ERROR) << "[DeepES] Input noise array cannot be nullptr."; + success = false; + return success; + } + int rand_key = rand(); + *key = rand_key; + std::default_random_engine generator(rand_key); + std::normal_distribution norm; + for (int64_t i = 0; i < size; ++i) { + *(noise + i) = norm(generator) * _std; + } + return success; } bool GaussianSampling::resampling(int key, float* noise, int64_t size) { - bool success = true; - if (noise == nullptr) { - success = false; - } - else { - std::default_random_engine generator(key); - std::normal_distribution norm; - for (int64_t i = 0; i < size; ++i) { - *(noise + i) = norm(generator) * _std; - } + bool success = true; + if (noise == nullptr) { + LOG(ERROR) << "[DeepES] Input noise array cannot be nullptr."; + success = false; + } else { + std::default_random_engine generator(key); + std::normal_distribution norm; + for (int64_t i = 0; i < size; ++i) { + *(noise + i) = norm(generator) * _std; } - return success; + } + return success; } } diff --git a/deepes/src/optimizer_factory.cc b/deepes/src/optimizer_factory.cc index 0841904..c873eee 100644 --- a/deepes/src/optimizer_factory.cc +++ b/deepes/src/optimizer_factory.cc @@ -24,13 +24,13 @@ std::shared_ptr create_optimizer(const OptimizerConfig& optimizer_con if (opt_type == "sgd") { optimizer = std::make_shared(optimizer_config.base_lr(), \ optimizer_config.momentum()); - }else if (opt_type == "adam") { + } else if (opt_type == "adam") { optimizer = std::make_shared(optimizer_config.base_lr(), \ - optimizer_config.beta1(), \ - optimizer_config.beta2(), \ - optimizer_config.epsilon()); - }else { - // TODO: NotImplementedError + optimizer_config.beta1(), \ + optimizer_config.beta2(), \ + optimizer_config.epsilon()); + } else { + LOG(ERROR) << "type of OptimizerConfig must be SGD or Adam."; // NotImplementedError } return optimizer; } diff --git a/deepes/src/paddle/es_agent.cc b/deepes/src/paddle/es_agent.cc index 2593472..8262191 100644 --- a/deepes/src/paddle/es_agent.cc +++ b/deepes/src/paddle/es_agent.cc @@ -52,8 +52,7 @@ ESAgent::ESAgent(const std::string& model_dir, const std::string& config_path) { _config = std::make_shared(); load_proto_conf(config_path, *_config); - _sampling_method = std::make_shared(); - _sampling_method->load_config(*_config); + _sampling_method = create_sampling_method(*_config); _optimizer = create_optimizer(_config->optimizer()); @@ -101,6 +100,7 @@ bool ESAgent::update( int key = noisy_info[i].key(0); float reward = noisy_rewards[i]; bool success = _sampling_method->resampling(key, _noise, _param_size); + CHECK(success) << "[DeepES] resampling error occurs at sample: " << i; for (int64_t j = 0; j < _param_size; ++j) { _neg_gradients[j] += _noise[j] * reward; } @@ -123,12 +123,16 @@ bool ESAgent::update( } bool ESAgent::add_noise(SamplingInfo& sampling_info) { + bool success = true; if (!_is_sampling_agent) { LOG(ERROR) << "[DeepES] Original ESAgent cannot call add_noise function, please use cloned ESAgent."; - return false; + success = false; + return success; } - int key = _sampling_method->sampling(_noise, _param_size); + int key = 0; + success = _sampling_method->sampling(&key, _noise, _param_size); + CHECK(success) << "[DeepES] sampling error occurs while add_noise."; int model_iter_id = _config->async_es().model_iter_id(); sampling_info.add_key(key); sampling_info.set_model_iter_id(model_iter_id); @@ -144,7 +148,7 @@ bool ESAgent::add_noise(SamplingInfo& sampling_info) { counter += tensor_size; } - return true; + return success; } std::shared_ptr ESAgent::get_predictor() { diff --git a/deepes/src/proto/deepes.proto b/deepes/src/proto/deepes.proto index b839ef2..c6c2519 100644 --- a/deepes/src/proto/deepes.proto +++ b/deepes/src/proto/deepes.proto @@ -29,6 +29,8 @@ message DeepESConfig { message GaussianSamplingConfig { optional float std = 1 [default = 1.0]; + optional bool cached = 2 [default = false]; + optional int32 cache_size = 3 [default = 100000]; } message OptimizerConfig{ diff --git a/deepes/src/sampling_factory.cc b/deepes/src/sampling_factory.cc new file mode 100644 index 0000000..44ff84b --- /dev/null +++ b/deepes/src/sampling_factory.cc @@ -0,0 +1,39 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "sampling_factory.h" + +namespace DeepES{ + + +std::shared_ptr create_sampling_method(const DeepESConfig& config) { + std::shared_ptr sampling_method; + bool cached = config.gaussian_sampling().cached(); + if (cached) { + sampling_method = std::make_shared(); + } else { + sampling_method = std::make_shared(); + } + + bool success = sampling_method->load_config(config); + if(success) { + return sampling_method; + } else { + LOG(ERROR) << "[DeepES] Fail to create sampling_method"; + return nullptr; + } + +} + +}//namespace diff --git a/deepes/src/sgd_optimizer.cc b/deepes/src/sgd_optimizer.cc index 06a65b6..2dbf272 100644 --- a/deepes/src/sgd_optimizer.cc +++ b/deepes/src/sgd_optimizer.cc @@ -12,27 +12,26 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include #include "sgd_optimizer.h" namespace DeepES { SGDOptimizer::~SGDOptimizer() { - for (auto iter = _velocity.begin(); iter != _velocity.end(); iter++) { - delete[] iter->second; - } - _velocity.clear(); + for (auto iter = _velocity.begin(); iter != _velocity.end(); iter++) { + delete[] iter->second; + } + _velocity.clear(); } void SGDOptimizer::compute_step(float* gradient, int size, std::string param_name="") { - if (_velocity.count(param_name) == 0) { - _velocity[param_name] = new float [size]; - memset(_velocity[param_name], 0, size * sizeof(float)); - } - for (int i = 0; i < size; ++i) { - _velocity[param_name][i] = _momentum * _velocity[param_name][i] + (1 - _momentum) * gradient[i]; - gradient[i] = _velocity[param_name][i]; - } + if (_velocity.count(param_name) == 0) { + _velocity[param_name] = new float [size]; + memset(_velocity[param_name], 0, size * sizeof(float)); + } + for (int i = 0; i < size; ++i) { + _velocity[param_name][i] = _momentum * _velocity[param_name][i] + (1 - _momentum) * gradient[i]; + gradient[i] = _velocity[param_name][i]; + } } diff --git a/deepes/test/CMakeLists.txt b/deepes/test/CMakeLists.txt index defcb66..ebd2907 100644 --- a/deepes/test/CMakeLists.txt +++ b/deepes/test/CMakeLists.txt @@ -10,9 +10,9 @@ set(CMAKE_CXX_EXTENSIONS OFF) find_package(GTest REQUIRED) find_package(OpenMP) if (OPENMP_FOUND) - set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") endif() # Torch lib diff --git a/deepes/test/include/torch_demo_model.h b/deepes/test/include/torch_demo_model.h index 709e28d..cf9d340 100644 --- a/deepes/test/include/torch_demo_model.h +++ b/deepes/test/include/torch_demo_model.h @@ -21,15 +21,14 @@ struct Model : public torch::nn::Module{ Model() = delete; - Model(const int obs_dim, const int act_dim) { - + Model(const int obs_dim, const int act_dim, const int h1_size, const int h2_size) { _obs_dim = obs_dim; _act_dim = act_dim; - int hid1_size = 30; - int hid2_size = 15; - fc1 = register_module("fc1", torch::nn::Linear(obs_dim, hid1_size)); - fc2 = register_module("fc2", torch::nn::Linear(hid1_size, hid2_size)); - fc3 = register_module("fc3", torch::nn::Linear(hid2_size, act_dim)); + _h1_size = h1_size; + _h2_size = h2_size; + fc1 = register_module("fc1", torch::nn::Linear(obs_dim, h1_size)); + fc2 = register_module("fc2", torch::nn::Linear(h1_size, h2_size)); + fc3 = register_module("fc3", torch::nn::Linear(h2_size, act_dim)); } torch::Tensor forward(torch::Tensor x) { @@ -41,7 +40,7 @@ struct Model : public torch::nn::Module{ } std::shared_ptr clone() { - std::shared_ptr model = std::make_shared(_obs_dim, _act_dim); + std::shared_ptr model = std::make_shared(_obs_dim, _act_dim, _h1_size, _h2_size); std::vector parameters1 = parameters(); std::vector parameters2 = model->parameters(); for (int i = 0; i < parameters1.size(); ++i) { @@ -58,6 +57,8 @@ struct Model : public torch::nn::Module{ int _act_dim; int _obs_dim; + int _h1_size; + int _h2_size; torch::nn::Linear fc1{nullptr}, fc2{nullptr}, fc3{nullptr}; }; diff --git a/deepes/test/prototxt/torch_sin_cached_config.prototxt b/deepes/test/prototxt/torch_sin_cached_config.prototxt new file mode 100644 index 0000000..6fe80b1 --- /dev/null +++ b/deepes/test/prototxt/torch_sin_cached_config.prototxt @@ -0,0 +1,16 @@ +seed : 1024 + +gaussian_sampling { + std: 0.005 + cached: true + cache_size : 100000 +} + +optimizer { + type: "Adam", + base_lr: 0.005, + momentum: 0.9, + beta1: 0.9, + beta2: 0.999, + epsilon: 1e-8, +} diff --git a/deepes/test/torch_sin_config.prototxt b/deepes/test/prototxt/torch_sin_config.prototxt similarity index 90% rename from deepes/test/torch_sin_config.prototxt rename to deepes/test/prototxt/torch_sin_config.prototxt index 6f4776b..3704d64 100644 --- a/deepes/test/torch_sin_config.prototxt +++ b/deepes/test/prototxt/torch_sin_config.prototxt @@ -2,6 +2,7 @@ seed : 1024 gaussian_sampling { std: 0.005 + cached: false } optimizer { diff --git a/deepes/test/run_test.sh b/deepes/test/run_test.sh index eb85170..34e6acb 100644 --- a/deepes/test/run_test.sh +++ b/deepes/test/run_test.sh @@ -4,11 +4,11 @@ export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH #---------------libtorch-------------# if [ ! -d "./libtorch" ];then echo "Cannot find the torch library: ../libtorch" - echo "Downloading Torch library" - wget -q https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-1.4.0%2Bcpu.zip - unzip -q libtorch-cxx11-abi-shared-with-deps-1.4.0+cpu.zip - rm -rf libtorch-cxx11-abi-shared-with-deps-1.4.0+cpu.zip - echo "Torch library Downloaded" + echo "Downloading Torch library" + wget -q https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-1.4.0%2Bcpu.zip + unzip -q libtorch-cxx11-abi-shared-with-deps-1.4.0+cpu.zip + rm -rf libtorch-cxx11-abi-shared-with-deps-1.4.0+cpu.zip + echo "Torch library Downloaded" fi #----------------protobuf-------------# diff --git a/deepes/test/src/optimizers_test.cc b/deepes/test/src/optimizers_test.cc index e589f47..b3372fd 100644 --- a/deepes/test/src/optimizers_test.cc +++ b/deepes/test/src/optimizers_test.cc @@ -22,38 +22,38 @@ namespace DeepES { TEST(SGDOptimizersTest, Method_update) { std::shared_ptr config = std::make_shared(); - auto optimizer_config = config->mutable_optimizer(); - optimizer_config->set_base_lr(1.0); - optimizer_config->set_type("sgd"); - std::shared_ptr optimizer = create_optimizer(config->optimizer()); - float sgd_wei[10] = { 0.0 , 0.0 , 0.04216444, 0.0511456 , 0.04231584, 0.01089015, 0.06569759, 0.00127421,-0.00092832, 0.01128081}; - float sgd_grad[10] = {-0.11992419,-0.0 , 0.07681337,-0.06616384, 0.00249889, 0.01158612,-0.3067452 , 0.36048946,-0.15820622,-0.20014143}; - float sgd_new[10] = { 0.01199242, 0.0 , 0.0344831 , 0.05776198, 0.04206595, 0.00973154, 0.09637211,-0.03477474, 0.014892306, 0.03129495}; - - EXPECT_TRUE(optimizer->update(sgd_wei, sgd_grad, 10, "test")); - for (int i = 0; i < 10; ++i) { - EXPECT_FLOAT_EQ(sgd_new[i], sgd_wei[i]) << " i: " << i ; - } - EXPECT_TRUE(optimizer->update(sgd_wei, sgd_grad, 10, "test")); - EXPECT_FALSE(optimizer->update(sgd_wei, sgd_grad, 9, "test")); + auto optimizer_config = config->mutable_optimizer(); + optimizer_config->set_base_lr(1.0); + optimizer_config->set_type("sgd"); + std::shared_ptr optimizer = create_optimizer(config->optimizer()); + float sgd_wei[10] = { 0.0 , 0.0 , 0.04216444, 0.0511456 , 0.04231584, 0.01089015, 0.06569759, 0.00127421,-0.00092832, 0.01128081}; + float sgd_grad[10] = {-0.11992419,-0.0 , 0.07681337,-0.06616384, 0.00249889, 0.01158612,-0.3067452 , 0.36048946,-0.15820622,-0.20014143}; + float sgd_new[10] = { 0.01199242, 0.0 , 0.0344831 , 0.05776198, 0.04206595, 0.00973154, 0.09637211,-0.03477474, 0.014892306, 0.03129495}; + + EXPECT_TRUE(optimizer->update(sgd_wei, sgd_grad, 10, "fc1")); + for (int i = 0; i < 10; ++i) { + EXPECT_FLOAT_EQ(sgd_new[i], sgd_wei[i]) << " i: " << i ; + } + EXPECT_TRUE(optimizer->update(sgd_wei, sgd_grad, 10, "fc1")); + EXPECT_FALSE(optimizer->update(sgd_wei, sgd_grad, 9, "fc1")); } TEST(AdamOptimizersTest, Method_update) { std::shared_ptr config = std::make_shared(); - auto optimizer_config = config->mutable_optimizer(); - optimizer_config->set_base_lr(1.0); - optimizer_config->set_type("adam"); - std::shared_ptr optimizer = create_optimizer(config->optimizer()); - float adam_wei[10] = { 0.0 , 0.0 , 0.04216444, 0.0511456 , 0.04231584, 0.01089015, 0.06569759, 0.00127421,-0.00092832, 0.01128081}; - float adam_grad[10] = {-0.11992419,-0.0 , 0.07681337,-0.06616384, 0.00249889, 0.01158612,-0.3067452 , 0.36048946,-0.15820622,-0.20014143}; - float adam_new[10] = { 0.99999736, 0. ,-0.95783144, 1.05114082,-0.95755763,-0.98908256, 1.06569656,-0.99872491, 0.99906968, 1.01127923}; - - EXPECT_TRUE(optimizer->update(adam_wei, adam_grad, 10, "test")); - for (int i = 0; i < 10; ++i) { - EXPECT_FLOAT_EQ(adam_new[i], adam_wei[i]) << " i: " << i ; - } - EXPECT_TRUE(optimizer->update(adam_wei, adam_grad, 10, "test")); - EXPECT_FALSE(optimizer->update(adam_wei, adam_grad, 9, "test")); + auto optimizer_config = config->mutable_optimizer(); + optimizer_config->set_base_lr(1.0); + optimizer_config->set_type("adam"); + std::shared_ptr optimizer = create_optimizer(config->optimizer()); + float adam_wei[10] = { 0.0 , 0.0 , 0.04216444, 0.0511456 , 0.04231584, 0.01089015, 0.06569759, 0.00127421,-0.00092832, 0.01128081}; + float adam_grad[10] = {-0.11992419,-0.0 , 0.07681337,-0.06616384, 0.00249889, 0.01158612,-0.3067452 , 0.36048946,-0.15820622,-0.20014143}; + float adam_new[10] = { 0.99999736, 0. ,-0.95783144, 1.05114082,-0.95755763,-0.98908256, 1.06569656,-0.99872491, 0.99906968, 1.01127923}; + + EXPECT_TRUE(optimizer->update(adam_wei, adam_grad, 10, "fc1")); + for (int i = 0; i < 10; ++i) { + EXPECT_FLOAT_EQ(adam_new[i], adam_wei[i]) << " i: " << i ; + } + EXPECT_TRUE(optimizer->update(adam_wei, adam_grad, 10, "fc1")); + EXPECT_FALSE(optimizer->update(adam_wei, adam_grad, 9, "fc1")); } } // namespace diff --git a/deepes/test/src/sampling_test.cc b/deepes/test/src/sampling_test.cc new file mode 100644 index 0000000..91d318d --- /dev/null +++ b/deepes/test/src/sampling_test.cc @@ -0,0 +1,117 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include +#include "sampling_method.h" +#include "gaussian_sampling.h" +#include "cached_gaussian_sampling.h" +#include + +namespace DeepES { + + +class SamplingTest : public ::testing::Test { + protected: + void init_sampling_method(bool cached) { + config = std::make_shared(); + config->set_seed(1024); + auto sampling_config = config->mutable_gaussian_sampling(); + sampling_config->set_std(1.0); + sampling_config->set_cached(cached); + sampling_config->set_cache_size(cache_size); + if (cached) { + sampler = std::make_shared(); + } else { + sampler = std::make_shared(); + } + } + + std::shared_ptr sampler; + std::shared_ptr config; + float array[3] = {1.0, 2.0, 3.0}; + int cache_size = 100; // default cache_size 100 + int key = 0; +}; + + +TEST_F(SamplingTest, GaussianSampling_load_config) { + init_sampling_method(false); + EXPECT_TRUE(sampler->load_config(*config)); +} + +TEST_F(SamplingTest, GaussianSampling_sampling) { + init_sampling_method(false); + sampler->load_config(*config); + + EXPECT_FALSE(sampler->sampling(&key, nullptr, 0)); + EXPECT_TRUE(sampler->sampling(&key, array, 3)); +} + +TEST_F(SamplingTest, GaussianSampling_resampling) { + init_sampling_method(false); + sampler->load_config(*config); + + EXPECT_FALSE(sampler->resampling(0, nullptr, 0)); + EXPECT_TRUE(sampler->resampling(0, array, 3)); +} + + +TEST_F(SamplingTest, CachedGaussianSampling_load_config) { + init_sampling_method(true); + EXPECT_TRUE(sampler->load_config(*config)); +} + +TEST_F(SamplingTest, CachedGaussianSampling_sampling) { + init_sampling_method(true); + EXPECT_FALSE(sampler->sampling(&key, array, 0)); + + sampler->load_config(*config); + + EXPECT_FALSE(sampler->sampling(&key, nullptr, 0)); + EXPECT_FALSE(sampler->sampling(&key, array, -1)); + EXPECT_FALSE(sampler->sampling(&key, array, cache_size)); + + EXPECT_TRUE(sampler->sampling(&key, array, 0)); + EXPECT_TRUE(sampler->sampling(&key, array, 3)); +} + +TEST_F(SamplingTest, CachedGaussianSampling_resampling) { + init_sampling_method(true); + EXPECT_FALSE(sampler->resampling(0, array, 0)); + + sampler->load_config(*config); + + EXPECT_FALSE(sampler->resampling(0, nullptr, 0)); + EXPECT_FALSE(sampler->resampling(0, array, -1)); + EXPECT_FALSE(sampler->resampling(0, array, cache_size)); + + EXPECT_TRUE(sampler->resampling(0, array, 0)); + EXPECT_TRUE(sampler->resampling(0, array, 1)); + EXPECT_TRUE(sampler->resampling(0, array, 2)); + + EXPECT_FALSE(sampler->resampling(-1, array, 3)); + EXPECT_TRUE(sampler->resampling(0, array, 3)); + EXPECT_TRUE(sampler->resampling(1, array, 3)); + EXPECT_TRUE(sampler->resampling(2, array, 3)); + EXPECT_TRUE(sampler->resampling(cache_size-3, array, 3)); + EXPECT_FALSE(sampler->resampling(cache_size-2, array, 3)); + EXPECT_FALSE(sampler->resampling(cache_size-1, array, 3)); + EXPECT_FALSE(sampler->resampling(cache_size, array, 3)); + EXPECT_FALSE(sampler->resampling(cache_size-3, array, cache_size-1)); +} + + +} // namespace + diff --git a/deepes/test/src/torch_agent_test.cc b/deepes/test/src/torch_agent_test.cc index a0aabf2..21ed301 100644 --- a/deepes/test/src/torch_agent_test.cc +++ b/deepes/test/src/torch_agent_test.cc @@ -32,105 +32,126 @@ namespace DeepES { // The fixture for testing class Foo. class TorchDemoTest : public ::testing::Test { protected: - float evaluate(std::vector& x_list, std::vector& y_list, int size, std::shared_ptr> agent) { - float total_loss = 0.0; - for (int i = 0; i < size; ++i) { - torch::Tensor x_input = torch::tensor(x_list[i], torch::dtype(torch::kFloat32)); - torch::Tensor predict_y = agent->predict(x_input); - auto pred_y = predict_y.accessor(); - float loss = pow((pred_y[0][0] - y_list[i]), 2); - total_loss += loss; - } - return -total_loss / float(size); + float evaluate(std::vector& x_list, std::vector& y_list, int size, std::shared_ptr> agent) { + float total_loss = 0.0; + for (int i = 0; i < size; ++i) { + torch::Tensor x_input = torch::tensor(x_list[i], torch::dtype(torch::kFloat32)); + torch::Tensor predict_y = agent->predict(x_input); + auto pred_y = predict_y.accessor(); + float loss = pow((pred_y[0][0] - y_list[i]), 2); + total_loss += loss; } - - float train_loss() { - return -1.0 * evaluate(x_list, y_list, train_data_size, agent); + return -total_loss / float(size); + } + + float train_loss() { + return -1.0 * evaluate(x_list, y_list, train_data_size, agent); + } + + float test_loss() { + return -1.0 * evaluate(test_x_list, test_y_list, test_data_size, agent); + } + + float train_test_gap() { + float train_lo = train_loss(); + float test_lo = test_loss(); + if ( train_lo > test_lo) { + return train_lo - test_lo; + } else { + return test_lo - train_lo; } - - float test_loss() { - return -1.0 * evaluate(test_x_list, test_y_list, test_data_size, agent); + } + + void init_agent(const int in_dim, const int out_dim, const int h1_size, const int h2_size) { + std::shared_ptr model = std::make_shared(in_dim, out_dim, h1_size, h2_size); + agent = std::make_shared>(model, "../test/prototxt/torch_sin_config.prototxt"); + } + + void train_agent(std::string config_path) { + std::default_random_engine generator(0); // fix seed + std::uniform_real_distribution uniform(-3.0, 9.0); + std::normal_distribution norm; + for (int i = 0; i < train_data_size; ++i) { + float x_i = uniform(generator); // generate data between [-3, 9] + float y_i = sin(x_i) + norm(generator) * 0.05; // label noise std 0.05 + x_list.push_back(x_i); + y_list.push_back(y_i); } + for (int i= 0; i < test_data_size; ++i) { + float x_i = uniform(generator); + float y_i = sin(x_i); + test_x_list.push_back(x_i); + test_y_list.push_back(y_i); + } + + std::shared_ptr model = std::make_shared(1, 1, 10, 5); + agent = std::make_shared>(model, config_path); - float train_test_gap() { - float train_lo = train_loss(); - float test_lo = test_loss(); - if ( train_lo > test_lo) { - return train_lo - test_lo; - } - else { - return test_lo - train_lo; - } + // Clone agents to sample (explore). + std::vector>> sampling_agents; + for (int i = 0; i < iter; ++i) { + sampling_agents.push_back(agent->clone()); } - void SetUp() override { - std::default_random_engine generator(0); // fix seed - std::uniform_real_distribution uniform(-3.0, 9.0); - std::normal_distribution norm; - for (int i = 0; i < train_data_size; ++i) { - float x_i = uniform(generator); // generate data between [-3, 9] - float y_i = sin(x_i) + norm(generator)*0.05; // noise std 0.05 - x_list.push_back(x_i); - y_list.push_back(y_i); - } - for (int i= 0; i < test_data_size; ++i) { - float x_i = uniform(generator); - float y_i = sin(x_i); - test_x_list.push_back(x_i); - test_y_list.push_back(y_i); - } - - std::shared_ptr model = std::make_shared(1, 1); - agent = std::make_shared>(model, "../test/torch_sin_config.prototxt"); - - // Clone agents to sample (explore). - std::vector>> sampling_agents; - for (int i = 0; i < iter; ++i) { - sampling_agents.push_back(agent->clone()); - } - - std::vector noisy_keys; - std::vector noisy_rewards(iter, 0.0f); - noisy_keys.resize(iter); - - LOG(INFO) << "start training..."; - for (int epoch = 0; epoch < 1001; ++epoch) { + std::vector noisy_keys; + std::vector noisy_rewards(iter, 0.0f); + noisy_keys.resize(iter); + + LOG(INFO) << "start training..."; + for (int epoch = 0; epoch < 1001; ++epoch) { #pragma omp parallel for schedule(dynamic, 1) - for (int i = 0; i < iter; ++i) { - auto sampling_agent = sampling_agents[i]; - SamplingInfo key; - bool success = sampling_agent->add_noise(key); - float reward = evaluate(x_list, y_list, train_data_size, sampling_agent); - noisy_keys[i] = key; - noisy_rewards[i] = reward; - } - bool success = agent->update(noisy_keys, noisy_rewards); - - if (epoch % 100 == 0) { - float reward = evaluate(test_x_list, test_y_list, test_data_size, agent); - float train_reward = evaluate(x_list, y_list, train_data_size, agent); - LOG(INFO) << "Epoch:" << epoch << " Loss: " << -reward << ", Train loss" << -train_reward; - } - } + for (int i = 0; i < iter; ++i) { + auto sampling_agent = sampling_agents[i]; + SamplingInfo key; + bool success = sampling_agent->add_noise(key); + float reward = evaluate(x_list, y_list, train_data_size, sampling_agent); + noisy_keys[i] = key; + noisy_rewards[i] = reward; + } + bool success = agent->update(noisy_keys, noisy_rewards); + + if (epoch % 100 == 0) { + float reward = evaluate(test_x_list, test_y_list, test_data_size, agent); + float train_reward = evaluate(x_list, y_list, train_data_size, agent); + LOG(INFO) << "Epoch:" << epoch << " Loss: " << -reward << ", Train loss" << -train_reward; + } } - - // Class members declared here can be used by all tests in the test suite - int train_data_size = 300; - int test_data_size = 100; - int iter = 10; - std::vector x_list; - std::vector y_list; - std::vector test_x_list; - std::vector test_y_list; - std::shared_ptr> agent; + } + + // Class members declared here can be used by all tests in the test suite + int train_data_size = 300; + int test_data_size = 100; + int iter = 10; + std::vector x_list; + std::vector y_list; + std::vector test_x_list; + std::vector test_y_list; + std::shared_ptr> agent; }; +TEST_F(TorchDemoTest, TrainingEffectUseNormalSampling) { + train_agent("../test/prototxt/torch_sin_config.prototxt"); + EXPECT_LT(train_loss(), 0.05); + EXPECT_LT(test_loss(), 0.05); + EXPECT_LT(train_test_gap(), 0.03); +} -TEST_F(TorchDemoTest, TrainingEffectTest) { - EXPECT_LT(train_loss(), 0.05); - EXPECT_LT(test_loss(), 0.05); - EXPECT_LT(train_test_gap(), 0.03); +TEST_F(TorchDemoTest, TrainingEffectTestUseTableSampling) { + train_agent("../test/prototxt/torch_sin_cached_config.prototxt"); + EXPECT_LT(train_loss(), 0.05); + EXPECT_LT(test_loss(), 0.05); + EXPECT_LT(train_test_gap(), 0.03); } +TEST_F(TorchDemoTest,ParamSizeTest) { + init_agent(1, 1, 10, 5); + EXPECT_EQ(agent->param_size(), 81); + init_agent(2, 3, 10, 5); + EXPECT_EQ(agent->param_size(), 103); + init_agent(1, 1, 1, 1); + EXPECT_EQ(agent->param_size(), 6); + init_agent(100, 2, 256, 64); + EXPECT_EQ(agent->param_size(), 42434); +} } // namespace diff --git a/deepes/test/src/utils_test.cc b/deepes/test/src/utils_test.cc index e6455dc..eb35d1d 100644 --- a/deepes/test/src/utils_test.cc +++ b/deepes/test/src/utils_test.cc @@ -20,9 +20,9 @@ namespace DeepES { // Tests that the Utils::compute_centered_rank() method. TEST(UtilsTest, Method_compute_centered_ranks) { - float a[5] = {9.0, 8.0, 7.0, 6.0, 5.0}; - std::vector reward_vec(a, a+5); - EXPECT_EQ(compute_centered_ranks(reward_vec), true); + float a[5] = {9.0, 8.0, 7.0, 6.0, 5.0}; + std::vector reward_vec(a, a+5); + EXPECT_EQ(compute_centered_ranks(reward_vec), true); } -- GitLab