diff --git a/deepes/CMakeLists.txt b/deepes/CMakeLists.txt index f4c66fbaae4244edceb55f89dcd6d7d32bdec09f..a9b120579e474aa374610678c563297cfa14a2e4 100644 --- a/deepes/CMakeLists.txt +++ b/deepes/CMakeLists.txt @@ -1,5 +1,25 @@ cmake_minimum_required (VERSION 2.6) project (DeepES) +set(TARGET parallel_main) + +########## options ########## +option(WITH_PADDLE "Compile DeepES with PaddleLite framework." OFF) +option(WITH_TORCH "Compile DeepES with Torch framework." OFF) + +message("WITH_PADDLE: "${WITH_PADDLE}) +message("WITH_TORCH: "${WITH_TORCH}) + +if (NOT (WITH_PADDLE OR WITH_TORCH)) + message("ERROR: You should choose at least one framework to compile DeepES.") + return() +elseif(WITH_PADDLE AND WITH_TORCH) + message("ERROR: You cannot choose more than one framework to compile DeepES.") + return() +endif() + +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) find_package(OpenMP) if (OPENMP_FOUND) @@ -8,19 +28,47 @@ if (OPENMP_FOUND) set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") endif() -set(CMAKE_CXX_STANDARD 11) -set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_EXTENSIONS OFF) -find_package(Torch REQUIRED ON) - -file(GLOB demo_src "demo/*.cpp") -file(GLOB core_src "src/*.cpp") -file(GLOB pb_src "src/*.cc") +file(GLOB src "src/*.cc") include_directories("include") -include_directories("demo") include_directories("benchmark") -link_directories("/usr/lib/x86_64-linux-gnu/") -add_executable(parallel_main "./demo/cartpole_solver_parallel.cpp" ${core_src} ${pb_src} ${benchmark_src}) -target_link_libraries(parallel_main gflags protobuf pthread glog "${TORCH_LIBRARIES}") +########## PaddleLite config ########## +if (WITH_PADDLE) + add_definitions(-g -O3 -pthread) + + include_directories("include/paddle") + include_directories("${PROJECT_SOURCE_DIR}/inference_lite_lib/cxx/include" + "${PROJECT_SOURCE_DIR}/inference_lite_lib/third_party/mklml/include") + link_directories("${PROJECT_SOURCE_DIR}/inference_lite_lib/cxx/lib" + "${PROJECT_SOURCE_DIR}/inference_lite_lib/third_party/mklml/lib") + + file(GLOB framework_src "src/paddle/*.cc") + set(demo "${PROJECT_SOURCE_DIR}/demo/paddle/cartpole_solver_parallel.cc") +########## Torch config ########## +elseif (WITH_TORCH) + list(APPEND CMAKE_PREFIX_PATH "./libtorch") + find_package(Torch REQUIRED ON) + + include_directories("include/torch") + include_directories("demo/torch") + + file(GLOB framework_src "src/torch/*.cc") + set(demo "${PROJECT_SOURCE_DIR}/demo/torch/cartpole_solver_parallel.cc") +else () + message("ERROR: You should choose at least one framework to compile DeepES.") +endif() + +add_executable(${TARGET} ${demo} ${src} ${framework_src}) + +target_link_libraries(${TARGET} gflags protobuf pthread glog) + +########## PaddleLite libraries ########## +if (WITH_PADDLE) + target_link_libraries(${TARGET} -lpaddle_full_api_shared) + target_link_libraries(${TARGET} -lmklml_intel) + target_link_libraries(${TARGET} -ldl) +########## Torch libraries ########## +elseif (WITH_TORCH) + target_link_libraries(${TARGET} "${TORCH_LIBRARIES}") +endif() diff --git a/deepes/README.md b/deepes/README.md index 75b9eae8dec93fe9de8e6cf15d412a1af3cbf501..48dbee2c1b30cc6024b84fed66e7f72b3e534694 100644 --- a/deepes/README.md +++ b/deepes/README.md @@ -7,22 +7,23 @@ DeepES是一个支持**快速验证**ES效果、**兼容多个框架**的C++库 ## 使用示范 ```c++ //实例化一个预测,根据配置文件加载模型,采样方式(Gaussian\CMA sampling..)、更新方式(SGD\Adam)等 -auto predictor = Predicotr(config); +auto agent = ESAgent(config); -for (int i = 0; i < 100; ++i) { - auto noisy_predictor = predictor->clone(); // copy 一份参数 - int key = noisy_predictor->add_noise(); // 参数扰动,同时保存随机种子 - int reward = evaluate(env, noisiy_predictor); //评估参数 +for (int i = 0; i < 10; ++i) { + auto sampling_agnet = agent->clone(); // clone出一个sampling agent + SamplingKey key; + agent->add_noise(key); // 参数扰动,同时保存随机种子到key中 + int reward = evaluate(env, sampling_agent); //评估参数 noisy_keys.push_back(key); // 记录随机噪声对应种子 noisy_rewards.push_back(reward); // 记录评估结果 } //根据评估结果、随机种子更新参数,然后重复以上过程,直到收敛。 -predictor->update(noisy_keys, noisy_rewards); +agent->update(noisy_keys, noisy_rewards); ``` ## 一键运行demo列表 -- **Torch**: sh [./scripts/build.sh](./scripts/build.sh) -- **Paddle**: +- **PaddleLite**: sh ./scripts/build.sh paddle +- **Torch**: sh ./scripts/build.sh torch - **裸写网络**: ## 相关依赖: @@ -33,5 +34,8 @@ predictor->update(noisy_keys, noisy_rewards); ## 额外依赖: +### 使用PaddleLite +下载PaddleLite的X86预编译库,或者编译PaddleLite源码,得到inference_lite_lib文件夹,放在当前目录中。(可参考:[PaddleLite使用X86预测部署](https://paddle-lite.readthedocs.io/zh/latest/demo_guides/x86.html)) + ### 使用torch 下载[libtorch](https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-1.4.0%2Bcpu.zip)或者编译torch源码,得到libtorch文件夹,放在当前目录中。 diff --git a/deepes/benchmark/cartpole.h b/deepes/benchmark/cartpole.h index 6935f8ddb3a058444945c3dab08be088a0152454..48d2e08515ed3e105e6a700caf7dfa889fbce0b3 100644 --- a/deepes/benchmark/cartpole.h +++ b/deepes/benchmark/cartpole.h @@ -1,7 +1,9 @@ // Third party code // This code is copied or modified from openai/gym's cartpole.py - -#include +#include +#include +#include +#include const double kPi = 3.1415926535898; @@ -21,13 +23,13 @@ public: double x_threshold = 2.4; int steps_beyond_done = -1; - torch::Tensor state; + std::vector state = {0, 0, 0, 0}; double reward; bool done; int step_ = 0; - torch::Tensor getState() { - return state; + const float* getState() { + return state.data(); } double getReward() { @@ -39,7 +41,13 @@ public: } void reset() { - state = torch::empty({ 4 }).uniform_(-0.05, 0.05); + std::random_device rd; + std::default_random_engine generator(rd()); + std::uniform_real_distribution distribution(-0.05, 0.05); + for (int i = 0; i < 4; ++i) { + state[i] = distribution(generator); + } + steps_beyond_done = -1; step_ = 0; } @@ -49,10 +57,10 @@ public: } void step(int action) { - auto x = state[0].item(); - auto x_dot = state[1].item(); - auto theta = state[2].item(); - auto theta_dot = state[3].item(); + float x = state[0]; + float x_dot = state[1]; + float theta = state[2]; + float theta_dot = state[3]; auto force = (action == 1) ? force_mag : -force_mag; auto costheta = std::cos(theta); @@ -67,7 +75,8 @@ public: x_dot = x_dot + tau * xacc; theta = theta + tau * theta_dot; theta_dot = theta_dot + tau * thetaacc; - state = torch::tensor({ x, x_dot, theta, theta_dot }); + + state = {x, x_dot, theta, theta_dot}; done = x < -x_threshold || x > x_threshold || theta < -theta_threshold_radians || theta > theta_threshold_radians || @@ -83,7 +92,7 @@ public: } else { if (steps_beyond_done == 0) { - AT_ASSERT(false); // Can't do this + assert(false); // Can't do this } } step_++; diff --git a/deepes/deepes_config.prototxt b/deepes/benchmark/cartpole_config.prototxt similarity index 100% rename from deepes/deepes_config.prototxt rename to deepes/benchmark/cartpole_config.prototxt diff --git a/deepes/demo/paddle/cartpole_init_model.zip b/deepes/demo/paddle/cartpole_init_model.zip new file mode 100644 index 0000000000000000000000000000000000000000..04d21fb870a13f149f9ed6d05a4618fa4cefcd4a Binary files /dev/null and b/deepes/demo/paddle/cartpole_init_model.zip differ diff --git a/deepes/demo/paddle/cartpole_solver_parallel.cc b/deepes/demo/paddle/cartpole_solver_parallel.cc new file mode 100644 index 0000000000000000000000000000000000000000..b169624597d4589e46ccc8c1e8fbe9937ce9cb2f --- /dev/null +++ b/deepes/demo/paddle/cartpole_solver_parallel.cc @@ -0,0 +1,122 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "cartpole.h" +#include "gaussian_sampling.h" +#include "es_agent.h" +#include "paddle_api.h" + +using namespace DeepES; +using namespace paddle::lite_api; + +const int ITER = 10; + +std::shared_ptr create_paddle_predictor(const std::string& model_dir) { + // 1. Create CxxConfig + CxxConfig config; + config.set_model_dir(model_dir); + config.set_valid_places({ + Place{TARGET(kX86), PRECISION(kFloat)}, + Place{TARGET(kHost), PRECISION(kFloat)} + }); + + // 2. Create PaddlePredictor by CxxConfig + std::shared_ptr predictor = CreatePaddlePredictor(config); + return predictor; +} + +// Use PaddlePredictor of CartPole model to predict the action. +std::vector forward(std::shared_ptr predictor, const float* obs) { + std::unique_ptr input_tensor(std::move(predictor->GetInput(0))); + input_tensor->Resize({1, 4}); + input_tensor->CopyFromCpu(obs); + + predictor->Run(); + + std::vector probs(2, 0.0); + std::unique_ptr output_tensor( + std::move(predictor->GetOutput(0))); + output_tensor->CopyToCpu(probs.data()); + return probs; +} + +int arg_max(const std::vector& vec) { + return static_cast(std::distance(vec.begin(), std::max_element(vec.begin(), vec.end()))); +} + + +float evaluate(CartPole& env, std::shared_ptr agent) { + float total_reward = 0.0; + env.reset(); + const float* obs = env.getState(); + + std::shared_ptr paddle_predictor; + paddle_predictor = agent->get_predictor(); + + while (true) { + std::vector probs = forward(paddle_predictor, obs); + int act = arg_max(probs); + env.step(act); + float reward = env.getReward(); + bool done = env.isDone(); + total_reward += reward; + if (done) break; + obs = env.getState(); + } + return total_reward; +} + + +int main(int argc, char* argv[]) { + std::vector envs; + for (int i = 0; i < ITER; ++i) { + envs.push_back(CartPole()); + } + + std::shared_ptr paddle_predictor = create_paddle_predictor("../demo/paddle/cartpole_init_model"); + std::shared_ptr agent = std::make_shared(paddle_predictor, "../benchmark/cartpole_config.prototxt"); + + // Clone agents to sample (explore). + std::vector< std::shared_ptr > sampling_agents; + for (int i = 0; i < ITER; ++i) { + sampling_agents.push_back(agent->clone()); + } + + std::vector noisy_keys; + std::vector noisy_rewards(ITER, 0.0f); + noisy_keys.resize(ITER); + + omp_set_num_threads(10); + for (int epoch = 0; epoch < 10000; ++epoch) { +#pragma omp parallel for schedule(dynamic, 1) + for (int i = 0; i < ITER; ++i) { + std::shared_ptr sampling_agent = sampling_agents[i]; + SamplingKey key; + bool success = sampling_agent->add_noise(key); + float reward = evaluate(envs[i], sampling_agent); + + noisy_keys[i] = key; + noisy_rewards[i] = reward; + } + + // NOTE: all parameters of sampling_agents will be updated + bool success = agent->update(noisy_keys, noisy_rewards); + + int reward = evaluate(envs[0], agent); + LOG(INFO) << "Epoch:" << epoch << " Reward: " << reward; + } +} diff --git a/deepes/demo/paddle/gen_cartpole_init_model.py b/deepes/demo/paddle/gen_cartpole_init_model.py new file mode 100644 index 0000000000000000000000000000000000000000..66b841aaf4ac428ca2232324a35fa66bd683c572 --- /dev/null +++ b/deepes/demo/paddle/gen_cartpole_init_model.py @@ -0,0 +1,39 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle import fluid + + +def net(obs, act_dim): + hid1_size = act_dim * 10 + hid1 = fluid.layers.fc(obs, size=hid1_size) + prob = fluid.layers.fc(hid1, size=act_dim, act='softmax') + return prob + + +if __name__ == '__main__': + obs_dim = 4 + act_dim = 2 + + obs = fluid.layers.data(name="obs", shape=[obs_dim], dtype='float32') + + prob = net(obs, act_dim) + + exe = fluid.Executor(fluid.CPUPlace()) + exe.run(fluid.default_startup_program()) + fluid.io.save_inference_model( + dirname='cartpole_init_model', + feeded_var_names=['obs'], + target_vars=[prob], + executor=exe) diff --git a/deepes/demo/cartpole_solver_parallel.cpp b/deepes/demo/torch/cartpole_solver_parallel.cc similarity index 64% rename from deepes/demo/cartpole_solver_parallel.cpp rename to deepes/demo/torch/cartpole_solver_parallel.cc index 960a8f7fcfc4522ef0c8c848bd1e2e1796b568a4..3311ed6185a8360e92258ccf9e3b42323e07ab76 100644 --- a/deepes/demo/cartpole_solver_parallel.cpp +++ b/deepes/demo/torch/cartpole_solver_parallel.cc @@ -20,17 +20,18 @@ #include "cartpole.h" #include "gaussian_sampling.h" #include "model.h" -#include "torch_predictor.h" +#include "es_agent.h" using namespace DeepES; -const int ITER = 100; +const int ITER = 10; -float evaluate(CartPole& env, std::shared_ptr> predictor) { +float evaluate(CartPole& env, std::shared_ptr> agent) { float total_reward = 0.0; env.reset(); - auto obs = env.getState(); + const float* obs = env.getState(); while (true) { - torch::Tensor action = predictor->predict(obs); + torch::Tensor obs_tensor = torch::tensor({obs[0], obs[1], obs[2], obs[3]}); + torch::Tensor action = agent->predict(obs_tensor); int act = std::get<1>(action.max(-1)).item(); env.step(act); float reward = env.getReward(); @@ -50,10 +51,12 @@ int main(int argc, char* argv[]) { } auto model = std::make_shared(4, 2); - std::shared_ptr> predictor = std::make_shared>(model, "../deepes_config.prototxt"); - std::vector>> noisy_predictors; + std::shared_ptr> agent = std::make_shared>(model, "../benchmark/cartpole_config.prototxt"); + + // Clone agents to sample (explore). + std::vector>> sampling_agents; for (int i = 0; i < ITER; ++i) { - noisy_predictors.push_back(predictor->clone()); + sampling_agents.push_back(agent->clone()); } std::vector noisy_keys; @@ -63,16 +66,19 @@ int main(int argc, char* argv[]) { for (int epoch = 0; epoch < 1000; ++epoch) { #pragma omp parallel for schedule(dynamic, 1) for (int i = 0; i < ITER; ++i) { - auto noisy_predictor = noisy_predictors[i]; - SamplingKey key = noisy_predictor->add_noise(); - float reward = evaluate(envs[i], noisy_predictor); + auto sampling_agent = sampling_agents[i]; + SamplingKey key; + bool success = sampling_agent->add_noise(key); + float reward = evaluate(envs[i], sampling_agent); noisy_keys[i] = key; noisy_rewards[i] = reward; } - - predictor->update(noisy_keys, noisy_rewards); - - int reward = evaluate(envs[0], predictor); + + // Will also update parameters of sampling_agents + bool success = agent->update(noisy_keys, noisy_rewards); + + // Use original agent to evalute (without noise). + int reward = evaluate(envs[0], agent); LOG(INFO) << "Epoch:" << epoch << " Reward: " << reward; } } diff --git a/deepes/demo/model.h b/deepes/demo/torch/model.h similarity index 100% rename from deepes/demo/model.h rename to deepes/demo/torch/model.h diff --git a/deepes/include/gaussian_sampling.h b/deepes/include/gaussian_sampling.h index 59c753e279d8575c3dda85ca855a099f0eabe398..82c58e50a1078faec011dba94ef66079479ab289 100644 --- a/deepes/include/gaussian_sampling.h +++ b/deepes/include/gaussian_sampling.h @@ -41,7 +41,7 @@ public: *@return: * success: load configuration successfully or not. */ - int sampling(float* noise, int size); + int sampling(float* noise, int64_t size); /*@brief reconstruct the Gaussion noise given the key. * This function is often used for updating the neuron network parameters in the offline environment. @@ -51,7 +51,7 @@ public: * noise: a pointer pointed to the memory that stores the noise * size: the number of float to be sampled. */ - bool resampling(int key, float* noise, int size); + bool resampling(int key, float* noise, int64_t size); private: float _std; diff --git a/deepes/include/paddle/es_agent.h b/deepes/include/paddle/es_agent.h new file mode 100644 index 0000000000000000000000000000000000000000..6bba4367bcea0087d4e2a5ef8cc2beb5db638c1d --- /dev/null +++ b/deepes/include/paddle/es_agent.h @@ -0,0 +1,98 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef DEEPES_PADDLE_ES_AGENT_H_ +#define DEEPES_PADDLE_ES_AGENT_H_ + +#include "paddle_api.h" +#include "optimizer.h" +#include "utils.h" +#include "gaussian_sampling.h" +#include "deepes.pb.h" +#include + + +namespace DeepES { + +typedef paddle::lite_api::PaddlePredictor PaddlePredictor; + +/** + * @brief DeepES agent for PaddleLite. + * + * Users use `clone` fucntion to clone a sampling agent, which can call `add_noise` + * function to add noise to copied parameters and call `get_predictor` fucntion to + * get a paddle predictor with added noise. + * + * Then can use `update` function to update parameters based on ES algorithm. + * Note: parameters of cloned agents will also be updated. + */ +class ESAgent { + public: + ESAgent(); + + ~ESAgent(); + + ESAgent( + std::shared_ptr predictor, + std::string config_path); + + /** + * @breif Clone a sampling agent + * + * Only cloned ESAgent can call `add_noise` function. + * Each cloned ESAgent will have a copy of original parameters. + * (support sampling in multi-thread way) + */ + std::shared_ptr clone(); + + /** + * @brief Update parameters of predictor based on ES algorithm. + * + * Only not cloned ESAgent can call `update` function. + * Parameters of cloned agents will also be updated. + */ + bool update( + std::vector& noisy_keys, + std::vector& noisy_rewards); + + // copied parameters = original parameters + noise + bool add_noise(SamplingKey& sampling_key); + + /** + * @brief Get paddle predict + * + * if _is_sampling_agent is true, will return predictor with added noise; + * if _is_sampling_agent is false, will return predictor without added noise. + */ + std::shared_ptr get_predictor(); + + private: + int64_t _calculate_param_size(); + + std::shared_ptr _predictor; + std::shared_ptr _sampling_predictor; + bool _is_sampling_agent; + std::shared_ptr _sampling_method; + std::shared_ptr _optimizer; + std::shared_ptr _config; + int64_t _param_size; + std::vector _param_names; + // malloc memory of noise and neg_gradients in advance. + float* _noise; + float* _neg_gradients; +}; + +} + +#endif /* DEEPES_PADDLE_ES_AGENT_H_ */ diff --git a/deepes/include/sampling_method.h b/deepes/include/sampling_method.h index a23273273decccb988449783176dbc501824bc39..835c8d77294de1befe6ebaf27601eebce4bcfa9f 100644 --- a/deepes/include/sampling_method.h +++ b/deepes/include/sampling_method.h @@ -55,7 +55,7 @@ public: *@return: * success: load configuration successfully or not. */ - virtual int sampling(float* noise, int size)=0; + virtual int sampling(float* noise, int64_t size)=0; /*@brief reconstruct the Gaussion noise given the key. * This function is often used for updating the neuron network parameters in the offline environment. @@ -65,7 +65,7 @@ public: * noise: a pointer pointed to the memory that stores the noise * size: the number of float to be sampled. */ - virtual bool resampling(int key, float* noise, int size)=0; + virtual bool resampling(int key, float* noise, int64_t size)=0; bool set_seed(int seed) { _seed = seed; diff --git a/deepes/include/torch/es_agent.h b/deepes/include/torch/es_agent.h new file mode 100644 index 0000000000000000000000000000000000000000..486b74e207ac939c27339dd60ef9eb23ce2285b2 --- /dev/null +++ b/deepes/include/torch/es_agent.h @@ -0,0 +1,187 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TORCH_ESAGENT_H +#define TORCH_ESAGENT_H + +#include +#include +#include "optimizer.h" +#include "utils.h" +#include "gaussian_sampling.h" +#include "deepes.pb.h" + +namespace DeepES{ + +/** + * @brief DeepES agent for Torch. + * + * Our implemtation is flexible to support any model that subclass torch::nn::Module. + * That is, we can instantiate an agent by: es_agent = ESAgent(model); + * After that, users can clone an agent for multi-thread processing, add parametric noise for exploration, + * and update the parameteres, according to the evaluation resutls of noisy parameters. + */ +template +class ESAgent{ +public: + ESAgent() {} + + ~ESAgent() { + delete[] _noise; + if (!_is_sampling_agent) + delete[] _neg_gradients; + } + + ESAgent(std::shared_ptr model, std::string config_path): _model(model) { + _is_sampling_agent = false; + _config = std::make_shared(); + load_proto_conf(config_path, *_config); + _sampling_method = std::make_shared(); + _sampling_method->load_config(*_config); + _optimizer = std::make_shared(_config->optimizer().base_lr()); + // Origin agent can't be used to sample, so keep it same with _model for evaluating. + _sampling_model = model; + _param_size = _calculate_param_size(); + + _noise = new float [_param_size]; + _neg_gradients = new float [_param_size]; + } + + /** + * @breif Clone a sampling agent + * + * Only cloned ESAgent can call `add_noise` function. + * Each cloned ESAgent will have a copy of original parameters. + * (support sampling in multi-thread way) + */ + std::shared_ptr clone() { + std::shared_ptr new_agent = std::make_shared(); + + new_agent->_model = _model; + std::shared_ptr new_model = _model->clone(); + new_agent->_sampling_model = new_model; + + new_agent->_is_sampling_agent = true; + new_agent->_sampling_method = _sampling_method; + new_agent->_param_size = _param_size; + + float* new_noise = new float [_param_size]; + new_agent->_noise = new_noise; + + return new_agent; + } + + /** + * @brief Use the model to predict. + * + * if _is_sampling_agent is true, will use the sampling model with added noise; + * if _is_sampling_agent is false, will use the original model without added noise. + */ + torch::Tensor predict(const torch::Tensor& x) { + return _sampling_model->forward(x); + } + + /** + * @brief Update parameters of model based on ES algorithm. + * + * Only not cloned ESAgent can call `update` function. + * Parameters of cloned agents will also be updated. + */ + bool update(std::vector& noisy_keys, std::vector& noisy_rewards) { + if (_is_sampling_agent) { + LOG(ERROR) << "[DeepES] Cloned ESAgent cannot call update function, please use original ESAgent."; + return false; + } + + compute_centered_ranks(noisy_rewards); + + memset(_neg_gradients, 0, _param_size * sizeof(float)); + for (int i = 0; i < noisy_keys.size(); ++i) { + int key = noisy_keys[i].key(0); + float reward = noisy_rewards[i]; + bool success = _sampling_method->resampling(key, _noise, _param_size); + for (int64_t j = 0; j < _param_size; ++j) { + _neg_gradients[j] += _noise[j] * reward; + } + } + for (int64_t j = 0; j < _param_size; ++j) { + _neg_gradients[j] /= -1.0 * noisy_keys.size(); + } + + //update + auto params = _model->named_parameters(); + int64_t counter = 0; + for (auto& param: params) { + torch::Tensor tensor = param.value().view({-1}); + auto tensor_a = tensor.accessor(); + _optimizer->update(tensor_a, _neg_gradients+counter, tensor.size(0)); + counter += tensor.size(0); + } + + return true; + } + + // copied parameters = original parameters + noise + bool add_noise(SamplingKey& sampling_key) { + if (!_is_sampling_agent) { + LOG(ERROR) << "[DeepES] Original ESAgent cannot call add_noise function, please use cloned ESAgent."; + return false; + } + + auto sampling_params = _sampling_model->named_parameters(); + auto params = _model->named_parameters(); + int key = _sampling_method->sampling(_noise, _param_size); + sampling_key.add_key(key); + int64_t counter = 0; + for (auto& param: sampling_params) { + torch::Tensor sampling_tensor = param.value().view({-1}); + std::string param_name = param.key(); + torch::Tensor tensor = params.find(param_name)->view({-1}); + auto sampling_tensor_a = sampling_tensor.accessor(); + auto tensor_a = tensor.accessor(); + for (int64_t j = 0; j < tensor.size(0); ++j) { + sampling_tensor_a[j] = tensor_a[j] + _noise[counter + j]; + } + counter += tensor.size(0); + } + return true; + } + + + +private: + int64_t _calculate_param_size() { + auto params = _model->named_parameters(); + for (auto& param: params) { + torch::Tensor tensor = param.value().view({-1}); + _param_size += tensor.size(0); + } + return _param_size; + } + + std::shared_ptr _model; + std::shared_ptr _sampling_model; + bool _is_sampling_agent; + std::shared_ptr _sampling_method; + std::shared_ptr _optimizer; + std::shared_ptr _config; + int64_t _param_size; + // malloc memory of noise and neg_gradients in advance. + float* _noise; + float* _neg_gradients; +}; + +} + +#endif /* TORCH_ESAGENT_H */ diff --git a/deepes/include/torch_predictor.h b/deepes/include/torch_predictor.h deleted file mode 100644 index 800124074950ee60a9de7f4be0317382172c18f9..0000000000000000000000000000000000000000 --- a/deepes/include/torch_predictor.h +++ /dev/null @@ -1,180 +0,0 @@ -// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef TORCHPREDICTOR_H -#define TORCHPREDICTOR_H -#include -#include -#include -#include "sgd_optimizer.h" -#include "adam_optimizer.h" -#include "utils.h" -#include "gaussian_sampling.h" -#include "deepes.pb.h" - -namespace DeepES{ - -/* DeepES predictor for Torch. - * Our implemtation is flexible to support any model that subclass torch::nn::Module. - * That is, we can instantiate a preditor by: predictor = Predcitor(model); - * After that, users can clone a predictor for multi-thread processing, add parametric noise for exploration, - * and update the parameteres, according to the evaluation resutls of noisy parameters. - * - */ -template -class Predictor{ -public: - Predictor(): _param_size(0){} - - Predictor(std::shared_ptr model, std::string config_path): _model(model) { - _config = std::make_shared(); - load_proto_conf(config_path, *_config); - _sampling_method = std::make_shared(); - _sampling_method->load_config(*_config); - - std::string opt_type = _config->optimizer().type(); - std::transform(opt_type.begin(),opt_type.end(),opt_type.begin(),::tolower); - if (opt_type == "sgd") { - _optimizer = std::make_shared(_config->optimizer().base_lr(), \ - _config->optimizer().momentum()); - }else if (opt_type == "adam") { - _optimizer = std::make_shared(_config->optimizer().base_lr(), \ - _config->optimizer().beta1(), \ - _config->optimizer().beta2(), \ - _config->optimizer().epsilon()); - }else { - // TODO: NotImplementedError - } - _param_size = 0; - _sampled_model = model; - param_size(); - } - - std::shared_ptr clone() { - std::shared_ptr new_model = _model->clone(); - std::shared_ptr new_predictor = std::make_shared(); - new_predictor->set_model(new_model, _model); - new_predictor->set_sampling_method(_sampling_method); - new_predictor->set_param_size(_param_size); - return new_predictor; - } - - void set_config(std::shared_ptr config) { - _config = config; - } - - void set_sampling_method(std::shared_ptr sampling_method) { - _sampling_method = sampling_method; - } - - void set_model(std::shared_ptr sampled_model, std::shared_ptr model) { - _sampled_model = sampled_model; - _model = model; - } - - std::shared_ptr get_sampling_method() { - return _sampling_method; - } - - std::shared_ptr get_optimizer() { - return _optimizer; - } - - void set_optimizer(std::shared_ptr optimizer) { - _optimizer = optimizer; - } - - void set_param_size(int param_size) { - _param_size = param_size; - } - - torch::Tensor predict(const torch::Tensor& x) { - return _sampled_model->forward(x); - } - - bool update(std::vector& noisy_keys, std::vector& noisy_rewards) { - compute_centered_ranks(noisy_rewards); - float* noise = new float [_param_size]; - float* neg_gradients = new float [_param_size]; - memset(neg_gradients, 0, _param_size * sizeof(float)); - for (int i = 0; i < noisy_keys.size(); ++i) { - int key = noisy_keys[i].key(0); - float reward = noisy_rewards[i]; - bool success = _sampling_method->resampling(key, noise, _param_size); - for (int j = 0; j < _param_size; ++j) { - neg_gradients[j] += noise[j] * reward; - } - } - for (int j = 0; j < _param_size; ++j) { - neg_gradients[j] /= -1.0 * noisy_keys.size(); - } - - //update - auto params = _model->named_parameters(); - int counter = 0; - for (auto& param: params) { - torch::Tensor tensor = param.value().view({-1}); - auto tensor_a = tensor.accessor(); - _optimizer->update(tensor_a, neg_gradients+counter, tensor.size(0), param.key()); - counter += tensor.size(0); - } - delete[] noise; - delete[] neg_gradients; - } - - SamplingKey add_noise() { - SamplingKey sampling_key; - auto sampled_params = _sampled_model->named_parameters(); - auto params = _model->named_parameters(); - float* noise = new float [_param_size]; - int key = _sampling_method->sampling(noise, _param_size); - sampling_key.add_key(key); - int counter = 0; - for (auto& param: sampled_params) { - torch::Tensor sampled_tensor = param.value().view({-1}); - std::string param_name = param.key(); - torch::Tensor tensor = params.find(param_name)->view({-1}); - auto sampled_tensor_a = sampled_tensor.accessor(); - auto tensor_a = tensor.accessor(); - for (int j = 0; j < tensor.size(0); ++j) { - sampled_tensor_a[j] = tensor_a[j] + noise[counter + j]; - } - counter += tensor.size(0); - } - delete[] noise; - return sampling_key; - } - - int param_size() { - if (_param_size == 0) { - auto params = _model->named_parameters(); - for (auto& param: params) { - torch::Tensor tensor = param.value().view({-1}); - _param_size += tensor.size(0); - } - } - return _param_size; - } - -private: - std::shared_ptr _sampled_model; - std::shared_ptr _model; - std::shared_ptr _sampling_method; - std::shared_ptr _optimizer; - std::shared_ptr _config; - int _param_size; -}; - -} -#endif diff --git a/deepes/scripts/build.sh b/deepes/scripts/build.sh index 70017d2487db13f0c11634eb16a5256e21722e21..fe70f3c8dd605688bbb081d9688b306cc8b261c9 100644 --- a/deepes/scripts/build.sh +++ b/deepes/scripts/build.sh @@ -1,23 +1,53 @@ #!/bin/bash -export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH + +if [ $# != 1 ]; then + echo "You must choose one framework (paddle/torch) to compile DeepES." + exit 0 +fi + +if [ $1 = "paddle" ]; then + #---------------paddlelite-------------# + if [ ! -d "./inference_lite_lib" ];then + echo "Cannot find the PaddleLite library: ./inference_lite_lib" + echo "Please put the PaddleLite libraray to current folder according the instruction in README" + exit 1 + fi + + # Initialization model + if [ ! -d ./demo/paddle/cartpole_init_model]; then + unzip ./demo/paddle/cartpole_init_model.zip -d ./demo/paddle/ + fi + + FLAGS=" -DWITH_PADDLE=ON" +elif [ $1 = "torch" ]; then + #---------------libtorch-------------# + if [ ! -d "./libtorch" ];then + echo "Cannot find the torch library: ./libtorch" + echo "Please put the torch libraray to current folder according the instruction in README" + exit 1 + fi + FLAGS=" -DWITH_TORCH=ON" +else + echo "Invalid arguments. [paddle/torch]" + exit 0 +fi + +#export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH #----------------protobuf-------------# cp ./src/proto/deepes.proto ./ protoc deepes.proto --cpp_out ./ mv deepes.pb.h ./include mv deepes.pb.cc ./src - -#---------------libtorch-------------# -if [ ! -d "./libtorch" ];then - echo "Cannot find the torch library: ./libtorch" - echo "Please put the torch libraray to current folder according the instruction in README" - exit 1 -fi +rm deepes.proto #----------------build---------------# +echo ${FLAGS} rm -rf build mkdir build cd build -cmake -DCMAKE_PREFIX_PATH=./libtorch ../ +cmake ../ ${FLAGS} make -j10 + +#-----------------run----------------# ./parallel_main diff --git a/deepes/src/gaussian_sampling.cpp b/deepes/src/gaussian_sampling.cc similarity index 86% rename from deepes/src/gaussian_sampling.cpp rename to deepes/src/gaussian_sampling.cc index 4ad6cf3021ea31cb371ffa59d42378971fae3016..f44dd5abecaa4c45a9c829952a38c2c4c26cf4aa 100644 --- a/deepes/src/gaussian_sampling.cpp +++ b/deepes/src/gaussian_sampling.cc @@ -26,17 +26,17 @@ void GaussianSampling::load_config(const DeepESConfig& config) { set_seed(config.seed()); } -int GaussianSampling::sampling(float* noise, int size) { +int GaussianSampling::sampling(float* noise, int64_t size) { int key = rand(); std::default_random_engine generator(key); std::normal_distribution norm; - for (int i = 0; i < size; ++i) { + for (int64_t i = 0; i < size; ++i) { *(noise + i) = norm(generator) * _std; } return key; } -bool GaussianSampling::resampling(int key, float* noise, int size) { +bool GaussianSampling::resampling(int key, float* noise, int64_t size) { bool success = true; if (noise == nullptr) { success = false; @@ -44,7 +44,7 @@ bool GaussianSampling::resampling(int key, float* noise, int size) { else { std::default_random_engine generator(key); std::normal_distribution norm; - for (int i = 0; i < size; ++i) { + for (int64_t i = 0; i < size; ++i) { *(noise + i) = norm(generator) * _std; } } diff --git a/deepes/src/paddle/es_agent.cc b/deepes/src/paddle/es_agent.cc new file mode 100644 index 0000000000000000000000000000000000000000..1437fa98a0a207b2a2358516ebe15d90f9e985e9 --- /dev/null +++ b/deepes/src/paddle/es_agent.cc @@ -0,0 +1,157 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "es_agent.h" + +namespace DeepES { + +typedef paddle::lite_api::PaddlePredictor PaddlePredictor; +typedef paddle::lite_api::Tensor Tensor; +typedef paddle::lite_api::shape_t shape_t; + +inline int64_t ShapeProduction(const shape_t& shape) { + int64_t res = 1; + for (auto i : shape) res *= i; + return res; +} + +ESAgent::ESAgent() {} + +ESAgent::~ESAgent() { + delete[] _noise; + if (!_is_sampling_agent) + delete[] _neg_gradients; +} + +ESAgent::ESAgent( + std::shared_ptr predictor, + std::string config_path) { + + _is_sampling_agent = false; + _predictor = predictor; + // Original agent can't be used to sample, so keep it same with _predictor for evaluating. + _sampling_predictor = predictor; + + _config = std::make_shared(); + load_proto_conf(config_path, *_config); + + _sampling_method = std::make_shared(); + _sampling_method->load_config(*_config); + + _optimizer = std::make_shared(_config->optimizer().base_lr()); + + _param_names = _predictor->GetParamNames(); + _param_size = _calculate_param_size(); + + _noise = new float [_param_size]; + _neg_gradients = new float [_param_size]; +} + +std::shared_ptr ESAgent::clone() { + std::shared_ptr new_sampling_predictor = _predictor->Clone(); + + std::shared_ptr new_agent = std::make_shared(); + + float* noise = new float [_param_size]; + + new_agent->_predictor = _predictor; + new_agent->_sampling_predictor = new_sampling_predictor; + + new_agent->_is_sampling_agent = true; + new_agent->_sampling_method = _sampling_method; + new_agent->_param_names = _param_names; + new_agent->_param_size = _param_size; + new_agent->_noise = noise; + + return new_agent; +} + +bool ESAgent::update( + std::vector& noisy_keys, + std::vector& noisy_rewards) { + if (_is_sampling_agent) { + LOG(ERROR) << "[DeepES] Cloned ESAgent cannot call update function, please use original ESAgent."; + return false; + } + + compute_centered_ranks(noisy_rewards); + + memset(_neg_gradients, 0, _param_size * sizeof(float)); + for (int i = 0; i < noisy_keys.size(); ++i) { + int key = noisy_keys[i].key(0); + float reward = noisy_rewards[i]; + bool success = _sampling_method->resampling(key, _noise, _param_size); + for (int64_t j = 0; j < _param_size; ++j) { + _neg_gradients[j] += _noise[j] * reward; + } + } + for (int64_t j = 0; j < _param_size; ++j) { + _neg_gradients[j] /= -1.0 * noisy_keys.size(); + } + + //update + int64_t counter = 0; + + for (std::string param_name: _param_names) { + std::unique_ptr tensor = _predictor->GetMutableTensor(param_name); + float* tensor_data = tensor->mutable_data(); + int64_t tensor_size = ShapeProduction(tensor->shape()); + _optimizer->update(tensor_data, _neg_gradients + counter, tensor_size); + counter += tensor_size; + } + return true; + +} + +bool ESAgent::add_noise(SamplingKey& sampling_key) { + if (!_is_sampling_agent) { + LOG(ERROR) << "[DeepES] Original ESAgent cannot call add_noise function, please use cloned ESAgent."; + return false; + } + + int key = _sampling_method->sampling(_noise, _param_size); + sampling_key.add_key(key); + int64_t counter = 0; + + for (std::string param_name: _param_names) { + std::unique_ptr sample_tensor = _sampling_predictor->GetMutableTensor(param_name); + std::unique_ptr tensor = _predictor->GetTensor(param_name); + int64_t tensor_size = ShapeProduction(tensor->shape()); + for (int64_t j = 0; j < tensor_size; ++j) { + sample_tensor->mutable_data()[j] = tensor->data()[j] + _noise[counter + j]; + } + counter += tensor_size; + } + + return true; +} + + +std::shared_ptr ESAgent::get_predictor() { + return _sampling_predictor; +} + +int64_t ESAgent::_calculate_param_size() { + int64_t param_size = 0; + for (std::string param_name: _param_names) { + std::unique_ptr tensor = _predictor->GetTensor(param_name); + param_size += ShapeProduction(tensor->shape()); + } + return param_size; +} + + +} + diff --git a/deepes/src/utils.cpp b/deepes/src/utils.cc similarity index 100% rename from deepes/src/utils.cpp rename to deepes/src/utils.cc