diff --git a/deepes/README.md b/deepes/README.md index 48dbee2c1b30cc6024b84fed66e7f72b3e534694..c2e6ef88108cff5e05273605ea0ae0b93899b365 100644 --- a/deepes/README.md +++ b/deepes/README.md @@ -11,14 +11,14 @@ auto agent = ESAgent(config); for (int i = 0; i < 10; ++i) { auto sampling_agnet = agent->clone(); // clone出一个sampling agent - SamplingKey key; - agent->add_noise(key); // 参数扰动,同时保存随机种子到key中 + SamplingInfo info; + agent->add_noise(info); // 参数扰动,同时保存随机种子到info中 int reward = evaluate(env, sampling_agent); //评估参数 - noisy_keys.push_back(key); // 记录随机噪声对应种子 + noisy_info.push_back(info); // 记录随机噪声对应种子 noisy_rewards.push_back(reward); // 记录评估结果 } //根据评估结果、随机种子更新参数,然后重复以上过程,直到收敛。 -agent->update(noisy_keys, noisy_rewards); +agent->update(noisy_info, noisy_rewards); ``` ## 一键运行demo列表 diff --git a/deepes/demo/paddle/cartpole_solver_parallel.cc b/deepes/demo/paddle/cartpole_solver_parallel.cc index 5fe4ae0c81d1550aa081b5bad86f0e643de260da..704b35e29574e799ff8ca9ba3403477b841e8f36 100644 --- a/deepes/demo/paddle/cartpole_solver_parallel.cc +++ b/deepes/demo/paddle/cartpole_solver_parallel.cc @@ -95,25 +95,25 @@ int main(int argc, char* argv[]) { sampling_agents.push_back(agent->clone()); } - std::vector noisy_keys; + std::vector noisy_info; std::vector noisy_rewards(ITER, 0.0f); - noisy_keys.resize(ITER); + noisy_info.resize(ITER); omp_set_num_threads(10); - for (int epoch = 0; epoch < 1000; ++epoch) { + for (int epoch = 0; epoch < 300; ++epoch) { #pragma omp parallel for schedule(dynamic, 1) for (int i = 0; i < ITER; ++i) { std::shared_ptr sampling_agent = sampling_agents[i]; - SamplingKey key; - bool success = sampling_agent->add_noise(key); + SamplingInfo info; + bool success = sampling_agent->add_noise(info); float reward = evaluate(envs[i], sampling_agent); - noisy_keys[i] = key; + noisy_info[i] = info; noisy_rewards[i] = reward; } // NOTE: all parameters of sampling_agents will be updated - bool success = agent->update(noisy_keys, noisy_rewards); + bool success = agent->update(noisy_info, noisy_rewards); int reward = evaluate(envs[0], agent); LOG(INFO) << "Epoch:" << epoch << " Reward: " << reward; diff --git a/deepes/demo/torch/cartpole_solver_parallel.cc b/deepes/demo/torch/cartpole_solver_parallel.cc index 3311ed6185a8360e92258ccf9e3b42323e07ab76..8261466ea662d16ea6379f25b574e9df76d6b4d8 100644 --- a/deepes/demo/torch/cartpole_solver_parallel.cc +++ b/deepes/demo/torch/cartpole_solver_parallel.cc @@ -59,23 +59,23 @@ int main(int argc, char* argv[]) { sampling_agents.push_back(agent->clone()); } - std::vector noisy_keys; + std::vector noisy_info; std::vector noisy_rewards(ITER, 0.0f); - noisy_keys.resize(ITER); + noisy_info.resize(ITER); for (int epoch = 0; epoch < 1000; ++epoch) { #pragma omp parallel for schedule(dynamic, 1) for (int i = 0; i < ITER; ++i) { auto sampling_agent = sampling_agents[i]; - SamplingKey key; - bool success = sampling_agent->add_noise(key); + SamplingInfo info; + bool success = sampling_agent->add_noise(info); float reward = evaluate(envs[i], sampling_agent); - noisy_keys[i] = key; + noisy_info[i] = info; noisy_rewards[i] = reward; } // Will also update parameters of sampling_agents - bool success = agent->update(noisy_keys, noisy_rewards); + bool success = agent->update(noisy_info, noisy_rewards); // Use original agent to evalute (without noise). int reward = evaluate(envs[0], agent); diff --git a/deepes/include/paddle/es_agent.h b/deepes/include/paddle/es_agent.h index ceeaf053877bd837d59ba963132ed0077bc9da84..219c58f4abaa4a1c04d16f549ff66f76a05f9db7 100644 --- a/deepes/include/paddle/es_agent.h +++ b/deepes/include/paddle/es_agent.h @@ -63,11 +63,11 @@ class ESAgent { * Parameters of cloned agents will also be updated. */ bool update( - std::vector& noisy_keys, + std::vector& noisy_info, std::vector& noisy_rewards); // copied parameters = original parameters + noise - bool add_noise(SamplingKey& sampling_key); + bool add_noise(SamplingInfo& sampling_info); /** * @brief Get paddle predict diff --git a/deepes/include/torch/es_agent.h b/deepes/include/torch/es_agent.h index 50d6f4c2d9fb454ac9450f74b39766b013c480a2..50434b5e9e08a4915c83c277b3b9e0396e52a016 100644 --- a/deepes/include/torch/es_agent.h +++ b/deepes/include/torch/es_agent.h @@ -98,7 +98,7 @@ public: * Only not cloned ESAgent can call `update` function. * Parameters of cloned agents will also be updated. */ - bool update(std::vector& noisy_keys, std::vector& noisy_rewards) { + bool update(std::vector& noisy_info, std::vector& noisy_rewards) { if (_is_sampling_agent) { LOG(ERROR) << "[DeepES] Cloned ESAgent cannot call update function, please use original ESAgent."; return false; @@ -107,8 +107,8 @@ public: compute_centered_ranks(noisy_rewards); memset(_neg_gradients, 0, _param_size * sizeof(float)); - for (int i = 0; i < noisy_keys.size(); ++i) { - int key = noisy_keys[i].key(0); + for (int i = 0; i < noisy_info.size(); ++i) { + int key = noisy_info[i].key(0); float reward = noisy_rewards[i]; bool success = _sampling_method->resampling(key, _noise, _param_size); for (int64_t j = 0; j < _param_size; ++j) { @@ -116,7 +116,7 @@ public: } } for (int64_t j = 0; j < _param_size; ++j) { - _neg_gradients[j] /= -1.0 * noisy_keys.size(); + _neg_gradients[j] /= -1.0 * noisy_info.size(); } //update @@ -125,7 +125,7 @@ public: for (auto& param: params) { torch::Tensor tensor = param.value().view({-1}); auto tensor_a = tensor.accessor(); - _optimizer->update(tensor_a, _neg_gradients+counter, tensor.size(0), param.key()); + _optimizer->update(tensor_a, _neg_gradients+counter, tensor.size(0), param.info()); counter += tensor.size(0); } @@ -133,7 +133,7 @@ public: } // copied parameters = original parameters + noise - bool add_noise(SamplingKey& sampling_key) { + bool add_noise(SamplingInfo& sampling_info) { if (!_is_sampling_agent) { LOG(ERROR) << "[DeepES] Original ESAgent cannot call add_noise function, please use cloned ESAgent."; return false; @@ -142,11 +142,11 @@ public: auto sampling_params = _sampling_model->named_parameters(); auto params = _model->named_parameters(); int key = _sampling_method->sampling(_noise, _param_size); - sampling_key.add_key(key); + sampling_info.add_key(key); int64_t counter = 0; for (auto& param: sampling_params) { torch::Tensor sampling_tensor = param.value().view({-1}); - std::string param_name = param.key(); + std::string param_name = param.info(); torch::Tensor tensor = params.find(param_name)->view({-1}); auto sampling_tensor_a = sampling_tensor.accessor(); auto tensor_a = tensor.accessor(); diff --git a/deepes/src/paddle/es_agent.cc b/deepes/src/paddle/es_agent.cc index a27a0d73bc7c05308b6665bfb349c90ad6d85b5e..ed2b1b9332ce5bbb305aa5eac2cf61e28b923dc0 100644 --- a/deepes/src/paddle/es_agent.cc +++ b/deepes/src/paddle/es_agent.cc @@ -78,7 +78,7 @@ std::shared_ptr ESAgent::clone() { } bool ESAgent::update( - std::vector& noisy_keys, + std::vector& noisy_info, std::vector& noisy_rewards) { if (_is_sampling_agent) { LOG(ERROR) << "[DeepES] Cloned ESAgent cannot call update function, please use original ESAgent."; @@ -88,8 +88,8 @@ bool ESAgent::update( compute_centered_ranks(noisy_rewards); memset(_neg_gradients, 0, _param_size * sizeof(float)); - for (int i = 0; i < noisy_keys.size(); ++i) { - int key = noisy_keys[i].key(0); + for (int i = 0; i < noisy_info.size(); ++i) { + int key = noisy_info[i].key(0); float reward = noisy_rewards[i]; bool success = _sampling_method->resampling(key, _noise, _param_size); for (int64_t j = 0; j < _param_size; ++j) { @@ -97,7 +97,7 @@ bool ESAgent::update( } } for (int64_t j = 0; j < _param_size; ++j) { - _neg_gradients[j] /= -1.0 * noisy_keys.size(); + _neg_gradients[j] /= -1.0 * noisy_info.size(); } //update @@ -114,14 +114,14 @@ bool ESAgent::update( } -bool ESAgent::add_noise(SamplingKey& sampling_key) { +bool ESAgent::add_noise(SamplingInfo& sampling_info) { if (!_is_sampling_agent) { LOG(ERROR) << "[DeepES] Original ESAgent cannot call add_noise function, please use cloned ESAgent."; return false; } int key = _sampling_method->sampling(_noise, _param_size); - sampling_key.add_key(key); + sampling_info.add_key(key); int64_t counter = 0; for (std::string param_name: _param_names) { diff --git a/deepes/src/proto/deepes.proto b/deepes/src/proto/deepes.proto index 38abee970140fc7fecfb6f5c18854a43dabd06ff..c6c1c9cd95dfadd23d2b45aebc07dd5856a92d00 100644 --- a/deepes/src/proto/deepes.proto +++ b/deepes/src/proto/deepes.proto @@ -23,6 +23,8 @@ message DeepESConfig { optional GaussianSamplingConfig gaussian_sampling = 3; // Optimizer Configuration optional OptimizerConfig optimizer = 4; + // AsyncESAgent Configuration + optional AsyncESConfig async_es = 5; } message GaussianSamplingConfig { @@ -40,6 +42,13 @@ message OptimizerConfig{ optional float epsilon = 6 [default = 1e-8]; } -message SamplingKey{ +message SamplingInfo{ repeated int32 key = 1; + optional int32 model_iter_id = 2; +} + +message AsyncESConfig{ + optional string model_warehouse = 1 [default = "./model_warehouse"]; + repeated string model_md5 = 2; + optional int32 max_to_keep = 3 [default = 5]; }