add _is_sampling_agent flag in original agent and cloned agnet

1e2746fa · zenghsh3 · 86d0fedb · 1e2746fa · 86d0fedb · 86d0fedb
12 changed file
--- a/deepes/demo/paddle/cartpole_init_model.zip
+++ b/deepes/demo/paddle/cartpole_init_model.zip
--- a/deepes/demo/paddle/cartpole_init_model/__model__
+++ b/deepes/demo/paddle/cartpole_init_model/__model__
--- a/deepes/demo/paddle/cartpole_init_model/fc_0.b_0
+++ b/deepes/demo/paddle/cartpole_init_model/fc_0.b_0
--- a/deepes/demo/paddle/cartpole_init_model/fc_0.w_0
+++ b/deepes/demo/paddle/cartpole_init_model/fc_0.w_0
--- a/deepes/demo/paddle/cartpole_init_model/fc_1.b_0
+++ b/deepes/demo/paddle/cartpole_init_model/fc_1.b_0
--- a/deepes/demo/paddle/cartpole_init_model/fc_1.w_0
+++ b/deepes/demo/paddle/cartpole_init_model/fc_1.w_0
--- a/deepes/demo/paddle/cartpole_solver_parallel.cc
+++ b/deepes/demo/paddle/cartpole_solver_parallel.cc
@@ -59,16 +59,13 @@ int arg_max(const std::vector<float>& vec) {
 }


-float evaluate(CartPole& env, std::shared_ptr<ESAgent> agent, bool is_eval=false) {
+float evaluate(CartPole& env, std::shared_ptr<ESAgent> agent) {
  float total_reward = 0.0;
  env.reset();
  const float* obs = env.getState();

  std::shared_ptr<PaddlePredictor> paddle_predictor;
-  if (is_eval) 
-    paddle_predictor = agent->get_evaluate_predictor(); // For evaluate
-  else 
-    paddle_predictor = agent->get_sample_predictor(); // For sampling (ES exploring)
+  paddle_predictor = agent->get_predictor();

  while (true) {
    std::vector<float> probs = forward(paddle_predictor, obs); 
@@ -93,8 +90,9 @@ int main(int argc, char* argv[]) {
  std::shared_ptr<PaddlePredictor> paddle_predictor = create_paddle_predictor("../demo/paddle/cartpole_init_model");
  std::shared_ptr<ESAgent> agent = std::make_shared<ESAgent>(paddle_predictor, "../benchmark/cartpole_config.prototxt");

-  std::vector< std::shared_ptr<ESAgent> > sampling_agents{ agent };
-  for (int i = 0; i < (ITER - 1); ++i) {
+  // Clone agents to sample (explore).
+  std::vector< std::shared_ptr<ESAgent> > sampling_agents;
+  for (int i = 0; i < ITER; ++i) {
    sampling_agents.push_back(agent->clone());
  }

@@ -107,7 +105,8 @@ int main(int argc, char* argv[]) {
 #pragma omp parallel for schedule(dynamic, 1)
    for (int i = 0; i < ITER; ++i) {
      std::shared_ptr<ESAgent> sampling_agent = sampling_agents[i];
-      SamplingKey key = sampling_agent->add_noise();
+      SamplingKey key;
+      bool success = sampling_agent->add_noise(key);
      float reward = evaluate(envs[i], sampling_agent);

      noisy_keys[i] = key;
@@ -115,9 +114,9 @@ int main(int argc, char* argv[]) {
    }

    // NOTE: all parameters of sampling_agents will be updated
-    agent->update(noisy_keys, noisy_rewards);
+    bool success = agent->update(noisy_keys, noisy_rewards);
  
-    int reward = evaluate(envs[0], agent, true);
+    int reward = evaluate(envs[0], agent);
    LOG(INFO) << "Epoch:" << epoch << " Reward: " << reward;
  }
 }
--- a/deepes/demo/torch/cartpole_solver_parallel.cc
+++ b/deepes/demo/torch/cartpole_solver_parallel.cc
@@ -25,13 +25,13 @@
 using namespace DeepES;
 const int ITER = 10;

-float evaluate(CartPole& env, std::shared_ptr<ESAgent<Model>> agent, bool is_eval=false) {
+float evaluate(CartPole& env, std::shared_ptr<ESAgent<Model>> agent) {
  float total_reward = 0.0;
  env.reset();
  const float* obs = env.getState();
  while (true) {
    torch::Tensor obs_tensor = torch::tensor({obs[0], obs[1], obs[2], obs[3]});
-    torch::Tensor action = agent->predict(obs_tensor, is_eval);
+    torch::Tensor action = agent->predict(obs_tensor);
    int act = std::get<1>(action.max(-1)).item<long>(); 
    env.step(act);
    float reward = env.getReward(); 
@@ -52,9 +52,10 @@ int main(int argc, char* argv[]) {

  auto model = std::make_shared<Model>(4, 2);
  std::shared_ptr<ESAgent<Model>> agent = std::make_shared<ESAgent<Model>>(model, "../benchmark/cartpole_config.prototxt");
-
-  std::vector<std::shared_ptr<ESAgent<Model>>> sampling_agents = {agent};
-  for (int i = 0; i < ITER - 1; ++i) {
+  
+  // Clone agents to sample (explore).
+  std::vector<std::shared_ptr<ESAgent<Model>>> sampling_agents;
+  for (int i = 0; i < ITER; ++i) {
    sampling_agents.push_back(agent->clone());
  }

@@ -66,15 +67,18 @@ int main(int argc, char* argv[]) {
 #pragma omp parallel for schedule(dynamic, 1)
    for (int i = 0; i < ITER; ++i) {
      auto sampling_agent = sampling_agents[i];
-      SamplingKey key = sampling_agent->add_noise();
+      SamplingKey key;
+      bool success = sampling_agent->add_noise(key);
      float reward = evaluate(envs[i], sampling_agent);
      noisy_keys[i] = key;
      noisy_rewards[i] = reward;
    }
-
-    agent->update(noisy_keys, noisy_rewards);
-
-    int reward = evaluate(envs[0], agent, true);
+    
+    // Will also update parameters of sampling_agents
+    bool success = agent->update(noisy_keys, noisy_rewards);
+    
+    // Use original agent to evalute (without noise).
+    int reward = evaluate(envs[0], agent);
    LOG(INFO) << "Epoch:" << epoch << " Reward: " << reward;
  }
 }
--- a/deepes/include/paddle/es_agent.h
+++ b/deepes/include/paddle/es_agent.h
@@ -46,8 +46,8 @@ class ESAgent {
  
  // Return a cloned ESAgent, whose _predictor is same with this->_predictor
  // but _sample_predictor is pointed to a newly created object.
-  // This function mainly used to clone a new ESAgent to do sampling in multi-thread way.
-  // NOTE: when calling `update` function of current object or cloned one, both of their
+  // This function is used to clone a new ESAgent to sample in multi-thread way.
+  // NOTE: when calling `update` function of current object, both of their
  //       parameters will be updated. Because their _predictor is point to same object.
  std::shared_ptr<ESAgent> clone();
  
@@ -57,34 +57,17 @@ class ESAgent {
      std::vector<float>& noisy_rewards);
  
  // parameters of _sample_predictor = parameters of _predictor + noise
-  SamplingKey add_noise();
+  bool add_noise(SamplingKey& sampling_key);

-  std::shared_ptr<SamplingMethod> get_sampling_method();
-  std::shared_ptr<Optimizer> get_optimizer();
-  std::shared_ptr<DeepESConfig> get_config();
-  int64_t get_param_size();
-  std::vector<std::string> get_param_names();
-  
-  // Return paddle predict _sample_predictor (with addded noise)
-  std::shared_ptr<PaddlePredictor> get_sample_predictor();
-
-  // Return paddle predict _predictor (without addded noise)
-  std::shared_ptr<PaddlePredictor> get_evaluate_predictor();
-
-  void set_config(std::shared_ptr<DeepESConfig> config);
-  void set_sampling_method(std::shared_ptr<SamplingMethod> sampling_method);
-  void set_optimizer(std::shared_ptr<Optimizer> optimizer);
-  void set_param_size(int64_t param_size);
-  void set_param_names(std::vector<std::string> param_names);
-  void set_noise(float* noise);
-  void set_neg_gradients(float* neg_gradients);
-  void set_predictor(
-      std::shared_ptr<PaddlePredictor> predictor,
-      std::shared_ptr<PaddlePredictor> sample_predictor);
+  // Return paddle predict _sample_predictor
+  // if _is_sampling_agent is true, will return predictor with added noise;
+  // if _is_sampling_agent is false, will return predictor without added noise.
+  std::shared_ptr<PaddlePredictor> get_predictor();

 private:
  std::shared_ptr<PaddlePredictor> _predictor;
  std::shared_ptr<PaddlePredictor> _sample_predictor;
+  bool _is_sampling_agent;
  std::shared_ptr<SamplingMethod> _sampling_method;
  std::shared_ptr<Optimizer> _optimizer;
  std::shared_ptr<DeepESConfig> _config;

--- a/deepes/include/torch/es_agent.h
+++ b/deepes/include/torch/es_agent.h
@@ -26,101 +26,64 @@ namespace DeepES{

 /* DeepES agent for Torch.
 * Our implemtation is flexible to support any model that subclass torch::nn::Module.
- * That is, we can instantiate a agent by: es_agent = ESAgent<Model>(model);
- * After that, users can clone a agent for multi-thread processing, add parametric noise for exploration,
+ * That is, we can instantiate an agent by: es_agent = ESAgent<Model>(model);
+ * After that, users can clone an agent for multi-thread processing, add parametric noise for exploration,
 * and update the parameteres, according to the evaluation resutls of noisy parameters.
 *
 */
 template <class T>
 class ESAgent{
 public:
-  ESAgent(): _param_size(0){}
+  ESAgent() {}

  ~ESAgent() {
    delete[] _noise;
-    delete[] _neg_gradients;
+    if (!_is_sampling_agent)
+      delete[] _neg_gradients;
  }

  ESAgent(std::shared_ptr<T> model, std::string config_path): _model(model) {
+    _is_sampling_agent = false;
    _config = std::make_shared<DeepESConfig>();
    load_proto_conf(config_path, *_config);
    _sampling_method = std::make_shared<GaussianSampling>();
    _sampling_method->load_config(*_config);
    _optimizer = std::make_shared<SGDOptimizer>(_config->optimizer().base_lr());
-    _param_size = 0;
-    _sampled_model = model->clone();
-    param_size();
+    // Origin agent can't be used to sample, so keep it same with _model for evaluating.
+    _sampled_model = model;
+    _param_size = _calculate_param_size();

    _noise = new float [_param_size];
    _neg_gradients = new float [_param_size];
  }

  std::shared_ptr<ESAgent> clone() {
-    std::shared_ptr<T> new_model = _model->clone();
    std::shared_ptr<ESAgent> new_agent = std::make_shared<ESAgent>();
-    new_agent->set_model(_model, new_model);
-    new_agent->set_sampling_method(_sampling_method);
-    new_agent->set_optimizer(_optimizer);
-    new_agent->set_config(_config);
-    new_agent->set_param_size(_param_size);
-
-    float* new_noise = new float [_param_size];
-    float* new_neg_gradients = new float [_param_size];
-    new_agent->set_noise(new_noise);
-    new_agent->set_neg_gradients(new_neg_gradients);
-    return new_agent;
-  }

-  void set_config(std::shared_ptr<DeepESConfig> config) {
-    _config = config;
-  }
-
-  void set_sampling_method(std::shared_ptr<SamplingMethod> sampling_method) {
-    _sampling_method = sampling_method;
-  }
+    new_agent->_model = _model;
+    std::shared_ptr<T> new_model = _model->clone();
+    new_agent->_sampled_model = new_model;
  
-  void set_model(std::shared_ptr<T> model, std::shared_ptr<T> sampled_model) {
-    _model = model;
-    _sampled_model = sampled_model;
-  }
+    new_agent->_is_sampling_agent = true;
+    new_agent->_sampling_method = _sampling_method;
+    new_agent->_param_size = _param_size;

-  std::shared_ptr<SamplingMethod> get_sampling_method() {
-    return _sampling_method;
-  }
-
-  std::shared_ptr<Optimizer> get_optimizer() {
-    return _optimizer;
-  }
-
-  void set_optimizer(std::shared_ptr<Optimizer> optimizer) {
-    _optimizer = optimizer;
-  }
-
-  void set_param_size(int64_t param_size) {
-    _param_size = param_size;
-  }
+    float* new_noise = new float [_param_size];
+    new_agent->_noise = new_noise;

-  void set_noise(float* noise) {
-    _noise = noise;
+    return new_agent;
  }

-  void set_neg_gradients(float* neg_gradients) {
-    _neg_gradients = neg_gradients;
+  torch::Tensor predict(const torch::Tensor& x) {
+    return _sampled_model->forward(x);
  }

-
-  torch::Tensor predict(const torch::Tensor& x, bool is_eval=false) {
-    if (is_eval) {
-      // predict with _model (without addding noise)
-      return _model->forward(x);
-    }
-    else {
-      // predict with _sampled_model (with adding noise)
-      return _sampled_model->forward(x);
+  bool update(std::vector<SamplingKey>& noisy_keys, std::vector<float>& noisy_rewards) {
+    if (_is_sampling_agent) {
+      LOG(ERROR) << "[DeepES] Cloned ESAgent cannot call update function, please use original ESAgent.";
+      return false;
    }
-  }

-  bool update(std::vector<SamplingKey>& noisy_keys, std::vector<float>& noisy_rewards) {
    compute_centered_ranks(noisy_rewards);

    memset(_neg_gradients, 0, _param_size * sizeof(float));
@@ -145,10 +108,16 @@ public:
      _optimizer->update(tensor_a, _neg_gradients+counter, tensor.size(0));
      counter += tensor.size(0);
    }
+
+    return true;
  }

-  SamplingKey add_noise() {
-    SamplingKey sampling_key;
+  bool add_noise(SamplingKey& sampling_key) {
+    if (!_is_sampling_agent) {
+      LOG(ERROR) << "[DeepES] Original ESAgent cannot call add_noise function, please use cloned ESAgent.";
+      return false;
+    }
+
    auto sampled_params = _sampled_model->named_parameters();
    auto params = _model->named_parameters();
    int key = _sampling_method->sampling(_noise, _param_size);
@@ -165,23 +134,15 @@ public:
      }
      counter += tensor.size(0);
    }
-    return sampling_key;
+    return true;
  }

-  int64_t param_size() {
-    if (_param_size == 0) {
-      auto params = _model->named_parameters();
-      for (auto& param: params) {
-        torch::Tensor tensor = param.value().view({-1});
-        _param_size += tensor.size(0);
-      }
-    }
-    return _param_size;
-  }
+  

 private:
  std::shared_ptr<T> _sampled_model;
  std::shared_ptr<T> _model;
+  bool _is_sampling_agent;
  std::shared_ptr<SamplingMethod> _sampling_method;
  std::shared_ptr<Optimizer> _optimizer;
  std::shared_ptr<DeepESConfig> _config;
@@ -189,6 +150,15 @@ private:
  // malloc memory of noise and neg_gradients in advance.
  float* _noise;
  float* _neg_gradients;
+
+  int64_t _calculate_param_size() {
+    auto params = _model->named_parameters();
+    for (auto& param: params) {
+      torch::Tensor tensor = param.value().view({-1});
+      _param_size += tensor.size(0);
+    }
+    return _param_size;
+  }
 };

 }

--- a/deepes/scripts/build.sh
+++ b/deepes/scripts/build.sh
@@ -12,6 +12,9 @@ if [ $1 = "paddle" ]; then
    echo "Please put the PaddleLite libraray to current folder according the instruction in README"
    exit 1
  fi
+  
+  # Initialization model
+  unzip ./demo/paddle/cartpole_init_model.zip -d ./demo/paddle/

  FLAGS=" -DWITH_PADDLE=ON"
 elif [ $1 = "torch" ]; then

--- a/deepes/src/paddle/es_agent.cc
+++ b/deepes/src/paddle/es_agent.cc
@@ -28,7 +28,7 @@ typedef paddle::lite_api::PaddlePredictor PaddlePredictor;
 typedef paddle::lite_api::Tensor Tensor;
 typedef paddle::lite_api::shape_t shape_t;

-int64_t ShapeProduction(const shape_t& shape) {
+inline int64_t ShapeProduction(const shape_t& shape) {
  int64_t res = 1;
  for (auto i : shape) res *= i;
  return res;
@@ -38,15 +38,18 @@ ESAgent::ESAgent() {}

 ESAgent::~ESAgent() {
  delete[] _noise;
-  delete[] _neg_gradients;
+  if (!_is_sampling_agent)
+    delete[] _neg_gradients;
 }

 ESAgent::ESAgent(
    std::shared_ptr<PaddlePredictor> predictor,
    std::string config_path) {

+  _is_sampling_agent = false;
  _predictor = predictor;
-  _sample_predictor = predictor->Clone();
+  // Original agent can't be used to sample, so keep it same with _predictor for evaluating.
+  _sample_predictor = predictor;

  _config = std::make_shared<DeepESConfig>();
  load_proto_conf(config_path, *_config);
@@ -69,22 +72,27 @@ std::shared_ptr<ESAgent> ESAgent::clone() {
  std::shared_ptr<ESAgent> new_agent = std::make_shared<ESAgent>();

  float* new_noise = new float [_param_size];
-  float* new_neg_gradients = new float [_param_size];
-
-  new_agent->set_predictor(_predictor, new_sample_predictor);
-  new_agent->set_sampling_method(_sampling_method);
-  new_agent->set_optimizer(_optimizer);
-  new_agent->set_config(_config);
-  new_agent->set_param_size(_param_size);
-  new_agent->set_param_names(_param_names);
-  new_agent->set_noise(new_noise);
-  new_agent->set_neg_gradients(new_neg_gradients);
+
+  new_agent->_predictor = _predictor;
+  new_agent->_sample_predictor = new_sample_predictor;
+
+  new_agent->_is_sampling_agent = true;
+  new_agent->_sampling_method = _sampling_method;
+  new_agent->_param_names = _param_names;
+  new_agent->_param_size = _param_size;
+  new_agent->_noise = new_noise;
+
  return new_agent;
 }

 bool ESAgent::update(
    std::vector<SamplingKey>& noisy_keys,
    std::vector<float>& noisy_rewards) {
+  if (_is_sampling_agent) {
+    LOG(ERROR) << "[DeepES] Cloned ESAgent cannot call update function, please use original ESAgent.";
+    return false;
+  }
+
  compute_centered_ranks(noisy_rewards);
  
  memset(_neg_gradients, 0, _param_size * sizeof(float));
@@ -110,11 +118,16 @@ bool ESAgent::update(
    _optimizer->update(tensor_data, _neg_gradients + counter, tensor_size);
    counter += tensor_size;
  }
+  return true;
  
 }

-SamplingKey ESAgent::add_noise() {
-  SamplingKey sampling_key;
+bool ESAgent::add_noise(SamplingKey& sampling_key) {
+  if (!_is_sampling_agent) {
+    LOG(ERROR) << "[DeepES] Original ESAgent cannot call add_noise function, please use cloned ESAgent.";
+    return false;
+  }
+
  int key = _sampling_method->sampling(_noise, _param_size);
  sampling_key.add_key(key);
  int64_t counter = 0;
@@ -129,76 +142,14 @@ SamplingKey ESAgent::add_noise() {
    counter += tensor_size;
  }

-  return sampling_key;
-}
-
-std::shared_ptr<SamplingMethod> ESAgent::get_sampling_method() {
-  return _sampling_method;
-}
-
-std::shared_ptr<Optimizer> ESAgent::get_optimizer() {
-  return _optimizer;
-}
-
-std::shared_ptr<DeepESConfig> ESAgent::get_config() {
-  return _config;
-}
-
-
-int64_t ESAgent::get_param_size() {
-  return _param_size;
-}
-
-std::vector<std::string> ESAgent::get_param_names() {
-  return _param_names;
+  return true;
 }


-std::shared_ptr<PaddlePredictor> ESAgent::get_sample_predictor() {
+std::shared_ptr<PaddlePredictor> ESAgent::get_predictor() {
  return _sample_predictor;
 }

-std::shared_ptr<PaddlePredictor> ESAgent::get_evaluate_predictor() {
-  return _predictor;
-}
-
-
-void ESAgent::set_predictor(
-    std::shared_ptr<PaddlePredictor> predictor,
-    std::shared_ptr<PaddlePredictor> sample_predictor) {
-  _predictor = predictor;
-  _sample_predictor = sample_predictor;
-}
-
-void ESAgent::set_sampling_method(std::shared_ptr<SamplingMethod> sampling_method) {
-  _sampling_method = sampling_method;
-}
-
-void ESAgent::set_optimizer(std::shared_ptr<Optimizer> optimizer) {
-  _optimizer = optimizer;
-}
-
-void ESAgent::set_config(std::shared_ptr<DeepESConfig> config) {
-  _config = config;
-}
-
-void ESAgent::set_param_size(int64_t param_size) {
-  _param_size = param_size;
-}
-
-void ESAgent::set_param_names(std::vector<std::string> param_names) {
-  _param_names = param_names;
-}
-
-void ESAgent::set_noise(float* noise) {
-  _noise = noise;
-}
-
-void ESAgent::set_neg_gradients(float* neg_gradients) {
-  _neg_gradients = neg_gradients;
-}
-
-
 int64_t ESAgent::_calculate_param_size() {
  int64_t param_size = 0;
  for (std::string param_name: _param_names) {