es_agent.cc 4.6 KB
Newer Older
Z
zenghsh3 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <vector>
#include "es_agent.h"

namespace DeepES {

typedef paddle::lite_api::PaddlePredictor PaddlePredictor;
typedef paddle::lite_api::Tensor Tensor;
typedef paddle::lite_api::shape_t shape_t;

24
inline int64_t ShapeProduction(const shape_t& shape) {
Z
zenghsh3 已提交
25 26 27 28 29 30 31 32 33
  int64_t res = 1;
  for (auto i : shape) res *= i;
  return res;
}

ESAgent::ESAgent() {}

ESAgent::~ESAgent() {
  delete[] _noise;
34 35
  if (!_is_sampling_agent)
    delete[] _neg_gradients;
Z
zenghsh3 已提交
36 37 38 39 40 41
}

ESAgent::ESAgent(
    std::shared_ptr<PaddlePredictor> predictor,
    std::string config_path) {

42
  _is_sampling_agent = false;
Z
zenghsh3 已提交
43
  _predictor = predictor;
44
  // Original agent can't be used to sample, so keep it same with _predictor for evaluating.
Z
zenghsh3 已提交
45
  _sampling_predictor = predictor;
Z
zenghsh3 已提交
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62

  _config = std::make_shared<DeepESConfig>();
  load_proto_conf(config_path, *_config);

  _sampling_method = std::make_shared<GaussianSampling>();
  _sampling_method->load_config(*_config);

  _optimizer = std::make_shared<SGDOptimizer>(_config->optimizer().base_lr());

  _param_names = _predictor->GetParamNames();
  _param_size = _calculate_param_size();

  _noise = new float [_param_size];
  _neg_gradients = new float [_param_size];
}

std::shared_ptr<ESAgent> ESAgent::clone() {
Z
zenghsh3 已提交
63
  std::shared_ptr<PaddlePredictor> new_sampling_predictor = _predictor->Clone();
Z
zenghsh3 已提交
64 65 66

  std::shared_ptr<ESAgent> new_agent = std::make_shared<ESAgent>();

Z
zenghsh3 已提交
67
  float* noise = new float [_param_size];
68 69

  new_agent->_predictor = _predictor;
Z
zenghsh3 已提交
70
  new_agent->_sampling_predictor = new_sampling_predictor;
71 72 73 74 75

  new_agent->_is_sampling_agent = true;
  new_agent->_sampling_method = _sampling_method;
  new_agent->_param_names = _param_names;
  new_agent->_param_size = _param_size;
Z
zenghsh3 已提交
76
  new_agent->_noise = noise;
77

Z
zenghsh3 已提交
78 79 80 81 82 83
  return new_agent;
}

bool ESAgent::update(
    std::vector<SamplingKey>& noisy_keys,
    std::vector<float>& noisy_rewards) {
84 85 86 87 88
  if (_is_sampling_agent) {
    LOG(ERROR) << "[DeepES] Cloned ESAgent cannot call update function, please use original ESAgent.";
    return false;
  }

Z
zenghsh3 已提交
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
  compute_centered_ranks(noisy_rewards);
  
  memset(_neg_gradients, 0, _param_size * sizeof(float));
  for (int i = 0; i < noisy_keys.size(); ++i) {
    int key = noisy_keys[i].key(0);
    float reward = noisy_rewards[i];
    bool success = _sampling_method->resampling(key, _noise, _param_size);
    for (int64_t j = 0; j < _param_size; ++j) {
      _neg_gradients[j] += _noise[j] * reward;
    }
  }
  for (int64_t j = 0; j < _param_size; ++j) {
    _neg_gradients[j] /= -1.0 * noisy_keys.size();
  }

  //update
  int64_t counter = 0;

  for (std::string param_name: _param_names) {
    std::unique_ptr<Tensor> tensor = _predictor->GetMutableTensor(param_name);
    float* tensor_data = tensor->mutable_data<float>();
    int64_t tensor_size = ShapeProduction(tensor->shape());
    _optimizer->update(tensor_data, _neg_gradients + counter, tensor_size);
    counter += tensor_size;
  }
114
  return true;
Z
zenghsh3 已提交
115 116 117
  
}

118 119 120 121 122 123
bool ESAgent::add_noise(SamplingKey& sampling_key) {
  if (!_is_sampling_agent) {
    LOG(ERROR) << "[DeepES] Original ESAgent cannot call add_noise function, please use cloned ESAgent.";
    return false;
  }

Z
zenghsh3 已提交
124 125 126 127 128
  int key = _sampling_method->sampling(_noise, _param_size);
  sampling_key.add_key(key);
  int64_t counter = 0;

  for (std::string param_name: _param_names) {
Z
zenghsh3 已提交
129
    std::unique_ptr<Tensor> sample_tensor = _sampling_predictor->GetMutableTensor(param_name);
Z
zenghsh3 已提交
130 131 132 133 134 135 136 137
    std::unique_ptr<const Tensor> tensor = _predictor->GetTensor(param_name);
    int64_t tensor_size = ShapeProduction(tensor->shape());
    for (int64_t j = 0; j < tensor_size; ++j) {
      sample_tensor->mutable_data<float>()[j] = tensor->data<float>()[j] + _noise[counter + j];
    }
    counter += tensor_size;
  }

138
  return true;
Z
zenghsh3 已提交
139 140 141
}


142
std::shared_ptr<PaddlePredictor> ESAgent::get_predictor() {
Z
zenghsh3 已提交
143
  return _sampling_predictor;
Z
zenghsh3 已提交
144 145 146 147 148 149 150 151 152 153 154 155 156 157
}

int64_t ESAgent::_calculate_param_size() {
  int64_t param_size = 0;
  for (std::string param_name: _param_names) {
    std::unique_ptr<const Tensor> tensor = _predictor->GetTensor(param_name);
    param_size += ShapeProduction(tensor->shape());
  }
  return param_size;
}


}