adadelta_optimizer.cc 1.2 KB
Newer Older
1 2
#include "adadelta_optimizer.h"
#include <algorithm>
3
#include <cmath>
4 5 6 7

namespace paddle {
namespace optimizer {

D
dzhwinter 已提交
8
void AdadeltaOptimizer::set_weight(Tensor* p) {
9
  size_t size = p->size();
D
dzhwinter 已提交
10
  real* gptr = new real[size];
11
  accum_gradient = new Tensor(gptr, size);
D
dzhwinter 已提交
12
  real* dptr = new real[size];
13
  accum_delta = new Tensor(dptr, size);
D
dzhwinter 已提交
14
  real* dptr_current = new real[size];
15
  update_delta = new Tensor(dptr_current, size);
16 17
}

18
void AdadeltaOptimizer::update(const Tensor* gradient) {
19
  num_sample_passed += 1;
20
  double learning_rate = lr_policy->get_learning_rate(num_sample_passed);
21 22 23 24 25 26 27
  Tensor& param = *parameter_;
  const Tensor& grad = *gradient;
  Tensor& accum_g = *accum_gradient;
  Tensor& accum_d = *accum_delta;
  Tensor& update_d = *update_delta;
  for (size_t i = 0; i < param.size(); ++i) {
    accum_g[i] = rho * accum_g[i] + (1.0 - rho) * grad[i] * grad[i];
28

29 30
    update_d[i] = std::sqrt(accum_d[i] + epsilon) /
                  std::sqrt(accum_g[i] + epsilon) * grad[i];
31

32
    accum_d[i] = rho * accum_d[i] + (1.0 - rho) * update_d[i] * update_d[i];
33

34
    param[i] -= learning_rate * update_d[i] + learning_rate * decay * param[i];
35 36 37 38
  }
}
}  // namespace optimizer
}  // namespace paddle