From 26e9c4e26fb4322fb4f4fd626bc52530d698d95a Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Mon, 5 Jun 2017 23:21:09 +0800 Subject: [PATCH] "add vector alias to make name clear" --- paddle/optimizer/CMakeLists.txt | 14 ++++++------- paddle/optimizer/adadelta_optimizer.cc | 29 ++++++++++++++------------ paddle/optimizer/adadelta_optimizer.h | 2 +- paddle/optimizer/adagrad_optimizer.cc | 26 +++++++++++------------ paddle/optimizer/adagrad_optimizer.h | 2 +- paddle/optimizer/adam_optimizer.cc | 25 ++++++++++++---------- paddle/optimizer/adam_optimizer.h | 2 +- paddle/optimizer/parameter_optimizer.h | 2 +- paddle/optimizer/sgd_optimizer.h | 2 +- paddle/optimizer/sgd_optmizer.cc | 19 +++++++++-------- 10 files changed, 64 insertions(+), 59 deletions(-) diff --git a/paddle/optimizer/CMakeLists.txt b/paddle/optimizer/CMakeLists.txt index 95d7ad720f3..192d0756202 100644 --- a/paddle/optimizer/CMakeLists.txt +++ b/paddle/optimizer/CMakeLists.txt @@ -1,23 +1,21 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR}) set(OPITMIZER_SRCS - # adadelta_optimizer.cc - # adagrad_optimizer.cc - # adam_optimizer.cc + adadelta_optimizer.cc + adagrad_optimizer.cc + adam_optimizer.cc optimizer.cc parameter_optimizer.cc sgd_optmizer.cc - regularizer.cc ) set(OPITMIZER_Headers - # adadelta_optimizer.h - # adagrad_optimizer.h - # adam_optimizer.h + adadelta_optimizer.h + adagrad_optimizer.h + adam_optimizer.h lr_policy.h optimizer.h parameter_optimizer.h - regularizer.h sgd_optimizer.h Tensor.h ) diff --git a/paddle/optimizer/adadelta_optimizer.cc b/paddle/optimizer/adadelta_optimizer.cc index c5537bde853..f10ee1bcd4b 100644 --- a/paddle/optimizer/adadelta_optimizer.cc +++ b/paddle/optimizer/adadelta_optimizer.cc @@ -1,5 +1,6 @@ #include "adadelta_optimizer.h" #include +#include namespace paddle { namespace optimizer { @@ -7,28 +8,30 @@ namespace optimizer { void AdadeltaOptimizer::set_weight(Tensor* p) { size_t size = p->size(); real* gptr = new real[size]; - accum_gradient = Tensor(gptr, size); + accum_gradient = new Tensor(gptr, size); real* dptr = new real[size]; - accum_delta = Tensor(dptr, size); + accum_delta = new Tensor(dptr, size); real* dptr_current = new real[size]; - update_delta = Tensor(dptr_current, size); + update_delta = new Tensor(dptr_current, size); } -void AdadeltaOptimizer::update(const Tensor& gradient) { +void AdadeltaOptimizer::update(const Tensor* gradient) { num_sample_passed += 1; double learning_rate = lr_policy->get_learning_rate(num_sample_passed); - for (size_t i = 0; i < parameter_->size(); ++i) { - accum_gradient[i] = - rho * accum_gradient[i] + (1.0 - rho) * gradient[i] * gradient[i]; + Tensor& param = *parameter_; + const Tensor& grad = *gradient; + Tensor& accum_g = *accum_gradient; + Tensor& accum_d = *accum_delta; + Tensor& update_d = *update_delta; + for (size_t i = 0; i < param.size(); ++i) { + accum_g[i] = rho * accum_g[i] + (1.0 - rho) * grad[i] * grad[i]; - update_delta[i] = std::sqrt(accum_delta[i] + epsilon) / - std::sqrt(accum_gradient[i] + epsilon) * gradient[i]; + update_d[i] = std::sqrt(accum_d[i] + epsilon) / + std::sqrt(accum_g[i] + epsilon) * grad[i]; - accum_delta[i] = - rho * accum_delta[i] + (1.0 - rho) * update_delta[i] * update_delta[i]; + accum_d[i] = rho * accum_d[i] + (1.0 - rho) * update_d[i] * update_d[i]; - parameter_[i] -= - learning_rate * update_delta[i] + learning_rate * decay * parameter_[i]; + param[i] -= learning_rate * update_d[i] + learning_rate * decay * param[i]; } } } // namespace optimizer diff --git a/paddle/optimizer/adadelta_optimizer.h b/paddle/optimizer/adadelta_optimizer.h index d9db5d09c22..1d8bd5a654c 100644 --- a/paddle/optimizer/adadelta_optimizer.h +++ b/paddle/optimizer/adadelta_optimizer.h @@ -19,7 +19,7 @@ public: if (accum_delta) delete accum_delta; if (update_delta) delete update_delta; } - void update(const Tensor &gradient); + void update(const Tensor *gradient); void set_weight(Tensor *p); real *get_weight() const; diff --git a/paddle/optimizer/adagrad_optimizer.cc b/paddle/optimizer/adagrad_optimizer.cc index 3d47e35896c..437bd4682d5 100644 --- a/paddle/optimizer/adagrad_optimizer.cc +++ b/paddle/optimizer/adagrad_optimizer.cc @@ -1,26 +1,26 @@ +#include + #include "adagrad_optimizer.h" namespace paddle { namespace optimizer { void AdagradOptimizer::set_weight(Tensor* p) { - size_t size = p->width(); + size_t size = p->size(); real* gptr = new real[size]; - accum_gradient = Tensor(gptr, size); - real* dptr = new real[size]; - accum_delta = Tensor(dtpr, size); - real* dptr_current = new real[size]; - update_delta = Tensor(dptr_current, size); + accum_gradient = new Tensor(gptr, size); } -void AdagradOptimizer::update(const Tensor& gradient) { +void AdagradOptimizer::update(const Tensor* gradient) { num_sample_passed += 1; - double learning_rate = lr_policy->get_learning_rate(); - for (size_t i = 0; i < parameter_.size(); ++i) { - accum_gradient[i] += gradient[i] * gradient[i]; - parameter_[i] += - learning_rate * (gradient[i] / std::sqrt(accum_gradient[i] + epsilon) + - decay * parameter_[i]); + double learning_rate = lr_policy->get_learning_rate(num_sample_passed); + Tensor& param = *parameter_; + const Tensor& grad = *gradient; + Tensor& accum_g = *accum_gradient; + for (size_t i = 0; i < param.size(); ++i) { + accum_g[i] += grad[i] * grad[i]; + param[i] += learning_rate * grad[i] / std::sqrt(accum_g[i] + epsilon) + + learning_rate * decay * param[i]; } } diff --git a/paddle/optimizer/adagrad_optimizer.h b/paddle/optimizer/adagrad_optimizer.h index 0f6ce06f35a..aa5f74ffcdf 100644 --- a/paddle/optimizer/adagrad_optimizer.h +++ b/paddle/optimizer/adagrad_optimizer.h @@ -13,7 +13,7 @@ public: ~AdagradOptimizer() { if (accum_gradient) delete accum_gradient; } - void update(const Tensor &gradient); + void update(const Tensor *gradient); void set_weight(Tensor *p); real *get_weight() const; diff --git a/paddle/optimizer/adam_optimizer.cc b/paddle/optimizer/adam_optimizer.cc index d9cc3344d59..6b3f275bf06 100644 --- a/paddle/optimizer/adam_optimizer.cc +++ b/paddle/optimizer/adam_optimizer.cc @@ -1,29 +1,32 @@ #include "adam_optimizer.h" +#include namespace paddle { namespace optimizer { void AdamOptimizer::set_weight(Tensor *p) { - size_t size = p->width(); + size_t size = p->size(); real *mptr = new real[size]; - momentums_ = Tensor(mptr, size); + momentums_ = new Tensor(mptr, size); real *vptr = new real[size]; - velocitys_ = Tensor(vtpr, size); + velocitys_ = new Tensor(vptr, size); } -void AdamOptimizer::update(const Tensor &gradient) { +void AdamOptimizer::update(const Tensor *gradient) { num_sample_passed += 1; double learning_rate = lr_policy->get_learning_rate(num_sample_passed); double coef1 = 1.0 - std::pow(beta_1, num_sample_passed); double coef2 = 1.0 - std::pow(beta_2, num_sample_passed); learning_rate *= std::sqrt(coef2) / coef1; - for (size_t i = 0; i < parameter_->size(); ++i) { - momentums_[i] = beta_1 * momentums_[i] + (1.0 - beta_1) * gradient[i]; - velocitys_[i] = - beta_2 * velocitys_[i] + (1.0 - beta_2) * gradient[i] * gradient[i]; - parameter_[i] -= - learning_rate * (momentums_[i] / std::sqrt(velocitys_[i] + epsilon) + - decay * parameter_[i]); + Tensor ¶m = *parameter_; + const Tensor &grad = *gradient; + Tensor &m = *momentums_; + Tensor &v = *velocitys_; + for (size_t i = 0; i < param.size(); ++i) { + m[i] = beta_1 * m[i] + (1.0 - beta_1) * grad[i]; + v[i] = beta_2 * v[i] + (1.0 - beta_2) * grad[i] * grad[i]; + param[i] -= + learning_rate * (m[i] / std::sqrt(v[i] + epsilon) + decay * param[i]); } } } // namespace optimizer diff --git a/paddle/optimizer/adam_optimizer.h b/paddle/optimizer/adam_optimizer.h index 68e2aa0223e..55a44b032df 100644 --- a/paddle/optimizer/adam_optimizer.h +++ b/paddle/optimizer/adam_optimizer.h @@ -19,7 +19,7 @@ public: if (momentums_) delete momentums_; if (velocitys_) delete velocitys_; } - void update(const Tensor &gradient); + void update(const Tensor *gradient); void set_weight(Tensor *p); real *get_weight() const; diff --git a/paddle/optimizer/parameter_optimizer.h b/paddle/optimizer/parameter_optimizer.h index a4f39836baf..0124cfdc191 100644 --- a/paddle/optimizer/parameter_optimizer.h +++ b/paddle/optimizer/parameter_optimizer.h @@ -24,7 +24,7 @@ public: virtual ~ParameterOptimizer() { delete parameter_; }; static ParameterOptimizer *create(const ::std::string &config_proto); - virtual void update(const Tensor &gradient) = 0; + virtual void update(const Tensor *gradient) = 0; virtual real *get_weight() const; virtual void set_weight(Tensor *parameter); diff --git a/paddle/optimizer/sgd_optimizer.h b/paddle/optimizer/sgd_optimizer.h index 375c99b30b8..4eb483c0fbd 100644 --- a/paddle/optimizer/sgd_optimizer.h +++ b/paddle/optimizer/sgd_optimizer.h @@ -15,7 +15,7 @@ public: SGDOptimizer(double m, double d, bool n, BaseLr* lr) : ParameterOptimizer(lr), momentum(m), decay(d), nesterov(n) {} virtual ~SGDOptimizer() { delete momentums_; } - void update(const Tensor& gradient); + void update(const Tensor* gradient); void set_weight(Tensor* p); real* get_weight() const; diff --git a/paddle/optimizer/sgd_optmizer.cc b/paddle/optimizer/sgd_optmizer.cc index 020867b93d5..5fdfc89c1f8 100644 --- a/paddle/optimizer/sgd_optmizer.cc +++ b/paddle/optimizer/sgd_optmizer.cc @@ -13,24 +13,25 @@ void SGDOptimizer::set_weight(Tensor *p) { } } -void SGDOptimizer::update(const Tensor &gradient) { +void SGDOptimizer::update(const Tensor *gradient) { num_sample_passed += 1; double learning_rate = lr_policy->get_learning_rate(num_sample_passed); real velocity = 0.0; Tensor ¶m = *parameter_; - for (size_t i = 0; i < parameter_->size(); ++i) { + const Tensor &grad = *gradient; + Tensor &m = *momentums_; + for (size_t i = 0; i < param.size(); ++i) { if (momentum == 0.0) { - velocity = - -learning_rate * gradient[i] - learning_rate * decay * parameter_[i]; + velocity = -learning_rate * grad[i] - learning_rate * decay * param[i]; } else { - momentums_[i] = momentum * momentums_[i] - learning_rate * gradient[i] - - learning_rate * decay * parameter_[i]; - velocity = momentums_[i]; + m[i] = momentum * m[i] - learning_rate * grad[i] - + learning_rate * decay * param[i]; + velocity = m[i]; } if (nesterov) { - parameter_[i] += momentum * velocity - learning_rate * gradient[i]; + param[i] += momentum * velocity - learning_rate * grad[i]; } else { - parameter_[i] += velocity; + param[i] += velocity; } } } -- GitLab