提交 fd8c5107 编写于 作者: D dzhwinter

"format name with google style"

上级 5ab958ba
...@@ -9,7 +9,7 @@ set(OPITMIZER_SRCS ...@@ -9,7 +9,7 @@ set(OPITMIZER_SRCS
sgd_optmizer.cc sgd_optmizer.cc
) )
set(OPITMIZER_Headers set(OPITMIZER_HEADERS
adadelta_optimizer.h adadelta_optimizer.h
adagrad_optimizer.h adagrad_optimizer.h
adam_optimizer.h adam_optimizer.h
...@@ -17,12 +17,12 @@ set(OPITMIZER_Headers ...@@ -17,12 +17,12 @@ set(OPITMIZER_Headers
optimizer.h optimizer.h
parameter_optimizer.h parameter_optimizer.h
sgd_optimizer.h sgd_optimizer.h
Tensor.h tensor.h
) )
add_library(optimizer STATIC ${OPITMIZER_SRCS}) add_library(optimizer STATIC ${OPITMIZER_SRCS})
add_dependencies(optimizer gen_proto_cpp) add_dependencies(optimizer gen_proto_cpp)
add_simple_unittest(Tensor_test) add_simple_unittest(tensor_test)
add_simple_unittest(parameter_optimizer_test) add_simple_unittest(parameter_optimizer_test)
add_dependencies(parameter_optimizer_test optimizer) add_dependencies(parameter_optimizer_test optimizer)
...@@ -32,7 +32,7 @@ public: ...@@ -32,7 +32,7 @@ public:
return data_[idx]; return data_[idx];
} }
// TODO: replace with tensorshape // TODO: replace with tensorshape
size_t size() const { return this->width_; } size_t size() const { return this->width_ * this->height_; }
protected: protected:
size_t height_; size_t height_;
......
#include "Tensor.h"
#include <iostream> #include <iostream>
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "tensor.h"
using namespace paddle; using namespace paddle;
using namespace paddle::optimizer; using namespace paddle::optimizer;
...@@ -13,6 +13,7 @@ TEST(Tensor, indexer) { ...@@ -13,6 +13,7 @@ TEST(Tensor, indexer) {
} }
ASSERT_EQ(t[2], 2); ASSERT_EQ(t[2], 2);
ASSERT_EQ(t[1], 1); ASSERT_EQ(t[1], 1);
delete ptr;
} }
int main(int argc, char** argv) { int main(int argc, char** argv) {
......
...@@ -6,32 +6,33 @@ namespace paddle { ...@@ -6,32 +6,33 @@ namespace paddle {
namespace optimizer { namespace optimizer {
void AdadeltaOptimizer::set_weight(Tensor* p) { void AdadeltaOptimizer::set_weight(Tensor* p) {
parameter_ = p;
size_t size = p->size(); size_t size = p->size();
real* gptr = new real[size]; real* gptr = new real[size];
accum_gradient = new Tensor(gptr, size); accum_gradient_ = new Tensor(gptr, size);
real* dptr = new real[size]; real* dptr = new real[size];
accum_delta = new Tensor(dptr, size); accum_delta_ = new Tensor(dptr, size);
real* dptr_current = new real[size]; real* dptr_current = new real[size];
update_delta = new Tensor(dptr_current, size); update_delta_ = new Tensor(dptr_current, size);
} }
void AdadeltaOptimizer::update(const Tensor* gradient) { void AdadeltaOptimizer::Update(const Tensor* gradient) {
num_sample_passed += 1; num_sample_passed_ += 1;
double learning_rate = lr_policy->get_learning_rate(num_sample_passed); double learning_rate = lr_policy_->LearningRate(num_sample_passed_);
Tensor& param = *parameter_; Tensor& param = *parameter_;
const Tensor& grad = *gradient; const Tensor& grad = *gradient;
Tensor& accum_g = *accum_gradient; Tensor& accum_g = *accum_gradient_;
Tensor& accum_d = *accum_delta; Tensor& accum_d = *accum_delta_;
Tensor& update_d = *update_delta; Tensor& update_d = *update_delta_;
for (size_t i = 0; i < param.size(); ++i) { for (size_t i = 0; i < param.size(); ++i) {
accum_g[i] = rho * accum_g[i] + (1.0 - rho) * grad[i] * grad[i]; accum_g[i] = rho_ * accum_g[i] + (1.0 - rho_) * grad[i] * grad[i];
update_d[i] = std::sqrt(accum_d[i] + epsilon) / update_d[i] = std::sqrt(accum_d[i] + epsilon_) /
std::sqrt(accum_g[i] + epsilon) * grad[i]; std::sqrt(accum_g[i] + epsilon_) * grad[i];
accum_d[i] = rho * accum_d[i] + (1.0 - rho) * update_d[i] * update_d[i]; accum_d[i] = rho_ * accum_d[i] + (1.0 - rho_) * update_d[i] * update_d[i];
param[i] -= learning_rate * update_d[i] + learning_rate * decay * param[i]; param[i] -= learning_rate * update_d[i] + learning_rate * decay_ * param[i];
} }
} }
} // namespace optimizer } // namespace optimizer
......
...@@ -8,29 +8,25 @@ namespace optimizer { ...@@ -8,29 +8,25 @@ namespace optimizer {
class AdadeltaOptimizer : public ParameterOptimizer { class AdadeltaOptimizer : public ParameterOptimizer {
public: public:
using ParameterOptimizer::parameter_; AdadeltaOptimizer(double rho, double epsilon, double decay, LrPolicy *lr)
using ParameterOptimizer::num_sample_passed; : ParameterOptimizer(lr), rho_(rho), epsilon_(epsilon), decay_(decay) {}
using ParameterOptimizer::lr_policy;
AdadeltaOptimizer(double rho, double epsilon, double decay, BaseLr *lr)
: ParameterOptimizer(lr), rho(rho), epsilon(epsilon), decay(decay) {}
~AdadeltaOptimizer() { ~AdadeltaOptimizer() {
if (accum_gradient) delete accum_gradient; if (accum_gradient_) delete accum_gradient_;
if (accum_delta) delete accum_delta; if (accum_delta_) delete accum_delta_;
if (update_delta) delete update_delta; if (update_delta_) delete update_delta_;
} }
void update(const Tensor *gradient); void Update(const Tensor *gradient);
void set_weight(Tensor *p); void set_weight(Tensor *p);
real *get_weight() const; real *get_weight() const;
private: private:
Tensor *accum_gradient; Tensor *accum_gradient_;
Tensor *accum_delta; Tensor *accum_delta_;
Tensor *update_delta; Tensor *update_delta_;
double rho; double rho_;
double epsilon; double epsilon_;
double decay; double decay_;
}; };
} // namespace optimizer } // namespace optimizer
......
...@@ -6,21 +6,22 @@ namespace paddle { ...@@ -6,21 +6,22 @@ namespace paddle {
namespace optimizer { namespace optimizer {
void AdagradOptimizer::set_weight(Tensor* p) { void AdagradOptimizer::set_weight(Tensor* p) {
parameter_ = p;
size_t size = p->size(); size_t size = p->size();
real* gptr = new real[size]; real* gptr = new real[size];
accum_gradient = new Tensor(gptr, size); accum_gradient_ = new Tensor(gptr, size);
} }
void AdagradOptimizer::update(const Tensor* gradient) { void AdagradOptimizer::Update(const Tensor* gradient) {
num_sample_passed += 1; num_sample_passed_ += 1;
double learning_rate = lr_policy->get_learning_rate(num_sample_passed); double learning_rate = lr_policy_->LearningRate(num_sample_passed_);
Tensor& param = *parameter_; Tensor& param = *parameter_;
Tensor& accum_g = *accum_gradient_;
const Tensor& grad = *gradient; const Tensor& grad = *gradient;
Tensor& accum_g = *accum_gradient;
for (size_t i = 0; i < param.size(); ++i) { for (size_t i = 0; i < param.size(); ++i) {
accum_g[i] += grad[i] * grad[i]; accum_g[i] += grad[i] * grad[i];
param[i] += learning_rate * grad[i] / std::sqrt(accum_g[i] + epsilon) + param[i] += learning_rate * grad[i] / std::sqrt(accum_g[i] + epsilon_) +
learning_rate * decay * param[i]; learning_rate * decay_ * param[i];
} }
} }
......
...@@ -8,19 +8,19 @@ namespace optimizer { ...@@ -8,19 +8,19 @@ namespace optimizer {
class AdagradOptimizer : public ParameterOptimizer { class AdagradOptimizer : public ParameterOptimizer {
public: public:
AdagradOptimizer(double epsilon, double decay, BaseLr *lr) AdagradOptimizer(double epsilon, double decay, LrPolicy *lr)
: ParameterOptimizer(lr), epsilon(epsilon), decay(decay) {} : ParameterOptimizer(lr), epsilon_(epsilon), decay_(decay) {}
~AdagradOptimizer() { ~AdagradOptimizer() {
if (accum_gradient) delete accum_gradient; if (accum_gradient_) delete accum_gradient_;
} }
void update(const Tensor *gradient); void Update(const Tensor *gradient);
void set_weight(Tensor *p); void set_weight(Tensor *p);
real *get_weight() const; real *get_weight() const;
private: private:
Tensor *accum_gradient; Tensor *accum_gradient_;
double epsilon; double epsilon_;
double decay; double decay_;
}; };
} // namespace optimizer } // namespace optimizer
......
...@@ -5,6 +5,7 @@ namespace paddle { ...@@ -5,6 +5,7 @@ namespace paddle {
namespace optimizer { namespace optimizer {
void AdamOptimizer::set_weight(Tensor *p) { void AdamOptimizer::set_weight(Tensor *p) {
parameter_ = p;
size_t size = p->size(); size_t size = p->size();
real *mptr = new real[size]; real *mptr = new real[size];
momentums_ = new Tensor(mptr, size); momentums_ = new Tensor(mptr, size);
...@@ -12,21 +13,21 @@ void AdamOptimizer::set_weight(Tensor *p) { ...@@ -12,21 +13,21 @@ void AdamOptimizer::set_weight(Tensor *p) {
velocitys_ = new Tensor(vptr, size); velocitys_ = new Tensor(vptr, size);
} }
void AdamOptimizer::update(const Tensor *gradient) { void AdamOptimizer::Update(const Tensor *gradient) {
num_sample_passed += 1; num_sample_passed_ += 1;
double learning_rate = lr_policy->get_learning_rate(num_sample_passed); double learning_rate = lr_policy_->LearningRate(num_sample_passed_);
double coef1 = 1.0 - std::pow(beta_1, num_sample_passed); double coef1 = 1.0 - std::pow(beta_1_, num_sample_passed_);
double coef2 = 1.0 - std::pow(beta_2, num_sample_passed); double coef2 = 1.0 - std::pow(beta_2_, num_sample_passed_);
learning_rate *= std::sqrt(coef2) / coef1; learning_rate *= std::sqrt(coef2) / coef1;
Tensor &param = *parameter_; Tensor &param = *parameter_;
const Tensor &grad = *gradient; const Tensor &grad = *gradient;
Tensor &m = *momentums_; Tensor &m = *momentums_;
Tensor &v = *velocitys_; Tensor &v = *velocitys_;
for (size_t i = 0; i < param.size(); ++i) { for (size_t i = 0; i < param.size(); ++i) {
m[i] = beta_1 * m[i] + (1.0 - beta_1) * grad[i]; m[i] = beta_1_ * m[i] + (1.0 - beta_1_) * grad[i];
v[i] = beta_2 * v[i] + (1.0 - beta_2) * grad[i] * grad[i]; v[i] = beta_2_ * v[i] + (1.0 - beta_2_) * grad[i] * grad[i];
param[i] -= param[i] -=
learning_rate * (m[i] / std::sqrt(v[i] + epsilon) + decay * param[i]); learning_rate * (m[i] / std::sqrt(v[i] + epsilon_) + decay_ * param[i]);
} }
} }
} // namespace optimizer } // namespace optimizer
......
...@@ -9,27 +9,27 @@ namespace optimizer { ...@@ -9,27 +9,27 @@ namespace optimizer {
class AdamOptimizer : public ParameterOptimizer { class AdamOptimizer : public ParameterOptimizer {
public: public:
AdamOptimizer( AdamOptimizer(
double beta_1, double beta_2, double epsilon, double decay, BaseLr *lr) double beta_1, double beta_2, double epsilon, double decay, LrPolicy *lr)
: ParameterOptimizer(lr), : ParameterOptimizer(lr),
beta_1(beta_1), beta_1_(beta_1),
beta_2(beta_2), beta_2_(beta_2),
epsilon(epsilon), epsilon_(epsilon),
decay(decay) {} decay_(decay) {}
~AdamOptimizer() { ~AdamOptimizer() {
if (momentums_) delete momentums_; if (momentums_) delete momentums_;
if (velocitys_) delete velocitys_; if (velocitys_) delete velocitys_;
} }
void update(const Tensor *gradient); void Update(const Tensor *gradient);
void set_weight(Tensor *p); void set_weight(Tensor *p);
real *get_weight() const; real *get_weight() const;
private: private:
Tensor *momentums_; Tensor *momentums_;
Tensor *velocitys_; Tensor *velocitys_;
double beta_1; double beta_1_;
double beta_2; double beta_2_;
double epsilon; double epsilon_;
double decay; double decay_;
}; };
} // namespace optimizer } // namespace optimizer
......
...@@ -7,34 +7,34 @@ ...@@ -7,34 +7,34 @@
namespace paddle { namespace paddle {
namespace optimizer { namespace optimizer {
class BaseLr { class LrPolicy {
public: public:
BaseLr(double lr) : learning_rate(lr) {} virtual ~LrPolicy() {}
virtual ~BaseLr() {} virtual double LearningRate(const uint64_t num_sample_passed) = 0;
virtual double get_learning_rate(const uint64_t num_sample_passed) = 0;
protected:
double learning_rate;
}; };
// constant learning rate policy // constant learning rate policy
class ConstLr final : public BaseLr { class ConstLr final : public LrPolicy {
public: public:
ConstLr(double lr) : BaseLr(lr){}; ConstLr(double lr) : learning_rate(lr){};
double get_learning_rate(const uint64_t num_sample_passed) { double LearningRate(const uint64_t num_sample_passed) {
return learning_rate; return learning_rate;
} }
protected:
double learning_rate;
}; };
class LinearLr final : public BaseLr { class LinearLr final : public LrPolicy {
public: public:
LinearLr(double lr, double lr_decay_a, double lr_decay_b) LinearLr(double lr, double lr_decay_a, double lr_decay_b)
: BaseLr(lr), lr_decay_a(lr_decay_a), lr_decay_b(lr_decay_b) {} : learning_rate(lr), lr_decay_a(lr_decay_a), lr_decay_b(lr_decay_b) {}
double get_learning_rate(const uint64_t num_sample_passed) { double LearningRate(const uint64_t num_sample_passed) {
return std::max(learning_rate - lr_decay_a * num_sample_passed, lr_decay_b); return std::max(learning_rate - lr_decay_a * num_sample_passed, lr_decay_b);
} }
private: private:
double learning_rate;
double lr_decay_a; double lr_decay_a;
double lr_decay_b; double lr_decay_b;
}; };
......
...@@ -37,7 +37,7 @@ paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto, ...@@ -37,7 +37,7 @@ paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto,
int config_proto_len) { int config_proto_len) {
paddle_optimizer* optimizer = new paddle_optimizer; paddle_optimizer* optimizer = new paddle_optimizer;
std::string config(config_proto, config_proto + config_proto_len); std::string config(config_proto, config_proto + config_proto_len);
optimizer->impl = ParameterOptimizer::create(config); optimizer->impl = ParameterOptimizer::Create(config);
return optimizer; return optimizer;
} }
...@@ -53,7 +53,7 @@ int paddle_update_parameter(paddle_optimizer* o, ...@@ -53,7 +53,7 @@ int paddle_update_parameter(paddle_optimizer* o,
// TOOD(zhihong): datatype not work. need to add the runtime datatype // TOOD(zhihong): datatype not work. need to add the runtime datatype
auto grad_type = reinterpret_cast<const real*>(grad_buffer); auto grad_type = reinterpret_cast<const real*>(grad_buffer);
Tensor* gradient = new Tensor(const_cast<real*>(grad_type), num_bytes); Tensor* gradient = new Tensor(const_cast<real*>(grad_type), num_bytes);
o->impl->update(gradient); o->impl->Update(gradient);
return PADDLE_SUCCESS; return PADDLE_SUCCESS;
} }
......
#include "optimizer.h"
#include "gtest/gtest.h"
template <class T>
class Opitmizer_C_Test : public testing::Test {
private:
Tensor<T> parameter;
Tensor<T> gradient;
};
void applyGradientDescent_TEST() {}
...@@ -10,41 +10,40 @@ ...@@ -10,41 +10,40 @@
namespace paddle { namespace paddle {
namespace optimizer { namespace optimizer {
ParameterOptimizer *ParameterOptimizer::create( ParameterOptimizer *ParameterOptimizer::Create(
const ::std::string &config_proto) { const std::string &config_proto) {
paddle::OptimizerConfig config; paddle::OptimizerConfig config;
CHECK(config.ParseFromString(config_proto) == 0) CHECK(config.ParseFromString(config_proto) == 0)
<< "error : optimizer config"; << "failed parse optimizer config";
auto select_lr_policy = [=](const OptimizerConfig &config) -> BaseLr * { auto select_lr_policy = [=](const OptimizerConfig &config) -> LrPolicy * {
std::string s(config.lr_policy()); if (config.lr_policy() == OptimizerConfig::ConstLr)
if (s == "ConstLr") return new ConstLr(config.const_lr().learning_rate()); return new ConstLr(config.const_lr().learning_rate());
if (s == "LinearLr") if (config.lr_policy() == OptimizerConfig::LinearLr)
return new LinearLr(config.linear_lr().learning_rate(), return new LinearLr(config.linear_lr().learning_rate(),
config.linear_lr().lr_decay_a(), config.linear_lr().lr_decay_a(),
config.linear_lr().lr_decay_b()); config.linear_lr().lr_decay_b());
// default // default
return nullptr; return nullptr;
}; };
BaseLr *lr = select_lr_policy(config); LrPolicy *lr = select_lr_policy(config);
auto select_optimizer = auto select_optimizer =
[=](const OptimizerConfig &config) -> ParameterOptimizer * { [=](const OptimizerConfig &config) -> ParameterOptimizer * {
std::string s(config.optimizer_name()); if (config.optimizer() == OptimizerConfig::SGD) {
if (s == "SGD") {
return new SGDOptimizer(config.sgd().momentum(), return new SGDOptimizer(config.sgd().momentum(),
config.sgd().decay(), config.sgd().decay(),
config.sgd().nesterov(), config.sgd().nesterov(),
lr); lr);
} }
if (s == "Adadelta") { if (config.optimizer() == OptimizerConfig::Adadelta) {
return new AdagradOptimizer( return new AdagradOptimizer(
config.adagrad().epsilon(), config.adagrad().decay(), lr); config.adagrad().epsilon(), config.adagrad().decay(), lr);
} }
if (s == "Adagrad") { if (config.optimizer() == OptimizerConfig::Adagrad) {
return new AdagradOptimizer( return new AdagradOptimizer(
config.adagrad().epsilon(), config.adagrad().decay(), lr); config.adagrad().epsilon(), config.adagrad().decay(), lr);
} }
if (s == "Adam") { if (config.optimizer() == OptimizerConfig::Adam) {
return new AdadeltaOptimizer(config.adadelta().rho(), return new AdadeltaOptimizer(config.adadelta().rho(),
config.adadelta().epsilon(), config.adadelta().epsilon(),
config.adadelta().decay(), config.adadelta().decay(),
......
...@@ -5,8 +5,8 @@ ...@@ -5,8 +5,8 @@
#include <functional> #include <functional>
#include <string> #include <string>
#include "OptimizerConfig.pb.h" #include "OptimizerConfig.pb.h"
#include "Tensor.h"
#include "lr_policy.h" #include "lr_policy.h"
#include "tensor.h"
namespace paddle { namespace paddle {
namespace optimizer { namespace optimizer {
...@@ -17,21 +17,21 @@ public: ...@@ -17,21 +17,21 @@ public:
* @brief update hook for algorithm need to traverse parameter more than * @brief update hook for algorithm need to traverse parameter more than
* once. * once.
*/ */
ParameterOptimizer(BaseLr *lr) : lr_policy(lr), num_sample_passed(0) {} ParameterOptimizer(LrPolicy *lr) : lr_policy_(lr), num_sample_passed_(0) {}
virtual ~ParameterOptimizer() { delete parameter_; }; virtual ~ParameterOptimizer() { delete parameter_; };
static ParameterOptimizer *create(const ::std::string &config_proto); static ParameterOptimizer *Create(const std::string &config_proto);
virtual void update(const Tensor *gradient) = 0; virtual void Update(const Tensor *gradient) = 0;
virtual real *get_weight() const; virtual real *get_weight() const;
virtual void set_weight(Tensor *parameter); virtual void set_weight(Tensor *parameter);
public: protected:
OptimizerConfig config_; OptimizerConfig config_;
Tensor *parameter_; Tensor *parameter_;
// learning rate policy // learning rate policy
BaseLr *lr_policy; LrPolicy *lr_policy_;
uint64_t num_sample_passed; uint64_t num_sample_passed_;
}; };
} // namespace optimizer } // namespace optimizer
......
...@@ -42,28 +42,28 @@ public: ...@@ -42,28 +42,28 @@ public:
virtual void TearDown() {} virtual void TearDown() {}
void create_sgd() { void create_sgd() {
config.set_optimizer_name("SGD"); config.set_optimizer(OptimizerConfig::SGD);
config.mutable_sgd()->set_momentum(0.0); config.mutable_sgd()->set_momentum(0.0);
config.mutable_sgd()->set_decay(0.0); config.mutable_sgd()->set_decay(0.0);
config.mutable_sgd()->set_nesterov(false); config.mutable_sgd()->set_nesterov(false);
config.set_lr_policy("ConstLr"); config.set_lr_policy(OptimizerConfig::ConstLr);
config.mutable_const_lr()->set_learning_rate(0.1); config.mutable_const_lr()->set_learning_rate(0.1);
ParameterOptimizer* opt = ParameterOptimizer* opt =
ParameterOptimizer::create(config.SerializeAsString()); ParameterOptimizer::Create(config.SerializeAsString());
opts.push_back(opt); opts.push_back(opt);
} }
void create_adam() { void create_adam() {
config.set_optimizer_name("Adam"); config.set_optimizer(OptimizerConfig::Adam);
config.mutable_adam()->set_beta_1(0.9); config.mutable_adam()->set_beta_1(0.9);
config.mutable_adam()->set_beta_2(0.1); config.mutable_adam()->set_beta_2(0.1);
config.mutable_adam()->set_epsilon(1e-3); config.mutable_adam()->set_epsilon(1e-3);
config.mutable_adam()->set_decay(0.0); config.mutable_adam()->set_decay(0.0);
config.set_lr_policy("ConstLr"); config.set_lr_policy(OptimizerConfig::ConstLr);
config.mutable_const_lr()->set_learning_rate(0.1); config.mutable_const_lr()->set_learning_rate(0.1);
ParameterOptimizer* opt = ParameterOptimizer* opt =
ParameterOptimizer::create(config.SerializeAsString()); ParameterOptimizer::Create(config.SerializeAsString());
opts.push_back(opt); opts.push_back(opt);
} }
void test_set_weight() { void test_set_weight() {
...@@ -88,7 +88,7 @@ public: ...@@ -88,7 +88,7 @@ public:
void test_update() { void test_update() {
Tensor* g = fix_n_Tensor(size); Tensor* g = fix_n_Tensor(size);
for (size_t i = 0; i < opts.size(); ++i) { for (size_t i = 0; i < opts.size(); ++i) {
opts[i]->update(g); opts[i]->Update(g);
} }
} }
......
...@@ -8,23 +8,19 @@ namespace optimizer { ...@@ -8,23 +8,19 @@ namespace optimizer {
class SGDOptimizer : public ParameterOptimizer { class SGDOptimizer : public ParameterOptimizer {
public: public:
using ParameterOptimizer::parameter_; SGDOptimizer(double m, double d, bool n, LrPolicy* lr)
using ParameterOptimizer::num_sample_passed; : ParameterOptimizer(lr), momentum_(m), decay_(d), nesterov_(n) {}
using ParameterOptimizer::lr_policy;
SGDOptimizer(double m, double d, bool n, BaseLr* lr)
: ParameterOptimizer(lr), momentum(m), decay(d), nesterov(n) {}
virtual ~SGDOptimizer() { delete momentums_; } virtual ~SGDOptimizer() { delete momentums_; }
void update(const Tensor* gradient); void Update(const Tensor* gradient);
void set_weight(Tensor* p); void set_weight(Tensor* p);
real* get_weight() const; real* get_weight() const;
private: private:
Tensor* momentums_; Tensor* momentums_;
double momentum; double momentum_;
double decay; double decay_;
bool nesterov; bool nesterov_;
}; };
} // namespace optimizer } // namespace optimizer
......
...@@ -5,31 +5,32 @@ namespace optimizer { ...@@ -5,31 +5,32 @@ namespace optimizer {
void SGDOptimizer::set_weight(Tensor *p) { void SGDOptimizer::set_weight(Tensor *p) {
// ParameterOptimizer::set_weight(p); // ParameterOptimizer::set_weight(p);
parameter_ = p;
size_t size = p->size(); size_t size = p->size();
// TODO: fix it with align aware allocator bind to Tensor // TODO: fix it with align aware allocator bind to Tensor
if (momentum != 0.0) { if (momentum_ != 0.0) {
real *ptr = new real[size]; real *ptr = new real[size];
momentums_ = new Tensor(ptr, size); momentums_ = new Tensor(ptr, size);
} }
} }
void SGDOptimizer::update(const Tensor *gradient) { void SGDOptimizer::Update(const Tensor *gradient) {
num_sample_passed += 1; num_sample_passed_ += 1;
double learning_rate = lr_policy->get_learning_rate(num_sample_passed); double learning_rate = lr_policy_->LearningRate(num_sample_passed_);
real velocity = 0.0; real velocity = 0.0;
Tensor &param = *parameter_; Tensor &param = *parameter_;
const Tensor &grad = *gradient; const Tensor &grad = *gradient;
Tensor &m = *momentums_; Tensor &m = *momentums_;
for (size_t i = 0; i < param.size(); ++i) { for (size_t i = 0; i < param.size(); ++i) {
if (momentum == 0.0) { if (momentum_ == 0.0) {
velocity = -learning_rate * grad[i] - learning_rate * decay * param[i]; velocity = -learning_rate * grad[i] - learning_rate * decay_ * param[i];
} else { } else {
m[i] = momentum * m[i] - learning_rate * grad[i] - m[i] = momentum_ * m[i] - learning_rate * grad[i] -
learning_rate * decay * param[i]; learning_rate * decay_ * param[i];
velocity = m[i]; velocity = m[i];
} }
if (nesterov) { if (nesterov_) {
param[i] += momentum * velocity - learning_rate * grad[i]; param[i] += momentum_ * velocity - learning_rate * grad[i];
} else { } else {
param[i] += velocity; param[i] += velocity;
} }
......
...@@ -54,38 +54,40 @@ message AdamConfig { ...@@ -54,38 +54,40 @@ message AdamConfig {
message ConstLr { message ConstLr {
// learninRate Policy // learninRate Policy
required double learning_rate = 40 [default = 1.0]; required double learning_rate = 1 [default = 1.0];
} }
message LinearLr { message LinearLr {
// learninRate Policy // learninRate Policy
required double learning_rate = 40 [default = 1.0]; required double learning_rate = 1 [default = 1.0];
optional double lr_decay_a = 25; optional double lr_decay_a = 2;
optional double lr_decay_b = 26; optional double lr_decay_b = 3;
} }
message OptimizerConfig { message OptimizerConfig {
// common config of optimizer // common config of optimizer
// algorithm config, type : string // algorithm config, type : string
// SGD = 1; enum Optimizer {
// Adadelta = 2; SGD = 1;
// Adagrad = 3; Adadelta = 2;
// Adam = 4; Adagrad = 3;
required string optimizer_name = 1; Adam = 4;
}
required Optimizer optimizer = 1;
optional SGDConfig sgd = 3; optional SGDConfig sgd = 3;
optional AdadeltaConfig adadelta = 4; optional AdadeltaConfig adadelta = 4;
optional AdagradConfig adagrad = 5; optional AdagradConfig adagrad = 5;
optional AdamConfig adam = 6; optional AdamConfig adam = 6;
// learning rate runtime policy config // learning rate runtime policy config
// lr_policy , type : string enum LrPolicy {
// ConstLr = 0; ConstLr = 0;
// LinearLr = 1; LinearLr = 1;
required string lr_policy = 11; }
required LrPolicy lr_policy = 11;
optional ConstLr const_lr = 12; optional ConstLr const_lr = 12;
optional LinearLr linear_lr = 15; optional LinearLr linear_lr = 13;
optional uint64 num_sample_passed = 13 [default = 0];
// common config of optimizer // common config of optimizer
optional double clipnorm = 101; optional double clipnorm = 101;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册