提交 fd8c5107 编写于 作者: D dzhwinter

"format name with google style"

上级 5ab958ba
......@@ -9,7 +9,7 @@ set(OPITMIZER_SRCS
sgd_optmizer.cc
)
set(OPITMIZER_Headers
set(OPITMIZER_HEADERS
adadelta_optimizer.h
adagrad_optimizer.h
adam_optimizer.h
......@@ -17,12 +17,12 @@ set(OPITMIZER_Headers
optimizer.h
parameter_optimizer.h
sgd_optimizer.h
Tensor.h
tensor.h
)
add_library(optimizer STATIC ${OPITMIZER_SRCS})
add_dependencies(optimizer gen_proto_cpp)
add_simple_unittest(Tensor_test)
add_simple_unittest(tensor_test)
add_simple_unittest(parameter_optimizer_test)
add_dependencies(parameter_optimizer_test optimizer)
......@@ -32,7 +32,7 @@ public:
return data_[idx];
}
// TODO: replace with tensorshape
size_t size() const { return this->width_; }
size_t size() const { return this->width_ * this->height_; }
protected:
size_t height_;
......
#include "Tensor.h"
#include <iostream>
#include "gtest/gtest.h"
#include "tensor.h"
using namespace paddle;
using namespace paddle::optimizer;
......@@ -13,6 +13,7 @@ TEST(Tensor, indexer) {
}
ASSERT_EQ(t[2], 2);
ASSERT_EQ(t[1], 1);
delete ptr;
}
int main(int argc, char** argv) {
......
......@@ -6,32 +6,33 @@ namespace paddle {
namespace optimizer {
void AdadeltaOptimizer::set_weight(Tensor* p) {
parameter_ = p;
size_t size = p->size();
real* gptr = new real[size];
accum_gradient = new Tensor(gptr, size);
accum_gradient_ = new Tensor(gptr, size);
real* dptr = new real[size];
accum_delta = new Tensor(dptr, size);
accum_delta_ = new Tensor(dptr, size);
real* dptr_current = new real[size];
update_delta = new Tensor(dptr_current, size);
update_delta_ = new Tensor(dptr_current, size);
}
void AdadeltaOptimizer::update(const Tensor* gradient) {
num_sample_passed += 1;
double learning_rate = lr_policy->get_learning_rate(num_sample_passed);
void AdadeltaOptimizer::Update(const Tensor* gradient) {
num_sample_passed_ += 1;
double learning_rate = lr_policy_->LearningRate(num_sample_passed_);
Tensor& param = *parameter_;
const Tensor& grad = *gradient;
Tensor& accum_g = *accum_gradient;
Tensor& accum_d = *accum_delta;
Tensor& update_d = *update_delta;
Tensor& accum_g = *accum_gradient_;
Tensor& accum_d = *accum_delta_;
Tensor& update_d = *update_delta_;
for (size_t i = 0; i < param.size(); ++i) {
accum_g[i] = rho * accum_g[i] + (1.0 - rho) * grad[i] * grad[i];
accum_g[i] = rho_ * accum_g[i] + (1.0 - rho_) * grad[i] * grad[i];
update_d[i] = std::sqrt(accum_d[i] + epsilon) /
std::sqrt(accum_g[i] + epsilon) * grad[i];
update_d[i] = std::sqrt(accum_d[i] + epsilon_) /
std::sqrt(accum_g[i] + epsilon_) * grad[i];
accum_d[i] = rho * accum_d[i] + (1.0 - rho) * update_d[i] * update_d[i];
accum_d[i] = rho_ * accum_d[i] + (1.0 - rho_) * update_d[i] * update_d[i];
param[i] -= learning_rate * update_d[i] + learning_rate * decay * param[i];
param[i] -= learning_rate * update_d[i] + learning_rate * decay_ * param[i];
}
}
} // namespace optimizer
......
......@@ -8,29 +8,25 @@ namespace optimizer {
class AdadeltaOptimizer : public ParameterOptimizer {
public:
using ParameterOptimizer::parameter_;
using ParameterOptimizer::num_sample_passed;
using ParameterOptimizer::lr_policy;
AdadeltaOptimizer(double rho, double epsilon, double decay, BaseLr *lr)
: ParameterOptimizer(lr), rho(rho), epsilon(epsilon), decay(decay) {}
AdadeltaOptimizer(double rho, double epsilon, double decay, LrPolicy *lr)
: ParameterOptimizer(lr), rho_(rho), epsilon_(epsilon), decay_(decay) {}
~AdadeltaOptimizer() {
if (accum_gradient) delete accum_gradient;
if (accum_delta) delete accum_delta;
if (update_delta) delete update_delta;
if (accum_gradient_) delete accum_gradient_;
if (accum_delta_) delete accum_delta_;
if (update_delta_) delete update_delta_;
}
void update(const Tensor *gradient);
void Update(const Tensor *gradient);
void set_weight(Tensor *p);
real *get_weight() const;
private:
Tensor *accum_gradient;
Tensor *accum_delta;
Tensor *update_delta;
Tensor *accum_gradient_;
Tensor *accum_delta_;
Tensor *update_delta_;
double rho;
double epsilon;
double decay;
double rho_;
double epsilon_;
double decay_;
};
} // namespace optimizer
......
......@@ -6,21 +6,22 @@ namespace paddle {
namespace optimizer {
void AdagradOptimizer::set_weight(Tensor* p) {
parameter_ = p;
size_t size = p->size();
real* gptr = new real[size];
accum_gradient = new Tensor(gptr, size);
accum_gradient_ = new Tensor(gptr, size);
}
void AdagradOptimizer::update(const Tensor* gradient) {
num_sample_passed += 1;
double learning_rate = lr_policy->get_learning_rate(num_sample_passed);
void AdagradOptimizer::Update(const Tensor* gradient) {
num_sample_passed_ += 1;
double learning_rate = lr_policy_->LearningRate(num_sample_passed_);
Tensor& param = *parameter_;
Tensor& accum_g = *accum_gradient_;
const Tensor& grad = *gradient;
Tensor& accum_g = *accum_gradient;
for (size_t i = 0; i < param.size(); ++i) {
accum_g[i] += grad[i] * grad[i];
param[i] += learning_rate * grad[i] / std::sqrt(accum_g[i] + epsilon) +
learning_rate * decay * param[i];
param[i] += learning_rate * grad[i] / std::sqrt(accum_g[i] + epsilon_) +
learning_rate * decay_ * param[i];
}
}
......
......@@ -8,19 +8,19 @@ namespace optimizer {
class AdagradOptimizer : public ParameterOptimizer {
public:
AdagradOptimizer(double epsilon, double decay, BaseLr *lr)
: ParameterOptimizer(lr), epsilon(epsilon), decay(decay) {}
AdagradOptimizer(double epsilon, double decay, LrPolicy *lr)
: ParameterOptimizer(lr), epsilon_(epsilon), decay_(decay) {}
~AdagradOptimizer() {
if (accum_gradient) delete accum_gradient;
if (accum_gradient_) delete accum_gradient_;
}
void update(const Tensor *gradient);
void Update(const Tensor *gradient);
void set_weight(Tensor *p);
real *get_weight() const;
private:
Tensor *accum_gradient;
double epsilon;
double decay;
Tensor *accum_gradient_;
double epsilon_;
double decay_;
};
} // namespace optimizer
......
......@@ -5,6 +5,7 @@ namespace paddle {
namespace optimizer {
void AdamOptimizer::set_weight(Tensor *p) {
parameter_ = p;
size_t size = p->size();
real *mptr = new real[size];
momentums_ = new Tensor(mptr, size);
......@@ -12,21 +13,21 @@ void AdamOptimizer::set_weight(Tensor *p) {
velocitys_ = new Tensor(vptr, size);
}
void AdamOptimizer::update(const Tensor *gradient) {
num_sample_passed += 1;
double learning_rate = lr_policy->get_learning_rate(num_sample_passed);
double coef1 = 1.0 - std::pow(beta_1, num_sample_passed);
double coef2 = 1.0 - std::pow(beta_2, num_sample_passed);
void AdamOptimizer::Update(const Tensor *gradient) {
num_sample_passed_ += 1;
double learning_rate = lr_policy_->LearningRate(num_sample_passed_);
double coef1 = 1.0 - std::pow(beta_1_, num_sample_passed_);
double coef2 = 1.0 - std::pow(beta_2_, num_sample_passed_);
learning_rate *= std::sqrt(coef2) / coef1;
Tensor &param = *parameter_;
const Tensor &grad = *gradient;
Tensor &m = *momentums_;
Tensor &v = *velocitys_;
for (size_t i = 0; i < param.size(); ++i) {
m[i] = beta_1 * m[i] + (1.0 - beta_1) * grad[i];
v[i] = beta_2 * v[i] + (1.0 - beta_2) * grad[i] * grad[i];
m[i] = beta_1_ * m[i] + (1.0 - beta_1_) * grad[i];
v[i] = beta_2_ * v[i] + (1.0 - beta_2_) * grad[i] * grad[i];
param[i] -=
learning_rate * (m[i] / std::sqrt(v[i] + epsilon) + decay * param[i]);
learning_rate * (m[i] / std::sqrt(v[i] + epsilon_) + decay_ * param[i]);
}
}
} // namespace optimizer
......
......@@ -9,27 +9,27 @@ namespace optimizer {
class AdamOptimizer : public ParameterOptimizer {
public:
AdamOptimizer(
double beta_1, double beta_2, double epsilon, double decay, BaseLr *lr)
double beta_1, double beta_2, double epsilon, double decay, LrPolicy *lr)
: ParameterOptimizer(lr),
beta_1(beta_1),
beta_2(beta_2),
epsilon(epsilon),
decay(decay) {}
beta_1_(beta_1),
beta_2_(beta_2),
epsilon_(epsilon),
decay_(decay) {}
~AdamOptimizer() {
if (momentums_) delete momentums_;
if (velocitys_) delete velocitys_;
}
void update(const Tensor *gradient);
void Update(const Tensor *gradient);
void set_weight(Tensor *p);
real *get_weight() const;
private:
Tensor *momentums_;
Tensor *velocitys_;
double beta_1;
double beta_2;
double epsilon;
double decay;
double beta_1_;
double beta_2_;
double epsilon_;
double decay_;
};
} // namespace optimizer
......
......@@ -7,34 +7,34 @@
namespace paddle {
namespace optimizer {
class BaseLr {
class LrPolicy {
public:
BaseLr(double lr) : learning_rate(lr) {}
virtual ~BaseLr() {}
virtual double get_learning_rate(const uint64_t num_sample_passed) = 0;
protected:
double learning_rate;
virtual ~LrPolicy() {}
virtual double LearningRate(const uint64_t num_sample_passed) = 0;
};
// constant learning rate policy
class ConstLr final : public BaseLr {
class ConstLr final : public LrPolicy {
public:
ConstLr(double lr) : BaseLr(lr){};
double get_learning_rate(const uint64_t num_sample_passed) {
ConstLr(double lr) : learning_rate(lr){};
double LearningRate(const uint64_t num_sample_passed) {
return learning_rate;
}
protected:
double learning_rate;
};
class LinearLr final : public BaseLr {
class LinearLr final : public LrPolicy {
public:
LinearLr(double lr, double lr_decay_a, double lr_decay_b)
: BaseLr(lr), lr_decay_a(lr_decay_a), lr_decay_b(lr_decay_b) {}
double get_learning_rate(const uint64_t num_sample_passed) {
: learning_rate(lr), lr_decay_a(lr_decay_a), lr_decay_b(lr_decay_b) {}
double LearningRate(const uint64_t num_sample_passed) {
return std::max(learning_rate - lr_decay_a * num_sample_passed, lr_decay_b);
}
private:
double learning_rate;
double lr_decay_a;
double lr_decay_b;
};
......
......@@ -37,7 +37,7 @@ paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto,
int config_proto_len) {
paddle_optimizer* optimizer = new paddle_optimizer;
std::string config(config_proto, config_proto + config_proto_len);
optimizer->impl = ParameterOptimizer::create(config);
optimizer->impl = ParameterOptimizer::Create(config);
return optimizer;
}
......@@ -53,7 +53,7 @@ int paddle_update_parameter(paddle_optimizer* o,
// TOOD(zhihong): datatype not work. need to add the runtime datatype
auto grad_type = reinterpret_cast<const real*>(grad_buffer);
Tensor* gradient = new Tensor(const_cast<real*>(grad_type), num_bytes);
o->impl->update(gradient);
o->impl->Update(gradient);
return PADDLE_SUCCESS;
}
......
#include "optimizer.h"
#include "gtest/gtest.h"
template <class T>
class Opitmizer_C_Test : public testing::Test {
private:
Tensor<T> parameter;
Tensor<T> gradient;
};
void applyGradientDescent_TEST() {}
......@@ -10,41 +10,40 @@
namespace paddle {
namespace optimizer {
ParameterOptimizer *ParameterOptimizer::create(
const ::std::string &config_proto) {
ParameterOptimizer *ParameterOptimizer::Create(
const std::string &config_proto) {
paddle::OptimizerConfig config;
CHECK(config.ParseFromString(config_proto) == 0)
<< "error : optimizer config";
<< "failed parse optimizer config";
auto select_lr_policy = [=](const OptimizerConfig &config) -> BaseLr * {
std::string s(config.lr_policy());
if (s == "ConstLr") return new ConstLr(config.const_lr().learning_rate());
if (s == "LinearLr")
auto select_lr_policy = [=](const OptimizerConfig &config) -> LrPolicy * {
if (config.lr_policy() == OptimizerConfig::ConstLr)
return new ConstLr(config.const_lr().learning_rate());
if (config.lr_policy() == OptimizerConfig::LinearLr)
return new LinearLr(config.linear_lr().learning_rate(),
config.linear_lr().lr_decay_a(),
config.linear_lr().lr_decay_b());
// default
return nullptr;
};
BaseLr *lr = select_lr_policy(config);
LrPolicy *lr = select_lr_policy(config);
auto select_optimizer =
[=](const OptimizerConfig &config) -> ParameterOptimizer * {
std::string s(config.optimizer_name());
if (s == "SGD") {
if (config.optimizer() == OptimizerConfig::SGD) {
return new SGDOptimizer(config.sgd().momentum(),
config.sgd().decay(),
config.sgd().nesterov(),
lr);
}
if (s == "Adadelta") {
if (config.optimizer() == OptimizerConfig::Adadelta) {
return new AdagradOptimizer(
config.adagrad().epsilon(), config.adagrad().decay(), lr);
}
if (s == "Adagrad") {
if (config.optimizer() == OptimizerConfig::Adagrad) {
return new AdagradOptimizer(
config.adagrad().epsilon(), config.adagrad().decay(), lr);
}
if (s == "Adam") {
if (config.optimizer() == OptimizerConfig::Adam) {
return new AdadeltaOptimizer(config.adadelta().rho(),
config.adadelta().epsilon(),
config.adadelta().decay(),
......
......@@ -5,8 +5,8 @@
#include <functional>
#include <string>
#include "OptimizerConfig.pb.h"
#include "Tensor.h"
#include "lr_policy.h"
#include "tensor.h"
namespace paddle {
namespace optimizer {
......@@ -17,21 +17,21 @@ public:
* @brief update hook for algorithm need to traverse parameter more than
* once.
*/
ParameterOptimizer(BaseLr *lr) : lr_policy(lr), num_sample_passed(0) {}
ParameterOptimizer(LrPolicy *lr) : lr_policy_(lr), num_sample_passed_(0) {}
virtual ~ParameterOptimizer() { delete parameter_; };
static ParameterOptimizer *create(const ::std::string &config_proto);
virtual void update(const Tensor *gradient) = 0;
static ParameterOptimizer *Create(const std::string &config_proto);
virtual void Update(const Tensor *gradient) = 0;
virtual real *get_weight() const;
virtual void set_weight(Tensor *parameter);
public:
protected:
OptimizerConfig config_;
Tensor *parameter_;
// learning rate policy
BaseLr *lr_policy;
uint64_t num_sample_passed;
LrPolicy *lr_policy_;
uint64_t num_sample_passed_;
};
} // namespace optimizer
......
......@@ -42,28 +42,28 @@ public:
virtual void TearDown() {}
void create_sgd() {
config.set_optimizer_name("SGD");
config.set_optimizer(OptimizerConfig::SGD);
config.mutable_sgd()->set_momentum(0.0);
config.mutable_sgd()->set_decay(0.0);
config.mutable_sgd()->set_nesterov(false);
config.set_lr_policy("ConstLr");
config.set_lr_policy(OptimizerConfig::ConstLr);
config.mutable_const_lr()->set_learning_rate(0.1);
ParameterOptimizer* opt =
ParameterOptimizer::create(config.SerializeAsString());
ParameterOptimizer::Create(config.SerializeAsString());
opts.push_back(opt);
}
void create_adam() {
config.set_optimizer_name("Adam");
config.set_optimizer(OptimizerConfig::Adam);
config.mutable_adam()->set_beta_1(0.9);
config.mutable_adam()->set_beta_2(0.1);
config.mutable_adam()->set_epsilon(1e-3);
config.mutable_adam()->set_decay(0.0);
config.set_lr_policy("ConstLr");
config.set_lr_policy(OptimizerConfig::ConstLr);
config.mutable_const_lr()->set_learning_rate(0.1);
ParameterOptimizer* opt =
ParameterOptimizer::create(config.SerializeAsString());
ParameterOptimizer::Create(config.SerializeAsString());
opts.push_back(opt);
}
void test_set_weight() {
......@@ -88,7 +88,7 @@ public:
void test_update() {
Tensor* g = fix_n_Tensor(size);
for (size_t i = 0; i < opts.size(); ++i) {
opts[i]->update(g);
opts[i]->Update(g);
}
}
......
......@@ -8,23 +8,19 @@ namespace optimizer {
class SGDOptimizer : public ParameterOptimizer {
public:
using ParameterOptimizer::parameter_;
using ParameterOptimizer::num_sample_passed;
using ParameterOptimizer::lr_policy;
SGDOptimizer(double m, double d, bool n, BaseLr* lr)
: ParameterOptimizer(lr), momentum(m), decay(d), nesterov(n) {}
SGDOptimizer(double m, double d, bool n, LrPolicy* lr)
: ParameterOptimizer(lr), momentum_(m), decay_(d), nesterov_(n) {}
virtual ~SGDOptimizer() { delete momentums_; }
void update(const Tensor* gradient);
void Update(const Tensor* gradient);
void set_weight(Tensor* p);
real* get_weight() const;
private:
Tensor* momentums_;
double momentum;
double decay;
bool nesterov;
double momentum_;
double decay_;
bool nesterov_;
};
} // namespace optimizer
......
......@@ -5,31 +5,32 @@ namespace optimizer {
void SGDOptimizer::set_weight(Tensor *p) {
// ParameterOptimizer::set_weight(p);
parameter_ = p;
size_t size = p->size();
// TODO: fix it with align aware allocator bind to Tensor
if (momentum != 0.0) {
if (momentum_ != 0.0) {
real *ptr = new real[size];
momentums_ = new Tensor(ptr, size);
}
}
void SGDOptimizer::update(const Tensor *gradient) {
num_sample_passed += 1;
double learning_rate = lr_policy->get_learning_rate(num_sample_passed);
void SGDOptimizer::Update(const Tensor *gradient) {
num_sample_passed_ += 1;
double learning_rate = lr_policy_->LearningRate(num_sample_passed_);
real velocity = 0.0;
Tensor &param = *parameter_;
const Tensor &grad = *gradient;
Tensor &m = *momentums_;
for (size_t i = 0; i < param.size(); ++i) {
if (momentum == 0.0) {
velocity = -learning_rate * grad[i] - learning_rate * decay * param[i];
if (momentum_ == 0.0) {
velocity = -learning_rate * grad[i] - learning_rate * decay_ * param[i];
} else {
m[i] = momentum * m[i] - learning_rate * grad[i] -
learning_rate * decay * param[i];
m[i] = momentum_ * m[i] - learning_rate * grad[i] -
learning_rate * decay_ * param[i];
velocity = m[i];
}
if (nesterov) {
param[i] += momentum * velocity - learning_rate * grad[i];
if (nesterov_) {
param[i] += momentum_ * velocity - learning_rate * grad[i];
} else {
param[i] += velocity;
}
......
......@@ -54,38 +54,40 @@ message AdamConfig {
message ConstLr {
// learninRate Policy
required double learning_rate = 40 [default = 1.0];
required double learning_rate = 1 [default = 1.0];
}
message LinearLr {
// learninRate Policy
required double learning_rate = 40 [default = 1.0];
optional double lr_decay_a = 25;
optional double lr_decay_b = 26;
required double learning_rate = 1 [default = 1.0];
optional double lr_decay_a = 2;
optional double lr_decay_b = 3;
}
message OptimizerConfig {
// common config of optimizer
// algorithm config, type : string
// SGD = 1;
// Adadelta = 2;
// Adagrad = 3;
// Adam = 4;
required string optimizer_name = 1;
enum Optimizer {
SGD = 1;
Adadelta = 2;
Adagrad = 3;
Adam = 4;
}
required Optimizer optimizer = 1;
optional SGDConfig sgd = 3;
optional AdadeltaConfig adadelta = 4;
optional AdagradConfig adagrad = 5;
optional AdamConfig adam = 6;
// learning rate runtime policy config
// lr_policy , type : string
// ConstLr = 0;
// LinearLr = 1;
required string lr_policy = 11;
enum LrPolicy {
ConstLr = 0;
LinearLr = 1;
}
required LrPolicy lr_policy = 11;
optional ConstLr const_lr = 12;
optional LinearLr linear_lr = 15;
optional uint64 num_sample_passed = 13 [default = 0];
optional LinearLr linear_lr = 13;
// common config of optimizer
optional double clipnorm = 101;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册