提交 bc26df79 编写于 作者: D dzhwinter

"polish code style and update based review comment"

上级 5a1e678b
...@@ -46,7 +46,7 @@ protected: ...@@ -46,7 +46,7 @@ protected:
}; };
// TODO(zhihong): design problem of dynamic datatype, need to fix it // TODO(zhihong): design problem of dynamic datatype, need to fix it
typedef TensorT<real> Tensor; typedef TensorT<float> Tensor;
} // namespace optimizer } // namespace optimizer
} // namespace paddle } // namespace paddle
...@@ -8,7 +8,13 @@ namespace optimizer { ...@@ -8,7 +8,13 @@ namespace optimizer {
class AdadeltaOptimizer : public ParameterOptimizer { class AdadeltaOptimizer : public ParameterOptimizer {
public: public:
AdadeltaOptimizer(double rho, double epsilon, double decay, LrPolicy *lr) AdadeltaOptimizer(double rho, double epsilon, double decay, LrPolicy *lr)
: ParameterOptimizer(lr), rho_(rho), epsilon_(epsilon), decay_(decay) {} : ParameterOptimizer(lr),
accum_gradient_(nullptr),
accum_delta_(nullptr),
update_delta_(nullptr),
rho_(rho),
epsilon_(epsilon),
decay_(decay) {}
~AdadeltaOptimizer() { ~AdadeltaOptimizer() {
if (accum_gradient_) delete accum_gradient_; if (accum_gradient_) delete accum_gradient_;
if (accum_delta_) delete accum_delta_; if (accum_delta_) delete accum_delta_;
...@@ -16,13 +22,11 @@ public: ...@@ -16,13 +22,11 @@ public:
} }
void Update(const Tensor *gradient); void Update(const Tensor *gradient);
void set_weight(Tensor *p); void set_weight(Tensor *p);
real *get_weight() const;
private: private:
Tensor *accum_gradient_; Tensor *accum_gradient_;
Tensor *accum_delta_; Tensor *accum_delta_;
Tensor *update_delta_; Tensor *update_delta_;
double rho_; double rho_;
double epsilon_; double epsilon_;
double decay_; double decay_;
......
...@@ -8,13 +8,15 @@ namespace optimizer { ...@@ -8,13 +8,15 @@ namespace optimizer {
class AdagradOptimizer : public ParameterOptimizer { class AdagradOptimizer : public ParameterOptimizer {
public: public:
AdagradOptimizer(double epsilon, double decay, LrPolicy *lr) AdagradOptimizer(double epsilon, double decay, LrPolicy *lr)
: ParameterOptimizer(lr), epsilon_(epsilon), decay_(decay) {} : ParameterOptimizer(lr),
accum_gradient_(nullptr),
epsilon_(epsilon),
decay_(decay) {}
~AdagradOptimizer() { ~AdagradOptimizer() {
if (accum_gradient_) delete accum_gradient_; if (accum_gradient_) delete accum_gradient_;
} }
void Update(const Tensor *gradient); void Update(const Tensor *gradient);
void set_weight(Tensor *p); void set_weight(Tensor *p);
real *get_weight() const;
private: private:
Tensor *accum_gradient_; Tensor *accum_gradient_;
......
...@@ -10,6 +10,8 @@ public: ...@@ -10,6 +10,8 @@ public:
AdamOptimizer( AdamOptimizer(
double beta_1, double beta_2, double epsilon, double decay, LrPolicy *lr) double beta_1, double beta_2, double epsilon, double decay, LrPolicy *lr)
: ParameterOptimizer(lr), : ParameterOptimizer(lr),
momentums_(nullptr),
velocitys_(nullptr),
beta_1_(beta_1), beta_1_(beta_1),
beta_2_(beta_2), beta_2_(beta_2),
epsilon_(epsilon), epsilon_(epsilon),
...@@ -20,7 +22,6 @@ public: ...@@ -20,7 +22,6 @@ public:
} }
void Update(const Tensor *gradient); void Update(const Tensor *gradient);
void set_weight(Tensor *p); void set_weight(Tensor *p);
real *get_weight() const;
private: private:
Tensor *momentums_; Tensor *momentums_;
......
...@@ -20,7 +20,7 @@ public: ...@@ -20,7 +20,7 @@ public:
return learning_rate; return learning_rate;
} }
protected: private:
double learning_rate; double learning_rate;
}; };
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#include <string> #include <string>
#include "parameter_optimizer.h" #include "parameter_optimizer.h"
using namespace paddle; using namespace paddle;
using namespace paddle::optimizer; using namespace paddle::optimizer;
...@@ -26,6 +27,7 @@ MATCH_ENUM_TYPE(int32_t, PADDLE_ELEMENT_TYPE_INT32); ...@@ -26,6 +27,7 @@ MATCH_ENUM_TYPE(int32_t, PADDLE_ELEMENT_TYPE_INT32);
MATCH_ENUM_TYPE(uint32_t, PADDLE_ELEMENT_TYPE_UINT32); MATCH_ENUM_TYPE(uint32_t, PADDLE_ELEMENT_TYPE_UINT32);
MATCH_ENUM_TYPE(int64_t, PADDLE_ELEMENT_TYPE_INT64); MATCH_ENUM_TYPE(int64_t, PADDLE_ELEMENT_TYPE_INT64);
MATCH_ENUM_TYPE(uint64_t, PADDLE_ELEMENT_TYPE_UINT64); MATCH_ENUM_TYPE(uint64_t, PADDLE_ELEMENT_TYPE_UINT64);
// TODO(zhihong): only implement below type, need to fix
MATCH_ENUM_TYPE(float, PADDLE_ELEMENT_TYPE_FLOAT32); MATCH_ENUM_TYPE(float, PADDLE_ELEMENT_TYPE_FLOAT32);
MATCH_ENUM_TYPE(double, PADDLE_ELEMENT_TYPE_FLOAT64); MATCH_ENUM_TYPE(double, PADDLE_ELEMENT_TYPE_FLOAT64);
...@@ -35,15 +37,20 @@ struct paddle_optimizer { ...@@ -35,15 +37,20 @@ struct paddle_optimizer {
paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto, paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto,
const int config_proto_len, const int config_proto_len,
const char** state, const paddle_element_type data_type,
const int state_size) { void* param_buffer,
int num_bytes,
const char* state,
const int state_len) {
paddle_optimizer* optimizer = new paddle_optimizer; paddle_optimizer* optimizer = new paddle_optimizer;
std::string config(config_proto, config_proto + config_proto_len); std::string config(config_proto, config_proto + config_proto_len);
optimizer->impl = ParameterOptimizer::Create(config); optimizer->impl = ParameterOptimizer::Create(config);
if (state != nullptr) { if (state != nullptr) {
std::string s(*state, *state + state_size); std::string s(state, state + state_len);
optimizer->impl->DeSerializeState(s); optimizer->impl->DeSerializeState(s);
} }
Tensor* param = new Tensor(reinterpret_cast<float*>(param_buffer), num_bytes);
optimizer->impl->set_weight(param);
return optimizer; return optimizer;
} }
...@@ -57,28 +64,19 @@ int paddle_update_parameter(paddle_optimizer* o, ...@@ -57,28 +64,19 @@ int paddle_update_parameter(paddle_optimizer* o,
const void* grad_buffer, const void* grad_buffer,
int num_bytes) { int num_bytes) {
// TOOD(zhihong): datatype not work. need to add the runtime datatype // TOOD(zhihong): datatype not work. need to add the runtime datatype
auto grad_type = reinterpret_cast<const real*>(grad_buffer); auto grad_type = reinterpret_cast<const float*>(grad_buffer);
Tensor* gradient = new Tensor(const_cast<real*>(grad_type), num_bytes); Tensor* gradient = new Tensor(const_cast<float*>(grad_type), num_bytes);
o->impl->Update(gradient); o->impl->Update(gradient);
return PADDLE_SUCCESS; return PADDLE_SUCCESS;
} }
int paddle_optimizer_set_weights(paddle_optimizer* o, int paddle_optimizer_get_weights(paddle_optimizer* o, void** param_buffer) {
const paddle_element_type data_type, int param_size = 0;
void* param_buffer, *param_buffer = (void*)o->impl->get_weight(&param_size);
int num_bytes) { return param_size;
// TOOD(zhihong): datatype not work. need to add the runtime datatype
Tensor* param = new Tensor(reinterpret_cast<real*>(param_buffer), num_bytes);
o->impl->set_weight(param);
return PADDLE_SUCCESS;
}
void* paddle_optimizer_get_weights(paddle_optimizer* o) {
void* buffer = (void*)o->impl->get_weight();
return buffer;
} }
int paddle_optimizer_get_state(paddle_optimizer* o, const char* state) { int paddle_optimizer_get_state(paddle_optimizer* o, const char** state) {
state = o->impl->SerializeState(); *state = o->impl->SerializeState();
return PADDLE_SUCCESS; return strlen(*state);
} }
...@@ -3,19 +3,18 @@ ...@@ -3,19 +3,18 @@
#include <stdbool.h> #include <stdbool.h>
#include <stdint.h> #include <stdint.h>
/*! \brief optimizer export C API. which will be used in /**
Case A, on Trainer (On ParameterServer Client) optimize gradient * @brief optimizer library in independent with other module
* which will be used in :
Case B, on ParameterServer side optimize gradient * Case A, the gradient optimized locally on the trainer.
*
To simplify the configuration parsing. optimizer *do not* parse any config * Case B, the gradient optimized on the parameter server.
e.g. learning rate should be calculated by the caller
*/ */
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
/*! \brief datatypes */
typedef enum { typedef enum {
PADDLE_ELEMENT_TYPE_INT32 = 0, PADDLE_ELEMENT_TYPE_INT32 = 0,
PADDLE_ELEMENT_TYPE_UINT32 = 1, PADDLE_ELEMENT_TYPE_UINT32 = 1,
...@@ -25,7 +24,9 @@ typedef enum { ...@@ -25,7 +24,9 @@ typedef enum {
PADDLE_ELEMENT_TYPE_FLOAT64 = 5, PADDLE_ELEMENT_TYPE_FLOAT64 = 5,
} paddle_element_type; } paddle_element_type;
/*! \brief execute status code */ /**
* @brief execution status code
*/
const int32_t PADDLE_SUCCESS = 0; const int32_t PADDLE_SUCCESS = 0;
const int32_t PADDLE_ERROR = -1; const int32_t PADDLE_ERROR = -1;
...@@ -46,8 +47,11 @@ typedef struct paddle_optimizer paddle_optimizer; ...@@ -46,8 +47,11 @@ typedef struct paddle_optimizer paddle_optimizer;
*/ */
paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto, paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto,
const int config_proto_len, const int config_proto_len,
const char** state, const paddle_element_type data_type,
const int state_size); void* param_buffer,
int num_bytes,
const char* state,
const int state_len);
/** /**
* @brief release optimizer * @brief release optimizer
...@@ -72,23 +76,17 @@ int paddle_update_parameter(paddle_optimizer* o, ...@@ -72,23 +76,17 @@ int paddle_update_parameter(paddle_optimizer* o,
/** /**
* @brief optimizer instance * @brief optimizer instance
* @param data_type datatype of gradient
* @param param_buffer, initilized parameter buffer * @param param_buffer, initilized parameter buffer
* @param num_bytes, parameter size * @return return content length
* @return return exec status
*/ */
int paddle_optimizer_set_weights(paddle_optimizer* o, int paddle_optimizer_get_weights(paddle_optimizer* o, void** param_buffer);
const paddle_element_type data_type,
void* param_buffer,
int num_bytes);
/** /**
* @brief optimizer instance * @brief optimzizer instance
* @return return content of parameter buffer in optimizer * @param training state for receive SerializeState
* @return return state_buffer length
*/ */
void* paddle_optimizer_get_weights(paddle_optimizer* o); int paddle_optimizer_get_state(paddle_optimizer* o, const char** state);
int paddle_optimizer_get_state(paddle_optimizer* o, const char* state);
#ifdef __cplusplus #ifdef __cplusplus
} }
......
...@@ -24,7 +24,8 @@ ParameterOptimizer *ParameterOptimizer::Create( ...@@ -24,7 +24,8 @@ ParameterOptimizer *ParameterOptimizer::Create(
config.linear_lr().lr_decay_a(), config.linear_lr().lr_decay_a(),
config.linear_lr().lr_decay_b()); config.linear_lr().lr_decay_b());
// default // default
return nullptr; LOG(WARNING) << " have not select any LrPolicy. use ConstLr in default";
return new ConstLr(0.1);
}; };
LrPolicy *lr = select_lr_policy(config); LrPolicy *lr = select_lr_policy(config);
auto select_optimizer = auto select_optimizer =
...@@ -36,29 +37,32 @@ ParameterOptimizer *ParameterOptimizer::Create( ...@@ -36,29 +37,32 @@ ParameterOptimizer *ParameterOptimizer::Create(
lr); lr);
} }
if (config.optimizer() == OptimizerConfig::Adadelta) { if (config.optimizer() == OptimizerConfig::Adadelta) {
return new AdagradOptimizer( return new AdadeltaOptimizer(config.adadelta().rho(),
config.adagrad().epsilon(), config.adagrad().decay(), lr); config.adadelta().epsilon(),
config.adadelta().decay(),
lr);
} }
if (config.optimizer() == OptimizerConfig::Adagrad) { if (config.optimizer() == OptimizerConfig::Adagrad) {
return new AdagradOptimizer( return new AdagradOptimizer(
config.adagrad().epsilon(), config.adagrad().decay(), lr); config.adagrad().epsilon(), config.adagrad().decay(), lr);
} }
if (config.optimizer() == OptimizerConfig::Adam) { if (config.optimizer() == OptimizerConfig::Adam) {
return new AdadeltaOptimizer(config.adadelta().rho(), return new AdamOptimizer(config.adam().beta_1(),
config.adadelta().epsilon(), config.adam().beta_2(),
config.adadelta().decay(), config.adam().epsilon(),
lr); config.adam().decay(),
lr);
} }
// default // default
return new SGDOptimizer(config.sgd().momentum(), LOG(WARNING)
config.sgd().decay(), << "have not select any Optimizer. use SGDOptimizer in default";
config.sgd().nesterov(), return new SGDOptimizer(0.0, 0.0, false, lr);
lr);
}; };
return select_optimizer(config); return select_optimizer(config);
} }
real *ParameterOptimizer::get_weight() const { float *ParameterOptimizer::get_weight(int *param_size) const {
*param_size = (int)parameter_->size();
return parameter_->get_buffer(); return parameter_->get_buffer();
} }
......
...@@ -25,11 +25,10 @@ public: ...@@ -25,11 +25,10 @@ public:
virtual const char *SerializeState(); virtual const char *SerializeState();
virtual void DeSerializeState(const std::string &state); virtual void DeSerializeState(const std::string &state);
virtual void Update(const Tensor *gradient) = 0; virtual void Update(const Tensor *gradient) = 0;
virtual real *get_weight() const; virtual float *get_weight(int *param_size) const;
virtual void set_weight(Tensor *parameter); virtual void set_weight(Tensor *parameter);
protected: protected:
OptimizerConfig config_;
Tensor *parameter_; Tensor *parameter_;
// learning rate policy // learning rate policy
......
...@@ -77,7 +77,8 @@ public: ...@@ -77,7 +77,8 @@ public:
opts[i]->set_weight(p); opts[i]->set_weight(p);
} }
for (size_t i = 0; i < opts.size(); ++i) { for (size_t i = 0; i < opts.size(); ++i) {
real* newp = (real*)opts[i]->get_weight(); int s = 0;
float* newp = (float*)opts[i]->get_weight(&s);
for (size_t j = 0; j < size; ++j) { for (size_t j = 0; j < size; ++j) {
EXPECT_EQ(newp[j], (*p)[j]); EXPECT_EQ(newp[j], (*p)[j]);
} }
......
...@@ -9,14 +9,18 @@ namespace optimizer { ...@@ -9,14 +9,18 @@ namespace optimizer {
class SGDOptimizer : public ParameterOptimizer { class SGDOptimizer : public ParameterOptimizer {
public: public:
SGDOptimizer(double m, double d, bool n, LrPolicy* lr) SGDOptimizer(double m, double d, bool n, LrPolicy* lr)
: ParameterOptimizer(lr), momentum_(m), decay_(d), nesterov_(n) {} : ParameterOptimizer(lr),
momentums_(nullptr),
momentum_(m),
decay_(d),
nesterov_(n) {}
virtual ~SGDOptimizer() { delete momentums_; } virtual ~SGDOptimizer() { delete momentums_; }
void Update(const Tensor* gradient); void Update(const Tensor* gradient);
const char* SerializeState(); const char* SerializeState();
void DeSerializeState(const std::string& state); void DeSerializeState(const std::string& state);
void set_weight(Tensor* p); void set_weight(Tensor* p);
real* get_weight() const; float* get_weight(int* param_size) const;
private: private:
Tensor* momentums_; Tensor* momentums_;
......
...@@ -16,7 +16,7 @@ void SGDOptimizer::set_weight(Tensor *p) { ...@@ -16,7 +16,7 @@ void SGDOptimizer::set_weight(Tensor *p) {
void SGDOptimizer::Update(const Tensor *gradient) { void SGDOptimizer::Update(const Tensor *gradient) {
num_sample_passed_ += 1; num_sample_passed_ += 1;
double learning_rate = lr_policy_->LearningRate(num_sample_passed_); double learning_rate = lr_policy_->LearningRate(num_sample_passed_);
real velocity = 0.0; float velocity = 0.0;
Tensor &param = *parameter_; Tensor &param = *parameter_;
const Tensor &grad = *gradient; const Tensor &grad = *gradient;
Tensor &m = *momentums_; Tensor &m = *momentums_;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册