diff --git a/paddle/optimizer/Tensor.h b/paddle/optimizer/Tensor.h index 9f68877e3ba9ef7ca740eab6a5d208f97c130f87..a00a59bc6af4d714a8250f0267017d1bbfc19108 100644 --- a/paddle/optimizer/Tensor.h +++ b/paddle/optimizer/Tensor.h @@ -46,7 +46,7 @@ protected: }; // TODO(zhihong): design problem of dynamic datatype, need to fix it -typedef TensorT Tensor; +typedef TensorT Tensor; } // namespace optimizer } // namespace paddle diff --git a/paddle/optimizer/adadelta_optimizer.h b/paddle/optimizer/adadelta_optimizer.h index d17fae6b70a65b3ec4d81de2063e42a5dafab53c..4f6137cf2e5aca82a6f0b5afee8afec699bfe313 100644 --- a/paddle/optimizer/adadelta_optimizer.h +++ b/paddle/optimizer/adadelta_optimizer.h @@ -8,7 +8,13 @@ namespace optimizer { class AdadeltaOptimizer : public ParameterOptimizer { public: AdadeltaOptimizer(double rho, double epsilon, double decay, LrPolicy *lr) - : ParameterOptimizer(lr), rho_(rho), epsilon_(epsilon), decay_(decay) {} + : ParameterOptimizer(lr), + accum_gradient_(nullptr), + accum_delta_(nullptr), + update_delta_(nullptr), + rho_(rho), + epsilon_(epsilon), + decay_(decay) {} ~AdadeltaOptimizer() { if (accum_gradient_) delete accum_gradient_; if (accum_delta_) delete accum_delta_; @@ -16,13 +22,11 @@ public: } void Update(const Tensor *gradient); void set_weight(Tensor *p); - real *get_weight() const; private: Tensor *accum_gradient_; Tensor *accum_delta_; Tensor *update_delta_; - double rho_; double epsilon_; double decay_; diff --git a/paddle/optimizer/adagrad_optimizer.h b/paddle/optimizer/adagrad_optimizer.h index 6c00665c86c05d5c4c266e1ee8c1672d1224beb1..1b05e99754bfb29ed95b29450310dc7888f9d1d1 100644 --- a/paddle/optimizer/adagrad_optimizer.h +++ b/paddle/optimizer/adagrad_optimizer.h @@ -8,13 +8,15 @@ namespace optimizer { class AdagradOptimizer : public ParameterOptimizer { public: AdagradOptimizer(double epsilon, double decay, LrPolicy *lr) - : ParameterOptimizer(lr), epsilon_(epsilon), decay_(decay) {} + : ParameterOptimizer(lr), + accum_gradient_(nullptr), + epsilon_(epsilon), + decay_(decay) {} ~AdagradOptimizer() { if (accum_gradient_) delete accum_gradient_; } void Update(const Tensor *gradient); void set_weight(Tensor *p); - real *get_weight() const; private: Tensor *accum_gradient_; diff --git a/paddle/optimizer/adam_optimizer.h b/paddle/optimizer/adam_optimizer.h index 718039e451492f90c2be8db5d5def3ec5fdfb7a6..840927e6c7d59ee3d51e284fb2593508e812ffbd 100644 --- a/paddle/optimizer/adam_optimizer.h +++ b/paddle/optimizer/adam_optimizer.h @@ -10,6 +10,8 @@ public: AdamOptimizer( double beta_1, double beta_2, double epsilon, double decay, LrPolicy *lr) : ParameterOptimizer(lr), + momentums_(nullptr), + velocitys_(nullptr), beta_1_(beta_1), beta_2_(beta_2), epsilon_(epsilon), @@ -20,7 +22,6 @@ public: } void Update(const Tensor *gradient); void set_weight(Tensor *p); - real *get_weight() const; private: Tensor *momentums_; diff --git a/paddle/optimizer/lr_policy.h b/paddle/optimizer/lr_policy.h index 51debf675f38688384610d860cc46ec4e80b6dea..ab7d1fb0fe6a980df0db52d04bf89f3cfb453147 100644 --- a/paddle/optimizer/lr_policy.h +++ b/paddle/optimizer/lr_policy.h @@ -20,7 +20,7 @@ public: return learning_rate; } -protected: +private: double learning_rate; }; diff --git a/paddle/optimizer/optimizer.cc b/paddle/optimizer/optimizer.cc index 5076029494cd55e8e7f7c3f6494c05af432099c6..9572f163ddeaa72105455a1e034f3ae6d0f39d35 100644 --- a/paddle/optimizer/optimizer.cc +++ b/paddle/optimizer/optimizer.cc @@ -2,6 +2,7 @@ #include #include "parameter_optimizer.h" + using namespace paddle; using namespace paddle::optimizer; @@ -26,6 +27,7 @@ MATCH_ENUM_TYPE(int32_t, PADDLE_ELEMENT_TYPE_INT32); MATCH_ENUM_TYPE(uint32_t, PADDLE_ELEMENT_TYPE_UINT32); MATCH_ENUM_TYPE(int64_t, PADDLE_ELEMENT_TYPE_INT64); MATCH_ENUM_TYPE(uint64_t, PADDLE_ELEMENT_TYPE_UINT64); +// TODO(zhihong): only implement below type, need to fix MATCH_ENUM_TYPE(float, PADDLE_ELEMENT_TYPE_FLOAT32); MATCH_ENUM_TYPE(double, PADDLE_ELEMENT_TYPE_FLOAT64); @@ -35,15 +37,20 @@ struct paddle_optimizer { paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto, const int config_proto_len, - const char** state, - const int state_size) { + const paddle_element_type data_type, + void* param_buffer, + int num_bytes, + const char* state, + const int state_len) { paddle_optimizer* optimizer = new paddle_optimizer; std::string config(config_proto, config_proto + config_proto_len); optimizer->impl = ParameterOptimizer::Create(config); if (state != nullptr) { - std::string s(*state, *state + state_size); + std::string s(state, state + state_len); optimizer->impl->DeSerializeState(s); } + Tensor* param = new Tensor(reinterpret_cast(param_buffer), num_bytes); + optimizer->impl->set_weight(param); return optimizer; } @@ -57,28 +64,19 @@ int paddle_update_parameter(paddle_optimizer* o, const void* grad_buffer, int num_bytes) { // TOOD(zhihong): datatype not work. need to add the runtime datatype - auto grad_type = reinterpret_cast(grad_buffer); - Tensor* gradient = new Tensor(const_cast(grad_type), num_bytes); + auto grad_type = reinterpret_cast(grad_buffer); + Tensor* gradient = new Tensor(const_cast(grad_type), num_bytes); o->impl->Update(gradient); return PADDLE_SUCCESS; } -int paddle_optimizer_set_weights(paddle_optimizer* o, - const paddle_element_type data_type, - void* param_buffer, - int num_bytes) { - // TOOD(zhihong): datatype not work. need to add the runtime datatype - Tensor* param = new Tensor(reinterpret_cast(param_buffer), num_bytes); - o->impl->set_weight(param); - return PADDLE_SUCCESS; -} - -void* paddle_optimizer_get_weights(paddle_optimizer* o) { - void* buffer = (void*)o->impl->get_weight(); - return buffer; +int paddle_optimizer_get_weights(paddle_optimizer* o, void** param_buffer) { + int param_size = 0; + *param_buffer = (void*)o->impl->get_weight(¶m_size); + return param_size; } -int paddle_optimizer_get_state(paddle_optimizer* o, const char* state) { - state = o->impl->SerializeState(); - return PADDLE_SUCCESS; +int paddle_optimizer_get_state(paddle_optimizer* o, const char** state) { + *state = o->impl->SerializeState(); + return strlen(*state); } diff --git a/paddle/optimizer/optimizer.h b/paddle/optimizer/optimizer.h index ee017eddabe3b348d96f9ef358b48b7807f38000..a5f468b06b54c8d483ca7c81bac5ea54d4c5d6d4 100644 --- a/paddle/optimizer/optimizer.h +++ b/paddle/optimizer/optimizer.h @@ -3,19 +3,18 @@ #include #include -/*! \brief optimizer export C API. which will be used in - Case A, on Trainer (On ParameterServer Client) optimize gradient - - Case B, on ParameterServer side optimize gradient - - To simplify the configuration parsing. optimizer *do not* parse any config - e.g. learning rate should be calculated by the caller +/** + * @brief optimizer library in independent with other module + * which will be used in : + * Case A, the gradient optimized locally on the trainer. + * + * Case B, the gradient optimized on the parameter server. */ #ifdef __cplusplus extern "C" { #endif -/*! \brief datatypes */ + typedef enum { PADDLE_ELEMENT_TYPE_INT32 = 0, PADDLE_ELEMENT_TYPE_UINT32 = 1, @@ -25,7 +24,9 @@ typedef enum { PADDLE_ELEMENT_TYPE_FLOAT64 = 5, } paddle_element_type; -/*! \brief execute status code */ +/** + * @brief execution status code + */ const int32_t PADDLE_SUCCESS = 0; const int32_t PADDLE_ERROR = -1; @@ -46,8 +47,11 @@ typedef struct paddle_optimizer paddle_optimizer; */ paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto, const int config_proto_len, - const char** state, - const int state_size); + const paddle_element_type data_type, + void* param_buffer, + int num_bytes, + const char* state, + const int state_len); /** * @brief release optimizer @@ -72,23 +76,17 @@ int paddle_update_parameter(paddle_optimizer* o, /** * @brief optimizer instance - * @param data_type datatype of gradient * @param param_buffer, initilized parameter buffer - * @param num_bytes, parameter size - * @return return exec status + * @return return content length */ -int paddle_optimizer_set_weights(paddle_optimizer* o, - const paddle_element_type data_type, - void* param_buffer, - int num_bytes); +int paddle_optimizer_get_weights(paddle_optimizer* o, void** param_buffer); /** - * @brief optimizer instance - * @return return content of parameter buffer in optimizer + * @brief optimzizer instance + * @param training state for receive SerializeState + * @return return state_buffer length */ -void* paddle_optimizer_get_weights(paddle_optimizer* o); - -int paddle_optimizer_get_state(paddle_optimizer* o, const char* state); +int paddle_optimizer_get_state(paddle_optimizer* o, const char** state); #ifdef __cplusplus } diff --git a/paddle/optimizer/parameter_optimizer.cc b/paddle/optimizer/parameter_optimizer.cc index 00e9b858551bbab95a234a9da46fb383b5d50778..1a96880c57464f814d8e64f705cb974eb0e4f682 100644 --- a/paddle/optimizer/parameter_optimizer.cc +++ b/paddle/optimizer/parameter_optimizer.cc @@ -24,7 +24,8 @@ ParameterOptimizer *ParameterOptimizer::Create( config.linear_lr().lr_decay_a(), config.linear_lr().lr_decay_b()); // default - return nullptr; + LOG(WARNING) << " have not select any LrPolicy. use ConstLr in default"; + return new ConstLr(0.1); }; LrPolicy *lr = select_lr_policy(config); auto select_optimizer = @@ -36,29 +37,32 @@ ParameterOptimizer *ParameterOptimizer::Create( lr); } if (config.optimizer() == OptimizerConfig::Adadelta) { - return new AdagradOptimizer( - config.adagrad().epsilon(), config.adagrad().decay(), lr); + return new AdadeltaOptimizer(config.adadelta().rho(), + config.adadelta().epsilon(), + config.adadelta().decay(), + lr); } if (config.optimizer() == OptimizerConfig::Adagrad) { return new AdagradOptimizer( config.adagrad().epsilon(), config.adagrad().decay(), lr); } if (config.optimizer() == OptimizerConfig::Adam) { - return new AdadeltaOptimizer(config.adadelta().rho(), - config.adadelta().epsilon(), - config.adadelta().decay(), - lr); + return new AdamOptimizer(config.adam().beta_1(), + config.adam().beta_2(), + config.adam().epsilon(), + config.adam().decay(), + lr); } // default - return new SGDOptimizer(config.sgd().momentum(), - config.sgd().decay(), - config.sgd().nesterov(), - lr); + LOG(WARNING) + << "have not select any Optimizer. use SGDOptimizer in default"; + return new SGDOptimizer(0.0, 0.0, false, lr); }; return select_optimizer(config); } -real *ParameterOptimizer::get_weight() const { +float *ParameterOptimizer::get_weight(int *param_size) const { + *param_size = (int)parameter_->size(); return parameter_->get_buffer(); } diff --git a/paddle/optimizer/parameter_optimizer.h b/paddle/optimizer/parameter_optimizer.h index 33b3f837b43c2af3fc4372dc6d47b0aaf1894814..f65f1d71a4271b2317d3e5714435cd2e68f3a54c 100644 --- a/paddle/optimizer/parameter_optimizer.h +++ b/paddle/optimizer/parameter_optimizer.h @@ -25,11 +25,10 @@ public: virtual const char *SerializeState(); virtual void DeSerializeState(const std::string &state); virtual void Update(const Tensor *gradient) = 0; - virtual real *get_weight() const; + virtual float *get_weight(int *param_size) const; virtual void set_weight(Tensor *parameter); protected: - OptimizerConfig config_; Tensor *parameter_; // learning rate policy diff --git a/paddle/optimizer/parameter_optimizer_test.cpp b/paddle/optimizer/parameter_optimizer_test.cpp index d9f2ed8e950fb7f9aff0638f75c094909ca5e329..d39d50a1abf4651650252ef55440959d71a1c5ac 100644 --- a/paddle/optimizer/parameter_optimizer_test.cpp +++ b/paddle/optimizer/parameter_optimizer_test.cpp @@ -77,7 +77,8 @@ public: opts[i]->set_weight(p); } for (size_t i = 0; i < opts.size(); ++i) { - real* newp = (real*)opts[i]->get_weight(); + int s = 0; + float* newp = (float*)opts[i]->get_weight(&s); for (size_t j = 0; j < size; ++j) { EXPECT_EQ(newp[j], (*p)[j]); } diff --git a/paddle/optimizer/sgd_optimizer.h b/paddle/optimizer/sgd_optimizer.h index 284d0a4d0c7ea7cb627f810b3e8044f3aeabb0c4..1bb97349c7cdcf7a7604d9f10a9aa3eb91f47971 100644 --- a/paddle/optimizer/sgd_optimizer.h +++ b/paddle/optimizer/sgd_optimizer.h @@ -9,14 +9,18 @@ namespace optimizer { class SGDOptimizer : public ParameterOptimizer { public: SGDOptimizer(double m, double d, bool n, LrPolicy* lr) - : ParameterOptimizer(lr), momentum_(m), decay_(d), nesterov_(n) {} + : ParameterOptimizer(lr), + momentums_(nullptr), + momentum_(m), + decay_(d), + nesterov_(n) {} virtual ~SGDOptimizer() { delete momentums_; } void Update(const Tensor* gradient); const char* SerializeState(); void DeSerializeState(const std::string& state); void set_weight(Tensor* p); - real* get_weight() const; + float* get_weight(int* param_size) const; private: Tensor* momentums_; diff --git a/paddle/optimizer/sgd_optmizer.cc b/paddle/optimizer/sgd_optmizer.cc index b40bf7c1020b0567808be395cd2c3e23810a621c..fea550f8d1d7c4c0051a12e880ce815c9e74ef43 100644 --- a/paddle/optimizer/sgd_optmizer.cc +++ b/paddle/optimizer/sgd_optmizer.cc @@ -16,7 +16,7 @@ void SGDOptimizer::set_weight(Tensor *p) { void SGDOptimizer::Update(const Tensor *gradient) { num_sample_passed_ += 1; double learning_rate = lr_policy_->LearningRate(num_sample_passed_); - real velocity = 0.0; + float velocity = 0.0; Tensor ¶m = *parameter_; const Tensor &grad = *gradient; Tensor &m = *momentums_;