diff --git a/paddle/optimizer/CMakeLists.txt b/paddle/optimizer/CMakeLists.txt index e93ba102945425197348587d9b9ef6f8af2ffe04..bafd8a9b97a97a0e90ec2f9b1a72c5f2c8beadf2 100644 --- a/paddle/optimizer/CMakeLists.txt +++ b/paddle/optimizer/CMakeLists.txt @@ -12,7 +12,6 @@ set(OPITMIZER_SRCS add_library(optimizer STATIC ${OPITMIZER_SRCS}) add_dependencies(optimizer gen_proto_cpp) -add_simple_unittest(tensor_test) add_simple_unittest(serialization_test) add_simple_unittest(parameter_optimizer_test) add_dependencies(parameter_optimizer_test optimizer) diff --git a/paddle/optimizer/adadelta_optimizer.cc b/paddle/optimizer/adadelta_optimizer.cc index d1c6571d9b47301c447cc005c2bd1b1931fbf4ee..465ad5e0d2089121a0f11ab916afe0420cbcfab7 100644 --- a/paddle/optimizer/adadelta_optimizer.cc +++ b/paddle/optimizer/adadelta_optimizer.cc @@ -27,23 +27,22 @@ void AdadeltaOptimizer::Update(const Tensor* gradient) { const char* AdadeltaOptimizer::SerializeState(int* state_len) { AdadeltaOptimizerState state; - state.set_learning_rate(lr_policy_->LearningRate(num_sample_passed_)); + // TODO(zhihong) : add lr_policy serialization state.set_num_sample_passed(num_sample_passed_); TensorToProto(*parameter_, state.mutable_parameter()); TensorToProto(*accum_gradient_, state.mutable_accum_gradient()); TensorToProto(*accum_delta_, state.mutable_accum_delta()); TensorToProto(*update_delta_, state.mutable_update_delta()); - - *state_len = - CalStateSize(parameter_, accum_gradient_, accum_delta_, update_delta_); - return state.SerializeAsString().c_str(); + auto str = state.SerializeAsString(); + *state_len = str.size(); + return str.c_str(); } void AdadeltaOptimizer::DeserializeState(const std::string& str) { AdadeltaOptimizerState state; state.ParseFromString(str); - lr_policy_->set(state.learning_rate()); + // TODO(zhihong) : add lr_policy DeserializeState num_sample_passed_ = state.num_sample_passed(); ProtoToTensor(state.parameter(), parameter_); diff --git a/paddle/optimizer/adadelta_optimizer.h b/paddle/optimizer/adadelta_optimizer.h index 58a26ebb7a73afa89992cbf3964767486f610690..1d5eab097f57d049855dd171a1aa6f74c48ae0e7 100644 --- a/paddle/optimizer/adadelta_optimizer.h +++ b/paddle/optimizer/adadelta_optimizer.h @@ -10,17 +10,13 @@ public: AdadeltaOptimizer( Tensor *parameter, LrPolicy *lr, double rho, double epsilon, double decay) : ParameterOptimizer(parameter, lr), + accum_gradient_(new Tensor(parameter->size())), + accum_delta_(new Tensor(parameter->size())), + update_delta_(new Tensor(parameter->size())), rho_(rho), epsilon_(epsilon), - decay_(decay) { - size_t size = parameter->size(); - if (accum_gradient_) delete accum_gradient_; - accum_gradient_ = new Tensor(size); - if (accum_delta_) delete accum_delta_; - accum_delta_ = new Tensor(size); - if (update_delta_) delete update_delta_; - update_delta_ = new Tensor(size); - } + decay_(decay) {} + ~AdadeltaOptimizer() { if (accum_gradient_) delete accum_gradient_; if (accum_delta_) delete accum_delta_; diff --git a/paddle/optimizer/adagrad_optimizer.cc b/paddle/optimizer/adagrad_optimizer.cc index ebc4d9e83ae8527253dd15fb4987c2148acf807b..bdaa7877d2bc58c17c51b977852d4b6fec511ed2 100644 --- a/paddle/optimizer/adagrad_optimizer.cc +++ b/paddle/optimizer/adagrad_optimizer.cc @@ -19,19 +19,20 @@ void AdagradOptimizer::Update(const Tensor* gradient) { } const char* AdagradOptimizer::SerializeState(int* state_len) { AdagradOptimizerState state; - state.set_learning_rate(lr_policy_->LearningRate(num_sample_passed_)); + // TODO(zhihong) : add lr_policy serialization state.set_num_sample_passed(num_sample_passed_); TensorToProto(*parameter_, state.mutable_parameter()); TensorToProto(*accum_gradient_, state.mutable_accum_gradient()); - *state_len = CalStateSize(parameter_, accum_gradient_); - return state.SerializeAsString().c_str(); + auto str = state.SerializeAsString(); + *state_len = str.size(); + return str.c_str(); } void AdagradOptimizer::DeserializeState(const std::string& str) { AdagradOptimizerState state; state.ParseFromString(str); - lr_policy_->set(state.learning_rate()); + // TODO(zhihong) : add lr_policy DeserializeState num_sample_passed_ = state.num_sample_passed(); ProtoToTensor(state.parameter(), parameter_); ProtoToTensor(state.accum_gradient(), accum_gradient_); diff --git a/paddle/optimizer/adagrad_optimizer.h b/paddle/optimizer/adagrad_optimizer.h index 90fc1dd4ac900090d67ff41aa4131184eb3fb604..15d0a965ad0c6967e73b14b465168fa66eb8fba3 100644 --- a/paddle/optimizer/adagrad_optimizer.h +++ b/paddle/optimizer/adagrad_optimizer.h @@ -11,11 +11,10 @@ public: LrPolicy *lr, double epsilon, double decay) - : ParameterOptimizer(parameter, lr), epsilon_(epsilon), decay_(decay) { - size_t size = parameter->size(); - if (accum_gradient_) delete accum_gradient_; - accum_gradient_ = new Tensor(size); - } + : ParameterOptimizer(parameter, lr), + accum_gradient_(new Tensor(parameter->size())), + epsilon_(epsilon), + decay_(decay) {} ~AdagradOptimizer() { if (accum_gradient_) delete accum_gradient_; } diff --git a/paddle/optimizer/adam_optimizer.cc b/paddle/optimizer/adam_optimizer.cc index 53b3350d68f67dea31555b1952aeb9b47286cce4..96cd6e4a129af59e5094225fdb6234ec1d2609e3 100644 --- a/paddle/optimizer/adam_optimizer.cc +++ b/paddle/optimizer/adam_optimizer.cc @@ -24,20 +24,20 @@ void AdamOptimizer::Update(const Tensor *gradient) { const char *AdamOptimizer::SerializeState(int *state_len) { AdamOptimizerState state; - state.set_learning_rate(lr_policy_->LearningRate(num_sample_passed_)); + // TODO(zhihong) : add lr_policy serialization state.set_num_sample_passed(num_sample_passed_); TensorToProto(*parameter_, state.mutable_parameter()); TensorToProto(*velocitys_, state.mutable_momentums()); - - *state_len = CalStateSize(parameter_, momentums_, velocitys_); - return state.SerializeAsString().c_str(); + auto str = state.SerializeAsString(); + *state_len = str.size(); + return str.c_str(); } void AdamOptimizer::DeserializeState(const std::string &str) { AdamOptimizerState state; state.ParseFromString(str); - lr_policy_->set(state.learning_rate()); + // TODO(zhihong) : add lr_policy DeserializeState num_sample_passed_ = state.num_sample_passed(); ProtoToTensor(state.parameter(), parameter_); diff --git a/paddle/optimizer/adam_optimizer.h b/paddle/optimizer/adam_optimizer.h index 04bc01154fb6b274b142db99b37328f6b0cb4d4a..0ea4c8bb8470504282b4d6c12039791ce896e401 100644 --- a/paddle/optimizer/adam_optimizer.h +++ b/paddle/optimizer/adam_optimizer.h @@ -14,14 +14,12 @@ public: double epsilon, double decay) : ParameterOptimizer(parameter, lr), + momentums_(new Tensor(parameter->size())), + velocitys_(new Tensor(parameter->size())), beta_1_(beta_1), beta_2_(beta_2), epsilon_(epsilon), - decay_(decay) { - size_t size = parameter->size(); - momentums_ = new Tensor(size); - velocitys_ = new Tensor(size); - } + decay_(decay) {} ~AdamOptimizer() { if (momentums_) delete momentums_; if (velocitys_) delete velocitys_; diff --git a/paddle/optimizer/lr_policy.h b/paddle/optimizer/lr_policy.h index 686ba22671505b7975ebf8e76e13abbc7e67ba3d..be2bf89504ec7fb133b3a1064914756ff051a588 100644 --- a/paddle/optimizer/lr_policy.h +++ b/paddle/optimizer/lr_policy.h @@ -10,7 +10,8 @@ class LrPolicy { public: virtual ~LrPolicy() {} virtual double LearningRate(const uint64_t num_sample_passed) = 0; - virtual void set(double current_learning_rate) = 0; + virtual const char *SerializeState(int *state_len) = 0; + virtual void DeserializeState(const std::string &state) = 0; }; // constant learning rate policy @@ -20,9 +21,8 @@ public: double LearningRate(const uint64_t num_sample_passed) { return learning_rate; } - void set(double current_learning_rate) { - learning_rate = current_learning_rate; - } + const char *SerializeState(int *state_len); + void DeserializeState(const std::string &state); private: double learning_rate; @@ -35,9 +35,8 @@ public: double LearningRate(const uint64_t num_sample_passed) { return std::max(learning_rate - lr_decay_a * num_sample_passed, lr_decay_b); } - void set(double current_learning_rate) { - learning_rate = current_learning_rate; - } + const char *SerializeState(int *state_len); + void DeserializeState(const std::string &state); private: double learning_rate; diff --git a/paddle/optimizer/parameter_optimizer.h b/paddle/optimizer/parameter_optimizer.h index 658b22406d68bc1733e33d7d14de710f2eaeb1d9..d89c9abb791f947172078d4dce5b1c366852591b 100644 --- a/paddle/optimizer/parameter_optimizer.h +++ b/paddle/optimizer/parameter_optimizer.h @@ -19,7 +19,10 @@ public: */ ParameterOptimizer(Tensor *parameter, LrPolicy *lr) : parameter_(parameter), lr_policy_(lr), num_sample_passed_(0) {} - virtual ~ParameterOptimizer() { delete parameter_; }; + virtual ~ParameterOptimizer() { + delete parameter_; + delete lr_policy_; + } static ParameterOptimizer *Create(const std::string &config_proto, Tensor *parameter); diff --git a/paddle/optimizer/serialization.h b/paddle/optimizer/serialization.h index 07874502a5015235cdb6a00d1032c0178e10b1c7..21de3259a885b663da738f0b45695f796f1a936d 100644 --- a/paddle/optimizer/serialization.h +++ b/paddle/optimizer/serialization.h @@ -10,18 +10,6 @@ namespace paddle { namespace optimizer { -static unsigned CalStateSize() { return 0; } - -template -unsigned CalStateSize(const HEAD& head, const TAIL&... tail) { - return sizeof head + CalStateSize(tail...); -} - -template -unsigned CalStateSize(const Tensor* head, const TAIL&... tail) { - return head->size() + CalStateSize(tail...); -} - static void TensorToProto(const Tensor& tensor, TensorProto* proto) { proto->set_data_type(TensorProto::PADDLE_ELEMENT_TYPE_FLOAT32); std::stringstream os; diff --git a/paddle/optimizer/sgd_optimizer.cc b/paddle/optimizer/sgd_optimizer.cc index 8b4ea777d2dab7346ca1148816756c91423d3bc3..66843ecb4b24101aadf119c59d4a9bc55360592d 100644 --- a/paddle/optimizer/sgd_optimizer.cc +++ b/paddle/optimizer/sgd_optimizer.cc @@ -29,19 +29,20 @@ void SGDOptimizer::Update(const Tensor *gradient) { const char *SGDOptimizer::SerializeState(int *state_len) { SGDOptimizerState state; - state.set_learning_rate(lr_policy_->LearningRate(num_sample_passed_)); + // TODO(zhihong) : add lr_policy serialization state.set_num_sample_passed(num_sample_passed_); TensorToProto(*parameter_, state.mutable_parameter()); TensorToProto(*momentums_, state.mutable_momentums()); - *state_len = CalStateSize(parameter_, momentums_); - return state.SerializeAsString().c_str(); + auto str = state.SerializeAsString(); + *state_len = str.size(); + return str.c_str(); } void SGDOptimizer::DeserializeState(const std::string &str) { SGDOptimizerState state; state.ParseFromString(str); - lr_policy_->set(state.learning_rate()); + // TODO(zhihong) : add lr_policy DeserializeState num_sample_passed_ = state.num_sample_passed(); ProtoToTensor(state.parameter(), parameter_); diff --git a/paddle/optimizer/sgd_optimizer.h b/paddle/optimizer/sgd_optimizer.h index 1d4ea46f1a4aadfb6d9853f3c6d1c6bcf2ee1f48..b74a902e1aa40a7831b36ab826d72372a3588bcf 100644 --- a/paddle/optimizer/sgd_optimizer.h +++ b/paddle/optimizer/sgd_optimizer.h @@ -16,7 +16,6 @@ public: if (momentum_ != 0.0) { size_t size = parameter->size(); // TODO: fix it with align aware allocator bind to Tensor - if (momentums_) delete momentums_; momentums_ = new Tensor(size); } }