From 1814fc294e58356db774c1308463c0bde095ebfb Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Wed, 14 Jun 2017 10:20:08 +0800 Subject: [PATCH] "fix lr_policy serialization" --- paddle/optimizer/CMakeLists.txt | 1 - paddle/optimizer/adadelta_optimizer.cc | 11 +++++------ paddle/optimizer/adadelta_optimizer.h | 14 +++++--------- paddle/optimizer/adagrad_optimizer.cc | 9 +++++---- paddle/optimizer/adagrad_optimizer.h | 9 ++++----- paddle/optimizer/adam_optimizer.cc | 10 +++++----- paddle/optimizer/adam_optimizer.h | 8 +++----- paddle/optimizer/lr_policy.h | 13 ++++++------- paddle/optimizer/parameter_optimizer.h | 5 ++++- paddle/optimizer/serialization.h | 12 ------------ paddle/optimizer/sgd_optimizer.cc | 9 +++++---- paddle/optimizer/sgd_optimizer.h | 1 - 12 files changed, 42 insertions(+), 60 deletions(-) diff --git a/paddle/optimizer/CMakeLists.txt b/paddle/optimizer/CMakeLists.txt index e93ba102945..bafd8a9b97a 100644 --- a/paddle/optimizer/CMakeLists.txt +++ b/paddle/optimizer/CMakeLists.txt @@ -12,7 +12,6 @@ set(OPITMIZER_SRCS add_library(optimizer STATIC ${OPITMIZER_SRCS}) add_dependencies(optimizer gen_proto_cpp) -add_simple_unittest(tensor_test) add_simple_unittest(serialization_test) add_simple_unittest(parameter_optimizer_test) add_dependencies(parameter_optimizer_test optimizer) diff --git a/paddle/optimizer/adadelta_optimizer.cc b/paddle/optimizer/adadelta_optimizer.cc index d1c6571d9b4..465ad5e0d20 100644 --- a/paddle/optimizer/adadelta_optimizer.cc +++ b/paddle/optimizer/adadelta_optimizer.cc @@ -27,23 +27,22 @@ void AdadeltaOptimizer::Update(const Tensor* gradient) { const char* AdadeltaOptimizer::SerializeState(int* state_len) { AdadeltaOptimizerState state; - state.set_learning_rate(lr_policy_->LearningRate(num_sample_passed_)); + // TODO(zhihong) : add lr_policy serialization state.set_num_sample_passed(num_sample_passed_); TensorToProto(*parameter_, state.mutable_parameter()); TensorToProto(*accum_gradient_, state.mutable_accum_gradient()); TensorToProto(*accum_delta_, state.mutable_accum_delta()); TensorToProto(*update_delta_, state.mutable_update_delta()); - - *state_len = - CalStateSize(parameter_, accum_gradient_, accum_delta_, update_delta_); - return state.SerializeAsString().c_str(); + auto str = state.SerializeAsString(); + *state_len = str.size(); + return str.c_str(); } void AdadeltaOptimizer::DeserializeState(const std::string& str) { AdadeltaOptimizerState state; state.ParseFromString(str); - lr_policy_->set(state.learning_rate()); + // TODO(zhihong) : add lr_policy DeserializeState num_sample_passed_ = state.num_sample_passed(); ProtoToTensor(state.parameter(), parameter_); diff --git a/paddle/optimizer/adadelta_optimizer.h b/paddle/optimizer/adadelta_optimizer.h index 58a26ebb7a7..1d5eab097f5 100644 --- a/paddle/optimizer/adadelta_optimizer.h +++ b/paddle/optimizer/adadelta_optimizer.h @@ -10,17 +10,13 @@ public: AdadeltaOptimizer( Tensor *parameter, LrPolicy *lr, double rho, double epsilon, double decay) : ParameterOptimizer(parameter, lr), + accum_gradient_(new Tensor(parameter->size())), + accum_delta_(new Tensor(parameter->size())), + update_delta_(new Tensor(parameter->size())), rho_(rho), epsilon_(epsilon), - decay_(decay) { - size_t size = parameter->size(); - if (accum_gradient_) delete accum_gradient_; - accum_gradient_ = new Tensor(size); - if (accum_delta_) delete accum_delta_; - accum_delta_ = new Tensor(size); - if (update_delta_) delete update_delta_; - update_delta_ = new Tensor(size); - } + decay_(decay) {} + ~AdadeltaOptimizer() { if (accum_gradient_) delete accum_gradient_; if (accum_delta_) delete accum_delta_; diff --git a/paddle/optimizer/adagrad_optimizer.cc b/paddle/optimizer/adagrad_optimizer.cc index ebc4d9e83ae..bdaa7877d2b 100644 --- a/paddle/optimizer/adagrad_optimizer.cc +++ b/paddle/optimizer/adagrad_optimizer.cc @@ -19,19 +19,20 @@ void AdagradOptimizer::Update(const Tensor* gradient) { } const char* AdagradOptimizer::SerializeState(int* state_len) { AdagradOptimizerState state; - state.set_learning_rate(lr_policy_->LearningRate(num_sample_passed_)); + // TODO(zhihong) : add lr_policy serialization state.set_num_sample_passed(num_sample_passed_); TensorToProto(*parameter_, state.mutable_parameter()); TensorToProto(*accum_gradient_, state.mutable_accum_gradient()); - *state_len = CalStateSize(parameter_, accum_gradient_); - return state.SerializeAsString().c_str(); + auto str = state.SerializeAsString(); + *state_len = str.size(); + return str.c_str(); } void AdagradOptimizer::DeserializeState(const std::string& str) { AdagradOptimizerState state; state.ParseFromString(str); - lr_policy_->set(state.learning_rate()); + // TODO(zhihong) : add lr_policy DeserializeState num_sample_passed_ = state.num_sample_passed(); ProtoToTensor(state.parameter(), parameter_); ProtoToTensor(state.accum_gradient(), accum_gradient_); diff --git a/paddle/optimizer/adagrad_optimizer.h b/paddle/optimizer/adagrad_optimizer.h index 90fc1dd4ac9..15d0a965ad0 100644 --- a/paddle/optimizer/adagrad_optimizer.h +++ b/paddle/optimizer/adagrad_optimizer.h @@ -11,11 +11,10 @@ public: LrPolicy *lr, double epsilon, double decay) - : ParameterOptimizer(parameter, lr), epsilon_(epsilon), decay_(decay) { - size_t size = parameter->size(); - if (accum_gradient_) delete accum_gradient_; - accum_gradient_ = new Tensor(size); - } + : ParameterOptimizer(parameter, lr), + accum_gradient_(new Tensor(parameter->size())), + epsilon_(epsilon), + decay_(decay) {} ~AdagradOptimizer() { if (accum_gradient_) delete accum_gradient_; } diff --git a/paddle/optimizer/adam_optimizer.cc b/paddle/optimizer/adam_optimizer.cc index 53b3350d68f..96cd6e4a129 100644 --- a/paddle/optimizer/adam_optimizer.cc +++ b/paddle/optimizer/adam_optimizer.cc @@ -24,20 +24,20 @@ void AdamOptimizer::Update(const Tensor *gradient) { const char *AdamOptimizer::SerializeState(int *state_len) { AdamOptimizerState state; - state.set_learning_rate(lr_policy_->LearningRate(num_sample_passed_)); + // TODO(zhihong) : add lr_policy serialization state.set_num_sample_passed(num_sample_passed_); TensorToProto(*parameter_, state.mutable_parameter()); TensorToProto(*velocitys_, state.mutable_momentums()); - - *state_len = CalStateSize(parameter_, momentums_, velocitys_); - return state.SerializeAsString().c_str(); + auto str = state.SerializeAsString(); + *state_len = str.size(); + return str.c_str(); } void AdamOptimizer::DeserializeState(const std::string &str) { AdamOptimizerState state; state.ParseFromString(str); - lr_policy_->set(state.learning_rate()); + // TODO(zhihong) : add lr_policy DeserializeState num_sample_passed_ = state.num_sample_passed(); ProtoToTensor(state.parameter(), parameter_); diff --git a/paddle/optimizer/adam_optimizer.h b/paddle/optimizer/adam_optimizer.h index 04bc01154fb..0ea4c8bb847 100644 --- a/paddle/optimizer/adam_optimizer.h +++ b/paddle/optimizer/adam_optimizer.h @@ -14,14 +14,12 @@ public: double epsilon, double decay) : ParameterOptimizer(parameter, lr), + momentums_(new Tensor(parameter->size())), + velocitys_(new Tensor(parameter->size())), beta_1_(beta_1), beta_2_(beta_2), epsilon_(epsilon), - decay_(decay) { - size_t size = parameter->size(); - momentums_ = new Tensor(size); - velocitys_ = new Tensor(size); - } + decay_(decay) {} ~AdamOptimizer() { if (momentums_) delete momentums_; if (velocitys_) delete velocitys_; diff --git a/paddle/optimizer/lr_policy.h b/paddle/optimizer/lr_policy.h index 686ba226715..be2bf89504e 100644 --- a/paddle/optimizer/lr_policy.h +++ b/paddle/optimizer/lr_policy.h @@ -10,7 +10,8 @@ class LrPolicy { public: virtual ~LrPolicy() {} virtual double LearningRate(const uint64_t num_sample_passed) = 0; - virtual void set(double current_learning_rate) = 0; + virtual const char *SerializeState(int *state_len) = 0; + virtual void DeserializeState(const std::string &state) = 0; }; // constant learning rate policy @@ -20,9 +21,8 @@ public: double LearningRate(const uint64_t num_sample_passed) { return learning_rate; } - void set(double current_learning_rate) { - learning_rate = current_learning_rate; - } + const char *SerializeState(int *state_len); + void DeserializeState(const std::string &state); private: double learning_rate; @@ -35,9 +35,8 @@ public: double LearningRate(const uint64_t num_sample_passed) { return std::max(learning_rate - lr_decay_a * num_sample_passed, lr_decay_b); } - void set(double current_learning_rate) { - learning_rate = current_learning_rate; - } + const char *SerializeState(int *state_len); + void DeserializeState(const std::string &state); private: double learning_rate; diff --git a/paddle/optimizer/parameter_optimizer.h b/paddle/optimizer/parameter_optimizer.h index 658b22406d6..d89c9abb791 100644 --- a/paddle/optimizer/parameter_optimizer.h +++ b/paddle/optimizer/parameter_optimizer.h @@ -19,7 +19,10 @@ public: */ ParameterOptimizer(Tensor *parameter, LrPolicy *lr) : parameter_(parameter), lr_policy_(lr), num_sample_passed_(0) {} - virtual ~ParameterOptimizer() { delete parameter_; }; + virtual ~ParameterOptimizer() { + delete parameter_; + delete lr_policy_; + } static ParameterOptimizer *Create(const std::string &config_proto, Tensor *parameter); diff --git a/paddle/optimizer/serialization.h b/paddle/optimizer/serialization.h index 07874502a50..21de3259a88 100644 --- a/paddle/optimizer/serialization.h +++ b/paddle/optimizer/serialization.h @@ -10,18 +10,6 @@ namespace paddle { namespace optimizer { -static unsigned CalStateSize() { return 0; } - -template -unsigned CalStateSize(const HEAD& head, const TAIL&... tail) { - return sizeof head + CalStateSize(tail...); -} - -template -unsigned CalStateSize(const Tensor* head, const TAIL&... tail) { - return head->size() + CalStateSize(tail...); -} - static void TensorToProto(const Tensor& tensor, TensorProto* proto) { proto->set_data_type(TensorProto::PADDLE_ELEMENT_TYPE_FLOAT32); std::stringstream os; diff --git a/paddle/optimizer/sgd_optimizer.cc b/paddle/optimizer/sgd_optimizer.cc index 8b4ea777d2d..66843ecb4b2 100644 --- a/paddle/optimizer/sgd_optimizer.cc +++ b/paddle/optimizer/sgd_optimizer.cc @@ -29,19 +29,20 @@ void SGDOptimizer::Update(const Tensor *gradient) { const char *SGDOptimizer::SerializeState(int *state_len) { SGDOptimizerState state; - state.set_learning_rate(lr_policy_->LearningRate(num_sample_passed_)); + // TODO(zhihong) : add lr_policy serialization state.set_num_sample_passed(num_sample_passed_); TensorToProto(*parameter_, state.mutable_parameter()); TensorToProto(*momentums_, state.mutable_momentums()); - *state_len = CalStateSize(parameter_, momentums_); - return state.SerializeAsString().c_str(); + auto str = state.SerializeAsString(); + *state_len = str.size(); + return str.c_str(); } void SGDOptimizer::DeserializeState(const std::string &str) { SGDOptimizerState state; state.ParseFromString(str); - lr_policy_->set(state.learning_rate()); + // TODO(zhihong) : add lr_policy DeserializeState num_sample_passed_ = state.num_sample_passed(); ProtoToTensor(state.parameter(), parameter_); diff --git a/paddle/optimizer/sgd_optimizer.h b/paddle/optimizer/sgd_optimizer.h index 1d4ea46f1a4..b74a902e1aa 100644 --- a/paddle/optimizer/sgd_optimizer.h +++ b/paddle/optimizer/sgd_optimizer.h @@ -16,7 +16,6 @@ public: if (momentum_ != 0.0) { size_t size = parameter->size(); // TODO: fix it with align aware allocator bind to Tensor - if (momentums_) delete momentums_; momentums_ = new Tensor(size); } } -- GitLab