diff --git a/paddle/math/tests/CMakeLists.txt b/paddle/math/tests/CMakeLists.txt index bdecba0869d9cda47cf7bcd03f898804fdb9a829..ceb96b2e250d8e04ffb2b1d8c77ad498dca91cf3 100644 --- a/paddle/math/tests/CMakeLists.txt +++ b/paddle/math/tests/CMakeLists.txt @@ -31,4 +31,3 @@ add_simple_unittest(test_FPException) add_simple_unittest(test_GpuProfiler) add_simple_unittest(test_BaseMatrix) add_simple_unittest(test_Matrix) -add_simple_unittest(test_Matrix2) diff --git a/paddle/optimizer/adam_optimizer.cc b/paddle/optimizer/adam_optimizer.cc index 96cd6e4a129af59e5094225fdb6234ec1d2609e3..bfe438ec936bf30008e1eee118ca02e2c7b30770 100644 --- a/paddle/optimizer/adam_optimizer.cc +++ b/paddle/optimizer/adam_optimizer.cc @@ -28,7 +28,8 @@ const char *AdamOptimizer::SerializeState(int *state_len) { state.set_num_sample_passed(num_sample_passed_); TensorToProto(*parameter_, state.mutable_parameter()); - TensorToProto(*velocitys_, state.mutable_momentums()); + TensorToProto(*momentums_, state.mutable_momentums()); + TensorToProto(*velocitys_, state.mutable_velocitys()); auto str = state.SerializeAsString(); *state_len = str.size(); return str.c_str(); diff --git a/paddle/optimizer/lr_policy.h b/paddle/optimizer/lr_policy.h index be2bf89504ec7fb133b3a1064914756ff051a588..d8e33ad37ab4c019a36f63f34babe65cf8c8fb16 100644 --- a/paddle/optimizer/lr_policy.h +++ b/paddle/optimizer/lr_policy.h @@ -21,8 +21,8 @@ public: double LearningRate(const uint64_t num_sample_passed) { return learning_rate; } - const char *SerializeState(int *state_len); - void DeserializeState(const std::string &state); + const char *SerializeState(int *state_len) { return nullptr; } + void DeserializeState(const std::string &state) {} private: double learning_rate; @@ -35,8 +35,13 @@ public: double LearningRate(const uint64_t num_sample_passed) { return std::max(learning_rate - lr_decay_a * num_sample_passed, lr_decay_b); } - const char *SerializeState(int *state_len); - void DeserializeState(const std::string &state); + const char *SerializeState(int *state_len) { + // TODO(zhihong) : add lr_policy serialization + return nullptr; + } + void DeserializeState(const std::string &state) { + // TODO(zhihong) : add lr_policy serialization + } private: double learning_rate; diff --git a/paddle/optimizer/parameter_optimizer.cc b/paddle/optimizer/parameter_optimizer.cc index 38df3b75d77ea7ecb243e26bb387cdcc5cd441ca..dd018037bdafeac60633c3130b5fbaac5cf33482 100644 --- a/paddle/optimizer/parameter_optimizer.cc +++ b/paddle/optimizer/parameter_optimizer.cc @@ -13,13 +13,13 @@ namespace optimizer { ParameterOptimizer *ParameterOptimizer::Create(const std::string &config_proto, Tensor *parameter) { paddle::OptimizerConfig config; - CHECK(config.ParseFromString(config_proto) == 0) + CHECK(config.ParseFromString(config_proto) == true) << "failed parse optimizer config"; auto select_lr_policy = [=](const OptimizerConfig &config) -> LrPolicy * { - if (config.lr_policy() == OptimizerConfig::ConstLr) + if (config.lr_policy() == OptimizerConfig::Const) return new ConstLr(config.const_lr().learning_rate()); - if (config.lr_policy() == OptimizerConfig::LinearLr) + if (config.lr_policy() == OptimizerConfig::Linear) return new LinearLr(config.linear_lr().learning_rate(), config.linear_lr().lr_decay_a(), config.linear_lr().lr_decay_b()); diff --git a/paddle/optimizer/parameter_optimizer_test.cpp b/paddle/optimizer/parameter_optimizer_test.cpp index afacd6d54a79eeea3e55b994af1441768bc03581..f599b74d71c4b94ca1c04c0d94649a32baa52311 100644 --- a/paddle/optimizer/parameter_optimizer_test.cpp +++ b/paddle/optimizer/parameter_optimizer_test.cpp @@ -2,11 +2,8 @@ #include #include #include -#include "adadelta_optimizer.h" -#include "adagrad_optimizer.h" -#include "adam_optimizer.h" #include "gtest/gtest.h" -#include "sgd_optimizer.h" +#include "lr_policy.h" using namespace paddle; using namespace paddle::optimizer; @@ -41,12 +38,12 @@ public: virtual void TearDown() {} void CreateSGD() { - Tensor* parameter = FillTensor(kSize); + Tensor* parameter = FixedTensor(kSize); config_.set_optimizer(OptimizerConfig::SGD); config_.mutable_sgd()->set_momentum(0.0); config_.mutable_sgd()->set_decay(0.0); config_.mutable_sgd()->set_nesterov(false); - config_.set_lr_policy(OptimizerConfig::ConstLr); + config_.set_lr_policy(OptimizerConfig::Const); config_.mutable_const_lr()->set_learning_rate(0.1); std::string str = config_.SerializeAsString(); @@ -62,7 +59,7 @@ public: config_.mutable_adam()->set_beta_2(0.1); config_.mutable_adam()->set_epsilon(1e-3); config_.mutable_adam()->set_decay(0.0); - config_.set_lr_policy(OptimizerConfig::ConstLr); + config_.set_lr_policy(OptimizerConfig::Const); config_.mutable_const_lr()->set_learning_rate(0.1); std::string str = config_.SerializeAsString(); ParameterOptimizer* opt = ParameterOptimizer::Create(str, parameter); @@ -90,12 +87,13 @@ public: void TestCheckPoint() { std::map expected_state_len = { - {OptimizerConfig::SGD, kSize}, {OptimizerConfig::Adam, kSize * 3}, + {OptimizerConfig::SGD, kSize * sizeof(float) + sizeof(double)}, + {OptimizerConfig::Adam, kSize * 3 * sizeof(float) + sizeof(double)}, }; for (size_t i = 0; i < opts_.size(); ++i) { int state_len = 0; std::string state = opts_[i]->SerializeState(&state_len); - EXPECT_EQ(state_len, expected_state_len[opts_table_[i]]); + EXPECT_EQ(state_len, expected_state_len[opts_table_[i + 1]]); opts_[i]->DeserializeState(state); } } diff --git a/paddle/optimizer/sgd_optimizer.cc b/paddle/optimizer/sgd_optimizer.cc index 66843ecb4b24101aadf119c59d4a9bc55360592d..252f205bb07de7788b21994d7ae7dcc687b1f3c0 100644 --- a/paddle/optimizer/sgd_optimizer.cc +++ b/paddle/optimizer/sgd_optimizer.cc @@ -29,11 +29,9 @@ void SGDOptimizer::Update(const Tensor *gradient) { const char *SGDOptimizer::SerializeState(int *state_len) { SGDOptimizerState state; - // TODO(zhihong) : add lr_policy serialization state.set_num_sample_passed(num_sample_passed_); - TensorToProto(*parameter_, state.mutable_parameter()); - TensorToProto(*momentums_, state.mutable_momentums()); + if (momentum_ != 0.0) TensorToProto(*momentums_, state.mutable_momentums()); auto str = state.SerializeAsString(); *state_len = str.size(); return str.c_str(); @@ -42,9 +40,7 @@ const char *SGDOptimizer::SerializeState(int *state_len) { void SGDOptimizer::DeserializeState(const std::string &str) { SGDOptimizerState state; state.ParseFromString(str); - // TODO(zhihong) : add lr_policy DeserializeState num_sample_passed_ = state.num_sample_passed(); - ProtoToTensor(state.parameter(), parameter_); ProtoToTensor(state.parameter(), momentums_); } diff --git a/proto/OptimizerConfig.proto b/proto/OptimizerConfig.proto index aab2fdad693f1e2ec5fdf9ae0f1843e6f196aa7f..56bda35be47729a57ce790c8e7ccef196d55a3e2 100644 --- a/proto/OptimizerConfig.proto +++ b/proto/OptimizerConfig.proto @@ -53,12 +53,12 @@ message AdamConfig { optional double decay = 44; } -message ConstLr { +message ConstLrConfig { // learninRate Policy required double learning_rate = 1 [default = 1.0]; } -message LinearLr { +message LinearLrConfig { // learninRate Policy required double learning_rate = 1 [default = 1.0]; optional double lr_decay_a = 2; @@ -139,12 +139,12 @@ message OptimizerConfig { optional AdamConfig adam = 6; enum LrPolicy { - ConstLr = 0; - LinearLr = 1; + Const = 0; + Linear = 1; } required LrPolicy lr_policy = 11; - optional ConstLr const_lr = 12; - optional LinearLr linear_lr = 13; + optional ConstLrConfig const_lr = 12; + optional LinearLrConfig linear_lr = 13; // common config of optimizer // gradient clip when L2 exceeding value