提交 b72e8aa3 编写于 作者: D dzhwinter

"seperate serialization proto state"

上级 b7e68e06
......@@ -6,12 +6,13 @@ set(OPITMIZER_SRCS
adam_optimizer.cc
optimizer.cc
parameter_optimizer.cc
sgd_optmizer.cc
sgd_optimizer.cc
)
add_library(optimizer STATIC ${OPITMIZER_SRCS})
add_dependencies(optimizer gen_proto_cpp)
add_simple_unittest(tensor_test)
add_simple_unittest(serialization_test)
add_simple_unittest(parameter_optimizer_test)
add_dependencies(parameter_optimizer_test optimizer)
......@@ -17,16 +17,10 @@ public:
TensorT(size_t size) : height_(1), width_(size) { data_ = new T[size]; }
TensorT(T* data, size_t size) : height_(1), width_(size), data_(data) {}
TensorT(T* data, size_t h, size_t w) : height_(h), width_(w), data_(data_) {}
TensorT(const TensorT& t)
: TensorT(1, t.size(), 0, t.get_buffer(), false, false) {}
~TensorT() {
if (data_) delete data_;
}
TensorT& operator=(const TensorT& t) {
this->width_ = t.size();
this->data_ = t.get_buffer();
}
T* get_buffer() { return this->data_; }
T& operator[](const size_t idx) {
CHECK(idx >= 0 && idx < this->width_) << "out of index range";
......
......@@ -26,7 +26,7 @@ void AdadeltaOptimizer::Update(const Tensor* gradient) {
}
const char* AdadeltaOptimizer::SerializeState(int* state_len) {
OptimizerState state;
AdadeltaOptimizerState state;
state.set_learning_rate(lr_policy_->LearningRate(num_sample_passed_));
state.set_num_sample_passed(num_sample_passed_);
......@@ -34,22 +34,14 @@ const char* AdadeltaOptimizer::SerializeState(int* state_len) {
TensorToProto(*accum_gradient_, state.mutable_accum_gradient());
TensorToProto(*accum_delta_, state.mutable_accum_delta());
TensorToProto(*update_delta_, state.mutable_update_delta());
state.set_nesterov(epsilon_);
state.set_momentum(rho_);
state.set_decay(decay_);
// can be used when memory alignment to system
*state_len += CalStateSize(parameter_,
accum_gradient_,
accum_delta_,
update_delta_,
rho_,
epsilon_,
decay_);
*state_len =
CalStateSize(parameter_, accum_gradient_, accum_delta_, update_delta_);
return state.SerializeAsString().c_str();
}
void AdadeltaOptimizer::DeSerializeState(const std::string& str) {
OptimizerState state;
void AdadeltaOptimizer::DeserializeState(const std::string& str) {
AdadeltaOptimizerState state;
state.ParseFromString(str);
lr_policy_->set(state.learning_rate());
num_sample_passed_ = state.num_sample_passed();
......@@ -58,6 +50,7 @@ void AdadeltaOptimizer::DeSerializeState(const std::string& str) {
ProtoToTensor(state.accum_gradient(), accum_gradient_);
ProtoToTensor(state.accum_delta(), accum_delta_);
ProtoToTensor(state.update_delta(), update_delta_);
}
} // namespace optimizer
} // namespace optimizer
} // namespace paddle
......@@ -13,7 +13,7 @@ public:
rho_(rho),
epsilon_(epsilon),
decay_(decay) {
size_t size = p->size();
size_t size = parameter->size();
if (accum_gradient_) delete accum_gradient_;
accum_gradient_ = new Tensor(size);
if (accum_delta_) delete accum_delta_;
......@@ -28,7 +28,7 @@ public:
}
void Update(const Tensor *gradient);
const char *SerializeState(int *state_len);
void DeSerializeState(const std::string &state);
void DeserializeState(const std::string &state);
private:
Tensor *accum_gradient_;
......
......@@ -17,8 +17,25 @@ void AdagradOptimizer::Update(const Tensor* gradient) {
learning_rate * decay_ * param[i];
}
}
const char* SGDOptimizer::SerializeState(int* state_len) { NIMPL; }
const char* AdagradOptimizer::SerializeState(int* state_len) {
AdagradOptimizerState state;
state.set_learning_rate(lr_policy_->LearningRate(num_sample_passed_));
state.set_num_sample_passed(num_sample_passed_);
TensorToProto(*parameter_, state.mutable_parameter());
TensorToProto(*accum_gradient_, state.mutable_accum_gradient());
*state_len = CalStateSize(parameter_, accum_gradient_);
return state.SerializeAsString().c_str();
}
void AdagradOptimizer::DeserializeState(const std::string& str) {
AdagradOptimizerState state;
state.ParseFromString(str);
lr_policy_->set(state.learning_rate());
num_sample_passed_ = state.num_sample_passed();
ProtoToTensor(state.parameter(), parameter_);
ProtoToTensor(state.accum_gradient(), accum_gradient_);
}
void SGDOptimizer::DeSerializeState(const std::string& str) { NIMPL; }
// namespace optimizer
} // namespace optimizer
} // namespace paddle
......@@ -12,7 +12,7 @@ public:
double epsilon,
double decay)
: ParameterOptimizer(parameter, lr), epsilon_(epsilon), decay_(decay) {
size_t size = p->size();
size_t size = parameter->size();
if (accum_gradient_) delete accum_gradient_;
accum_gradient_ = new Tensor(size);
}
......@@ -21,7 +21,7 @@ public:
}
void Update(const Tensor *gradient);
const char *SerializeState(int *state_len);
void DeSerializeState(const std::string &state);
void DeserializeState(const std::string &state);
private:
Tensor *accum_gradient_;
......
......@@ -22,32 +22,26 @@ void AdamOptimizer::Update(const Tensor *gradient) {
}
}
const char *AdadeltaOptimizer::SerializeState(int *state_len) {
OptimizerState state;
const char *AdamOptimizer::SerializeState(int *state_len) {
AdamOptimizerState state;
state.set_learning_rate(lr_policy_->LearningRate(num_sample_passed_));
state.set_num_sample_passed(num_sample_passed_);
TensorToProto(*parameter_, state.mutable_parameter());
TensorToProto(*velocitys_, state.mutable_momentums());
state.set_beta_1(beta_1_);
state.set_beta_2(beta_2_);
state.set_decay(decay_);
*state_len += CalStateSize(
parameter_, momentums_, velocitys_, beta_1_, beta_2, epsilon_ decay_);
*state_len = CalStateSize(parameter_, momentums_, velocitys_);
return state.SerializeAsString().c_str();
}
void AdadeltaOptimizer::DeSerializeState(const std::string &str) {
OptimizerState state;
void AdamOptimizer::DeserializeState(const std::string &str) {
AdamOptimizerState state;
state.ParseFromString(str);
lr_policy_->set(state.learning_rate());
num_sample_passed_ = state.num_sample_passed();
ProtoToTensor(state.parameter(), parameter_);
ProtoToTensor(state.velocitys(), velocitys__);
beta_1_ = state.beta_1();
beta_2_ = state.beta_2();
ProtoToTensor(state.velocitys(), velocitys_);
}
} // namespace optimizer
} // namespace paddle
......@@ -8,7 +8,8 @@ namespace optimizer {
class AdamOptimizer : public ParameterOptimizer {
public:
AdamOptimizer(Tensor *parameter,
LrPolicy *lr double beta_1,
LrPolicy *lr,
double beta_1,
double beta_2,
double epsilon,
double decay)
......@@ -17,7 +18,7 @@ public:
beta_2_(beta_2),
epsilon_(epsilon),
decay_(decay) {
size_t size = p->size();
size_t size = parameter->size();
momentums_ = new Tensor(size);
velocitys_ = new Tensor(size);
}
......@@ -26,6 +27,8 @@ public:
if (velocitys_) delete velocitys_;
}
void Update(const Tensor *gradient);
const char *SerializeState(int *state_len);
void DeserializeState(const std::string &state);
private:
Tensor *momentums_;
......
......@@ -49,7 +49,7 @@ paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto,
optimizer->impl = ParameterOptimizer::Create(config, parameter);
if (state != nullptr) {
std::string s(state, state + state_len);
optimizer->impl->DeSerializeState(s);
optimizer->impl->DeserializeState(s);
}
return optimizer;
}
......
......@@ -75,14 +75,14 @@ int paddle_update_parameter(paddle_optimizer* o,
int num_bytes);
/**
* @brief optimizer instance
* @brief optimizer for get parameter buffer
* @param param_buffer, initilized parameter buffer
* @return return content length
*/
int paddle_optimizer_get_weights(paddle_optimizer* o, void** param_buffer);
/**
* @brief optimzizer instance
* @brief optimzizer for saving training state
* @param training state for receive SerializeState
* @return return state_buffer length
*/
......
......@@ -62,7 +62,7 @@ ParameterOptimizer *ParameterOptimizer::Create(const std::string &config_proto,
<< "have not select any Optimizer. use SGDOptimizer in default";
return new SGDOptimizer(parameter, lr, 0.0, 0.0, false);
};
return select_optimizer(config);
return select_optimizer(parameter, config);
}
float *ParameterOptimizer::get_weight(int *param_size) const {
......
......@@ -8,10 +8,6 @@
#include "serialization.h"
#include "tensor.h"
// Not Implemen Yet, macr
// o
#define NIMPL crash(__PRETTY_FUNCTION__, " not implemented yet")
namespace paddle {
namespace optimizer {
......@@ -30,7 +26,7 @@ public:
virtual void Update(const Tensor *gradient) = 0;
virtual float *get_weight(int *param_size) const;
virtual const char *SerializeState(int *state_len) = 0;
virtual void DeSerializeState(const std::string &state) = 0;
virtual void DeserializeState(const std::string &state) = 0;
protected:
Tensor *parameter_;
......
#include "parameter_optimizer.h"
#include <cmath>
#include <tuple>
#include <map>
#include <vector>
#include "adadelta_optimizer.h"
#include "adagrad_optimizer.h"
#include "adam_optimizer.h"
#include "gtest/gtest.h"
#include "sgd_optimizer.h"
using namespace paddle;
using namespace paddle::optimizer;
Tensor* FillTensor(size_t size) {
Tensor* param = new Tensor(size);
Tensor& p = *param;
for (auto i = 0; i < p.size(); ++i) {
for (size_t i = 0; i < p.size(); ++i) {
p[i] = (float)rand() / (float)RAND_MAX;
}
return param;
......@@ -22,7 +23,7 @@ Tensor* FillTensor(size_t size) {
Tensor* FixedTensor(size_t size) {
Tensor* param = new Tensor(size);
Tensor& p = *param;
for (auto i = 0; i < p.size(); ++i) {
for (size_t i = 0; i < p.size(); ++i) {
p[i] = i;
}
return param;
......@@ -31,7 +32,7 @@ Tensor* FixedTensor(size_t size) {
class OptimizerTest : public testing::Test {
public:
// init tensor shape
const size_t size = 5;
const size_t kSize = 5;
virtual void SetUp() {
CreateSGD();
......@@ -40,68 +41,77 @@ public:
virtual void TearDown() {}
void CreateSGD() {
config.set_optimizer(OptimizerConfig::SGD);
config.mutable_sgd()->set_momentum(0.0);
config.mutable_sgd()->set_decay(0.0);
config.mutable_sgd()->set_nesterov(false);
config.set_lr_policy(OptimizerConfig::ConstLr);
config.mutable_const_lr()->set_learning_rate(0.1);
Tensor* parameter = FillTensor(kSize);
config_.set_optimizer(OptimizerConfig::SGD);
config_.mutable_sgd()->set_momentum(0.0);
config_.mutable_sgd()->set_decay(0.0);
config_.mutable_sgd()->set_nesterov(false);
config_.set_lr_policy(OptimizerConfig::ConstLr);
config_.mutable_const_lr()->set_learning_rate(0.1);
ParameterOptimizer* opt =
ParameterOptimizer::Create(config.SerializeAsString());
opts.push_back(opt);
ParameterOptimizer::Create(config_.SerializeAsString(), parameter);
opts_.push_back(opt);
opts_table_[opts_.size()] = OptimizerConfig::SGD;
}
void CreateAdam() {
config.set_optimizer(OptimizerConfig::Adam);
config.mutable_adam()->set_beta_1(0.9);
config.mutable_adam()->set_beta_2(0.1);
config.mutable_adam()->set_epsilon(1e-3);
config.mutable_adam()->set_decay(0.0);
config.set_lr_policy(OptimizerConfig::ConstLr);
config.mutable_const_lr()->set_learning_rate(0.1);
Tensor* parameter = FixedTensor(kSize);
config_.set_optimizer(OptimizerConfig::Adam);
config_.mutable_adam()->set_beta_1(0.9);
config_.mutable_adam()->set_beta_2(0.1);
config_.mutable_adam()->set_epsilon(1e-3);
config_.mutable_adam()->set_decay(0.0);
config_.set_lr_policy(OptimizerConfig::ConstLr);
config_.mutable_const_lr()->set_learning_rate(0.1);
ParameterOptimizer* opt =
ParameterOptimizer::Create(config.SerializeAsString());
opts.push_back(opt);
}
void TestSetWeight() {
Tensor* p = FillTensor(size);
for (size_t i = 0; i < opts.size(); ++i) {
opts[i]->set_weight(p);
}
ParameterOptimizer::Create(config_.SerializeAsString(), parameter);
opts_.push_back(opt);
opts_table_[opts_.size()] = OptimizerConfig::Adam;
}
void TestGetWeight() {
Tensor* p = FixedTensor(size);
for (size_t i = 0; i < opts.size(); ++i) {
opts[i]->set_weight(p);
}
for (size_t i = 0; i < opts.size(); ++i) {
Tensor* p = FixedTensor(kSize);
for (size_t i = 0; i < opts_.size(); ++i) {
int s = 0;
float* newp = (float*)opts[i]->get_weight(&s);
for (size_t j = 0; j < size; ++j) {
float* newp = (float*)opts_[i]->get_weight(&s);
for (size_t j = 0; j < kSize; ++j) {
EXPECT_EQ(newp[j], (*p)[j]);
}
}
}
void TestUpdate() {
Tensor* g = FixedTensor(size);
for (size_t i = 0; i < opts.size(); ++i) {
opts[i]->Update(g);
Tensor* g = FixedTensor(kSize);
for (size_t i = 0; i < opts_.size(); ++i) {
opts_[i]->Update(g);
}
}
void TestCheckPoint() {
std::map<OptimizerConfig::Optimizer, int> expected_state_len = {
{OptimizerConfig::SGD, kSize}, {OptimizerConfig::Adam, kSize * 3},
};
for (size_t i = 0; i < opts_.size(); ++i) {
int state_len = 0;
std::string state = opts_[i]->SerializeState(&state_len);
EXPECT_EQ(state_len, expected_state_len[opts_table_[i]]);
opts_[i]->DeserializeState(state);
}
}
private:
std::vector<ParameterOptimizer*> opts;
OptimizerConfig config;
std::vector<ParameterOptimizer*> opts_;
std::map<int, OptimizerConfig::Optimizer> opts_table_;
OptimizerConfig config_;
};
TEST_F(OptimizerTest, test_set_get_weight) {
TestSetWeight();
TestGetWeight();
}
TEST_F(OptimizerTest, TestGetWeight) { TestGetWeight(); }
TEST_F(OptimizerTest, TestUpdate) { TestUpdate(); }
TEST_F(OptimizerTest, TestCheckPoint) { TestCheckPoint(); }
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
......
......@@ -10,15 +10,16 @@
namespace paddle {
namespace optimizer {
static unsigned CalStateSize(int* state_len) { return 0; }
static unsigned CalStateSize() { return 0; }
template <typename HEAD, typename... TAIL>
unsigned CalStateSize(const HEAD& head, const TAIL&... tail) {
if (std::is_fundamental<HEAD>::value) {
return sizeof head + CalStateSize(tail...);
} else {
return sizeof(head[0]) * head->size() + CalStateSize(tail...);
}
}
template <typename... TAIL>
unsigned CalStateSize(const Tensor* head, const TAIL&... tail) {
return head->size() + CalStateSize(tail...);
}
static void TensorToProto(const Tensor& tensor, TensorProto* proto) {
......@@ -32,7 +33,6 @@ static void TensorToProto(const Tensor& tensor, TensorProto* proto) {
}
static void ProtoToTensor(const TensorProto& proto, Tensor* tensor) {
CHECK(proto.size() == tensor->size()) << "unmatch shape of proto and tensor";
std::stringstream sin;
for (auto i = 0; i < proto.content_size(); ++i) {
sin << proto.content(i);
......
#include "serialization.h"
#include "gtest/gtest.h"
#include "tensor.h"
using namespace paddle;
using namespace paddle::optimizer;
TEST(Tensor, indexer) {
Tensor t(3);
for (auto i = 0; i < t.size(); ++i) {
TEST(TensorToProto, Case1) {
Tensor t(3), t1(3);
for (size_t i = 0; i < t.size(); ++i) {
t[i] = i;
t1[i] = 0;
}
TensorProto proto;
TensorToProto(t, &proto);
ProtoToTensor(proto, &t1);
for (size_t i = 0; i < t1.size(); ++i) {
EXPECT_EQ(t1[i], t[i]);
}
ASSERT_EQ(t[2], 2);
ASSERT_EQ(t[1], 1);
}
int main(int argc, char** argv) {
......
#include "serialization.h"
#include "sgd_optimizer.h"
#include "serialization.h"
namespace paddle {
namespace optimizer {
......@@ -28,29 +28,24 @@ void SGDOptimizer::Update(const Tensor *gradient) {
}
const char *SGDOptimizer::SerializeState(int *state_len) {
OptimizerState state;
SGDOptimizerState state;
state.set_learning_rate(lr_policy_->LearningRate(num_sample_passed_));
state.set_num_sample_passed(num_sample_passed_);
TensorToProto(*parameter_, state.mutable_parameter());
TensorToProto(*momentums_, state.mutable_momentums());
state.set_momentum(momentum_);
state.set_decay(decay_);
state.set_nesterov(nesterov_);
*state_len +=
CalStateSize(parameter_, momentums_, momentum_, decay_, nesterov_);
*state_len = CalStateSize(parameter_, momentums_);
return state.SerializeAsString().c_str();
}
void SGDOptimizer::DeSerializeState(const std::string &str) {
OptimizerState state;
void SGDOptimizer::DeserializeState(const std::string &str) {
SGDOptimizerState state;
state.ParseFromString(str);
lr_policy_->set(state.learning_rate());
num_sample_passed_ = state.num_sample_passed();
ProtoToTensor(state.parameter(), parameter_);
ProtoToTensor(state.parameter(), momentums_);
momentum_ = state.momentum();
}
} // namespace optimizer
......
......@@ -14,7 +14,7 @@ public:
decay_(d),
nesterov_(n) {
if (momentum_ != 0.0) {
size_t size = p->size();
size_t size = parameter->size();
// TODO: fix it with align aware allocator bind to Tensor
if (momentums_) delete momentums_;
momentums_ = new Tensor(size);
......@@ -25,7 +25,7 @@ public:
}
void Update(const Tensor* gradient);
const char* SerializeState(int* state_len);
void DeSerializeState(const std::string& state);
void DeserializeState(const std::string& state);
private:
Tensor* momentums_;
......
......@@ -78,36 +78,51 @@ enum DataType {
repeated bytes content = 2;
}
message OptimizerState {
message SGDOptimizerState {
// learning rate policy
optional double learning_rate = 101;
optional double lr_decay_a = 102;
optional double lr_decay_b = 103;
optional double num_sample_passed = 104;
// momentum
optional TensorProto parameter = 105;
optional TensorProto momentums = 1;
// state
optional TensorProto parameter = 1;
optional TensorProto momentums = 2;
}
// adadelta
message AdadeltaOptimizerState {
// learning rate policy
optional double learning_rate = 101;
optional double lr_decay_a = 102;
optional double lr_decay_b = 103;
optional double num_sample_passed = 104;
// state
optional TensorProto parameter = 1;
optional TensorProto accum_gradient = 2;
optional TensorProto accum_delta = 3;
optional TensorProto update_delta = 4;
}
// adam
optional TensorProto velocitys = 5;
// momentum
optional double momentum = 6;
optional double decay = 7;
optional bool nesterov = 8;
// adadelta
optional double rho = 9;
optional double epsilon = 10;
// adam
optional double beta_1 = 11;
optional double beta_2 = 12;
message AdagradOptimizerState {
// learning rate policy
optional double learning_rate = 101;
optional double lr_decay_a = 102;
optional double lr_decay_b = 103;
optional double num_sample_passed = 104;
// state
optional TensorProto parameter = 1;
optional TensorProto accum_gradient = 2;
}
message AdamOptimizerState {
// learning rate policy
optional double learning_rate = 101;
optional double lr_decay_a = 102;
optional double lr_decay_b = 103;
optional double num_sample_passed = 104;
// state
optional TensorProto parameter = 1;
optional TensorProto momentums = 2;
optional TensorProto velocitys = 3;
}
message OptimizerConfig {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册