diff --git a/go/pserver/optimizer.go b/go/pserver/optimizer.go index ae7359073494bd9cb6b70b12af4daca064179556..51ffba5c74d82b7f24d5ab6218e47479c4d18658 100644 --- a/go/pserver/optimizer.go +++ b/go/pserver/optimizer.go @@ -72,21 +72,34 @@ func newOptimizer(paramWithConfigs ParameterWithConfig, State []byte) *optimizer } o.config = c - o.opt = C.paddle_create_optimizer((*C.uchar)(&c[0]), C.int(len(c)), - C.paddle_element_type(p.ElementType), cbuffer, C.int(paramBufferSize), (*C.char)(cstate), C.int(len(s))) + o.opt = C.paddle_create_optimizer( + (*C.uchar)(&c[0]), + C.int(len(c)), + C.paddle_element_type(p.ElementType), + cbuffer, + C.int(paramBufferSize), + (*C.char)(cstate), + C.int(len(s)), + ) return o } func (o *optimizer) GetWeights() []byte { var buffer unsafe.Pointer + // we do not own the buffer, no need to free later. bufferLen := C.paddle_optimizer_get_weights(o.opt, &buffer) return cArrayToSlice(buffer, int(bufferLen)*C.sizeof_float) } func (o *optimizer) GetStates() []byte { var cbuffer *C.char + // we owns the state buffer, need to free later. cbufferLen := C.paddle_optimizer_get_state(o.opt, &cbuffer) - return cArrayToSlice(unsafe.Pointer(cbuffer), int(cbufferLen)) + buf := cArrayToSlice(unsafe.Pointer(cbuffer), int(cbufferLen)) + cpy := make([]byte, len(buf)) + copy(cpy, buf) + C.free(unsafe.Pointer(cbuffer)) + return cpy } func (o *optimizer) UpdateParameter(g Gradient) error { diff --git a/go/pserver/optimizer_test.go b/go/pserver/optimizer_test.go index d001e6993e6aed2f5829c1b86928af30f4900c8a..565f56dc286d214e7e9a3ddce389d92d21569cd5 100644 --- a/go/pserver/optimizer_test.go +++ b/go/pserver/optimizer_test.go @@ -15,8 +15,12 @@ package pserver import ( + "encoding/binary" "io/ioutil" + "math" "testing" + + "github.com/stretchr/testify/assert" ) func TestOptimizerCreateRelease(t *testing.T) { @@ -36,3 +40,39 @@ func TestOptimizerCreateRelease(t *testing.T) { o := newOptimizer(param, nil) o.Cleanup() } + +func float32Bytes(float float32) []byte { + bits := math.Float32bits(float) + bytes := make([]byte, 4) + binary.LittleEndian.PutUint32(bytes, bits) + return bytes +} + +func TestOptimizerState(t *testing.T) { + p := Parameter{ + Name: "a", + ElementType: Int32, + } + weights := float32Bytes(100) + p.Content = weights + config, err := ioutil.ReadFile("./client/c/test/testdata/optimizer.pb") + if err != nil { + t.Fatalf("read optimizer proto failed") + } + param := ParameterWithConfig{ + Param: p, + Config: config, + } + o := newOptimizer(param, nil) + s := o.GetStates() + + // clear param content and check if the state is restored. + param.Param.Content = float32Bytes(300) + o1 := newOptimizer(param, s) + s1 := o1.GetStates() + assert.Equal(t, s, s1) + assert.Equal(t, weights, o.GetWeights()) + assert.Equal(t, weights, o1.GetWeights()) + o.Cleanup() + o1.Cleanup() +} diff --git a/go/pserver/service.go b/go/pserver/service.go index 25751540a9a2dff043c14e0912bfab1aaa938ab4..29e953acdd8ae16d13cf2307e212f8a18f0f2190 100644 --- a/go/pserver/service.go +++ b/go/pserver/service.go @@ -297,6 +297,13 @@ func (s *Service) checkpoint() (err error) { return } + if _, err = os.Stat(s.checkpointPath); os.IsNotExist(err) { + err = os.MkdirAll(s.checkpointPath, os.ModePerm) + if err != nil { + return + } + } + id := uuid.NewV4().String() p := path.Join(s.checkpointPath, id) f, err := os.Create(p) diff --git a/paddle/optimizer/adadelta_optimizer.cc b/paddle/optimizer/adadelta_optimizer.cc index 6eec5d846fa5ef6b25e7646200dad1d452dda806..34913c405075ed72af30ed056f74e8b4d7482488 100644 --- a/paddle/optimizer/adadelta_optimizer.cc +++ b/paddle/optimizer/adadelta_optimizer.cc @@ -25,19 +25,17 @@ void AdadeltaOptimizer::Update(const Tensor* gradient) { } } -const char* AdadeltaOptimizer::SerializeState(int* state_len) { +std::string AdadeltaOptimizer::SerializeState() { AdadeltaOptimizerState state; state.set_num_sample_passed(num_sample_passed_); - std::string lr_str = this->lr_policy_->SerializeState(state_len); + std::string lr_str = this->lr_policy_->SerializeState(); state.mutable_lr_state()->ParseFromString(lr_str); TensorToProto(*parameter_, state.mutable_parameter()); TensorToProto(*accum_gradient_, state.mutable_accum_gradient()); TensorToProto(*accum_delta_, state.mutable_accum_delta()); TensorToProto(*update_delta_, state.mutable_update_delta()); - auto str = state.SerializeAsString(); - *state_len += str.size(); - return str.c_str(); + return state.SerializeAsString(); } void AdadeltaOptimizer::DeserializeState(const std::string& str) { diff --git a/paddle/optimizer/adadelta_optimizer.h b/paddle/optimizer/adadelta_optimizer.h index 1d5eab097f57d049855dd171a1aa6f74c48ae0e7..bc634ee46d60abc9ffc4a31abac5c2f8edaf7aba 100644 --- a/paddle/optimizer/adadelta_optimizer.h +++ b/paddle/optimizer/adadelta_optimizer.h @@ -23,7 +23,7 @@ public: if (update_delta_) delete update_delta_; } void Update(const Tensor *gradient); - const char *SerializeState(int *state_len); + std::string SerializeState(); void DeserializeState(const std::string &state); private: diff --git a/paddle/optimizer/adagrad_optimizer.cc b/paddle/optimizer/adagrad_optimizer.cc index 5b92610ac547ee11cedf2e49e4d7f1db4b2da646..d915ffb8705eaa96bc96b8071a2c534d4d472273 100644 --- a/paddle/optimizer/adagrad_optimizer.cc +++ b/paddle/optimizer/adagrad_optimizer.cc @@ -17,17 +17,15 @@ void AdagradOptimizer::Update(const Tensor* gradient) { learning_rate * decay_ * param[i]; } } -const char* AdagradOptimizer::SerializeState(int* state_len) { +std::string AdagradOptimizer::SerializeState() { AdagradOptimizerState state; state.set_num_sample_passed(num_sample_passed_); - std::string lr_str = this->lr_policy_->SerializeState(state_len); + std::string lr_str = this->lr_policy_->SerializeState(); state.mutable_lr_state()->ParseFromString(lr_str); TensorToProto(*parameter_, state.mutable_parameter()); TensorToProto(*accum_gradient_, state.mutable_accum_gradient()); - auto str = state.SerializeAsString(); - *state_len += str.size(); - return str.c_str(); + return state.SerializeAsString(); } void AdagradOptimizer::DeserializeState(const std::string& str) { diff --git a/paddle/optimizer/adagrad_optimizer.h b/paddle/optimizer/adagrad_optimizer.h index 15d0a965ad0c6967e73b14b465168fa66eb8fba3..b2935f8aff87f710f508c5c5757dd36526ca63f9 100644 --- a/paddle/optimizer/adagrad_optimizer.h +++ b/paddle/optimizer/adagrad_optimizer.h @@ -19,7 +19,7 @@ public: if (accum_gradient_) delete accum_gradient_; } void Update(const Tensor *gradient); - const char *SerializeState(int *state_len); + std::string SerializeState(); void DeserializeState(const std::string &state); private: diff --git a/paddle/optimizer/adam_optimizer.cc b/paddle/optimizer/adam_optimizer.cc index 1ebb6b1e0f7b4edcbac1b28319fd4de576f85f6a..18e5896a22dc8a3c6292293fffc36ca9e3737b4c 100644 --- a/paddle/optimizer/adam_optimizer.cc +++ b/paddle/optimizer/adam_optimizer.cc @@ -22,18 +22,16 @@ void AdamOptimizer::Update(const Tensor *gradient) { } } -const char *AdamOptimizer::SerializeState(int *state_len) { +std::string AdamOptimizer::SerializeState() { AdamOptimizerState state; - std::string lr_str = this->lr_policy_->SerializeState(state_len); + std::string lr_str = this->lr_policy_->SerializeState(); state.mutable_lr_state()->ParseFromString(lr_str); state.set_num_sample_passed(num_sample_passed_); TensorToProto(*parameter_, state.mutable_parameter()); TensorToProto(*momentums_, state.mutable_momentums()); TensorToProto(*velocitys_, state.mutable_velocitys()); - auto str = state.SerializeAsString(); - *state_len += str.size(); - return str.c_str(); + return state.SerializeAsString(); } void AdamOptimizer::DeserializeState(const std::string &str) { diff --git a/paddle/optimizer/adam_optimizer.h b/paddle/optimizer/adam_optimizer.h index 0ea4c8bb8470504282b4d6c12039791ce896e401..d25cdc0731f65e9875d2fbf67783cce62d88af60 100644 --- a/paddle/optimizer/adam_optimizer.h +++ b/paddle/optimizer/adam_optimizer.h @@ -25,7 +25,7 @@ public: if (velocitys_) delete velocitys_; } void Update(const Tensor *gradient); - const char *SerializeState(int *state_len); + std::string SerializeState(); void DeserializeState(const std::string &state); private: diff --git a/paddle/optimizer/lr_policy.h b/paddle/optimizer/lr_policy.h index 036c376e10f465c2866a230caf9224f4af5478bc..bbb1ee48214cecdc6b6cd2a400cc9d12d5e8b64a 100644 --- a/paddle/optimizer/lr_policy.h +++ b/paddle/optimizer/lr_policy.h @@ -10,7 +10,7 @@ class LrPolicy { public: virtual ~LrPolicy() {} virtual double LearningRate(const uint64_t num_sample_passed) = 0; - virtual const char *SerializeState(int *state_len) = 0; + virtual std::string SerializeState() = 0; virtual void DeserializeState(const std::string &state) = 0; }; @@ -21,12 +21,10 @@ public: double LearningRate(const uint64_t num_sample_passed) { return learning_rate_; } - const char *SerializeState(int *state_len) { + std::string SerializeState() { LrPolicyState state; state.set_learning_rate(learning_rate_); - auto str = state.SerializeAsString(); - *state_len = str.size(); - return str.c_str(); + return state.SerializeAsString(); } void DeserializeState(const std::string &str) { LrPolicyState state; @@ -46,14 +44,12 @@ public: return std::max(learning_rate_ - lr_decay_a_ * num_sample_passed, lr_decay_b_); } - const char *SerializeState(int *state_len) { + std::string SerializeState() { LrPolicyState state; state.set_learning_rate(learning_rate_); state.set_lr_decay_a(lr_decay_a_); state.set_lr_decay_b(lr_decay_b_); - auto str = state.SerializeAsString(); - *state_len = str.size(); - return str.c_str(); + return state.SerializeAsString(); } void DeserializeState(const std::string &str) { LrPolicyState state; diff --git a/paddle/optimizer/optimizer.cc b/paddle/optimizer/optimizer.cc index eb7125adee769c97e16986cabf06ea389bf4c143..a2af139d012433214b825bd68289708098b76da8 100644 --- a/paddle/optimizer/optimizer.cc +++ b/paddle/optimizer/optimizer.cc @@ -1,4 +1,7 @@ #include "optimizer.h" +#include +#include +#include #include #include "parameter_optimizer.h" @@ -78,7 +81,13 @@ int paddle_optimizer_get_weights(paddle_optimizer* o, void** param_buffer) { } int paddle_optimizer_get_state(paddle_optimizer* o, const char** state) { - int state_len = 0; - *state = o->impl->SerializeState(&state_len); + std::string s = o->impl->SerializeState(); + int state_len = s.size(); + + if (state_len > 0) { + *state = (char*)std::malloc(state_len); + std::memcpy((void*)*state, (const void*)s.c_str(), state_len); + } + return state_len; } diff --git a/paddle/optimizer/parameter_optimizer.cc b/paddle/optimizer/parameter_optimizer.cc index f6218037925649e741d17f49af972ce2d50f8d3d..db0714635f9366b0404019688daf4708b4a0052f 100644 --- a/paddle/optimizer/parameter_optimizer.cc +++ b/paddle/optimizer/parameter_optimizer.cc @@ -32,6 +32,7 @@ ParameterOptimizer *ParameterOptimizer::Create(const std::string &config_proto, Tensor *parameter, const OptimizerConfig &config) -> ParameterOptimizer * { if (config.optimizer() == OptimizerConfig::SGD) { + LOG(INFO) << "creating SGD optimizer"; return new SGDOptimizer(parameter, lr, config.sgd().momentum(), @@ -39,6 +40,7 @@ ParameterOptimizer *ParameterOptimizer::Create(const std::string &config_proto, config.sgd().nesterov()); } if (config.optimizer() == OptimizerConfig::Adadelta) { + LOG(INFO) << "creating Adadelta optimizer"; return new AdadeltaOptimizer(parameter, lr, config.adadelta().rho(), @@ -46,10 +48,12 @@ ParameterOptimizer *ParameterOptimizer::Create(const std::string &config_proto, config.adadelta().decay()); } if (config.optimizer() == OptimizerConfig::Adagrad) { + LOG(INFO) << "creating Adagrad optimizer"; return new AdagradOptimizer( parameter, lr, config.adagrad().epsilon(), config.adagrad().decay()); } if (config.optimizer() == OptimizerConfig::Adam) { + LOG(INFO) << "creating Adam optimizer"; return new AdamOptimizer(parameter, lr, config.adam().beta_1(), diff --git a/paddle/optimizer/parameter_optimizer.h b/paddle/optimizer/parameter_optimizer.h index d89c9abb791f947172078d4dce5b1c366852591b..8319f84e1b820adf5cc0006045f2e13dffa91797 100644 --- a/paddle/optimizer/parameter_optimizer.h +++ b/paddle/optimizer/parameter_optimizer.h @@ -28,7 +28,7 @@ public: Tensor *parameter); virtual void Update(const Tensor *gradient) = 0; virtual float *get_weight(int *param_size) const; - virtual const char *SerializeState(int *state_len) = 0; + virtual std::string SerializeState() = 0; virtual void DeserializeState(const std::string &state) = 0; protected: diff --git a/paddle/optimizer/parameter_optimizer_test.cpp b/paddle/optimizer/parameter_optimizer_test.cpp index edf4ae37a9beee2911d23dd1ab23e67a18065b1b..c88fa11748716693355042d1784b33d7cfb616f1 100644 --- a/paddle/optimizer/parameter_optimizer_test.cpp +++ b/paddle/optimizer/parameter_optimizer_test.cpp @@ -85,6 +85,7 @@ public: for (size_t i = 0; i < opts_.size(); ++i) { int s = 0; float* newp = (float*)opts_[i]->get_weight(&s); + EXPECT_EQ(s, kSize); for (size_t j = 0; j < kSize; ++j) { EXPECT_EQ(newp[j], (*p)[j]); } @@ -99,10 +100,20 @@ public: } void TestCheckPoint() { + paddle::optimizer::Tensor* p = FixedTensor(kSize); for (size_t i = 0; i < opts_.size(); ++i) { - int state_len = 0; - std::string state = opts_[i]->SerializeState(&state_len); + auto state = opts_[i]->SerializeState(); + opts_[i]->DeserializeState(state); + auto state1 = opts_[i]->SerializeState(); opts_[i]->DeserializeState(state); + EXPECT_EQ(state, state1); + + int s = 0; + float* newp = (float*)opts_[i]->get_weight(&s); + EXPECT_EQ(s, kSize); + for (size_t j = 0; j < kSize; ++j) { + EXPECT_EQ(newp[j], (*p)[j]); + } } } diff --git a/paddle/optimizer/serialization_test.cpp b/paddle/optimizer/serialization_test.cpp index e4d97cbdba545c4ba5adf5b30efd3fc9f3f744ee..4c416f55ee0bd70f9ec6e288b08a5399d8b2bf39 100644 --- a/paddle/optimizer/serialization_test.cpp +++ b/paddle/optimizer/serialization_test.cpp @@ -21,7 +21,22 @@ TEST(TensorToProto, Case1) { paddle::optimizer::Tensor t(3), t1(3); for (size_t i = 0; i < t.size(); ++i) { t[i] = i; - t1[i] = 0; + t1[i] = 10; + } + + paddle::TensorProto proto; + paddle::optimizer::TensorToProto(t, &proto); + paddle::optimizer::ProtoToTensor(proto, &t1); + for (size_t i = 0; i < t1.size(); ++i) { + EXPECT_EQ(t1[i], t[i]); + } +} + +TEST(TensorToProto, Case2) { + paddle::optimizer::Tensor t(1), t1(1); + for (size_t i = 0; i < t.size(); ++i) { + t[i] = i; + t1[i] = 10; } paddle::TensorProto proto; diff --git a/paddle/optimizer/sgd_optimizer.cc b/paddle/optimizer/sgd_optimizer.cc index 15418faa840c19e776f293700ee886991754fb04..bf2540ecb092437e57a5970264559dc3c6ab4167 100644 --- a/paddle/optimizer/sgd_optimizer.cc +++ b/paddle/optimizer/sgd_optimizer.cc @@ -27,16 +27,14 @@ void SGDOptimizer::Update(const Tensor *gradient) { } } -const char *SGDOptimizer::SerializeState(int *state_len) { +std::string SGDOptimizer::SerializeState() { SGDOptimizerState state; state.set_num_sample_passed(num_sample_passed_); - std::string lr_str = this->lr_policy_->SerializeState(state_len); + std::string lr_str = this->lr_policy_->SerializeState(); state.mutable_lr_state()->ParseFromString(lr_str); TensorToProto(*parameter_, state.mutable_parameter()); if (momentum_ != 0.0) TensorToProto(*momentums_, state.mutable_momentums()); - auto str = state.SerializeAsString(); - *state_len += str.size(); - return str.c_str(); + return state.SerializeAsString(); } void SGDOptimizer::DeserializeState(const std::string &str) { diff --git a/paddle/optimizer/sgd_optimizer.h b/paddle/optimizer/sgd_optimizer.h index b74a902e1aa40a7831b36ab826d72372a3588bcf..6e1a0f0d3f9ecfeb51ccb355d65985a2e6388fb0 100644 --- a/paddle/optimizer/sgd_optimizer.h +++ b/paddle/optimizer/sgd_optimizer.h @@ -23,7 +23,7 @@ public: if (momentums_) delete momentums_; } void Update(const Tensor* gradient); - const char* SerializeState(int* state_len); + std::string SerializeState(); void DeserializeState(const std::string& state); private: