Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
b72e8aa3
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b72e8aa3
编写于
6月 14, 2017
作者:
D
dzhwinter
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
"seperate serialization proto state"
上级
b7e68e06
变更
18
显示空白变更内容
内联
并排
Showing
18 changed file
with
176 addition
and
134 deletion
+176
-134
paddle/optimizer/CMakeLists.txt
paddle/optimizer/CMakeLists.txt
+2
-1
paddle/optimizer/Tensor.h
paddle/optimizer/Tensor.h
+0
-6
paddle/optimizer/adadelta_optimizer.cc
paddle/optimizer/adadelta_optimizer.cc
+8
-15
paddle/optimizer/adadelta_optimizer.h
paddle/optimizer/adadelta_optimizer.h
+2
-2
paddle/optimizer/adagrad_optimizer.cc
paddle/optimizer/adagrad_optimizer.cc
+20
-3
paddle/optimizer/adagrad_optimizer.h
paddle/optimizer/adagrad_optimizer.h
+2
-2
paddle/optimizer/adam_optimizer.cc
paddle/optimizer/adam_optimizer.cc
+6
-12
paddle/optimizer/adam_optimizer.h
paddle/optimizer/adam_optimizer.h
+5
-2
paddle/optimizer/optimizer.cc
paddle/optimizer/optimizer.cc
+1
-1
paddle/optimizer/optimizer.h
paddle/optimizer/optimizer.h
+2
-2
paddle/optimizer/parameter_optimizer.cc
paddle/optimizer/parameter_optimizer.cc
+1
-1
paddle/optimizer/parameter_optimizer.h
paddle/optimizer/parameter_optimizer.h
+1
-5
paddle/optimizer/parameter_optimizer_test.cpp
paddle/optimizer/parameter_optimizer_test.cpp
+53
-43
paddle/optimizer/serialization.h
paddle/optimizer/serialization.h
+7
-7
paddle/optimizer/serialization_test.cpp
paddle/optimizer/serialization_test.cpp
+24
-0
paddle/optimizer/sgd_optimizer.cc
paddle/optimizer/sgd_optimizer.cc
+5
-10
paddle/optimizer/sgd_optimizer.h
paddle/optimizer/sgd_optimizer.h
+2
-2
proto/OptimizerConfig.proto
proto/OptimizerConfig.proto
+35
-20
未找到文件。
paddle/optimizer/CMakeLists.txt
浏览文件 @
b72e8aa3
...
...
@@ -6,12 +6,13 @@ set(OPITMIZER_SRCS
adam_optimizer.cc
optimizer.cc
parameter_optimizer.cc
sgd_optmizer.cc
sgd_opt
i
mizer.cc
)
add_library
(
optimizer STATIC
${
OPITMIZER_SRCS
}
)
add_dependencies
(
optimizer gen_proto_cpp
)
add_simple_unittest
(
tensor_test
)
add_simple_unittest
(
serialization_test
)
add_simple_unittest
(
parameter_optimizer_test
)
add_dependencies
(
parameter_optimizer_test optimizer
)
paddle/optimizer/Tensor.h
浏览文件 @
b72e8aa3
...
...
@@ -17,16 +17,10 @@ public:
TensorT
(
size_t
size
)
:
height_
(
1
),
width_
(
size
)
{
data_
=
new
T
[
size
];
}
TensorT
(
T
*
data
,
size_t
size
)
:
height_
(
1
),
width_
(
size
),
data_
(
data
)
{}
TensorT
(
T
*
data
,
size_t
h
,
size_t
w
)
:
height_
(
h
),
width_
(
w
),
data_
(
data_
)
{}
TensorT
(
const
TensorT
&
t
)
:
TensorT
(
1
,
t
.
size
(),
0
,
t
.
get_buffer
(),
false
,
false
)
{}
~
TensorT
()
{
if
(
data_
)
delete
data_
;
}
TensorT
&
operator
=
(
const
TensorT
&
t
)
{
this
->
width_
=
t
.
size
();
this
->
data_
=
t
.
get_buffer
();
}
T
*
get_buffer
()
{
return
this
->
data_
;
}
T
&
operator
[](
const
size_t
idx
)
{
CHECK
(
idx
>=
0
&&
idx
<
this
->
width_
)
<<
"out of index range"
;
...
...
paddle/optimizer/adadelta_optimizer.cc
浏览文件 @
b72e8aa3
...
...
@@ -26,7 +26,7 @@ void AdadeltaOptimizer::Update(const Tensor* gradient) {
}
const
char
*
AdadeltaOptimizer
::
SerializeState
(
int
*
state_len
)
{
OptimizerState
state
;
Adadelta
OptimizerState
state
;
state
.
set_learning_rate
(
lr_policy_
->
LearningRate
(
num_sample_passed_
));
state
.
set_num_sample_passed
(
num_sample_passed_
);
...
...
@@ -34,22 +34,14 @@ const char* AdadeltaOptimizer::SerializeState(int* state_len) {
TensorToProto
(
*
accum_gradient_
,
state
.
mutable_accum_gradient
());
TensorToProto
(
*
accum_delta_
,
state
.
mutable_accum_delta
());
TensorToProto
(
*
update_delta_
,
state
.
mutable_update_delta
());
state
.
set_nesterov
(
epsilon_
);
state
.
set_momentum
(
rho_
);
state
.
set_decay
(
decay_
);
// can be used when memory alignment to system
*
state_len
+=
CalStateSize
(
parameter_
,
accum_gradient_
,
accum_delta_
,
update_delta_
,
rho_
,
epsilon_
,
decay_
);
*
state_len
=
CalStateSize
(
parameter_
,
accum_gradient_
,
accum_delta_
,
update_delta_
);
return
state
.
SerializeAsString
().
c_str
();
}
void
AdadeltaOptimizer
::
De
S
erializeState
(
const
std
::
string
&
str
)
{
OptimizerState
state
;
void
AdadeltaOptimizer
::
De
s
erializeState
(
const
std
::
string
&
str
)
{
Adadelta
OptimizerState
state
;
state
.
ParseFromString
(
str
);
lr_policy_
->
set
(
state
.
learning_rate
());
num_sample_passed_
=
state
.
num_sample_passed
();
...
...
@@ -58,6 +50,7 @@ void AdadeltaOptimizer::DeSerializeState(const std::string& str) {
ProtoToTensor
(
state
.
accum_gradient
(),
accum_gradient_
);
ProtoToTensor
(
state
.
accum_delta
(),
accum_delta_
);
ProtoToTensor
(
state
.
update_delta
(),
update_delta_
);
}
}
// namespace optimizer
}
// namespace
optimizer
}
// namespace
paddle
paddle/optimizer/adadelta_optimizer.h
浏览文件 @
b72e8aa3
...
...
@@ -13,7 +13,7 @@ public:
rho_
(
rho
),
epsilon_
(
epsilon
),
decay_
(
decay
)
{
size_t
size
=
p
->
size
();
size_t
size
=
p
arameter
->
size
();
if
(
accum_gradient_
)
delete
accum_gradient_
;
accum_gradient_
=
new
Tensor
(
size
);
if
(
accum_delta_
)
delete
accum_delta_
;
...
...
@@ -28,7 +28,7 @@ public:
}
void
Update
(
const
Tensor
*
gradient
);
const
char
*
SerializeState
(
int
*
state_len
);
void
De
S
erializeState
(
const
std
::
string
&
state
);
void
De
s
erializeState
(
const
std
::
string
&
state
);
private:
Tensor
*
accum_gradient_
;
...
...
paddle/optimizer/adagrad_optimizer.cc
浏览文件 @
b72e8aa3
...
...
@@ -17,8 +17,25 @@ void AdagradOptimizer::Update(const Tensor* gradient) {
learning_rate
*
decay_
*
param
[
i
];
}
}
const
char
*
SGDOptimizer
::
SerializeState
(
int
*
state_len
)
{
NIMPL
;
}
const
char
*
AdagradOptimizer
::
SerializeState
(
int
*
state_len
)
{
AdagradOptimizerState
state
;
state
.
set_learning_rate
(
lr_policy_
->
LearningRate
(
num_sample_passed_
));
state
.
set_num_sample_passed
(
num_sample_passed_
);
TensorToProto
(
*
parameter_
,
state
.
mutable_parameter
());
TensorToProto
(
*
accum_gradient_
,
state
.
mutable_accum_gradient
());
*
state_len
=
CalStateSize
(
parameter_
,
accum_gradient_
);
return
state
.
SerializeAsString
().
c_str
();
}
void
AdagradOptimizer
::
DeserializeState
(
const
std
::
string
&
str
)
{
AdagradOptimizerState
state
;
state
.
ParseFromString
(
str
);
lr_policy_
->
set
(
state
.
learning_rate
());
num_sample_passed_
=
state
.
num_sample_passed
();
ProtoToTensor
(
state
.
parameter
(),
parameter_
);
ProtoToTensor
(
state
.
accum_gradient
(),
accum_gradient_
);
}
void
SGDOptimizer
::
DeSerializeState
(
const
std
::
string
&
str
)
{
NIMPL
;
}
// namespace optimizer
}
// namespace optimizer
}
// namespace paddle
paddle/optimizer/adagrad_optimizer.h
浏览文件 @
b72e8aa3
...
...
@@ -12,7 +12,7 @@ public:
double
epsilon
,
double
decay
)
:
ParameterOptimizer
(
parameter
,
lr
),
epsilon_
(
epsilon
),
decay_
(
decay
)
{
size_t
size
=
p
->
size
();
size_t
size
=
p
arameter
->
size
();
if
(
accum_gradient_
)
delete
accum_gradient_
;
accum_gradient_
=
new
Tensor
(
size
);
}
...
...
@@ -21,7 +21,7 @@ public:
}
void
Update
(
const
Tensor
*
gradient
);
const
char
*
SerializeState
(
int
*
state_len
);
void
De
S
erializeState
(
const
std
::
string
&
state
);
void
De
s
erializeState
(
const
std
::
string
&
state
);
private:
Tensor
*
accum_gradient_
;
...
...
paddle/optimizer/adam_optimizer.cc
浏览文件 @
b72e8aa3
...
...
@@ -22,32 +22,26 @@ void AdamOptimizer::Update(const Tensor *gradient) {
}
}
const
char
*
Ada
delta
Optimizer
::
SerializeState
(
int
*
state_len
)
{
OptimizerState
state
;
const
char
*
Ada
m
Optimizer
::
SerializeState
(
int
*
state_len
)
{
Adam
OptimizerState
state
;
state
.
set_learning_rate
(
lr_policy_
->
LearningRate
(
num_sample_passed_
));
state
.
set_num_sample_passed
(
num_sample_passed_
);
TensorToProto
(
*
parameter_
,
state
.
mutable_parameter
());
TensorToProto
(
*
velocitys_
,
state
.
mutable_momentums
());
state
.
set_beta_1
(
beta_1_
);
state
.
set_beta_2
(
beta_2_
);
state
.
set_decay
(
decay_
);
*
state_len
+=
CalStateSize
(
parameter_
,
momentums_
,
velocitys_
,
beta_1_
,
beta_2
,
epsilon_
decay_
);
*
state_len
=
CalStateSize
(
parameter_
,
momentums_
,
velocitys_
);
return
state
.
SerializeAsString
().
c_str
();
}
void
Ada
deltaOptimizer
::
DeS
erializeState
(
const
std
::
string
&
str
)
{
OptimizerState
state
;
void
Ada
mOptimizer
::
Des
erializeState
(
const
std
::
string
&
str
)
{
Adam
OptimizerState
state
;
state
.
ParseFromString
(
str
);
lr_policy_
->
set
(
state
.
learning_rate
());
num_sample_passed_
=
state
.
num_sample_passed
();
ProtoToTensor
(
state
.
parameter
(),
parameter_
);
ProtoToTensor
(
state
.
velocitys
(),
velocitys__
);
beta_1_
=
state
.
beta_1
();
beta_2_
=
state
.
beta_2
();
ProtoToTensor
(
state
.
velocitys
(),
velocitys_
);
}
}
// namespace optimizer
}
// namespace paddle
paddle/optimizer/adam_optimizer.h
浏览文件 @
b72e8aa3
...
...
@@ -8,7 +8,8 @@ namespace optimizer {
class
AdamOptimizer
:
public
ParameterOptimizer
{
public:
AdamOptimizer
(
Tensor
*
parameter
,
LrPolicy
*
lr
double
beta_1
,
LrPolicy
*
lr
,
double
beta_1
,
double
beta_2
,
double
epsilon
,
double
decay
)
...
...
@@ -17,7 +18,7 @@ public:
beta_2_
(
beta_2
),
epsilon_
(
epsilon
),
decay_
(
decay
)
{
size_t
size
=
p
->
size
();
size_t
size
=
p
arameter
->
size
();
momentums_
=
new
Tensor
(
size
);
velocitys_
=
new
Tensor
(
size
);
}
...
...
@@ -26,6 +27,8 @@ public:
if
(
velocitys_
)
delete
velocitys_
;
}
void
Update
(
const
Tensor
*
gradient
);
const
char
*
SerializeState
(
int
*
state_len
);
void
DeserializeState
(
const
std
::
string
&
state
);
private:
Tensor
*
momentums_
;
...
...
paddle/optimizer/optimizer.cc
浏览文件 @
b72e8aa3
...
...
@@ -49,7 +49,7 @@ paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto,
optimizer
->
impl
=
ParameterOptimizer
::
Create
(
config
,
parameter
);
if
(
state
!=
nullptr
)
{
std
::
string
s
(
state
,
state
+
state_len
);
optimizer
->
impl
->
De
S
erializeState
(
s
);
optimizer
->
impl
->
De
s
erializeState
(
s
);
}
return
optimizer
;
}
...
...
paddle/optimizer/optimizer.h
浏览文件 @
b72e8aa3
...
...
@@ -75,14 +75,14 @@ int paddle_update_parameter(paddle_optimizer* o,
int
num_bytes
);
/**
* @brief optimizer
instance
* @brief optimizer
for get parameter buffer
* @param param_buffer, initilized parameter buffer
* @return return content length
*/
int
paddle_optimizer_get_weights
(
paddle_optimizer
*
o
,
void
**
param_buffer
);
/**
* @brief optimzizer
instanc
e
* @brief optimzizer
for saving training stat
e
* @param training state for receive SerializeState
* @return return state_buffer length
*/
...
...
paddle/optimizer/parameter_optimizer.cc
浏览文件 @
b72e8aa3
...
...
@@ -62,7 +62,7 @@ ParameterOptimizer *ParameterOptimizer::Create(const std::string &config_proto,
<<
"have not select any Optimizer. use SGDOptimizer in default"
;
return
new
SGDOptimizer
(
parameter
,
lr
,
0.0
,
0.0
,
false
);
};
return
select_optimizer
(
config
);
return
select_optimizer
(
parameter
,
config
);
}
float
*
ParameterOptimizer
::
get_weight
(
int
*
param_size
)
const
{
...
...
paddle/optimizer/parameter_optimizer.h
浏览文件 @
b72e8aa3
...
...
@@ -8,10 +8,6 @@
#include "serialization.h"
#include "tensor.h"
// Not Implemen Yet, macr
// o
#define NIMPL crash(__PRETTY_FUNCTION__, " not implemented yet")
namespace
paddle
{
namespace
optimizer
{
...
...
@@ -30,7 +26,7 @@ public:
virtual
void
Update
(
const
Tensor
*
gradient
)
=
0
;
virtual
float
*
get_weight
(
int
*
param_size
)
const
;
virtual
const
char
*
SerializeState
(
int
*
state_len
)
=
0
;
virtual
void
De
S
erializeState
(
const
std
::
string
&
state
)
=
0
;
virtual
void
De
s
erializeState
(
const
std
::
string
&
state
)
=
0
;
protected:
Tensor
*
parameter_
;
...
...
paddle/optimizer/parameter_optimizer_test.cpp
浏览文件 @
b72e8aa3
#include "parameter_optimizer.h"
#include <cmath>
#include <
tuple
>
#include <
map
>
#include <vector>
#include "adadelta_optimizer.h"
#include "adagrad_optimizer.h"
#include "adam_optimizer.h"
#include "gtest/gtest.h"
#include "sgd_optimizer.h"
using
namespace
paddle
;
using
namespace
paddle
::
optimizer
;
Tensor
*
FillTensor
(
size_t
size
)
{
Tensor
*
param
=
new
Tensor
(
size
);
Tensor
&
p
=
*
param
;
for
(
auto
i
=
0
;
i
<
p
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
p
.
size
();
++
i
)
{
p
[
i
]
=
(
float
)
rand
()
/
(
float
)
RAND_MAX
;
}
return
param
;
...
...
@@ -22,7 +23,7 @@ Tensor* FillTensor(size_t size) {
Tensor
*
FixedTensor
(
size_t
size
)
{
Tensor
*
param
=
new
Tensor
(
size
);
Tensor
&
p
=
*
param
;
for
(
auto
i
=
0
;
i
<
p
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
p
.
size
();
++
i
)
{
p
[
i
]
=
i
;
}
return
param
;
...
...
@@ -31,7 +32,7 @@ Tensor* FixedTensor(size_t size) {
class
OptimizerTest
:
public
testing
::
Test
{
public:
// init tensor shape
const
size_t
s
ize
=
5
;
const
size_t
kS
ize
=
5
;
virtual
void
SetUp
()
{
CreateSGD
();
...
...
@@ -40,68 +41,77 @@ public:
virtual
void
TearDown
()
{}
void
CreateSGD
()
{
config
.
set_optimizer
(
OptimizerConfig
::
SGD
);
config
.
mutable_sgd
()
->
set_momentum
(
0.0
);
config
.
mutable_sgd
()
->
set_decay
(
0.0
);
config
.
mutable_sgd
()
->
set_nesterov
(
false
);
config
.
set_lr_policy
(
OptimizerConfig
::
ConstLr
);
config
.
mutable_const_lr
()
->
set_learning_rate
(
0.1
);
Tensor
*
parameter
=
FillTensor
(
kSize
);
config_
.
set_optimizer
(
OptimizerConfig
::
SGD
);
config_
.
mutable_sgd
()
->
set_momentum
(
0.0
);
config_
.
mutable_sgd
()
->
set_decay
(
0.0
);
config_
.
mutable_sgd
()
->
set_nesterov
(
false
);
config_
.
set_lr_policy
(
OptimizerConfig
::
ConstLr
);
config_
.
mutable_const_lr
()
->
set_learning_rate
(
0.1
);
ParameterOptimizer
*
opt
=
ParameterOptimizer
::
Create
(
config
.
SerializeAsString
());
opts
.
push_back
(
opt
);
ParameterOptimizer
::
Create
(
config_
.
SerializeAsString
(),
parameter
);
opts_
.
push_back
(
opt
);
opts_table_
[
opts_
.
size
()]
=
OptimizerConfig
::
SGD
;
}
void
CreateAdam
()
{
config
.
set_optimizer
(
OptimizerConfig
::
Adam
);
config
.
mutable_adam
()
->
set_beta_1
(
0.9
);
config
.
mutable_adam
()
->
set_beta_2
(
0.1
);
config
.
mutable_adam
()
->
set_epsilon
(
1e-3
);
config
.
mutable_adam
()
->
set_decay
(
0.0
);
config
.
set_lr_policy
(
OptimizerConfig
::
ConstLr
);
config
.
mutable_const_lr
()
->
set_learning_rate
(
0.1
);
Tensor
*
parameter
=
FixedTensor
(
kSize
);
config_
.
set_optimizer
(
OptimizerConfig
::
Adam
);
config_
.
mutable_adam
()
->
set_beta_1
(
0.9
);
config_
.
mutable_adam
()
->
set_beta_2
(
0.1
);
config_
.
mutable_adam
()
->
set_epsilon
(
1e-3
);
config_
.
mutable_adam
()
->
set_decay
(
0.0
);
config_
.
set_lr_policy
(
OptimizerConfig
::
ConstLr
);
config_
.
mutable_const_lr
()
->
set_learning_rate
(
0.1
);
ParameterOptimizer
*
opt
=
ParameterOptimizer
::
Create
(
config
.
SerializeAsString
());
opts
.
push_back
(
opt
);
}
void
TestSetWeight
()
{
Tensor
*
p
=
FillTensor
(
size
);
for
(
size_t
i
=
0
;
i
<
opts
.
size
();
++
i
)
{
opts
[
i
]
->
set_weight
(
p
);
}
ParameterOptimizer
::
Create
(
config_
.
SerializeAsString
(),
parameter
);
opts_
.
push_back
(
opt
);
opts_table_
[
opts_
.
size
()]
=
OptimizerConfig
::
Adam
;
}
void
TestGetWeight
()
{
Tensor
*
p
=
FixedTensor
(
size
);
for
(
size_t
i
=
0
;
i
<
opts
.
size
();
++
i
)
{
opts
[
i
]
->
set_weight
(
p
);
}
for
(
size_t
i
=
0
;
i
<
opts
.
size
();
++
i
)
{
Tensor
*
p
=
FixedTensor
(
kSize
);
for
(
size_t
i
=
0
;
i
<
opts_
.
size
();
++
i
)
{
int
s
=
0
;
float
*
newp
=
(
float
*
)
opts
[
i
]
->
get_weight
(
&
s
);
for
(
size_t
j
=
0
;
j
<
s
ize
;
++
j
)
{
float
*
newp
=
(
float
*
)
opts
_
[
i
]
->
get_weight
(
&
s
);
for
(
size_t
j
=
0
;
j
<
kS
ize
;
++
j
)
{
EXPECT_EQ
(
newp
[
j
],
(
*
p
)[
j
]);
}
}
}
void
TestUpdate
()
{
Tensor
*
g
=
FixedTensor
(
size
);
for
(
size_t
i
=
0
;
i
<
opts
.
size
();
++
i
)
{
opts
[
i
]
->
Update
(
g
);
Tensor
*
g
=
FixedTensor
(
kSize
);
for
(
size_t
i
=
0
;
i
<
opts_
.
size
();
++
i
)
{
opts_
[
i
]
->
Update
(
g
);
}
}
void
TestCheckPoint
()
{
std
::
map
<
OptimizerConfig
::
Optimizer
,
int
>
expected_state_len
=
{
{
OptimizerConfig
::
SGD
,
kSize
},
{
OptimizerConfig
::
Adam
,
kSize
*
3
},
};
for
(
size_t
i
=
0
;
i
<
opts_
.
size
();
++
i
)
{
int
state_len
=
0
;
std
::
string
state
=
opts_
[
i
]
->
SerializeState
(
&
state_len
);
EXPECT_EQ
(
state_len
,
expected_state_len
[
opts_table_
[
i
]]);
opts_
[
i
]
->
DeserializeState
(
state
);
}
}
private:
std
::
vector
<
ParameterOptimizer
*>
opts
;
OptimizerConfig
config
;
std
::
vector
<
ParameterOptimizer
*>
opts_
;
std
::
map
<
int
,
OptimizerConfig
::
Optimizer
>
opts_table_
;
OptimizerConfig
config_
;
};
TEST_F
(
OptimizerTest
,
test_set_get_weight
)
{
TestSetWeight
();
TestGetWeight
();
}
TEST_F
(
OptimizerTest
,
TestGetWeight
)
{
TestGetWeight
();
}
TEST_F
(
OptimizerTest
,
TestUpdate
)
{
TestUpdate
();
}
TEST_F
(
OptimizerTest
,
TestCheckPoint
)
{
TestCheckPoint
();
}
int
main
(
int
argc
,
char
**
argv
)
{
testing
::
InitGoogleTest
(
&
argc
,
argv
);
return
RUN_ALL_TESTS
();
...
...
paddle/optimizer/serialization.h
浏览文件 @
b72e8aa3
...
...
@@ -10,15 +10,16 @@
namespace
paddle
{
namespace
optimizer
{
static
unsigned
CalStateSize
(
int
*
state_len
)
{
return
0
;
}
static
unsigned
CalStateSize
()
{
return
0
;
}
template
<
typename
HEAD
,
typename
...
TAIL
>
unsigned
CalStateSize
(
const
HEAD
&
head
,
const
TAIL
&
...
tail
)
{
if
(
std
::
is_fundamental
<
HEAD
>::
value
)
{
return
sizeof
head
+
CalStateSize
(
tail
...);
}
else
{
return
sizeof
(
head
[
0
])
*
head
->
size
()
+
CalStateSize
(
tail
...);
}
}
template
<
typename
...
TAIL
>
unsigned
CalStateSize
(
const
Tensor
*
head
,
const
TAIL
&
...
tail
)
{
return
head
->
size
()
+
CalStateSize
(
tail
...);
}
static
void
TensorToProto
(
const
Tensor
&
tensor
,
TensorProto
*
proto
)
{
...
...
@@ -32,7 +33,6 @@ static void TensorToProto(const Tensor& tensor, TensorProto* proto) {
}
static
void
ProtoToTensor
(
const
TensorProto
&
proto
,
Tensor
*
tensor
)
{
CHECK
(
proto
.
size
()
==
tensor
->
size
())
<<
"unmatch shape of proto and tensor"
;
std
::
stringstream
sin
;
for
(
auto
i
=
0
;
i
<
proto
.
content_size
();
++
i
)
{
sin
<<
proto
.
content
(
i
);
...
...
paddle/optimizer/
Tensor
_test.cpp
→
paddle/optimizer/
serialization
_test.cpp
浏览文件 @
b72e8aa3
#include "serialization.h"
#include "gtest/gtest.h"
#include "tensor.h"
using
namespace
paddle
;
using
namespace
paddle
::
optimizer
;
TEST
(
Tensor
,
indexer
)
{
Tensor
t
(
3
);
for
(
auto
i
=
0
;
i
<
t
.
size
();
++
i
)
{
TEST
(
Tensor
ToProto
,
Case1
)
{
Tensor
t
(
3
)
,
t1
(
3
)
;
for
(
size_t
i
=
0
;
i
<
t
.
size
();
++
i
)
{
t
[
i
]
=
i
;
t1
[
i
]
=
0
;
}
TensorProto
proto
;
TensorToProto
(
t
,
&
proto
);
ProtoToTensor
(
proto
,
&
t1
);
for
(
size_t
i
=
0
;
i
<
t1
.
size
();
++
i
)
{
EXPECT_EQ
(
t1
[
i
],
t
[
i
]);
}
ASSERT_EQ
(
t
[
2
],
2
);
ASSERT_EQ
(
t
[
1
],
1
);
}
int
main
(
int
argc
,
char
**
argv
)
{
...
...
paddle/optimizer/sgd_optmizer.cc
→
paddle/optimizer/sgd_opt
i
mizer.cc
浏览文件 @
b72e8aa3
#include "serialization.h"
#include "sgd_optimizer.h"
#include "serialization.h"
namespace
paddle
{
namespace
optimizer
{
...
...
@@ -28,29 +28,24 @@ void SGDOptimizer::Update(const Tensor *gradient) {
}
const
char
*
SGDOptimizer
::
SerializeState
(
int
*
state_len
)
{
OptimizerState
state
;
SGD
OptimizerState
state
;
state
.
set_learning_rate
(
lr_policy_
->
LearningRate
(
num_sample_passed_
));
state
.
set_num_sample_passed
(
num_sample_passed_
);
TensorToProto
(
*
parameter_
,
state
.
mutable_parameter
());
TensorToProto
(
*
momentums_
,
state
.
mutable_momentums
());
state
.
set_momentum
(
momentum_
);
state
.
set_decay
(
decay_
);
state
.
set_nesterov
(
nesterov_
);
*
state_len
+=
CalStateSize
(
parameter_
,
momentums_
,
momentum_
,
decay_
,
nesterov_
);
*
state_len
=
CalStateSize
(
parameter_
,
momentums_
);
return
state
.
SerializeAsString
().
c_str
();
}
void
SGDOptimizer
::
De
S
erializeState
(
const
std
::
string
&
str
)
{
OptimizerState
state
;
void
SGDOptimizer
::
De
s
erializeState
(
const
std
::
string
&
str
)
{
SGD
OptimizerState
state
;
state
.
ParseFromString
(
str
);
lr_policy_
->
set
(
state
.
learning_rate
());
num_sample_passed_
=
state
.
num_sample_passed
();
ProtoToTensor
(
state
.
parameter
(),
parameter_
);
ProtoToTensor
(
state
.
parameter
(),
momentums_
);
momentum_
=
state
.
momentum
();
}
}
// namespace optimizer
...
...
paddle/optimizer/sgd_optimizer.h
浏览文件 @
b72e8aa3
...
...
@@ -14,7 +14,7 @@ public:
decay_
(
d
),
nesterov_
(
n
)
{
if
(
momentum_
!=
0.0
)
{
size_t
size
=
p
->
size
();
size_t
size
=
p
arameter
->
size
();
// TODO: fix it with align aware allocator bind to Tensor
if
(
momentums_
)
delete
momentums_
;
momentums_
=
new
Tensor
(
size
);
...
...
@@ -25,7 +25,7 @@ public:
}
void
Update
(
const
Tensor
*
gradient
);
const
char
*
SerializeState
(
int
*
state_len
);
void
De
S
erializeState
(
const
std
::
string
&
state
);
void
De
s
erializeState
(
const
std
::
string
&
state
);
private:
Tensor
*
momentums_
;
...
...
proto/OptimizerConfig.proto
浏览文件 @
b72e8aa3
...
...
@@ -78,36 +78,51 @@ enum DataType {
repeated
bytes
content
=
2
;
}
message
OptimizerState
{
message
SGDOptimizerState
{
// learning rate policy
optional
double
learning_rate
=
101
;
optional
double
lr_decay_a
=
102
;
optional
double
lr_decay_b
=
103
;
optional
double
num_sample_passed
=
104
;
// momentum
optional
TensorProto
parameter
=
105
;
optional
TensorProto
momentums
=
1
;
// state
optional
TensorProto
parameter
=
1
;
optional
TensorProto
momentums
=
2
;
}
// adadelta
message
AdadeltaOptimizerState
{
// learning rate policy
optional
double
learning_rate
=
101
;
optional
double
lr_decay_a
=
102
;
optional
double
lr_decay_b
=
103
;
optional
double
num_sample_passed
=
104
;
// state
optional
TensorProto
parameter
=
1
;
optional
TensorProto
accum_gradient
=
2
;
optional
TensorProto
accum_delta
=
3
;
optional
TensorProto
update_delta
=
4
;
}
// adam
optional
TensorProto
velocitys
=
5
;
// momentum
optional
double
momentum
=
6
;
optional
double
decay
=
7
;
optional
bool
nesterov
=
8
;
// adadelta
optional
double
rho
=
9
;
optional
double
epsilon
=
10
;
// adam
optional
double
beta_1
=
11
;
optional
double
beta_2
=
12
;
message
AdagradOptimizerState
{
// learning rate policy
optional
double
learning_rate
=
101
;
optional
double
lr_decay_a
=
102
;
optional
double
lr_decay_b
=
103
;
optional
double
num_sample_passed
=
104
;
// state
optional
TensorProto
parameter
=
1
;
optional
TensorProto
accum_gradient
=
2
;
}
message
AdamOptimizerState
{
// learning rate policy
optional
double
learning_rate
=
101
;
optional
double
lr_decay_a
=
102
;
optional
double
lr_decay_b
=
103
;
optional
double
num_sample_passed
=
104
;
// state
optional
TensorProto
parameter
=
1
;
optional
TensorProto
momentums
=
2
;
optional
TensorProto
velocitys
=
3
;
}
message
OptimizerConfig
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录