Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
e4567962
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e4567962
编写于
6月 10, 2017
作者:
D
dzhwinter
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
"update with comment"
上级
f5ff2838
变更
14
隐藏空白更改
内联
并排
Showing
14 changed file
with
210 addition
and
95 deletion
+210
-95
paddle/optimizer/adadelta_optimizer.cc
paddle/optimizer/adadelta_optimizer.cc
+35
-9
paddle/optimizer/adadelta_optimizer.h
paddle/optimizer/adadelta_optimizer.h
+14
-4
paddle/optimizer/adagrad_optimizer.cc
paddle/optimizer/adagrad_optimizer.cc
+3
-7
paddle/optimizer/adagrad_optimizer.h
paddle/optimizer/adagrad_optimizer.h
+9
-6
paddle/optimizer/adam_optimizer.cc
paddle/optimizer/adam_optimizer.cc
+28
-0
paddle/optimizer/adam_optimizer.h
paddle/optimizer/adam_optimizer.h
+6
-3
paddle/optimizer/lr_policy.h
paddle/optimizer/lr_policy.h
+7
-0
paddle/optimizer/optimizer.cc
paddle/optimizer/optimizer.cc
+6
-5
paddle/optimizer/parameter_optimizer.cc
paddle/optimizer/parameter_optimizer.cc
+18
-16
paddle/optimizer/parameter_optimizer.h
paddle/optimizer/parameter_optimizer.h
+11
-8
paddle/optimizer/serialization.h
paddle/optimizer/serialization.h
+12
-0
paddle/optimizer/sgd_optimizer.h
paddle/optimizer/sgd_optimizer.h
+14
-8
paddle/optimizer/sgd_optmizer.cc
paddle/optimizer/sgd_optmizer.cc
+16
-23
proto/OptimizerConfig.proto
proto/OptimizerConfig.proto
+31
-6
未找到文件。
paddle/optimizer/adadelta_optimizer.cc
浏览文件 @
e4567962
...
...
@@ -5,14 +5,6 @@
namespace
paddle
{
namespace
optimizer
{
void
AdadeltaOptimizer
::
set_weight
(
Tensor
*
p
)
{
parameter_
=
p
;
size_t
size
=
p
->
size
();
accum_gradient_
=
new
Tensor
(
size
);
accum_delta_
=
new
Tensor
(
size
);
update_delta_
=
new
Tensor
(
size
);
}
void
AdadeltaOptimizer
::
Update
(
const
Tensor
*
gradient
)
{
num_sample_passed_
+=
1
;
double
learning_rate
=
lr_policy_
->
LearningRate
(
num_sample_passed_
);
...
...
@@ -32,5 +24,39 @@ void AdadeltaOptimizer::Update(const Tensor* gradient) {
param
[
i
]
-=
learning_rate
*
update_d
[
i
]
+
learning_rate
*
decay_
*
param
[
i
];
}
}
const
char
*
AdadeltaOptimizer
::
SerializeState
(
int
*
state_len
)
{
OptimizerState
state
;
state
.
set_learning_rate
(
lr_policy_
->
LearningRate
(
num_sample_passed_
));
state
.
set_num_sample_passed
(
num_sample_passed_
);
TensorToProto
(
*
parameter_
,
state
.
mutable_parameter
());
TensorToProto
(
*
accum_gradient_
,
state
.
mutable_accum_gradient
());
TensorToProto
(
*
accum_delta_
,
state
.
mutable_accum_delta
());
TensorToProto
(
*
update_delta_
,
state
.
mutable_update_delta
());
state
.
set_nesterov
(
epsilon_
);
state
.
set_momentum
(
rho_
);
state
.
set_decay
(
decay_
);
*
state_len
+=
CalStateSize
(
parameter_
,
accum_gradient_
,
accum_delta_
,
update_delta_
,
rho_
,
epsilon_
,
decay_
);
return
state
.
SerializeAsString
().
c_str
();
}
void
AdadeltaOptimizer
::
DeSerializeState
(
const
std
::
string
&
str
)
{
OptimizerState
state
;
state
.
ParseFromString
(
str
);
lr_policy_
->
set
(
state
.
learning_rate
());
num_sample_passed_
=
state
.
num_sample_passed
();
ProtoToTensor
(
state
.
parameter
(),
parameter_
);
ProtoToTensor
(
state
.
accum_gradient
(),
accum_gradient_
);
ProtoToTensor
(
state
.
accum_delta
(),
accum_delta_
);
ProtoToTensor
(
state
.
update_delta
(),
update_delta_
);
}
// namespace optimizer
}
// namespace optimizer
}
// namespace paddle
paddle/optimizer/adadelta_optimizer.h
浏览文件 @
e4567962
...
...
@@ -7,21 +7,31 @@ namespace optimizer {
class
AdadeltaOptimizer
:
public
ParameterOptimizer
{
public:
AdadeltaOptimizer
(
double
rho
,
double
epsilon
,
double
decay
,
LrPolicy
*
lr
)
:
ParameterOptimizer
(
lr
),
AdadeltaOptimizer
(
Tensor
*
parameter
,
LrPolicy
*
lr
,
double
rho
,
double
epsilon
,
double
decay
)
:
ParameterOptimizer
(
parameter
,
lr
),
accum_gradient_
(
nullptr
),
accum_delta_
(
nullptr
),
update_delta_
(
nullptr
),
rho_
(
rho
),
epsilon_
(
epsilon
),
decay_
(
decay
)
{}
decay_
(
decay
)
{
size_t
size
=
p
->
size
();
if
(
accum_gradient_
)
delete
accum_gradient_
;
accum_gradient_
=
new
Tensor
(
size
);
if
(
accum_delta_
)
delete
accum_delta_
;
accum_delta_
=
new
Tensor
(
size
);
if
(
update_delta_
)
delete
update_delta_
;
update_delta_
=
new
Tensor
(
size
);
}
~
AdadeltaOptimizer
()
{
if
(
accum_gradient_
)
delete
accum_gradient_
;
if
(
accum_delta_
)
delete
accum_delta_
;
if
(
update_delta_
)
delete
update_delta_
;
}
void
Update
(
const
Tensor
*
gradient
);
void
set_weight
(
Tensor
*
p
);
const
char
*
SerializeState
(
int
*
state_len
);
void
DeSerializeState
(
const
std
::
string
&
state
);
private:
Tensor
*
accum_gradient_
;
...
...
paddle/optimizer/adagrad_optimizer.cc
浏览文件 @
e4567962
...
...
@@ -5,12 +5,6 @@
namespace
paddle
{
namespace
optimizer
{
void
AdagradOptimizer
::
set_weight
(
Tensor
*
p
)
{
parameter_
=
p
;
size_t
size
=
p
->
size
();
accum_gradient_
=
new
Tensor
(
size
);
}
void
AdagradOptimizer
::
Update
(
const
Tensor
*
gradient
)
{
num_sample_passed_
+=
1
;
double
learning_rate
=
lr_policy_
->
LearningRate
(
num_sample_passed_
);
...
...
@@ -23,6 +17,8 @@ void AdagradOptimizer::Update(const Tensor* gradient) {
learning_rate
*
decay_
*
param
[
i
];
}
}
const
char
*
SGDOptimizer
::
SerializeState
(
int
*
state_len
)
{
NIMPL
;
}
void
SGDOptimizer
::
DeSerializeState
(
const
std
::
string
&
str
)
{
NIMPL
;
}
// namespace optimizer
}
// namespace optimizer
}
// namespace paddle
paddle/optimizer/adagrad_optimizer.h
浏览文件 @
e4567962
...
...
@@ -7,16 +7,19 @@ namespace optimizer {
class
AdagradOptimizer
:
public
ParameterOptimizer
{
public:
AdagradOptimizer
(
double
epsilon
,
double
decay
,
LrPolicy
*
lr
)
:
ParameterOptimizer
(
lr
),
accum_gradient_
(
nullptr
),
epsilon_
(
epsilon
),
decay_
(
decay
)
{}
AdagradOptimizer
(
Tensor
*
parameter
,
LrPolicy
*
lr
,
double
epsilon
,
double
decay
)
:
ParameterOptimizer
(
parameter
,
lr
),
epsilon_
(
epsilon
),
decay_
(
decay
)
{
size_t
size
=
p
->
size
();
if
(
accum_gradient_
)
delete
accum_gradient_
;
accum_gradient_
=
new
Tensor
(
size
);
}
~
AdagradOptimizer
()
{
if
(
accum_gradient_
)
delete
accum_gradient_
;
}
void
Update
(
const
Tensor
*
gradient
);
void
set_weight
(
Tensor
*
p
);
private:
Tensor
*
accum_gradient_
;
...
...
paddle/optimizer/adam_optimizer.cc
浏览文件 @
e4567962
...
...
@@ -28,5 +28,33 @@ void AdamOptimizer::Update(const Tensor *gradient) {
learning_rate
*
(
m
[
i
]
/
std
::
sqrt
(
v
[
i
]
+
epsilon_
)
+
decay_
*
param
[
i
]);
}
}
const
char
*
AdadeltaOptimizer
::
SerializeState
(
int
*
state_len
)
{
OptimizerState
state
;
state
.
set_learning_rate
(
lr_policy_
->
LearningRate
(
num_sample_passed_
));
state
.
set_num_sample_passed
(
num_sample_passed_
);
TensorToProto
(
*
parameter_
,
state
.
mutable_parameter
());
TensorToProto
(
*
velocitys_
,
state
.
mutable_momentums
());
state
.
set_beta_1
(
beta_1_
);
state
.
set_beta_2
(
beta_2_
);
state
.
set_decay
(
decay_
);
*
state_len
+=
CalStateSize
(
parameter_
,
momentums_
,
velocitys_
,
beta_1_
,
beta_2
,
epsilon_
decay_
);
return
state
.
SerializeAsString
().
c_str
();
}
void
AdadeltaOptimizer
::
DeSerializeState
(
const
std
::
string
&
str
)
{
OptimizerState
state
;
state
.
ParseFromString
(
str
);
lr_policy_
->
set
(
state
.
learning_rate
());
num_sample_passed_
=
state
.
num_sample_passed
();
ProtoToTensor
(
state
.
parameter
(),
parameter_
);
ProtoToTensor
(
state
.
velocitys
(),
velocitys__
);
beta_1_
=
state
.
beta_1
();
beta_2_
=
state
.
beta_2
();
}
}
// namespace optimizer
}
// namespace paddle
paddle/optimizer/adam_optimizer.h
浏览文件 @
e4567962
...
...
@@ -7,9 +7,12 @@ namespace optimizer {
class
AdamOptimizer
:
public
ParameterOptimizer
{
public:
AdamOptimizer
(
double
beta_1
,
double
beta_2
,
double
epsilon
,
double
decay
,
LrPolicy
*
lr
)
:
ParameterOptimizer
(
lr
),
AdamOptimizer
(
Tensor
*
parameter
,
LrPolicy
*
lr
double
beta_1
,
double
beta_2
,
double
epsilon
,
double
decay
)
:
ParameterOptimizer
(
parameter
,
lr
),
momentums_
(
nullptr
),
velocitys_
(
nullptr
),
beta_1_
(
beta_1
),
...
...
paddle/optimizer/lr_policy.h
浏览文件 @
e4567962
...
...
@@ -10,6 +10,7 @@ class LrPolicy {
public:
virtual
~
LrPolicy
()
{}
virtual
double
LearningRate
(
const
uint64_t
num_sample_passed
)
=
0
;
virtual
void
set
(
double
current_learning_rate
)
=
0
;
};
// constant learning rate policy
...
...
@@ -19,6 +20,9 @@ public:
double
LearningRate
(
const
uint64_t
num_sample_passed
)
{
return
learning_rate
;
}
void
set
(
double
current_learning_rate
)
{
learning_rate
=
current_learning_rate
;
}
private:
double
learning_rate
;
...
...
@@ -31,6 +35,9 @@ public:
double
LearningRate
(
const
uint64_t
num_sample_passed
)
{
return
std
::
max
(
learning_rate
-
lr_decay_a
*
num_sample_passed
,
lr_decay_b
);
}
void
set
(
double
current_learning_rate
)
{
learning_rate
=
current_learning_rate
;
}
private:
double
learning_rate
;
...
...
paddle/optimizer/optimizer.cc
浏览文件 @
e4567962
...
...
@@ -44,13 +44,13 @@ paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto,
const
int
state_len
)
{
paddle_optimizer
*
optimizer
=
new
paddle_optimizer
;
std
::
string
config
(
config_proto
,
config_proto
+
config_proto_len
);
optimizer
->
impl
=
ParameterOptimizer
::
Create
(
config
);
Tensor
*
parameter
=
new
Tensor
(
reinterpret_cast
<
float
*>
(
param_buffer
),
num_bytes
);
optimizer
->
impl
=
ParameterOptimizer
::
Create
(
config
,
parameter
);
if
(
state
!=
nullptr
)
{
std
::
string
s
(
state
,
state
+
state_len
);
optimizer
->
impl
->
DeSerializeState
(
s
);
}
Tensor
*
param
=
new
Tensor
(
reinterpret_cast
<
float
*>
(
param_buffer
),
num_bytes
);
optimizer
->
impl
->
set_weight
(
param
);
return
optimizer
;
}
...
...
@@ -77,6 +77,7 @@ int paddle_optimizer_get_weights(paddle_optimizer* o, void** param_buffer) {
}
int
paddle_optimizer_get_state
(
paddle_optimizer
*
o
,
const
char
**
state
)
{
*
state
=
o
->
impl
->
SerializeState
();
return
strlen
(
*
state
);
int
state_len
=
0
;
*
state
=
o
->
impl
->
SerializeState
(
&
state_len
);
return
state_len
;
}
paddle/optimizer/parameter_optimizer.cc
浏览文件 @
e4567962
...
...
@@ -10,8 +10,8 @@
namespace
paddle
{
namespace
optimizer
{
ParameterOptimizer
*
ParameterOptimizer
::
Create
(
const
std
::
string
&
config_proto
)
{
ParameterOptimizer
*
ParameterOptimizer
::
Create
(
const
std
::
string
&
config_proto
,
Tensor
*
parameter
)
{
paddle
::
OptimizerConfig
config
;
CHECK
(
config
.
ParseFromString
(
config_proto
)
==
0
)
<<
"failed parse optimizer config"
;
...
...
@@ -29,34 +29,38 @@ ParameterOptimizer *ParameterOptimizer::Create(
};
LrPolicy
*
lr
=
select_lr_policy
(
config
);
auto
select_optimizer
=
[
=
](
const
OptimizerConfig
&
config
)
->
ParameterOptimizer
*
{
[
=
](
Tensor
*
parameter
,
const
OptimizerConfig
&
config
)
->
ParameterOptimizer
*
{
if
(
config
.
optimizer
()
==
OptimizerConfig
::
SGD
)
{
return
new
SGDOptimizer
(
config
.
sgd
().
momentum
(),
return
new
SGDOptimizer
(
parameter
,
lr
,
config
.
sgd
().
momentum
(),
config
.
sgd
().
decay
(),
config
.
sgd
().
nesterov
(),
lr
);
config
.
sgd
().
nesterov
());
}
if
(
config
.
optimizer
()
==
OptimizerConfig
::
Adadelta
)
{
return
new
AdadeltaOptimizer
(
config
.
adadelta
().
rho
(),
return
new
AdadeltaOptimizer
(
parameter
,
lr
,
config
.
adadelta
().
rho
(),
config
.
adadelta
().
epsilon
(),
config
.
adadelta
().
decay
(),
lr
);
config
.
adadelta
().
decay
());
}
if
(
config
.
optimizer
()
==
OptimizerConfig
::
Adagrad
)
{
return
new
AdagradOptimizer
(
config
.
adagrad
().
epsilon
(),
config
.
adagrad
().
decay
(),
lr
);
parameter
,
lr
,
config
.
adagrad
().
epsilon
(),
config
.
adagrad
().
decay
()
);
}
if
(
config
.
optimizer
()
==
OptimizerConfig
::
Adam
)
{
return
new
AdamOptimizer
(
config
.
adam
().
beta_1
(),
return
new
AdamOptimizer
(
parameter
,
lr
,
config
.
adam
().
beta_1
(),
config
.
adam
().
beta_2
(),
config
.
adam
().
epsilon
(),
config
.
adam
().
decay
(),
lr
);
config
.
adam
().
decay
());
}
// default
LOG
(
WARNING
)
<<
"have not select any Optimizer. use SGDOptimizer in default"
;
return
new
SGDOptimizer
(
0.0
,
0.0
,
false
,
lr
);
return
new
SGDOptimizer
(
parameter
,
lr
,
0.0
,
0.0
,
false
);
};
return
select_optimizer
(
config
);
}
...
...
@@ -66,7 +70,5 @@ float *ParameterOptimizer::get_weight(int *param_size) const {
return
parameter_
->
get_buffer
();
}
void
ParameterOptimizer
::
set_weight
(
Tensor
*
p
)
{
parameter_
=
p
;
}
}
// namespace optimizer
}
// namespace paddle
paddle/optimizer/parameter_optimizer.h
浏览文件 @
e4567962
...
...
@@ -5,32 +5,35 @@
#include <string>
#include "OptimizerConfig.pb.h"
#include "lr_policy.h"
#include "serialization.h"
#include "tensor.h"
// Not Implemen Yet, macr
// o
#define NIMPL crash(__PRETTY_FUNCTION__, " not implemented yet")
namespace
paddle
{
namespace
optimizer
{
const
std
::
string
kOptimizerVersion
=
"1.0"
;
class
ParameterOptimizer
{
public:
/**
* @brief update hook for algorithm need to traverse parameter more than
* once.
*/
ParameterOptimizer
(
LrPolicy
*
lr
)
:
lr_policy_
(
lr
),
num_sample_passed_
(
0
)
{}
ParameterOptimizer
(
Tensor
*
parameter
,
LrPolicy
*
lr
)
:
parameter_
(
parameter
),
lr_policy_
(
lr
),
num_sample_passed_
(
0
)
{}
virtual
~
ParameterOptimizer
()
{
delete
parameter_
;
};
static
ParameterOptimizer
*
Create
(
const
std
::
string
&
config_proto
);
virtual
const
char
*
SerializeState
();
virtual
void
DeSerializeState
(
const
std
::
string
&
state
);
static
ParameterOptimizer
*
Create
(
const
std
::
string
&
config_proto
,
Tensor
*
parameter
);
virtual
void
Update
(
const
Tensor
*
gradient
)
=
0
;
virtual
float
*
get_weight
(
int
*
param_size
)
const
;
virtual
void
set_weight
(
Tensor
*
parameter
);
virtual
const
char
*
SerializeState
(
int
*
state_len
)
=
0
;
virtual
void
DeSerializeState
(
const
std
::
string
&
state
)
=
0
;
protected:
Tensor
*
parameter_
;
// learning rate policy
LrPolicy
*
lr_policy_
;
uint64_t
num_sample_passed_
;
...
...
paddle/optimizer/serialization.h
浏览文件 @
e4567962
...
...
@@ -2,6 +2,7 @@
#include <sstream>
#include <string>
#include <type_traits>
#include "OptimizerConfig.pb.h"
#include "paddle/utils/Logging.h"
#include "tensor.h"
...
...
@@ -9,6 +10,17 @@
namespace
paddle
{
namespace
optimizer
{
inline
unsigned
CalStateSize
(
int
*
state_len
)
{
return
0
;
}
template
<
typename
HEAD
,
typename
...
TAIL
>
unsigned
CalStateSize
(
const
HEAD
&
head
,
const
TAIL
&
...
tail
)
{
if
(
std
::
is_fundamental
<
HEAD
>::
value
)
{
return
sizeof
head
+
CalStateSize
(
tail
...);
}
else
{
return
sizeof
(
head
[
0
]
*
head
->
size
())
+
CalStateSize
(
tail
...);
}
}
static
void
TensorToProto
(
const
Tensor
&
tensor
,
TensorProto
*
proto
)
{
proto
->
set_data_type
(
TensorProto
::
PADDLE_ELEMENT_TYPE_FLOAT32
);
proto
->
set_size
(
tensor
.
size
());
...
...
paddle/optimizer/sgd_optimizer.h
浏览文件 @
e4567962
...
...
@@ -7,20 +7,26 @@ namespace optimizer {
class
SGDOptimizer
:
public
ParameterOptimizer
{
public:
SGDOptimizer
(
double
m
,
double
d
,
bool
n
,
LrPolicy
*
lr
)
:
ParameterOptimizer
(
lr
),
SGDOptimizer
(
Tensor
*
parameter
,
LrPolicy
*
lr
,
double
m
,
double
d
,
bool
n
)
:
ParameterOptimizer
(
parameter
,
lr
),
momentums_
(
nullptr
),
momentum_
(
m
),
decay_
(
d
),
nesterov_
(
n
)
{}
virtual
~
SGDOptimizer
()
{
delete
momentums_
;
}
nesterov_
(
n
)
{
if
(
momentum_
!=
0.0
)
{
size_t
size
=
p
->
size
();
// TODO: fix it with align aware allocator bind to Tensor
if
(
momentums_
)
delete
momentums_
;
momentums_
=
new
Tensor
(
size
);
}
}
virtual
~
SGDOptimizer
()
{
if
(
momentums_
)
delete
momentums_
;
}
void
Update
(
const
Tensor
*
gradient
);
const
char
*
SerializeState
();
const
char
*
SerializeState
(
int
*
state_len
);
void
DeSerializeState
(
const
std
::
string
&
state
);
void
set_weight
(
Tensor
*
p
);
float
*
get_weight
(
int
*
param_size
)
const
;
private:
Tensor
*
momentums_
;
double
momentum_
;
...
...
paddle/optimizer/sgd_optmizer.cc
浏览文件 @
e4567962
...
...
@@ -4,15 +4,6 @@
namespace
paddle
{
namespace
optimizer
{
void
SGDOptimizer
::
set_weight
(
Tensor
*
p
)
{
parameter_
=
p
;
if
(
momentum_
!=
0.0
)
{
size_t
size
=
p
->
size
();
// TODO: fix it with align aware allocator bind to Tensor
momentums_
=
new
Tensor
(
size
);
}
}
void
SGDOptimizer
::
Update
(
const
Tensor
*
gradient
)
{
num_sample_passed_
+=
1
;
double
learning_rate
=
lr_policy_
->
LearningRate
(
num_sample_passed_
);
...
...
@@ -36,28 +27,30 @@ void SGDOptimizer::Update(const Tensor *gradient) {
}
}
const
char
*
SGDOptimizer
::
SerializeState
()
{
const
char
*
SGDOptimizer
::
SerializeState
(
int
*
state_len
)
{
OptimizerState
state
;
// version is a global const value
state
.
set_version
(
kOptimizerVersion
);
TensorToProto
(
*
parameter_
,
state
.
add_data
());
TensorToProto
(
*
momentums_
,
state
.
add_data
());
state
.
add_hyperparam
(
momentum_
);
state
.
set_learning_rate
(
lr_policy_
->
LearningRate
(
num_sample_passed_
));
state
.
set_num_sample_passed
(
num_sample_passed_
);
TensorToProto
(
*
parameter_
,
state
.
mutable_parameter
());
TensorToProto
(
*
momentums_
,
state
.
mutable_momentums
());
state
.
set_momentum
(
momentum_
);
state
.
set_decay
(
decay_
);
state
.
set_nesterov
(
nesterov_
);
*
state_len
+=
CalStateSize
(
parameter_
,
momentums_
,
momentum_
,
decay_
,
nesterov_
);
return
state
.
SerializeAsString
().
c_str
();
}
void
SGDOptimizer
::
DeSerializeState
(
const
std
::
string
&
str
)
{
OptimizerState
state
;
state
.
ParseFromString
(
str
);
CHECK
(
state
.
version
()
==
kOptimizerVersion
)
<<
"error version of state"
<<
"expected : "
<<
kOptimizerVersion
<<
"get : "
<<
state
.
version
();
lr_policy_
->
set
(
state
.
learning_rate
());
num_sample_passed_
=
state
.
num_sample_passed
();
ProtoToTensor
(
state
.
data
(
0
),
parameter_
);
if
(
state
.
data_size
()
==
2
)
{
ProtoToTensor
(
state
.
data
(
1
),
momentums_
);
momentum_
=
state
.
hyperparam
(
0
);
}
ProtoToTensor
(
state
.
parameter
(),
parameter_
);
ProtoToTensor
(
state
.
parameter
(),
momentums_
);
momentum_
=
state
.
momentum
();
}
}
// namespace optimizer
...
...
proto/OptimizerConfig.proto
浏览文件 @
e4567962
...
...
@@ -5,13 +5,14 @@ option optimize_for = LITE_RUNTIME;
package
paddle
;
message
SGDConfig
{
// SGD
// SGD
// momentum: float >= 0. Parameter updates momentum.
// decay: float >= 0. Learning rate decay over each update.
// nesterov: boolean. Whether to apply Nesterov momentum.
optional
double
momentum
=
21
[
default
=
0.0
];
optional
double
decay
=
23
[
default
=
0.0
];
optional
bool
nesterov
=
24
[
default
=
false
];
}
...
...
@@ -75,14 +76,38 @@ enum DataType {
}
required
DataType
data_type
=
1
;
repeated
bytes
content
=
2
;
optional
uint64
size
=
3
;
}
message
OptimizerState
{
// match old training state with format parser
required
string
version
=
100
;
repeated
TensorProto
data
=
1
;
repeated
double
hyperparam
=
3
;
optional
double
learning_rate
=
101
;
optional
double
lr_decay_a
=
102
;
optional
double
lr_decay_b
=
103
;
optional
double
num_sample_passed
=
104
;
// momentum
optional
TensorProto
parameter
=
105
;
optional
TensorProto
momentums
=
1
;
// adadelta
optional
TensorProto
accum_gradient
=
2
;
optional
TensorProto
accum_delta
=
3
;
optional
TensorProto
update_delta
=
4
;
// adam
optional
TensorProto
velocitys
=
5
;
// momentum
optional
double
momentum
=
6
;
optional
double
decay
=
7
;
optional
bool
nesterov
=
8
;
// adadelta
optional
double
rho
=
9
;
optional
double
epsilon
=
10
;
// adam
optional
double
beta_1
=
11
;
optional
double
beta_2
=
12
;
}
message
OptimizerConfig
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录