Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
8b6f374f
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
8b6f374f
编写于
5月 23, 2017
作者:
Y
Yibing Liu
提交者:
GitHub
5月 23, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2216 from kuke/enable_grad_clipping_dev
Enable the setting of global gradient clipping threshold
上级
285bee54
5cf2b2e8
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
25 addition
and
8 deletion
+25
-8
paddle/parameter/FirstOrderOptimizer.cpp
paddle/parameter/FirstOrderOptimizer.cpp
+15
-6
paddle/parameter/OptimizerWithRegularizer.cpp
paddle/parameter/OptimizerWithRegularizer.cpp
+2
-1
paddle/parameter/ParameterOptimizer.h
paddle/parameter/ParameterOptimizer.h
+2
-0
proto/TrainerConfig.proto
proto/TrainerConfig.proto
+3
-0
python/paddle/trainer/config_parser.py
python/paddle/trainer/config_parser.py
+1
-0
python/paddle/trainer_config_helpers/optimizers.py
python/paddle/trainer_config_helpers/optimizers.py
+2
-1
未找到文件。
paddle/parameter/FirstOrderOptimizer.cpp
浏览文件 @
8b6f374f
...
...
@@ -161,6 +161,7 @@ void AdaDeltaParameterOptimizer::update(const VectorPtr vecs[],
const
ParameterConfig
&
config
,
size_t
sparseId
)
const
{
CHECK
(
sparseId
==
-
1LU
)
<<
"Sparse update is not supported"
;
BaseMatrix
&
value
=
*
vecs
[
PARAMETER_VALUE
];
BaseMatrix
&
grad
=
*
vecs
[
PARAMETER_GRADIENT
];
BaseMatrix
&
mom
=
*
vecs
[
PARAMETER_MOMENTUM
];
...
...
@@ -265,6 +266,7 @@ void AdamParameterOptimizer::update(const VectorPtr vecs[],
const
ParameterConfig
&
config
,
size_t
sparseId
)
const
{
CHECK
(
sparseId
==
-
1UL
)
<<
"Sparse update is not supported"
;
real
beta1_power
=
std
::
pow
(
beta1_
,
step_
);
real
beta2_power
=
std
::
pow
(
beta2_
,
step_
);
real
learningRate
=
config
.
learning_rate
()
*
learningRate_
;
...
...
@@ -303,18 +305,25 @@ void AdamaxParameterOptimizer::update(const VectorPtr vecs[],
void
OptimizerWithGradientClipping
::
update
(
const
VectorPtr
vecs
[],
const
ParameterConfig
&
config
,
size_t
sparseId
)
const
{
real
globalThreshold
=
optConfig_
.
gradient_clipping_threshold
();
real
localThreshold
=
config
.
gradient_clipping_threshold
();
// Use local gradient clipping threshold if it's enabled,
// otherwise using the global one.
real
threshold
=
localThreshold
>
0.0
f
?
localThreshold
:
globalThreshold
;
std
::
string
field
=
localThreshold
>
0.0
f
?
"local"
:
"global"
;
real
maxAbsGrad
=
vecs
[
PARAMETER_GRADIENT
]
->
getAbsMax
();
if
(
maxAbsGrad
>
config
.
gradient_clipping_threshold
()
)
{
if
(
maxAbsGrad
>
threshold
)
{
if
(
FLAGS_log_clipping
)
{
real
avgAbsGrad
=
vecs
[
PARAMETER_GRADIENT
]
->
getAbsSum
()
/
vecs
[
PARAMETER_GRADIENT
]
->
getSize
();
LOG
(
INFO
)
<<
"parameter="
<<
config
.
name
()
<<
" need clipping,"
<<
" max grad="
<<
maxAbsGrad
<<
" avg grad="
<<
avgAbsGrad
;
LOG
(
INFO
)
<<
"parameter="
<<
config
.
name
()
<<
" need clipping by "
<<
field
<<
" threshold="
<<
threshold
<<
", max grad="
<<
maxAbsGrad
<<
", avg grad="
<<
avgAbsGrad
;
}
vecs
[
PARAMETER_GRADIENT
]
->
clip
(
-
config
.
gradient_clipping_threshold
(),
config
.
gradient_clipping_threshold
());
vecs
[
PARAMETER_GRADIENT
]
->
clip
(
-
threshold
,
threshold
);
}
optimizer_
->
update
(
vecs
,
config
,
sparseId
);
}
...
...
paddle/parameter/OptimizerWithRegularizer.cpp
浏览文件 @
8b6f374f
...
...
@@ -131,7 +131,8 @@ ParameterOptimizer* OptimizerWithRegularizer::create(
bool
inPserver
)
{
ParameterOptimizer
*
optimizer
=
ParameterOptimizer
::
create
(
optConfig
,
inPserver
);
if
(
paraConfig
.
gradient_clipping_threshold
()
>
0.0
f
&&
if
((
optConfig
.
gradient_clipping_threshold
()
>
0.0
f
||
paraConfig
.
gradient_clipping_threshold
()
>
0.0
f
)
&&
!
dynamic_cast
<
AddOptimizer
*>
(
optimizer
))
{
optimizer
=
new
OptimizerWithGradientClipping
(
optConfig
,
optimizer
);
}
...
...
paddle/parameter/ParameterOptimizer.h
浏览文件 @
8b6f374f
...
...
@@ -167,6 +167,7 @@ public:
}
parameterTypes_
.
push_back
(
type
);
}
real
getLearningRate
()
const
{
return
learningRate_
;
}
virtual
void
setNoDecay
()
{
applyDecay_
=
false
;
}
...
...
@@ -201,6 +202,7 @@ protected:
* so, if lr change in StartBatch, please assign to learningRate_
*/
real
learningRate_
;
std
::
unique_ptr
<
LearningRateScheduler
>
learningRateScheduler_
;
int64_t
pass_
;
// current training pass (starting from 0)
bool
firstTime_
;
...
...
proto/TrainerConfig.proto
浏览文件 @
8b6f374f
...
...
@@ -128,6 +128,9 @@ message OptimizationConfig {
// when async_lagged_grad_discard_ratio * num_gradient_servers commit passed,
// current async gradient will be discard silently.
optional
double
async_lagged_grad_discard_ratio
=
37
[
default
=
1.5
];
// global threshold for gradient clipping
optional
double
gradient_clipping_threshold
=
38
[
default
=
0.0
];
};
message
TrainerConfig
{
...
...
python/paddle/trainer/config_parser.py
浏览文件 @
8b6f374f
...
...
@@ -3377,6 +3377,7 @@ settings = dict(
algorithm
=
'async_sgd'
,
async_lagged_grad_discard_ratio
=
1.5
,
learning_method
=
'momentum'
,
gradient_clipping_threshold
=
None
,
num_batches_per_send_parameter
=
None
,
num_batches_per_get_parameter
=
None
,
center_parameter_update_method
=
None
,
...
...
python/paddle/trainer_config_helpers/optimizers.py
浏览文件 @
8b6f374f
...
...
@@ -408,7 +408,8 @@ def settings(batch_size,
args
=
[
'batch_size'
,
'learning_rate'
,
'learning_rate_decay_a'
,
'learning_rate_decay_b'
,
'learning_rate_schedule'
,
'learning_rate_args'
'learning_rate_decay_b'
,
'learning_rate_schedule'
,
'learning_rate_args'
,
'gradient_clipping_threshold'
]
kwargs
=
dict
()
kwargs
[
'algorithm'
]
=
algorithm
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录