Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
adc26dff
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
adc26dff
编写于
1月 15, 2018
作者:
F
fengjiayi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
developing GradientClipByGlobalNorm
上级
d23ea4ef
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
53 addition
and
21 deletion
+53
-21
python/paddle/v2/fluid/clip.py
python/paddle/v2/fluid/clip.py
+47
-7
python/paddle/v2/fluid/layers/ops.py
python/paddle/v2/fluid/layers/ops.py
+6
-14
未找到文件。
python/paddle/v2/fluid/clip.py
浏览文件 @
adc26dff
import
functools
import
layers
from
framework
import
Variable
from
.
import
core
__all__
=
[
...
...
@@ -44,7 +45,7 @@ def error_clip_callback(block, context):
class
BaseGradientClipAttr
(
object
):
def
process_context
(
self
,
context
,
p
_g
):
def
process_context
(
self
,
context
,
p
aram
,
grad
):
raise
NotImplementedError
()
def
create_operators
(
self
,
param
,
grad
):
...
...
@@ -52,7 +53,7 @@ class BaseGradientClipAttr(object):
class
NullGradientClipAttr
(
BaseGradientClipAttr
):
def
process_context
(
self
,
context
,
p
_g
):
def
process_context
(
self
,
context
,
p
aram
,
grad
):
pass
def
create_operators
(
self
,
param
,
grad
):
...
...
@@ -69,7 +70,7 @@ class GradientClipByValue(BaseGradientClipAttr):
self
.
max
=
max
self
.
min
=
min
def
process_context
(
self
,
context
,
p
_g
):
def
process_context
(
self
,
context
,
p
aram
,
grad
):
pass
def
create_operators
(
self
,
param
,
grad
):
...
...
@@ -81,7 +82,7 @@ class GradientClipByNorm(BaseGradientClipAttr):
def
__init__
(
self
,
clip_norm
):
self
.
clip_norm
=
clip_norm
def
process_context
(
self
,
context
,
p
_g
):
def
process_context
(
self
,
context
,
p
aram
,
grad
):
pass
def
create_operators
(
self
,
param
,
grad
):
...
...
@@ -89,6 +90,46 @@ class GradientClipByNorm(BaseGradientClipAttr):
return
param
,
new_grad
class
GradientClipByGlobalNorm
(
BaseGradientClipAttr
):
global_norm_var
=
None
clip_norm_var
=
None
ratio_var
=
None
@
classmethod
def
init
(
cls
,
clip_norm
):
cls
.
global_norm_var
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
"float32"
,
value
=
0.0
)
cls
.
clip_norm_var
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
"float32"
,
value
=
clip_norm
)
def
__init__
(
self
):
if
not
(
isinstance
(
self
.
__class__
.
global_norm_var
,
Variable
)
and
isinstance
(
self
.
__class__
.
clip_norm_var
,
Variable
)):
raise
ValueError
(
"Class 'GradientClipByGlobalNorm' has not been properly initialized. Please call GradientClipByGlobalNorm.init() first."
)
def
process_context
(
self
,
context
,
param
,
grad
):
local_norm_var
=
layers
.
reduce_sum
(
x
=
layers
.
pow
(
x
=
grad
,
factor
=
2
),
reduce_all
=
True
)
layers
.
sums
(
input
=
[
local_norm_var
,
self
.
__class__
.
global_norm_var
],
out
=
[
self
.
__class__
.
global_norm_var
])
def
create_operators
(
self
,
param
,
grad
):
if
self
.
__class__
.
ratio_var
is
None
:
self
.
__class__
.
global_norm_var
=
layers
.
sqrt
(
x
=
self
.
__class__
.
global_norm_var
)
self
.
__class__
.
ratio_var
=
layers
.
elementwise_div
(
x
=
self
.
__class__
.
clip_norm_var
,
y
=
layers
.
elementwise_max
(
x
=
self
.
__class__
.
clip_norm_var
,
y
=
self
.
__class__
.
global_norm_var
))
# 缺乏elementwise_max
# 没法将ratio_var送给scale_op。
# new_grad = layers.
def
append_gradient_clip_ops
(
param_grad
):
context
=
dict
()
create_op_callbacks
=
[]
...
...
@@ -98,10 +139,9 @@ def append_gradient_clip_ops(param_grad):
clip_attr
=
NullGradientClipAttr
()
if
not
isinstance
(
clip_attr
,
BaseGradientClipAttr
):
raise
TypeError
(
"clip attribute should be an instance of BaseGradientClippingAttr"
)
"clip attribute should be an instance of BaseGradientClipAttr"
)
clip_attr
.
process_context
(
context
=
context
,
p
_g
=
param_grad
)
clip_attr
.
process_context
(
context
=
context
,
p
aram
=
p
,
grad
=
g
)
create_op_callbacks
.
append
(
functools
.
partial
(
clip_attr
.
create_operators
,
param
=
p
,
grad
=
g
))
...
...
python/paddle/v2/fluid/layers/ops.py
浏览文件 @
adc26dff
from
..registry
import
register_layer
__activations__
=
[
'abs'
,
'tanh'
,
'sigmoid'
,
'relu'
,
'sqrt'
,
'ceil'
,
'floor'
,
'log'
,
'round'
'abs'
,
'tanh'
,
'sigmoid'
,
'relu'
,
'sqrt'
,
'ceil'
,
'floor'
,
'log'
,
'round'
,
'pow'
]
__all__
=
[
'mean'
,
'mul'
,
'reshape'
,
'scale'
,
'transpose'
,
'sigmoid_cross_entropy_with_logits'
,
'elementwise_add'
,
'elementwise_div'
,
'elementwise_sub'
,
'elementwise_mul'
,
'clip'
,
'clip_by_norm'
,
'sequence_softmax'
,
'mean'
,
'mul'
,
'reshape'
,
'scale'
,
'transpose'
,
'sigmoid_cross_entropy_with_logits'
,
'elementwise_add'
,
'elementwise_div'
,
'elementwise_sub'
,
'elementwise_mul'
,
'clip'
,
'clip_by_norm'
,
'sequence_softmax'
,
'reduce_sum'
]
+
__activations__
for
_OP
in
set
(
__all__
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录