Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
e79df7c1
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e79df7c1
编写于
6月 02, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
6月 02, 2020
浏览文件
操作
浏览文件
下载
差异文件
!1697 optimize clip_norm
Merge pull request !1697 from chenhaozhe/bert-optimization
上级
75f791d8
435fc12e
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
33 addition
and
7 deletion
+33
-7
mindspore/ccsrc/session/session_basic.cc
mindspore/ccsrc/session/session_basic.cc
+1
-1
mindspore/nn/layer/basic.py
mindspore/nn/layer/basic.py
+12
-1
model_zoo/bert/src/bert_for_pre_training.py
model_zoo/bert/src/bert_for_pre_training.py
+1
-2
model_zoo/bert/src/config.py
model_zoo/bert/src/config.py
+3
-3
tests/ut/python/nn/test_clip_by_norm.py
tests/ut/python/nn/test_clip_by_norm.py
+16
-0
未找到文件。
mindspore/ccsrc/session/session_basic.cc
浏览文件 @
e79df7c1
...
...
@@ -588,7 +588,7 @@ std::shared_ptr<KernelGraph> SessionBasic::ConstructKernelGraph(const FuncGraphP
graph
->
set_output_null
(
is_trace_back
);
AddParameterToGraphInputs
(
func_graph
->
parameters
(),
graph
.
get
());
MS_EXCEPTION_IF_NULL
(
context_
);
FuncGraphManagerPtr
manager
=
context_
->
manager
(
);
FuncGraphManagerPtr
manager
=
MakeManager
({
graph
}
);
if
(
manager
)
{
manager
->
AddFuncGraph
(
graph
);
graph
->
set_manager
(
manager
);
...
...
mindspore/nn/layer/basic.py
浏览文件 @
e79df7c1
...
...
@@ -22,6 +22,7 @@ from mindspore.ops import operations as P
from
mindspore.ops
import
functional
as
F
from
mindspore.ops.functional
import
identity
from
mindspore.ops.operations
import
_inner_ops
as
inner
from
mindspore.ops.primitive
import
constexpr
from
mindspore.common.parameter
import
Parameter
from
mindspore._extends
import
cell_attr_register
from
mindspore.common.api
import
ms_function
...
...
@@ -236,6 +237,13 @@ class Dense(Cell):
return
str_info
@
constexpr
def
_is_equal_one
(
x
):
if
x
is
None
:
return
False
return
bool
(
x
.
asnumpy
().
mean
()
==
1.0
)
class
ClipByNorm
(
Cell
):
r
"""
Clips tensor values to a maximum :math:`L_2`-norm.
...
...
@@ -290,7 +298,10 @@ class ClipByNorm(Cell):
l2sum_safe
=
self
.
select_
(
cond
,
l2sum
,
self
.
cast
(
ones_
,
self
.
dtype
(
l2sum
)))
l2norm
=
self
.
select_
(
cond
,
self
.
sqrt
(
l2sum_safe
),
l2sum
)
intermediate
=
x
*
clip_norm
if
_is_equal_one
(
clip_norm
):
intermediate
=
x
else
:
intermediate
=
x
*
clip_norm
max_norm
=
self
.
max_op
(
l2norm
,
clip_norm
)
values_clip
=
self
.
cast
(
intermediate
,
mstype
.
float32
)
/
self
.
expand_dims
(
max_norm
,
-
1
)
values_clip
=
self
.
reshape
(
values_clip
,
self
.
shape
(
x
))
...
...
model_zoo/bert/src/bert_for_pre_training.py
浏览文件 @
e79df7c1
...
...
@@ -32,7 +32,6 @@ from .bert_model import BertModel
GRADIENT_CLIP_TYPE
=
1
GRADIENT_CLIP_VALUE
=
1.0
_nn_clip_by_norm
=
nn
.
ClipByNorm
()
clip_grad
=
C
.
MultitypeFuncGraph
(
"clip_grad"
)
...
...
@@ -57,7 +56,7 @@ def _clip_grad(clip_type, clip_value, grad):
new_grad
=
C
.
clip_by_value
(
grad
,
F
.
cast
(
F
.
tuple_to_array
((
-
clip_value
,)),
dt
),
F
.
cast
(
F
.
tuple_to_array
((
clip_value
,)),
dt
))
else
:
new_grad
=
_nn_clip_by_norm
(
grad
,
F
.
cast
(
F
.
tuple_to_array
((
clip_value
,)),
dt
))
new_grad
=
nn
.
ClipByNorm
()
(
grad
,
F
.
cast
(
F
.
tuple_to_array
((
clip_value
,)),
dt
))
return
new_grad
...
...
model_zoo/bert/src/config.py
浏览文件 @
e79df7c1
...
...
@@ -56,7 +56,7 @@ if cfg.bert_network == 'base':
bert_net_cfg
=
BertConfig
(
batch_size
=
32
,
seq_length
=
128
,
vocab_size
=
211
36
,
vocab_size
=
211
28
,
hidden_size
=
768
,
num_hidden_layers
=
12
,
num_attention_heads
=
12
,
...
...
@@ -77,7 +77,7 @@ if cfg.bert_network == 'nezha':
bert_net_cfg
=
BertConfig
(
batch_size
=
32
,
seq_length
=
128
,
vocab_size
=
211
36
,
vocab_size
=
211
28
,
hidden_size
=
1024
,
num_hidden_layers
=
24
,
num_attention_heads
=
16
,
...
...
@@ -98,7 +98,7 @@ if cfg.bert_network == 'large':
bert_net_cfg
=
BertConfig
(
batch_size
=
16
,
seq_length
=
512
,
vocab_size
=
3052
8
,
vocab_size
=
3052
2
,
hidden_size
=
1024
,
num_hidden_layers
=
24
,
num_attention_heads
=
16
,
...
...
tests/ut/python/nn/test_clip_by_norm.py
浏览文件 @
e79df7c1
...
...
@@ -26,3 +26,19 @@ def test_clip_by_norm():
x
=
Tensor
(
np
.
array
([[
-
2
,
0
,
0
],
[
0
,
3
,
4
]]).
astype
(
np
.
float32
))
clip_norm
=
Tensor
(
np
.
array
([
1
]).
astype
(
np
.
float32
))
clip_by_norm
(
x
,
clip_norm
)
@
non_graph_engine
def
test_clip_by_norm_const
():
class
Network
(
nn
.
Cell
):
def
__init__
(
self
):
super
(
Network
,
self
).
__init__
()
self
.
norm_value
=
Tensor
(
np
.
array
([
1
]).
astype
(
np
.
float32
))
self
.
clip
=
nn
.
ClipByNorm
()
def
construct
(
self
,
x
):
return
self
.
clip
(
x
,
self
.
norm_value
)
net
=
Network
()
x
=
Tensor
(
np
.
array
([[
-
2
,
0
,
0
],
[
0
,
3
,
4
]]).
astype
(
np
.
float32
))
output
=
net
(
x
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录