Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
55dd9d98
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
55dd9d98
编写于
6月 02, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
6月 02, 2020
浏览文件
操作
浏览文件
下载
差异文件
!1801 add bert performance test case
Merge pull request !1801 from wanghua/master
上级
ee625262
3f536ea1
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
104 addition
and
10 deletion
+104
-10
tests/st/networks/models/bert/bert_tdt_lossscale.py
tests/st/networks/models/bert/bert_tdt_lossscale.py
+104
-10
未找到文件。
tests/st/networks/models/bert/bert_tdt_lossscale.py
浏览文件 @
55dd9d98
...
...
@@ -16,6 +16,7 @@
"""train bert network without lossscale"""
import
os
import
time
import
pytest
import
numpy
as
np
...
...
@@ -85,14 +86,23 @@ def get_config(version='base', batch_size=1):
return
bert_config
def
me_de_train_dataset
():
def
me_de_train_dataset
(
sink_mode
=
False
):
"""test me de train dataset"""
# apply repeat operations
repeat_count
=
1
batch_size
=
16
ds
=
de
.
TFRecordDataset
(
DATA_DIR
,
SCHEMA_DIR
,
columns_list
=
[
"input_ids"
,
"input_mask"
,
"segment_ids"
,
"next_sentence_labels"
,
"masked_lm_positions"
,
"masked_lm_ids"
,
"masked_lm_weights"
],
shuffle
=
False
)
type_cast_op
=
C
.
TypeCast
(
mstype
.
int32
)
new_repeat_count
=
repeat_count
if
sink_mode
:
repeat_count
=
30
sink_steps
=
100
ori_dataaet_size
=
ds
.
get_dataset_size
()
new_size
=
sink_steps
*
batch_size
ds
.
set_dataset_size
(
new_size
)
new_repeat_count
=
int
(
repeat_count
*
ori_dataaet_size
//
ds
.
get_dataset_size
())
ds
=
ds
.
map
(
input_columns
=
"masked_lm_ids"
,
operations
=
type_cast_op
)
ds
=
ds
.
map
(
input_columns
=
"masked_lm_positions"
,
operations
=
type_cast_op
)
ds
=
ds
.
map
(
input_columns
=
"next_sentence_labels"
,
operations
=
type_cast_op
)
...
...
@@ -100,10 +110,11 @@ def me_de_train_dataset():
ds
=
ds
.
map
(
input_columns
=
"input_mask"
,
operations
=
type_cast_op
)
ds
=
ds
.
map
(
input_columns
=
"input_ids"
,
operations
=
type_cast_op
)
# apply batch operations
batch_size
=
int
(
os
.
getenv
(
'BATCH_SIZE'
,
'16'
))
ds
=
ds
.
batch
(
batch_size
,
drop_remainder
=
True
)
ds
=
ds
.
repeat
(
repeat_count
)
return
ds
logger
.
info
(
"data size: {}"
.
format
(
ds
.
get_dataset_size
()))
logger
.
info
(
"repeat_count: {}"
.
format
(
ds
.
get_repeat_count
()))
return
ds
,
new_repeat_count
def
weight_variable
(
shape
):
...
...
@@ -127,20 +138,34 @@ class ModelCallback(Callback):
self
.
lossscale_list
.
append
(
cb_params
.
net_outputs
[
2
].
asnumpy
())
print
(
"epoch: {}, outputs are: {}"
.
format
(
cb_params
.
cur_epoch_num
,
str
(
cb_params
.
net_outputs
)))
class
TimeMonitor
(
Callback
):
"""Time Monitor."""
def
__init__
(
self
,
data_size
):
super
(
TimeMonitor
,
self
).
__init__
()
self
.
data_size
=
data_size
self
.
epoch_mseconds_list
=
[]
self
.
per_step_mseconds_list
=
[]
def
epoch_begin
(
self
,
run_context
):
self
.
epoch_time
=
time
.
time
()
def
epoch_end
(
self
,
run_context
):
epoch_mseconds
=
(
time
.
time
()
-
self
.
epoch_time
)
*
1000
self
.
epoch_mseconds_list
.
append
(
epoch_mseconds
)
self
.
per_step_mseconds_list
.
append
(
epoch_mseconds
/
self
.
data_size
)
@
pytest
.
mark
.
level0
@
pytest
.
mark
.
platform_arm_ascend_training
@
pytest
.
mark
.
platform_x86_ascend_training
@
pytest
.
mark
.
env_onecard
def
test_bert_
tdt
():
"""test bert
tdt
"""
def
test_bert_
percision
():
"""test bert
percision
"""
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
"Ascend"
,
reserve_class_name_in_scope
=
False
)
ds
=
me_de_train_dataset
()
ds
,
new_repeat_count
=
me_de_train_dataset
()
version
=
os
.
getenv
(
'VERSION'
,
'large'
)
batch_size
=
int
(
os
.
getenv
(
'BATCH_SIZE'
,
'16'
))
batch_size
=
16
config
=
get_config
(
version
=
version
,
batch_size
=
batch_size
)
netwithloss
=
BertNetworkWithLoss
(
config
,
True
)
optimizer
=
Lamb
(
netwithloss
.
trainable_params
(),
decay_steps
=
ds
.
get_dataset_size
()
*
ds
.
get_repeat_count
()
,
optimizer
=
Lamb
(
netwithloss
.
trainable_params
(),
decay_steps
=
ds
.
get_dataset_size
()
*
new_repeat_count
,
start_learning_rate
=
5e-5
,
end_learning_rate
=
1e-9
,
power
=
10.0
,
warmup_steps
=
0
,
weight_decay
=
0.01
)
scale_window
=
3
...
...
@@ -169,10 +194,12 @@ def test_bert_tdt():
else
:
logger
.
info
(
"***************** BERT param name is 3 {}"
.
format
(
name
))
param
.
default_input
=
weight_variable
(
value
.
asnumpy
().
shape
)
model
.
train
(
ds
.
get_repeat_count
()
,
ds
,
callbacks
=
callback
,
dataset_sink_mode
=
False
)
model
.
train
(
new_repeat_count
,
ds
,
callbacks
=
callback
,
dataset_sink_mode
=
False
)
# assertion occurs while the loss value, overflow state or loss_scale value is wrong
loss_value
=
np
.
array
(
callback
.
loss_list
)
assert
np
.
allclose
(
loss_value
[
0
],
12.207198
,
0
,
0.000001
)
expect_loss_value
=
[
12.207198
,
11.980881
,
11.984844
,
11.879381
,
11.832978
,
12.411333
,
12.009284
,
12.621277
,
12.223178
,
12.427385
]
print
(
"loss value: {}"
.
format
(
loss_value
))
...
...
@@ -188,6 +215,73 @@ def test_bert_tdt():
print
(
"loss scale: {}"
.
format
(
loss_scale
))
assert
np
.
allclose
(
loss_scale
,
expect_loss_scale
,
0
,
0
)
def
test_bert_performance
():
"""test bert performance"""
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
"Ascend"
,
reserve_class_name_in_scope
=
False
)
ds
,
new_repeat_count
=
me_de_train_dataset
(
sink_mode
=
True
)
version
=
os
.
getenv
(
'VERSION'
,
'large'
)
batch_size
=
16
config
=
get_config
(
version
=
version
,
batch_size
=
batch_size
)
netwithloss
=
BertNetworkWithLoss
(
config
,
True
)
optimizer
=
Lamb
(
netwithloss
.
trainable_params
(),
decay_steps
=
ds
.
get_dataset_size
()
*
new_repeat_count
,
start_learning_rate
=
5e-5
,
end_learning_rate
=
1e-9
,
power
=
10.0
,
warmup_steps
=
0
,
weight_decay
=
0.01
)
scale_window
=
3
scale_manager
=
DynamicLossScaleManager
(
2
**
16
,
2
,
scale_window
)
netwithgrads
=
BertTrainOneStepWithLossScaleCell
(
netwithloss
,
optimizer
=
optimizer
,
scale_update_cell
=
scale_manager
.
get_update_cell
())
netwithgrads
.
set_train
(
True
)
model
=
Model
(
netwithgrads
)
callback
=
ModelCallback
()
params
=
netwithloss
.
trainable_params
()
for
param
in
params
:
param
.
init_data
()
value
=
param
.
default_input
name
=
param
.
name
if
isinstance
(
value
,
Tensor
):
if
name
.
split
(
'.'
)[
-
1
]
in
[
'weight'
]:
if
name
.
split
(
'.'
)[
-
3
]
in
[
'cls2'
]:
logger
.
info
(
"***************** BERT param name is 1 {}"
.
format
(
name
))
param
.
default_input
=
weight_variable
(
value
.
asnumpy
().
shape
)
else
:
logger
.
info
(
"***************** BERT param name is 2 {}"
.
format
(
name
))
tempshape
=
value
.
asnumpy
().
shape
shape
=
(
tempshape
[
1
],
tempshape
[
0
])
weight_value
=
weight_variable
(
shape
).
asnumpy
()
param
.
default_input
=
Tensor
(
np
.
transpose
(
weight_value
,
[
1
,
0
]))
else
:
logger
.
info
(
"***************** BERT param name is 3 {}"
.
format
(
name
))
param
.
default_input
=
weight_variable
(
value
.
asnumpy
().
shape
)
time_monitor_callback
=
TimeMonitor
(
ds
.
get_dataset_size
())
model
.
train
(
new_repeat_count
,
ds
,
callbacks
=
[
time_monitor_callback
,
callback
],
dataset_sink_mode
=
True
)
# assertion occurs while the loss value, overflow state or loss_scale value is wrong
loss_value
=
np
.
array
(
callback
.
loss_list
)
expect_loss_value
=
[
10.237753
,
10.213153
,
10.212972
]
print
(
"loss value: {}"
.
format
(
loss_value
))
assert
np
.
allclose
(
loss_value
,
expect_loss_value
,
0
,
0.0005
)
overflow
=
np
.
array
(
callback
.
overflow_list
)
expect_overflow
=
[
False
,
False
,
False
]
print
(
"overflow: {}"
.
format
(
overflow
))
assert
(
overflow
==
expect_overflow
).
all
()
loss_scale
=
np
.
array
(
callback
.
lossscale_list
)
expect_loss_scale
=
[
16384.0
,
16384.0
,
16384.0
]
print
(
"loss scale: {}"
.
format
(
loss_scale
))
assert
np
.
allclose
(
loss_scale
,
expect_loss_scale
,
0
,
0
)
epoch_mseconds
=
np
.
array
(
time_monitor_callback
.
epoch_mseconds_list
)[
2
]
expect_epoch_mseconds
=
1726
print
(
"epoch mseconds: {}"
.
format
(
epoch_mseconds
))
assert
epoch_mseconds
<=
expect_epoch_mseconds
+
5
per_step_mseconds
=
np
.
array
(
time_monitor_callback
.
per_step_mseconds_list
)[
2
]
expect_per_step_mseconds
=
17
print
(
"per step mseconds: {}"
.
format
(
per_step_mseconds
))
assert
per_step_mseconds
<=
expect_per_step_mseconds
+
1
if
__name__
==
'__main__'
:
test_bert_tdt
()
test_bert_percision
()
test_bert_performance
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录