Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
正统之独孤求败
mindspore
提交
eb46d5cc
M
mindspore
项目概览
正统之独孤求败
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
eb46d5cc
编写于
8月 20, 2020
作者:
Y
yoonlee666
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
split ci cases
上级
0b3ab6b7
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
257 addition
and
75 deletion
+257
-75
tests/st/networks/models/bert/bert_performance/test_bert_tdt_lossscale.py
...s/models/bert/bert_performance/test_bert_tdt_lossscale.py
+6
-75
tests/st/networks/models/bert/bert_precision/test_bert_tdt_lossscale.py
...rks/models/bert/bert_precision/test_bert_tdt_lossscale.py
+251
-0
未找到文件。
tests/st/networks/models/bert/test_bert_tdt_lossscale.py
→
tests/st/networks/models/bert/
bert_performance/
test_bert_tdt_lossscale.py
浏览文件 @
eb46d5cc
...
...
@@ -17,12 +17,8 @@
import
os
import
time
import
numpy
as
np
import
pytest
from
src.bert_for_pre_training
import
BertNetworkWithLoss
,
BertTrainOneStepWithLossScaleCell
from
src.bert_model
import
BertConfig
import
mindspore.common.dtype
as
mstype
import
mindspore.dataset.engine.datasets
as
de
import
mindspore.dataset.transforms.c_transforms
as
C
...
...
@@ -35,6 +31,10 @@ from mindspore.train.callback import Callback
from
mindspore.train.loss_scale_manager
import
DynamicLossScaleManager
from
mindspore.train.model
import
Model
import
mindspore.nn.learning_rate_schedule
as
lr_schedules
from
model_zoo.official.nlp.bert.src.bert_for_pre_training
import
BertNetworkWithLoss
from
model_zoo.official.nlp.bert.src.bert_for_pre_training
import
BertTrainOneStepWithLossScaleCell
from
model_zoo.official.nlp.bert.src.bert_model
import
BertConfig
_current_dir
=
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
))
DATA_DIR
=
[
"/home/workspace/mindspore_dataset/bert/example/examples.tfrecord"
]
...
...
@@ -177,74 +177,6 @@ class TimeMonitor(Callback):
self
.
epoch_mseconds_list
.
append
(
epoch_mseconds
)
self
.
per_step_mseconds_list
.
append
(
epoch_mseconds
/
self
.
data_size
)
@
pytest
.
mark
.
level0
@
pytest
.
mark
.
platform_arm_ascend_training
@
pytest
.
mark
.
platform_x86_ascend_training
@
pytest
.
mark
.
env_onecard
def
test_bert_percision
():
"""test bert percision"""
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
"Ascend"
,
reserve_class_name_in_scope
=
False
)
ds
,
new_repeat_count
,
_
=
me_de_train_dataset
()
version
=
os
.
getenv
(
'VERSION'
,
'large'
)
batch_size
=
16
config
=
get_config
(
version
=
version
,
batch_size
=
batch_size
)
netwithloss
=
BertNetworkWithLoss
(
config
,
True
)
lr
=
BertLearningRate
(
decay_steps
=
ds
.
get_dataset_size
()
*
new_repeat_count
,
learning_rate
=
5e-5
,
end_learning_rate
=
1e-9
,
power
=
10.0
,
warmup_steps
=
0
)
decay_filter
=
lambda
x
:
'layernorm'
not
in
x
.
name
.
lower
()
and
'bias'
not
in
x
.
name
.
lower
()
no_decay_filter
=
lambda
x
:
'layernorm'
in
x
.
name
.
lower
()
or
'bias'
in
x
.
name
.
lower
()
decay_params
=
list
(
filter
(
decay_filter
,
netwithloss
.
trainable_params
()))
other_params
=
list
(
filter
(
no_decay_filter
,
netwithloss
.
trainable_params
()))
group_params
=
[{
'params'
:
decay_params
,
'weight_decay'
:
0.01
},
{
'params'
:
other_params
},
{
'order_params'
:
netwithloss
.
trainable_params
()}]
optimizer
=
Lamb
(
group_params
,
lr
)
scale_window
=
3
scale_manager
=
DynamicLossScaleManager
(
2
**
16
,
2
,
scale_window
)
netwithgrads
=
BertTrainOneStepWithLossScaleCell
(
netwithloss
,
optimizer
=
optimizer
,
scale_update_cell
=
scale_manager
.
get_update_cell
())
netwithgrads
.
set_train
(
True
)
model
=
Model
(
netwithgrads
)
callback
=
ModelCallback
()
params
=
netwithloss
.
trainable_params
()
for
param
in
params
:
value
=
param
.
default_input
name
=
param
.
name
if
isinstance
(
value
,
Tensor
):
if
name
.
split
(
'.'
)[
-
1
]
in
[
'weight'
]:
if
name
.
split
(
'.'
)[
-
3
]
in
[
'cls2'
]:
logger
.
info
(
"***************** BERT param name is 1 {}"
.
format
(
name
))
param
.
default_input
=
weight_variable
(
value
.
asnumpy
().
shape
)
else
:
logger
.
info
(
"***************** BERT param name is 2 {}"
.
format
(
name
))
tempshape
=
value
.
asnumpy
().
shape
shape
=
(
tempshape
[
1
],
tempshape
[
0
])
weight_value
=
weight_variable
(
shape
).
asnumpy
()
param
.
default_input
=
Tensor
(
np
.
transpose
(
weight_value
,
[
1
,
0
]))
else
:
logger
.
info
(
"***************** BERT param name is 3 {}"
.
format
(
name
))
param
.
default_input
=
weight_variable
(
value
.
asnumpy
().
shape
)
model
.
train
(
new_repeat_count
,
ds
,
callbacks
=
callback
,
dataset_sink_mode
=
False
)
# assertion occurs while the loss value, overflow state or loss_scale value is wrong
loss_value
=
np
.
array
(
callback
.
loss_list
)
assert
np
.
allclose
(
loss_value
[
0
],
12.206575
,
0
,
0.000001
)
expect_loss_value
=
[
12.206575
,
11.865044
,
11.828129
,
11.826707
,
11.82108
,
12.407423
,
12.005459
,
12.621225
,
12.222903
,
12.427446
]
print
(
"loss value: {}"
.
format
(
loss_value
))
assert
np
.
allclose
(
loss_value
,
expect_loss_value
,
0
,
0.0005
)
overflow
=
np
.
array
(
callback
.
overflow_list
)
expect_overflow
=
[
False
,
False
,
False
,
True
,
False
,
False
,
False
,
True
,
False
,
False
]
print
(
"overflow: {}"
.
format
(
overflow
))
assert
(
overflow
==
expect_overflow
).
all
()
loss_scale
=
np
.
array
(
callback
.
lossscale_list
)
expect_loss_scale
=
[
65536.0
,
65536.0
,
131072.0
,
65536.0
,
65536.0
,
65536.0
,
131072.0
,
65536.0
,
65536.0
,
65536.0
]
print
(
"loss scale: {}"
.
format
(
loss_scale
))
assert
np
.
allclose
(
loss_scale
,
expect_loss_scale
,
0
,
0
)
@
pytest
.
mark
.
level0
@
pytest
.
mark
.
platform_arm_ascend_training
...
...
@@ -317,15 +249,14 @@ def test_bert_performance():
assert
np
.
allclose
(
loss_scale
,
expect_loss_scale
,
0
,
0
)
epoch_mseconds
=
np
.
array
(
time_monitor_callback
.
epoch_mseconds_list
)[
2
]
expect_epoch_mseconds
=
1
6
00
expect_epoch_mseconds
=
1
4
00
print
(
"epoch mseconds: {}"
.
format
(
epoch_mseconds
))
assert
epoch_mseconds
<=
expect_epoch_mseconds
+
5
per_step_mseconds
=
np
.
array
(
time_monitor_callback
.
per_step_mseconds_list
)[
2
]
expect_per_step_mseconds
=
1
6
expect_per_step_mseconds
=
1
4
print
(
"per step mseconds: {}"
.
format
(
per_step_mseconds
))
assert
per_step_mseconds
<=
expect_per_step_mseconds
+
1
if
__name__
==
'__main__'
:
test_bert_percision
()
test_bert_performance
()
tests/st/networks/models/bert/bert_precision/test_bert_tdt_lossscale.py
0 → 100644
浏览文件 @
eb46d5cc
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""train bert network without lossscale"""
import
os
import
time
import
numpy
as
np
import
pytest
import
mindspore.common.dtype
as
mstype
import
mindspore.dataset.engine.datasets
as
de
import
mindspore.dataset.transforms.c_transforms
as
C
from
mindspore
import
context
from
mindspore
import
log
as
logger
from
mindspore.ops
import
operations
as
P
from
mindspore.common.tensor
import
Tensor
from
mindspore.nn.optim
import
Lamb
from
mindspore.train.callback
import
Callback
from
mindspore.train.loss_scale_manager
import
DynamicLossScaleManager
from
mindspore.train.model
import
Model
import
mindspore.nn.learning_rate_schedule
as
lr_schedules
from
model_zoo.official.nlp.bert.src.bert_for_pre_training
import
BertNetworkWithLoss
from
model_zoo.official.nlp.bert.src.bert_for_pre_training
import
BertTrainOneStepWithLossScaleCell
from
model_zoo.official.nlp.bert.src.bert_model
import
BertConfig
_current_dir
=
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
))
DATA_DIR
=
[
"/home/workspace/mindspore_dataset/bert/example/examples.tfrecord"
]
SCHEMA_DIR
=
"/home/workspace/mindspore_dataset/bert/example/datasetSchema.json"
def
get_config
(
version
=
'base'
,
batch_size
=
1
):
"""get config"""
if
version
==
'base'
:
bert_config
=
BertConfig
(
batch_size
=
batch_size
,
seq_length
=
128
,
vocab_size
=
21136
,
hidden_size
=
768
,
num_hidden_layers
=
2
,
num_attention_heads
=
12
,
intermediate_size
=
3072
,
hidden_act
=
"gelu"
,
hidden_dropout_prob
=
0.1
,
attention_probs_dropout_prob
=
0.1
,
max_position_embeddings
=
512
,
type_vocab_size
=
2
,
initializer_range
=
0.02
,
use_relative_positions
=
True
,
input_mask_from_dataset
=
True
,
token_type_ids_from_dataset
=
True
,
dtype
=
mstype
.
float32
,
compute_type
=
mstype
.
float32
)
elif
version
==
'large'
:
bert_config
=
BertConfig
(
batch_size
=
batch_size
,
seq_length
=
128
,
vocab_size
=
21136
,
hidden_size
=
1024
,
num_hidden_layers
=
2
,
num_attention_heads
=
16
,
intermediate_size
=
4096
,
hidden_act
=
"gelu"
,
hidden_dropout_prob
=
0.0
,
attention_probs_dropout_prob
=
0.0
,
max_position_embeddings
=
512
,
type_vocab_size
=
2
,
initializer_range
=
0.02
,
use_relative_positions
=
False
,
input_mask_from_dataset
=
True
,
token_type_ids_from_dataset
=
True
,
dtype
=
mstype
.
float32
,
compute_type
=
mstype
.
float16
,
enable_fused_layernorm
=
False
)
else
:
bert_config
=
BertConfig
(
batch_size
=
batch_size
)
return
bert_config
def
me_de_train_dataset
(
sink_mode
=
False
):
"""test me de train dataset"""
# apply repeat operations
repeat_count
=
1
sink_size
=
-
1
batch_size
=
16
ds
=
de
.
TFRecordDataset
(
DATA_DIR
,
SCHEMA_DIR
,
columns_list
=
[
"input_ids"
,
"input_mask"
,
"segment_ids"
,
"next_sentence_labels"
,
"masked_lm_positions"
,
"masked_lm_ids"
,
"masked_lm_weights"
],
shuffle
=
False
)
type_cast_op
=
C
.
TypeCast
(
mstype
.
int32
)
new_repeat_count
=
repeat_count
if
sink_mode
:
sink_size
=
100
new_repeat_count
=
3
ds
=
ds
.
map
(
input_columns
=
"masked_lm_ids"
,
operations
=
type_cast_op
)
ds
=
ds
.
map
(
input_columns
=
"masked_lm_positions"
,
operations
=
type_cast_op
)
ds
=
ds
.
map
(
input_columns
=
"next_sentence_labels"
,
operations
=
type_cast_op
)
ds
=
ds
.
map
(
input_columns
=
"segment_ids"
,
operations
=
type_cast_op
)
ds
=
ds
.
map
(
input_columns
=
"input_mask"
,
operations
=
type_cast_op
)
ds
=
ds
.
map
(
input_columns
=
"input_ids"
,
operations
=
type_cast_op
)
# apply batch operations
ds
=
ds
.
batch
(
batch_size
,
drop_remainder
=
True
)
logger
.
info
(
"data size: {}"
.
format
(
ds
.
get_dataset_size
()))
logger
.
info
(
"repeat_count: {}"
.
format
(
ds
.
get_repeat_count
()))
return
ds
,
new_repeat_count
,
sink_size
def
weight_variable
(
shape
):
"""weight variable"""
np
.
random
.
seed
(
1
)
ones
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
size
=
shape
).
astype
(
np
.
float32
)
return
Tensor
(
ones
)
class
BertLearningRate
(
lr_schedules
.
LearningRateSchedule
):
def
__init__
(
self
,
learning_rate
,
end_learning_rate
,
warmup_steps
,
decay_steps
,
power
):
super
(
BertLearningRate
,
self
).
__init__
()
self
.
warmup_flag
=
False
if
warmup_steps
>
0
:
self
.
warmup_flag
=
True
self
.
warmup_lr
=
lr_schedules
.
WarmUpLR
(
learning_rate
,
warmup_steps
)
self
.
decay_lr
=
lr_schedules
.
PolynomialDecayLR
(
learning_rate
,
end_learning_rate
,
decay_steps
,
power
)
self
.
warmup_steps
=
Tensor
(
np
.
array
([
warmup_steps
]).
astype
(
np
.
float32
))
self
.
greater
=
P
.
Greater
()
self
.
one
=
Tensor
(
np
.
array
([
1.0
]).
astype
(
np
.
float32
))
self
.
cast
=
P
.
Cast
()
def
construct
(
self
,
global_step
):
decay_lr
=
self
.
decay_lr
(
global_step
)
if
self
.
warmup_flag
:
is_warmup
=
self
.
cast
(
self
.
greater
(
self
.
warmup_steps
,
global_step
),
mstype
.
float32
)
warmup_lr
=
self
.
warmup_lr
(
global_step
)
lr
=
(
self
.
one
-
is_warmup
)
*
decay_lr
+
is_warmup
*
warmup_lr
else
:
lr
=
decay_lr
return
lr
class
ModelCallback
(
Callback
):
def
__init__
(
self
):
super
(
ModelCallback
,
self
).
__init__
()
self
.
loss_list
=
[]
self
.
overflow_list
=
[]
self
.
lossscale_list
=
[]
def
step_end
(
self
,
run_context
):
cb_params
=
run_context
.
original_args
()
self
.
loss_list
.
append
(
cb_params
.
net_outputs
[
0
].
asnumpy
()[
0
])
self
.
overflow_list
.
append
(
cb_params
.
net_outputs
[
1
].
asnumpy
())
self
.
lossscale_list
.
append
(
cb_params
.
net_outputs
[
2
].
asnumpy
())
print
(
"epoch: {}, outputs are: {}"
.
format
(
cb_params
.
cur_epoch_num
,
str
(
cb_params
.
net_outputs
)))
class
TimeMonitor
(
Callback
):
"""Time Monitor."""
def
__init__
(
self
,
data_size
):
super
(
TimeMonitor
,
self
).
__init__
()
self
.
data_size
=
data_size
self
.
epoch_mseconds_list
=
[]
self
.
per_step_mseconds_list
=
[]
def
epoch_begin
(
self
,
run_context
):
self
.
epoch_time
=
time
.
time
()
def
epoch_end
(
self
,
run_context
):
epoch_mseconds
=
(
time
.
time
()
-
self
.
epoch_time
)
*
1000
self
.
epoch_mseconds_list
.
append
(
epoch_mseconds
)
self
.
per_step_mseconds_list
.
append
(
epoch_mseconds
/
self
.
data_size
)
@
pytest
.
mark
.
level0
@
pytest
.
mark
.
platform_arm_ascend_training
@
pytest
.
mark
.
platform_x86_ascend_training
@
pytest
.
mark
.
env_onecard
def
test_bert_percision
():
"""test bert percision"""
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
"Ascend"
,
reserve_class_name_in_scope
=
False
)
ds
,
new_repeat_count
,
_
=
me_de_train_dataset
()
version
=
os
.
getenv
(
'VERSION'
,
'large'
)
batch_size
=
16
config
=
get_config
(
version
=
version
,
batch_size
=
batch_size
)
netwithloss
=
BertNetworkWithLoss
(
config
,
True
)
lr
=
BertLearningRate
(
decay_steps
=
ds
.
get_dataset_size
()
*
new_repeat_count
,
learning_rate
=
5e-5
,
end_learning_rate
=
1e-9
,
power
=
10.0
,
warmup_steps
=
0
)
decay_filter
=
lambda
x
:
'layernorm'
not
in
x
.
name
.
lower
()
and
'bias'
not
in
x
.
name
.
lower
()
no_decay_filter
=
lambda
x
:
'layernorm'
in
x
.
name
.
lower
()
or
'bias'
in
x
.
name
.
lower
()
decay_params
=
list
(
filter
(
decay_filter
,
netwithloss
.
trainable_params
()))
other_params
=
list
(
filter
(
no_decay_filter
,
netwithloss
.
trainable_params
()))
group_params
=
[{
'params'
:
decay_params
,
'weight_decay'
:
0.01
},
{
'params'
:
other_params
},
{
'order_params'
:
netwithloss
.
trainable_params
()}]
optimizer
=
Lamb
(
group_params
,
lr
)
scale_window
=
3
scale_manager
=
DynamicLossScaleManager
(
2
**
16
,
2
,
scale_window
)
netwithgrads
=
BertTrainOneStepWithLossScaleCell
(
netwithloss
,
optimizer
=
optimizer
,
scale_update_cell
=
scale_manager
.
get_update_cell
())
netwithgrads
.
set_train
(
True
)
model
=
Model
(
netwithgrads
)
callback
=
ModelCallback
()
params
=
netwithloss
.
trainable_params
()
for
param
in
params
:
value
=
param
.
default_input
name
=
param
.
name
if
isinstance
(
value
,
Tensor
):
if
name
.
split
(
'.'
)[
-
1
]
in
[
'weight'
]:
if
name
.
split
(
'.'
)[
-
3
]
in
[
'cls2'
]:
logger
.
info
(
"***************** BERT param name is 1 {}"
.
format
(
name
))
param
.
default_input
=
weight_variable
(
value
.
asnumpy
().
shape
)
else
:
logger
.
info
(
"***************** BERT param name is 2 {}"
.
format
(
name
))
tempshape
=
value
.
asnumpy
().
shape
shape
=
(
tempshape
[
1
],
tempshape
[
0
])
weight_value
=
weight_variable
(
shape
).
asnumpy
()
param
.
default_input
=
Tensor
(
np
.
transpose
(
weight_value
,
[
1
,
0
]))
else
:
logger
.
info
(
"***************** BERT param name is 3 {}"
.
format
(
name
))
param
.
default_input
=
weight_variable
(
value
.
asnumpy
().
shape
)
model
.
train
(
new_repeat_count
,
ds
,
callbacks
=
callback
,
dataset_sink_mode
=
False
)
# assertion occurs while the loss value, overflow state or loss_scale value is wrong
loss_value
=
np
.
array
(
callback
.
loss_list
)
assert
np
.
allclose
(
loss_value
[
0
],
12.2065868
,
0
,
0.000001
)
expect_loss_value
=
[
12.2065868
,
11.8651543
,
11.8282356
,
11.8266964
,
11.8210478
,
12.4073524
,
12.0055466
,
12.6212320
,
12.2229223
,
12.4272099
]
print
(
"loss value: {}"
.
format
(
loss_value
))
assert
np
.
allclose
(
loss_value
,
expect_loss_value
,
0
,
0.0005
)
overflow
=
np
.
array
(
callback
.
overflow_list
)
expect_overflow
=
[
False
,
False
,
False
,
True
,
False
,
False
,
False
,
True
,
False
,
False
]
print
(
"overflow: {}"
.
format
(
overflow
))
assert
(
overflow
==
expect_overflow
).
all
()
loss_scale
=
np
.
array
(
callback
.
lossscale_list
)
expect_loss_scale
=
[
65536.0
,
65536.0
,
131072.0
,
65536.0
,
65536.0
,
65536.0
,
131072.0
,
65536.0
,
65536.0
,
65536.0
]
print
(
"loss scale: {}"
.
format
(
loss_scale
))
assert
np
.
allclose
(
loss_scale
,
expect_loss_scale
,
0
,
0
)
if
__name__
==
'__main__'
:
test_bert_percision
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录