Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
69cbf585
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
69cbf585
编写于
6月 08, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
6月 08, 2020
浏览文件
操作
浏览文件
下载
差异文件
!1902 Fix bert scripts.
Merge pull request !1902 from chenhaozhe/fix-bert-scripts
上级
31ecc13b
1be7ad52
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
10 addition
and
7 deletion
+10
-7
model_zoo/bert/run_pretrain.py
model_zoo/bert/run_pretrain.py
+2
-0
model_zoo/bert/src/dataset.py
model_zoo/bert/src/dataset.py
+2
-1
tests/st/networks/models/bert/src/bert_for_pre_training.py
tests/st/networks/models/bert/src/bert_for_pre_training.py
+1
-2
tests/st/networks/models/bert/src/config.py
tests/st/networks/models/bert/src/config.py
+3
-3
tests/st/networks/models/bert/src/dataset.py
tests/st/networks/models/bert/src/dataset.py
+2
-1
未找到文件。
model_zoo/bert/run_pretrain.py
浏览文件 @
69cbf585
...
...
@@ -19,6 +19,7 @@ python run_pretrain.py
import
os
import
argparse
import
numpy
import
mindspore.communication.management
as
D
from
mindspore
import
context
from
mindspore.train.model
import
Model
...
...
@@ -142,4 +143,5 @@ def run_pretrain():
model
=
Model
(
netwithgrads
)
model
.
train
(
new_repeat_count
,
ds
,
callbacks
=
callback
,
dataset_sink_mode
=
(
args_opt
.
enable_data_sink
==
"true"
))
if
__name__
==
'__main__'
:
numpy
.
random
.
seed
(
0
)
run_pretrain
()
model_zoo/bert/src/dataset.py
浏览文件 @
69cbf585
...
...
@@ -39,6 +39,7 @@ def create_bert_dataset(epoch_size=1, device_num=1, rank=0, do_shuffle="true", e
shuffle
=
(
do_shuffle
==
"true"
),
num_shards
=
device_num
,
shard_id
=
rank
,
shard_equal_rows
=
True
)
ori_dataset_size
=
ds
.
get_dataset_size
()
print
(
'origin dataset size: '
,
ori_dataset_size
)
new_size
=
ori_dataset_size
if
enable_data_sink
==
"true"
:
new_size
=
data_sink_steps
*
bert_net_cfg
.
batch_size
...
...
@@ -53,7 +54,7 @@ def create_bert_dataset(epoch_size=1, device_num=1, rank=0, do_shuffle="true", e
ds
=
ds
.
map
(
input_columns
=
"input_ids"
,
operations
=
type_cast_op
)
# apply batch operations
ds
=
ds
.
batch
(
bert_net_cfg
.
batch_size
,
drop_remainder
=
True
)
ds
=
ds
.
repeat
(
new_repeat_count
)
ds
=
ds
.
repeat
(
max
(
new_repeat_count
,
repeat_count
)
)
logger
.
info
(
"data size: {}"
.
format
(
ds
.
get_dataset_size
()))
logger
.
info
(
"repeatcount: {}"
.
format
(
ds
.
get_repeat_count
()))
return
ds
,
new_repeat_count
tests/st/networks/models/bert/src/bert_for_pre_training.py
浏览文件 @
69cbf585
...
...
@@ -32,7 +32,6 @@ from .bert_model import BertModel
GRADIENT_CLIP_TYPE
=
1
GRADIENT_CLIP_VALUE
=
1.0
_nn_clip_by_norm
=
nn
.
ClipByNorm
()
clip_grad
=
C
.
MultitypeFuncGraph
(
"clip_grad"
)
...
...
@@ -57,7 +56,7 @@ def _clip_grad(clip_type, clip_value, grad):
new_grad
=
C
.
clip_by_value
(
grad
,
F
.
cast
(
F
.
tuple_to_array
((
-
clip_value
,)),
dt
),
F
.
cast
(
F
.
tuple_to_array
((
clip_value
,)),
dt
))
else
:
new_grad
=
_nn_clip_by_norm
(
grad
,
F
.
cast
(
F
.
tuple_to_array
((
clip_value
,)),
dt
))
new_grad
=
nn
.
ClipByNorm
()
(
grad
,
F
.
cast
(
F
.
tuple_to_array
((
clip_value
,)),
dt
))
return
new_grad
...
...
tests/st/networks/models/bert/src/config.py
浏览文件 @
69cbf585
...
...
@@ -56,7 +56,7 @@ if cfg.bert_network == 'base':
bert_net_cfg
=
BertConfig
(
batch_size
=
32
,
seq_length
=
128
,
vocab_size
=
211
36
,
vocab_size
=
211
28
,
hidden_size
=
768
,
num_hidden_layers
=
12
,
num_attention_heads
=
12
,
...
...
@@ -77,7 +77,7 @@ if cfg.bert_network == 'nezha':
bert_net_cfg
=
BertConfig
(
batch_size
=
32
,
seq_length
=
128
,
vocab_size
=
211
36
,
vocab_size
=
211
28
,
hidden_size
=
1024
,
num_hidden_layers
=
24
,
num_attention_heads
=
16
,
...
...
@@ -98,7 +98,7 @@ if cfg.bert_network == 'large':
bert_net_cfg
=
BertConfig
(
batch_size
=
16
,
seq_length
=
512
,
vocab_size
=
3052
8
,
vocab_size
=
3052
2
,
hidden_size
=
1024
,
num_hidden_layers
=
24
,
num_attention_heads
=
16
,
...
...
tests/st/networks/models/bert/src/dataset.py
浏览文件 @
69cbf585
...
...
@@ -39,6 +39,7 @@ def create_bert_dataset(epoch_size=1, device_num=1, rank=0, do_shuffle="true", e
shuffle
=
(
do_shuffle
==
"true"
),
num_shards
=
device_num
,
shard_id
=
rank
,
shard_equal_rows
=
True
)
ori_dataset_size
=
ds
.
get_dataset_size
()
print
(
'origin dataset size: '
,
ori_dataset_size
)
new_size
=
ori_dataset_size
if
enable_data_sink
==
"true"
:
new_size
=
data_sink_steps
*
bert_net_cfg
.
batch_size
...
...
@@ -53,7 +54,7 @@ def create_bert_dataset(epoch_size=1, device_num=1, rank=0, do_shuffle="true", e
ds
=
ds
.
map
(
input_columns
=
"input_ids"
,
operations
=
type_cast_op
)
# apply batch operations
ds
=
ds
.
batch
(
bert_net_cfg
.
batch_size
,
drop_remainder
=
True
)
ds
=
ds
.
repeat
(
new_repeat_count
)
ds
=
ds
.
repeat
(
max
(
new_repeat_count
,
repeat_count
)
)
logger
.
info
(
"data size: {}"
.
format
(
ds
.
get_dataset_size
()))
logger
.
info
(
"repeatcount: {}"
.
format
(
ds
.
get_repeat_count
()))
return
ds
,
new_repeat_count
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录