Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
af492312
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
af492312
编写于
7月 22, 2020
作者:
S
shibeiji
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
script update for bert
上级
ca6da675
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
18 addition
and
14 deletion
+18
-14
model_zoo/official/nlp/bert/run_pretrain.py
model_zoo/official/nlp/bert/run_pretrain.py
+2
-4
model_zoo/official/nlp/bert/src/bert_for_finetune.py
model_zoo/official/nlp/bert/src/bert_for_finetune.py
+3
-3
model_zoo/official/nlp/bert/src/bert_for_pre_training.py
model_zoo/official/nlp/bert/src/bert_for_pre_training.py
+3
-3
model_zoo/official/nlp/bert/src/utils.py
model_zoo/official/nlp/bert/src/utils.py
+10
-4
未找到文件。
model_zoo/official/nlp/bert/run_pretrain.py
浏览文件 @
af492312
...
@@ -117,8 +117,7 @@ def run_pretrain():
...
@@ -117,8 +117,7 @@ def run_pretrain():
decay_params
=
list
(
filter
(
cfg
.
Lamb
.
decay_filter
,
params
))
decay_params
=
list
(
filter
(
cfg
.
Lamb
.
decay_filter
,
params
))
other_params
=
list
(
filter
(
lambda
x
:
x
not
in
decay_params
,
params
))
other_params
=
list
(
filter
(
lambda
x
:
x
not
in
decay_params
,
params
))
group_params
=
[{
'params'
:
decay_params
,
'weight_decay'
:
cfg
.
Lamb
.
weight_decay
},
group_params
=
[{
'params'
:
decay_params
,
'weight_decay'
:
cfg
.
Lamb
.
weight_decay
},
{
'params'
:
other_params
},
{
'params'
:
other_params
}]
{
'order_params'
:
params
}]
optimizer
=
Lamb
(
group_params
,
learning_rate
=
lr_schedule
,
eps
=
cfg
.
Lamb
.
eps
)
optimizer
=
Lamb
(
group_params
,
learning_rate
=
lr_schedule
,
eps
=
cfg
.
Lamb
.
eps
)
elif
cfg
.
optimizer
==
'Momentum'
:
elif
cfg
.
optimizer
==
'Momentum'
:
optimizer
=
Momentum
(
net_with_loss
.
trainable_params
(),
learning_rate
=
cfg
.
Momentum
.
learning_rate
,
optimizer
=
Momentum
(
net_with_loss
.
trainable_params
(),
learning_rate
=
cfg
.
Momentum
.
learning_rate
,
...
@@ -133,8 +132,7 @@ def run_pretrain():
...
@@ -133,8 +132,7 @@ def run_pretrain():
decay_params
=
list
(
filter
(
cfg
.
AdamWeightDecay
.
decay_filter
,
params
))
decay_params
=
list
(
filter
(
cfg
.
AdamWeightDecay
.
decay_filter
,
params
))
other_params
=
list
(
filter
(
lambda
x
:
x
not
in
decay_params
,
params
))
other_params
=
list
(
filter
(
lambda
x
:
x
not
in
decay_params
,
params
))
group_params
=
[{
'params'
:
decay_params
,
'weight_decay'
:
cfg
.
AdamWeightDecay
.
weight_decay
},
group_params
=
[{
'params'
:
decay_params
,
'weight_decay'
:
cfg
.
AdamWeightDecay
.
weight_decay
},
{
'params'
:
other_params
,
'weight_decay'
:
0.0
},
{
'params'
:
other_params
,
'weight_decay'
:
0.0
}]
{
'order_params'
:
params
}]
optimizer
=
AdamWeightDecay
(
group_params
,
learning_rate
=
lr_schedule
,
eps
=
cfg
.
AdamWeightDecay
.
eps
)
optimizer
=
AdamWeightDecay
(
group_params
,
learning_rate
=
lr_schedule
,
eps
=
cfg
.
AdamWeightDecay
.
eps
)
else
:
else
:
...
...
model_zoo/official/nlp/bert/src/bert_for_finetune.py
浏览文件 @
af492312
...
@@ -22,7 +22,7 @@ from mindspore.ops import operations as P
...
@@ -22,7 +22,7 @@ from mindspore.ops import operations as P
from
mindspore.ops
import
functional
as
F
from
mindspore.ops
import
functional
as
F
from
mindspore.ops
import
composite
as
C
from
mindspore.ops
import
composite
as
C
from
mindspore.common.tensor
import
Tensor
from
mindspore.common.tensor
import
Tensor
from
mindspore.common.parameter
import
Parameter
,
ParameterTuple
from
mindspore.common.parameter
import
Parameter
from
mindspore.common
import
dtype
as
mstype
from
mindspore.common
import
dtype
as
mstype
from
mindspore.nn.wrap.grad_reducer
import
DistributedGradReducer
from
mindspore.nn.wrap.grad_reducer
import
DistributedGradReducer
from
mindspore.train.parallel_utils
import
ParallelMode
from
mindspore.train.parallel_utils
import
ParallelMode
...
@@ -55,7 +55,7 @@ class BertFinetuneCell(nn.Cell):
...
@@ -55,7 +55,7 @@ class BertFinetuneCell(nn.Cell):
super
(
BertFinetuneCell
,
self
).
__init__
(
auto_prefix
=
False
)
super
(
BertFinetuneCell
,
self
).
__init__
(
auto_prefix
=
False
)
self
.
network
=
network
self
.
network
=
network
self
.
weights
=
ParameterTuple
(
network
.
trainable_params
())
self
.
weights
=
optimizer
.
parameters
self
.
optimizer
=
optimizer
self
.
optimizer
=
optimizer
self
.
grad
=
C
.
GradOperation
(
'grad'
,
self
.
grad
=
C
.
GradOperation
(
'grad'
,
get_by_list
=
True
,
get_by_list
=
True
,
...
@@ -158,7 +158,7 @@ class BertSquadCell(nn.Cell):
...
@@ -158,7 +158,7 @@ class BertSquadCell(nn.Cell):
def
__init__
(
self
,
network
,
optimizer
,
scale_update_cell
=
None
):
def
__init__
(
self
,
network
,
optimizer
,
scale_update_cell
=
None
):
super
(
BertSquadCell
,
self
).
__init__
(
auto_prefix
=
False
)
super
(
BertSquadCell
,
self
).
__init__
(
auto_prefix
=
False
)
self
.
network
=
network
self
.
network
=
network
self
.
weights
=
ParameterTuple
(
network
.
trainable_params
())
self
.
weights
=
optimizer
.
parameters
self
.
optimizer
=
optimizer
self
.
optimizer
=
optimizer
self
.
grad
=
C
.
GradOperation
(
'grad'
,
get_by_list
=
True
,
sens_param
=
True
)
self
.
grad
=
C
.
GradOperation
(
'grad'
,
get_by_list
=
True
,
sens_param
=
True
)
self
.
reducer_flag
=
False
self
.
reducer_flag
=
False
...
...
model_zoo/official/nlp/bert/src/bert_for_pre_training.py
浏览文件 @
af492312
...
@@ -21,7 +21,7 @@ from mindspore.ops import operations as P
...
@@ -21,7 +21,7 @@ from mindspore.ops import operations as P
from
mindspore.ops
import
functional
as
F
from
mindspore.ops
import
functional
as
F
from
mindspore.ops
import
composite
as
C
from
mindspore.ops
import
composite
as
C
from
mindspore.common.tensor
import
Tensor
from
mindspore.common.tensor
import
Tensor
from
mindspore.common.parameter
import
Parameter
,
ParameterTuple
from
mindspore.common.parameter
import
Parameter
from
mindspore.common
import
dtype
as
mstype
from
mindspore.common
import
dtype
as
mstype
from
mindspore.nn.wrap.grad_reducer
import
DistributedGradReducer
from
mindspore.nn.wrap.grad_reducer
import
DistributedGradReducer
from
mindspore.train.parallel_utils
import
ParallelMode
from
mindspore.train.parallel_utils
import
ParallelMode
...
@@ -270,7 +270,7 @@ class BertTrainOneStepCell(nn.Cell):
...
@@ -270,7 +270,7 @@ class BertTrainOneStepCell(nn.Cell):
def
__init__
(
self
,
network
,
optimizer
,
sens
=
1.0
):
def
__init__
(
self
,
network
,
optimizer
,
sens
=
1.0
):
super
(
BertTrainOneStepCell
,
self
).
__init__
(
auto_prefix
=
False
)
super
(
BertTrainOneStepCell
,
self
).
__init__
(
auto_prefix
=
False
)
self
.
network
=
network
self
.
network
=
network
self
.
weights
=
ParameterTuple
(
network
.
trainable_params
())
self
.
weights
=
optimizer
.
parameters
self
.
optimizer
=
optimizer
self
.
optimizer
=
optimizer
self
.
grad
=
C
.
GradOperation
(
'grad'
,
get_by_list
=
True
,
sens_param
=
True
)
self
.
grad
=
C
.
GradOperation
(
'grad'
,
get_by_list
=
True
,
sens_param
=
True
)
self
.
sens
=
sens
self
.
sens
=
sens
...
@@ -349,7 +349,7 @@ class BertTrainOneStepWithLossScaleCell(nn.Cell):
...
@@ -349,7 +349,7 @@ class BertTrainOneStepWithLossScaleCell(nn.Cell):
def
__init__
(
self
,
network
,
optimizer
,
scale_update_cell
=
None
):
def
__init__
(
self
,
network
,
optimizer
,
scale_update_cell
=
None
):
super
(
BertTrainOneStepWithLossScaleCell
,
self
).
__init__
(
auto_prefix
=
False
)
super
(
BertTrainOneStepWithLossScaleCell
,
self
).
__init__
(
auto_prefix
=
False
)
self
.
network
=
network
self
.
network
=
network
self
.
weights
=
ParameterTuple
(
network
.
trainable_params
())
self
.
weights
=
optimizer
.
parameters
self
.
optimizer
=
optimizer
self
.
optimizer
=
optimizer
self
.
grad
=
C
.
GradOperation
(
'grad'
,
self
.
grad
=
C
.
GradOperation
(
'grad'
,
get_by_list
=
True
,
get_by_list
=
True
,
...
...
model_zoo/official/nlp/bert/src/utils.py
浏览文件 @
af492312
...
@@ -133,7 +133,10 @@ class BertLearningRate(LearningRateSchedule):
...
@@ -133,7 +133,10 @@ class BertLearningRate(LearningRateSchedule):
"""
"""
def
__init__
(
self
,
learning_rate
,
end_learning_rate
,
warmup_steps
,
decay_steps
,
power
):
def
__init__
(
self
,
learning_rate
,
end_learning_rate
,
warmup_steps
,
decay_steps
,
power
):
super
(
BertLearningRate
,
self
).
__init__
()
super
(
BertLearningRate
,
self
).
__init__
()
self
.
warmup_lr
=
WarmUpLR
(
learning_rate
,
warmup_steps
)
self
.
warmup_flag
=
False
if
warmup_steps
>
0
:
self
.
warmup_flag
=
True
self
.
warmup_lr
=
WarmUpLR
(
learning_rate
,
warmup_steps
)
self
.
decay_lr
=
PolynomialDecayLR
(
learning_rate
,
end_learning_rate
,
decay_steps
,
power
)
self
.
decay_lr
=
PolynomialDecayLR
(
learning_rate
,
end_learning_rate
,
decay_steps
,
power
)
self
.
warmup_steps
=
Tensor
(
np
.
array
([
warmup_steps
]).
astype
(
np
.
float32
))
self
.
warmup_steps
=
Tensor
(
np
.
array
([
warmup_steps
]).
astype
(
np
.
float32
))
...
@@ -142,8 +145,11 @@ class BertLearningRate(LearningRateSchedule):
...
@@ -142,8 +145,11 @@ class BertLearningRate(LearningRateSchedule):
self
.
cast
=
P
.
Cast
()
self
.
cast
=
P
.
Cast
()
def
construct
(
self
,
global_step
):
def
construct
(
self
,
global_step
):
is_warmup
=
self
.
cast
(
self
.
greater
(
self
.
warmup_steps
,
global_step
),
mstype
.
float32
)
warmup_lr
=
self
.
warmup_lr
(
global_step
)
decay_lr
=
self
.
decay_lr
(
global_step
)
decay_lr
=
self
.
decay_lr
(
global_step
)
lr
=
(
self
.
one
-
is_warmup
)
*
decay_lr
+
is_warmup
*
warmup_lr
if
self
.
warmup_flag
:
is_warmup
=
self
.
cast
(
self
.
greater
(
self
.
warmup_steps
,
global_step
),
mstype
.
float32
)
warmup_lr
=
self
.
warmup_lr
(
global_step
)
lr
=
(
self
.
one
-
is_warmup
)
*
decay_lr
+
is_warmup
*
warmup_lr
else
:
lr
=
decay_lr
return
lr
return
lr
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录