Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
4b8d4391
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
4b8d4391
编写于
8月 07, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
8月 07, 2020
浏览文件
操作
浏览文件
下载
差异文件
!4092 [bug]fix bert script parameter use `in` and quant export fix matmul atc convert error
Merge pull request !4092 from vlne-v1/quant-matmul-fix
上级
7280d317
776d094c
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
20 addition
and
13 deletion
+20
-13
mindspore/common/parameter.py
mindspore/common/parameter.py
+1
-0
mindspore/train/quant/quant.py
mindspore/train/quant/quant.py
+9
-3
model_zoo/official/nlp/bert/run_classifier.py
model_zoo/official/nlp/bert/run_classifier.py
+1
-1
model_zoo/official/nlp/bert/run_ner.py
model_zoo/official/nlp/bert/run_ner.py
+1
-1
model_zoo/official/nlp/bert/run_pretrain.py
model_zoo/official/nlp/bert/run_pretrain.py
+2
-2
model_zoo/official/nlp/bert/run_squad.py
model_zoo/official/nlp/bert/run_squad.py
+1
-1
model_zoo/official/nlp/bert_thor/run_pretrain.py
model_zoo/official/nlp/bert_thor/run_pretrain.py
+2
-2
model_zoo/official/nlp/tinybert/run_general_distill.py
model_zoo/official/nlp/tinybert/run_general_distill.py
+1
-1
model_zoo/official/nlp/tinybert/run_task_distill.py
model_zoo/official/nlp/tinybert/run_task_distill.py
+2
-2
未找到文件。
mindspore/common/parameter.py
浏览文件 @
4b8d4391
...
@@ -270,6 +270,7 @@ class Parameter(MetaTensor):
...
@@ -270,6 +270,7 @@ class Parameter(MetaTensor):
"Update the parameter by a Tensor."
"Update the parameter by a Tensor."
if
isinstance
(
self
,
Tensor
):
if
isinstance
(
self
,
Tensor
):
# for Tensor same shape:
# for Tensor same shape:
self
.
init_flag
=
False
return
self
.
assign_value
(
data
)
return
self
.
assign_value
(
data
)
# create a new tensor
# create a new tensor
return
Parameter
(
data
,
self
.
name
,
self
.
requires_grad
)
return
Parameter
(
data
,
self
.
name
,
self
.
requires_grad
)
...
...
mindspore/train/quant/quant.py
浏览文件 @
4b8d4391
...
@@ -29,6 +29,7 @@ from ...common import dtype as mstype
...
@@ -29,6 +29,7 @@ from ...common import dtype as mstype
from
...common.api
import
_executor
from
...common.api
import
_executor
from
...nn.layer
import
quant
from
...nn.layer
import
quant
from
...ops
import
functional
as
F
from
...ops
import
functional
as
F
from
...ops
import
operations
as
P
from
...ops.operations
import
_inner_ops
as
inner
from
...ops.operations
import
_inner_ops
as
inner
from
...train
import
serialization
from
...train
import
serialization
from
.
import
quant_utils
from
.
import
quant_utils
...
@@ -366,8 +367,6 @@ class ExportToQuantInferNetwork:
...
@@ -366,8 +367,6 @@ class ExportToQuantInferNetwork:
sqrt_mode
=
True
sqrt_mode
=
True
dequant_op
=
inner
.
Dequant
(
sqrt_mode
)
dequant_op
=
inner
.
Dequant
(
sqrt_mode
)
# get op
op_core
=
cell_core
.
matmul
if
isinstance
(
cell_core
,
quant
.
DenseQuant
)
else
cell_core
.
conv
if
isinstance
(
activation
,
_AddFakeQuantAfterSubCell
):
if
isinstance
(
activation
,
_AddFakeQuantAfterSubCell
):
activation
=
activation
.
subcell
activation
=
activation
.
subcell
elif
hasattr
(
activation
,
"get_origin"
):
elif
hasattr
(
activation
,
"get_origin"
):
...
@@ -383,10 +382,17 @@ class ExportToQuantInferNetwork:
...
@@ -383,10 +382,17 @@ class ExportToQuantInferNetwork:
weight
,
bias
=
quant_utils
.
fold_batchnorm
(
weight
,
cell_core
)
weight
,
bias
=
quant_utils
.
fold_batchnorm
(
weight
,
cell_core
)
# apply the quant
# apply the quant
weight
=
Tensor
(
quant_utils
.
weight2int
(
weight
,
scale_w
,
zp_w
),
self
.
data_type
)
weight
=
quant_utils
.
weight2int
(
weight
,
scale_w
,
zp_w
)
if
bias
is
not
None
:
if
bias
is
not
None
:
bias
=
Tensor
(
scale_a_in
*
scale_w
*
bias
,
mstype
.
int32
)
bias
=
Tensor
(
scale_a_in
*
scale_w
*
bias
,
mstype
.
int32
)
scale_deq
=
Tensor
(
scale_deq
,
mstype
.
float16
)
scale_deq
=
Tensor
(
scale_deq
,
mstype
.
float16
)
# get op
if
isinstance
(
cell_core
,
quant
.
DenseQuant
):
op_core
=
P
.
MatMul
()
weight
=
np
.
transpose
(
weight
)
else
:
op_core
=
cell_core
.
conv
weight
=
Tensor
(
weight
,
self
.
data_type
)
block
=
quant
.
QuantBlock
(
op_core
,
weight
,
quant_op
,
dequant_op
,
scale_deq
,
bias
,
activation
)
block
=
quant
.
QuantBlock
(
op_core
,
weight
,
quant_op
,
dequant_op
,
scale_deq
,
bias
,
activation
)
return
block
return
block
...
...
model_zoo/official/nlp/bert/run_classifier.py
浏览文件 @
4b8d4391
...
@@ -50,7 +50,7 @@ def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoin
...
@@ -50,7 +50,7 @@ def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoin
power
=
optimizer_cfg
.
AdamWeightDecay
.
power
)
power
=
optimizer_cfg
.
AdamWeightDecay
.
power
)
params
=
net_with_loss
.
trainable_params
()
params
=
net_with_loss
.
trainable_params
()
decay_params
=
list
(
filter
(
optimizer_cfg
.
AdamWeightDecay
.
decay_filter
,
params
))
decay_params
=
list
(
filter
(
optimizer_cfg
.
AdamWeightDecay
.
decay_filter
,
params
))
other_params
=
list
(
filter
(
lambda
x
:
x
not
in
decay_params
,
params
))
other_params
=
list
(
filter
(
lambda
x
:
not
cfg
.
AdamWeightDecay
.
decay_filter
(
x
)
,
params
))
group_params
=
[{
'params'
:
decay_params
,
'weight_decay'
:
optimizer_cfg
.
AdamWeightDecay
.
weight_decay
},
group_params
=
[{
'params'
:
decay_params
,
'weight_decay'
:
optimizer_cfg
.
AdamWeightDecay
.
weight_decay
},
{
'params'
:
other_params
,
'weight_decay'
:
0.0
}]
{
'params'
:
other_params
,
'weight_decay'
:
0.0
}]
...
...
model_zoo/official/nlp/bert/run_ner.py
浏览文件 @
4b8d4391
...
@@ -52,7 +52,7 @@ def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoin
...
@@ -52,7 +52,7 @@ def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoin
power
=
optimizer_cfg
.
AdamWeightDecay
.
power
)
power
=
optimizer_cfg
.
AdamWeightDecay
.
power
)
params
=
network
.
trainable_params
()
params
=
network
.
trainable_params
()
decay_params
=
list
(
filter
(
optimizer_cfg
.
AdamWeightDecay
.
decay_filter
,
params
))
decay_params
=
list
(
filter
(
optimizer_cfg
.
AdamWeightDecay
.
decay_filter
,
params
))
other_params
=
list
(
filter
(
lambda
x
:
x
not
in
decay_params
,
params
))
other_params
=
list
(
filter
(
lambda
x
:
not
cfg
.
AdamWeightDecay
.
decay_filter
(
x
)
,
params
))
group_params
=
[{
'params'
:
decay_params
,
'weight_decay'
:
optimizer_cfg
.
AdamWeightDecay
.
weight_decay
},
group_params
=
[{
'params'
:
decay_params
,
'weight_decay'
:
optimizer_cfg
.
AdamWeightDecay
.
weight_decay
},
{
'params'
:
other_params
,
'weight_decay'
:
0.0
}]
{
'params'
:
other_params
,
'weight_decay'
:
0.0
}]
optimizer
=
AdamWeightDecay
(
group_params
,
lr_schedule
,
eps
=
optimizer_cfg
.
AdamWeightDecay
.
eps
)
optimizer
=
AdamWeightDecay
(
group_params
,
lr_schedule
,
eps
=
optimizer_cfg
.
AdamWeightDecay
.
eps
)
...
...
model_zoo/official/nlp/bert/run_pretrain.py
浏览文件 @
4b8d4391
...
@@ -116,7 +116,7 @@ def run_pretrain():
...
@@ -116,7 +116,7 @@ def run_pretrain():
power
=
cfg
.
Lamb
.
power
)
power
=
cfg
.
Lamb
.
power
)
params
=
net_with_loss
.
trainable_params
()
params
=
net_with_loss
.
trainable_params
()
decay_params
=
list
(
filter
(
cfg
.
Lamb
.
decay_filter
,
params
))
decay_params
=
list
(
filter
(
cfg
.
Lamb
.
decay_filter
,
params
))
other_params
=
list
(
filter
(
lambda
x
:
x
not
in
decay_params
,
params
))
other_params
=
list
(
filter
(
lambda
x
:
not
cfg
.
Lamb
.
decay_filter
(
x
)
,
params
))
group_params
=
[{
'params'
:
decay_params
,
'weight_decay'
:
cfg
.
Lamb
.
weight_decay
},
group_params
=
[{
'params'
:
decay_params
,
'weight_decay'
:
cfg
.
Lamb
.
weight_decay
},
{
'params'
:
other_params
},
{
'params'
:
other_params
},
{
'order_params'
:
params
}]
{
'order_params'
:
params
}]
...
@@ -132,7 +132,7 @@ def run_pretrain():
...
@@ -132,7 +132,7 @@ def run_pretrain():
power
=
cfg
.
AdamWeightDecay
.
power
)
power
=
cfg
.
AdamWeightDecay
.
power
)
params
=
net_with_loss
.
trainable_params
()
params
=
net_with_loss
.
trainable_params
()
decay_params
=
list
(
filter
(
cfg
.
AdamWeightDecay
.
decay_filter
,
params
))
decay_params
=
list
(
filter
(
cfg
.
AdamWeightDecay
.
decay_filter
,
params
))
other_params
=
list
(
filter
(
lambda
x
:
x
not
in
decay_params
,
params
))
other_params
=
list
(
filter
(
lambda
x
:
not
cfg
.
AdamWeightDecay
.
decay_filter
(
x
)
,
params
))
group_params
=
[{
'params'
:
decay_params
,
'weight_decay'
:
cfg
.
AdamWeightDecay
.
weight_decay
},
group_params
=
[{
'params'
:
decay_params
,
'weight_decay'
:
cfg
.
AdamWeightDecay
.
weight_decay
},
{
'params'
:
other_params
,
'weight_decay'
:
0.0
},
{
'params'
:
other_params
,
'weight_decay'
:
0.0
},
{
'order_params'
:
params
}]
{
'order_params'
:
params
}]
...
...
model_zoo/official/nlp/bert/run_squad.py
浏览文件 @
4b8d4391
...
@@ -52,7 +52,7 @@ def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoin
...
@@ -52,7 +52,7 @@ def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoin
power
=
optimizer_cfg
.
AdamWeightDecay
.
power
)
power
=
optimizer_cfg
.
AdamWeightDecay
.
power
)
params
=
network
.
trainable_params
()
params
=
network
.
trainable_params
()
decay_params
=
list
(
filter
(
optimizer_cfg
.
AdamWeightDecay
.
decay_filter
,
params
))
decay_params
=
list
(
filter
(
optimizer_cfg
.
AdamWeightDecay
.
decay_filter
,
params
))
other_params
=
list
(
filter
(
lambda
x
:
x
not
in
decay_params
,
params
))
other_params
=
list
(
filter
(
lambda
x
:
not
cfg
.
AdamWeightDecay
.
decay_filter
(
x
)
,
params
))
group_params
=
[{
'params'
:
decay_params
,
'weight_decay'
:
optimizer_cfg
.
AdamWeightDecay
.
weight_decay
},
group_params
=
[{
'params'
:
decay_params
,
'weight_decay'
:
optimizer_cfg
.
AdamWeightDecay
.
weight_decay
},
{
'params'
:
other_params
,
'weight_decay'
:
0.0
}]
{
'params'
:
other_params
,
'weight_decay'
:
0.0
}]
...
...
model_zoo/official/nlp/bert_thor/run_pretrain.py
浏览文件 @
4b8d4391
...
@@ -137,7 +137,7 @@ def run_pretrain():
...
@@ -137,7 +137,7 @@ def run_pretrain():
power
=
cfg
.
Lamb
.
power
)
power
=
cfg
.
Lamb
.
power
)
params
=
net_with_loss
.
trainable_params
()
params
=
net_with_loss
.
trainable_params
()
decay_params
=
list
(
filter
(
cfg
.
Lamb
.
decay_filter
,
params
))
decay_params
=
list
(
filter
(
cfg
.
Lamb
.
decay_filter
,
params
))
other_params
=
list
(
filter
(
lambda
x
:
x
not
in
decay_params
,
params
))
other_params
=
list
(
filter
(
lambda
x
:
not
cfg
.
Lamb
.
decay_filter
(
x
)
,
params
))
group_params
=
[{
'params'
:
decay_params
,
'weight_decay'
:
cfg
.
Lamb
.
weight_decay
},
group_params
=
[{
'params'
:
decay_params
,
'weight_decay'
:
cfg
.
Lamb
.
weight_decay
},
{
'params'
:
other_params
},
{
'params'
:
other_params
},
{
'order_params'
:
params
}]
{
'order_params'
:
params
}]
...
@@ -153,7 +153,7 @@ def run_pretrain():
...
@@ -153,7 +153,7 @@ def run_pretrain():
power
=
cfg
.
AdamWeightDecay
.
power
)
power
=
cfg
.
AdamWeightDecay
.
power
)
params
=
net_with_loss
.
trainable_params
()
params
=
net_with_loss
.
trainable_params
()
decay_params
=
list
(
filter
(
cfg
.
AdamWeightDecay
.
decay_filter
,
params
))
decay_params
=
list
(
filter
(
cfg
.
AdamWeightDecay
.
decay_filter
,
params
))
other_params
=
list
(
filter
(
lambda
x
:
x
not
in
decay_params
,
params
))
other_params
=
list
(
filter
(
lambda
x
:
not
cfg
.
AdamWeightDecay
.
decay_filter
(
x
)
,
params
))
group_params
=
[{
'params'
:
decay_params
,
'weight_decay'
:
cfg
.
AdamWeightDecay
.
weight_decay
},
group_params
=
[{
'params'
:
decay_params
,
'weight_decay'
:
cfg
.
AdamWeightDecay
.
weight_decay
},
{
'params'
:
other_params
,
'weight_decay'
:
0.0
},
{
'params'
:
other_params
,
'weight_decay'
:
0.0
},
{
'order_params'
:
params
}]
{
'order_params'
:
params
}]
...
...
model_zoo/official/nlp/tinybert/run_general_distill.py
浏览文件 @
4b8d4391
...
@@ -99,7 +99,7 @@ def run_general_distill():
...
@@ -99,7 +99,7 @@ def run_general_distill():
power
=
common_cfg
.
AdamWeightDecay
.
power
)
power
=
common_cfg
.
AdamWeightDecay
.
power
)
params
=
netwithloss
.
trainable_params
()
params
=
netwithloss
.
trainable_params
()
decay_params
=
list
(
filter
(
common_cfg
.
AdamWeightDecay
.
decay_filter
,
params
))
decay_params
=
list
(
filter
(
common_cfg
.
AdamWeightDecay
.
decay_filter
,
params
))
other_params
=
list
(
filter
(
lambda
x
:
x
not
in
decay_params
,
params
))
other_params
=
list
(
filter
(
lambda
x
:
not
cfg
.
AdamWeightDecay
.
decay_filter
(
x
)
,
params
))
group_params
=
[{
'params'
:
decay_params
,
'weight_decay'
:
common_cfg
.
AdamWeightDecay
.
weight_decay
},
group_params
=
[{
'params'
:
decay_params
,
'weight_decay'
:
common_cfg
.
AdamWeightDecay
.
weight_decay
},
{
'params'
:
other_params
,
'weight_decay'
:
0.0
},
{
'params'
:
other_params
,
'weight_decay'
:
0.0
},
{
'order_params'
:
params
}]
{
'order_params'
:
params
}]
...
...
model_zoo/official/nlp/tinybert/run_task_distill.py
浏览文件 @
4b8d4391
...
@@ -107,7 +107,7 @@ def run_predistill():
...
@@ -107,7 +107,7 @@ def run_predistill():
power
=
optimizer_cfg
.
AdamWeightDecay
.
power
)
power
=
optimizer_cfg
.
AdamWeightDecay
.
power
)
params
=
netwithloss
.
trainable_params
()
params
=
netwithloss
.
trainable_params
()
decay_params
=
list
(
filter
(
optimizer_cfg
.
AdamWeightDecay
.
decay_filter
,
params
))
decay_params
=
list
(
filter
(
optimizer_cfg
.
AdamWeightDecay
.
decay_filter
,
params
))
other_params
=
list
(
filter
(
lambda
x
:
x
not
in
decay_params
,
params
))
other_params
=
list
(
filter
(
lambda
x
:
not
cfg
.
AdamWeightDecay
.
decay_filter
(
x
)
,
params
))
group_params
=
[{
'params'
:
decay_params
,
'weight_decay'
:
optimizer_cfg
.
AdamWeightDecay
.
weight_decay
},
group_params
=
[{
'params'
:
decay_params
,
'weight_decay'
:
optimizer_cfg
.
AdamWeightDecay
.
weight_decay
},
{
'params'
:
other_params
,
'weight_decay'
:
0.0
},
{
'params'
:
other_params
,
'weight_decay'
:
0.0
},
{
'order_params'
:
params
}]
{
'order_params'
:
params
}]
...
@@ -165,7 +165,7 @@ def run_task_distill(ckpt_file):
...
@@ -165,7 +165,7 @@ def run_task_distill(ckpt_file):
power
=
optimizer_cfg
.
AdamWeightDecay
.
power
)
power
=
optimizer_cfg
.
AdamWeightDecay
.
power
)
params
=
netwithloss
.
trainable_params
()
params
=
netwithloss
.
trainable_params
()
decay_params
=
list
(
filter
(
optimizer_cfg
.
AdamWeightDecay
.
decay_filter
,
params
))
decay_params
=
list
(
filter
(
optimizer_cfg
.
AdamWeightDecay
.
decay_filter
,
params
))
other_params
=
list
(
filter
(
lambda
x
:
x
not
in
decay_params
,
params
))
other_params
=
list
(
filter
(
lambda
x
:
not
cfg
.
AdamWeightDecay
.
decay_filter
(
x
)
,
params
))
group_params
=
[{
'params'
:
decay_params
,
'weight_decay'
:
optimizer_cfg
.
AdamWeightDecay
.
weight_decay
},
group_params
=
[{
'params'
:
decay_params
,
'weight_decay'
:
optimizer_cfg
.
AdamWeightDecay
.
weight_decay
},
{
'params'
:
other_params
,
'weight_decay'
:
0.0
},
{
'params'
:
other_params
,
'weight_decay'
:
0.0
},
{
'order_params'
:
params
}]
{
'order_params'
:
params
}]
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录