提交 4b8d4391 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!4092 [bug]fix bert script parameter use `in` and quant export fix matmul atc convert error

Merge pull request !4092 from vlne-v1/quant-matmul-fix
...@@ -270,6 +270,7 @@ class Parameter(MetaTensor): ...@@ -270,6 +270,7 @@ class Parameter(MetaTensor):
"Update the parameter by a Tensor." "Update the parameter by a Tensor."
if isinstance(self, Tensor): if isinstance(self, Tensor):
# for Tensor same shape: # for Tensor same shape:
self.init_flag = False
return self.assign_value(data) return self.assign_value(data)
# create a new tensor # create a new tensor
return Parameter(data, self.name, self.requires_grad) return Parameter(data, self.name, self.requires_grad)
......
...@@ -29,6 +29,7 @@ from ...common import dtype as mstype ...@@ -29,6 +29,7 @@ from ...common import dtype as mstype
from ...common.api import _executor from ...common.api import _executor
from ...nn.layer import quant from ...nn.layer import quant
from ...ops import functional as F from ...ops import functional as F
from ...ops import operations as P
from ...ops.operations import _inner_ops as inner from ...ops.operations import _inner_ops as inner
from ...train import serialization from ...train import serialization
from . import quant_utils from . import quant_utils
...@@ -366,8 +367,6 @@ class ExportToQuantInferNetwork: ...@@ -366,8 +367,6 @@ class ExportToQuantInferNetwork:
sqrt_mode = True sqrt_mode = True
dequant_op = inner.Dequant(sqrt_mode) dequant_op = inner.Dequant(sqrt_mode)
# get op
op_core = cell_core.matmul if isinstance(cell_core, quant.DenseQuant) else cell_core.conv
if isinstance(activation, _AddFakeQuantAfterSubCell): if isinstance(activation, _AddFakeQuantAfterSubCell):
activation = activation.subcell activation = activation.subcell
elif hasattr(activation, "get_origin"): elif hasattr(activation, "get_origin"):
...@@ -383,10 +382,17 @@ class ExportToQuantInferNetwork: ...@@ -383,10 +382,17 @@ class ExportToQuantInferNetwork:
weight, bias = quant_utils.fold_batchnorm(weight, cell_core) weight, bias = quant_utils.fold_batchnorm(weight, cell_core)
# apply the quant # apply the quant
weight = Tensor(quant_utils.weight2int(weight, scale_w, zp_w), self.data_type) weight = quant_utils.weight2int(weight, scale_w, zp_w)
if bias is not None: if bias is not None:
bias = Tensor(scale_a_in * scale_w * bias, mstype.int32) bias = Tensor(scale_a_in * scale_w * bias, mstype.int32)
scale_deq = Tensor(scale_deq, mstype.float16) scale_deq = Tensor(scale_deq, mstype.float16)
# get op
if isinstance(cell_core, quant.DenseQuant):
op_core = P.MatMul()
weight = np.transpose(weight)
else:
op_core = cell_core.conv
weight = Tensor(weight, self.data_type)
block = quant.QuantBlock(op_core, weight, quant_op, dequant_op, scale_deq, bias, activation) block = quant.QuantBlock(op_core, weight, quant_op, dequant_op, scale_deq, bias, activation)
return block return block
......
...@@ -50,7 +50,7 @@ def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoin ...@@ -50,7 +50,7 @@ def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoin
power=optimizer_cfg.AdamWeightDecay.power) power=optimizer_cfg.AdamWeightDecay.power)
params = net_with_loss.trainable_params() params = net_with_loss.trainable_params()
decay_params = list(filter(optimizer_cfg.AdamWeightDecay.decay_filter, params)) decay_params = list(filter(optimizer_cfg.AdamWeightDecay.decay_filter, params))
other_params = list(filter(lambda x: x not in decay_params, params)) other_params = list(filter(lambda x: not cfg.AdamWeightDecay.decay_filter(x), params))
group_params = [{'params': decay_params, 'weight_decay': optimizer_cfg.AdamWeightDecay.weight_decay}, group_params = [{'params': decay_params, 'weight_decay': optimizer_cfg.AdamWeightDecay.weight_decay},
{'params': other_params, 'weight_decay': 0.0}] {'params': other_params, 'weight_decay': 0.0}]
......
...@@ -52,7 +52,7 @@ def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoin ...@@ -52,7 +52,7 @@ def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoin
power=optimizer_cfg.AdamWeightDecay.power) power=optimizer_cfg.AdamWeightDecay.power)
params = network.trainable_params() params = network.trainable_params()
decay_params = list(filter(optimizer_cfg.AdamWeightDecay.decay_filter, params)) decay_params = list(filter(optimizer_cfg.AdamWeightDecay.decay_filter, params))
other_params = list(filter(lambda x: x not in decay_params, params)) other_params = list(filter(lambda x: not cfg.AdamWeightDecay.decay_filter(x), params))
group_params = [{'params': decay_params, 'weight_decay': optimizer_cfg.AdamWeightDecay.weight_decay}, group_params = [{'params': decay_params, 'weight_decay': optimizer_cfg.AdamWeightDecay.weight_decay},
{'params': other_params, 'weight_decay': 0.0}] {'params': other_params, 'weight_decay': 0.0}]
optimizer = AdamWeightDecay(group_params, lr_schedule, eps=optimizer_cfg.AdamWeightDecay.eps) optimizer = AdamWeightDecay(group_params, lr_schedule, eps=optimizer_cfg.AdamWeightDecay.eps)
......
...@@ -116,7 +116,7 @@ def run_pretrain(): ...@@ -116,7 +116,7 @@ def run_pretrain():
power=cfg.Lamb.power) power=cfg.Lamb.power)
params = net_with_loss.trainable_params() params = net_with_loss.trainable_params()
decay_params = list(filter(cfg.Lamb.decay_filter, params)) decay_params = list(filter(cfg.Lamb.decay_filter, params))
other_params = list(filter(lambda x: x not in decay_params, params)) other_params = list(filter(lambda x: not cfg.Lamb.decay_filter(x), params))
group_params = [{'params': decay_params, 'weight_decay': cfg.Lamb.weight_decay}, group_params = [{'params': decay_params, 'weight_decay': cfg.Lamb.weight_decay},
{'params': other_params}, {'params': other_params},
{'order_params': params}] {'order_params': params}]
...@@ -132,7 +132,7 @@ def run_pretrain(): ...@@ -132,7 +132,7 @@ def run_pretrain():
power=cfg.AdamWeightDecay.power) power=cfg.AdamWeightDecay.power)
params = net_with_loss.trainable_params() params = net_with_loss.trainable_params()
decay_params = list(filter(cfg.AdamWeightDecay.decay_filter, params)) decay_params = list(filter(cfg.AdamWeightDecay.decay_filter, params))
other_params = list(filter(lambda x: x not in decay_params, params)) other_params = list(filter(lambda x: not cfg.AdamWeightDecay.decay_filter(x), params))
group_params = [{'params': decay_params, 'weight_decay': cfg.AdamWeightDecay.weight_decay}, group_params = [{'params': decay_params, 'weight_decay': cfg.AdamWeightDecay.weight_decay},
{'params': other_params, 'weight_decay': 0.0}, {'params': other_params, 'weight_decay': 0.0},
{'order_params': params}] {'order_params': params}]
......
...@@ -52,7 +52,7 @@ def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoin ...@@ -52,7 +52,7 @@ def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoin
power=optimizer_cfg.AdamWeightDecay.power) power=optimizer_cfg.AdamWeightDecay.power)
params = network.trainable_params() params = network.trainable_params()
decay_params = list(filter(optimizer_cfg.AdamWeightDecay.decay_filter, params)) decay_params = list(filter(optimizer_cfg.AdamWeightDecay.decay_filter, params))
other_params = list(filter(lambda x: x not in decay_params, params)) other_params = list(filter(lambda x: not cfg.AdamWeightDecay.decay_filter(x), params))
group_params = [{'params': decay_params, 'weight_decay': optimizer_cfg.AdamWeightDecay.weight_decay}, group_params = [{'params': decay_params, 'weight_decay': optimizer_cfg.AdamWeightDecay.weight_decay},
{'params': other_params, 'weight_decay': 0.0}] {'params': other_params, 'weight_decay': 0.0}]
......
...@@ -137,7 +137,7 @@ def run_pretrain(): ...@@ -137,7 +137,7 @@ def run_pretrain():
power=cfg.Lamb.power) power=cfg.Lamb.power)
params = net_with_loss.trainable_params() params = net_with_loss.trainable_params()
decay_params = list(filter(cfg.Lamb.decay_filter, params)) decay_params = list(filter(cfg.Lamb.decay_filter, params))
other_params = list(filter(lambda x: x not in decay_params, params)) other_params = list(filter(lambda x: not cfg.Lamb.decay_filter(x), params))
group_params = [{'params': decay_params, 'weight_decay': cfg.Lamb.weight_decay}, group_params = [{'params': decay_params, 'weight_decay': cfg.Lamb.weight_decay},
{'params': other_params}, {'params': other_params},
{'order_params': params}] {'order_params': params}]
...@@ -153,7 +153,7 @@ def run_pretrain(): ...@@ -153,7 +153,7 @@ def run_pretrain():
power=cfg.AdamWeightDecay.power) power=cfg.AdamWeightDecay.power)
params = net_with_loss.trainable_params() params = net_with_loss.trainable_params()
decay_params = list(filter(cfg.AdamWeightDecay.decay_filter, params)) decay_params = list(filter(cfg.AdamWeightDecay.decay_filter, params))
other_params = list(filter(lambda x: x not in decay_params, params)) other_params = list(filter(lambda x: not cfg.AdamWeightDecay.decay_filter(x), params))
group_params = [{'params': decay_params, 'weight_decay': cfg.AdamWeightDecay.weight_decay}, group_params = [{'params': decay_params, 'weight_decay': cfg.AdamWeightDecay.weight_decay},
{'params': other_params, 'weight_decay': 0.0}, {'params': other_params, 'weight_decay': 0.0},
{'order_params': params}] {'order_params': params}]
......
...@@ -99,7 +99,7 @@ def run_general_distill(): ...@@ -99,7 +99,7 @@ def run_general_distill():
power=common_cfg.AdamWeightDecay.power) power=common_cfg.AdamWeightDecay.power)
params = netwithloss.trainable_params() params = netwithloss.trainable_params()
decay_params = list(filter(common_cfg.AdamWeightDecay.decay_filter, params)) decay_params = list(filter(common_cfg.AdamWeightDecay.decay_filter, params))
other_params = list(filter(lambda x: x not in decay_params, params)) other_params = list(filter(lambda x: not cfg.AdamWeightDecay.decay_filter(x), params))
group_params = [{'params': decay_params, 'weight_decay': common_cfg.AdamWeightDecay.weight_decay}, group_params = [{'params': decay_params, 'weight_decay': common_cfg.AdamWeightDecay.weight_decay},
{'params': other_params, 'weight_decay': 0.0}, {'params': other_params, 'weight_decay': 0.0},
{'order_params': params}] {'order_params': params}]
......
...@@ -107,7 +107,7 @@ def run_predistill(): ...@@ -107,7 +107,7 @@ def run_predistill():
power=optimizer_cfg.AdamWeightDecay.power) power=optimizer_cfg.AdamWeightDecay.power)
params = netwithloss.trainable_params() params = netwithloss.trainable_params()
decay_params = list(filter(optimizer_cfg.AdamWeightDecay.decay_filter, params)) decay_params = list(filter(optimizer_cfg.AdamWeightDecay.decay_filter, params))
other_params = list(filter(lambda x: x not in decay_params, params)) other_params = list(filter(lambda x: not cfg.AdamWeightDecay.decay_filter(x), params))
group_params = [{'params': decay_params, 'weight_decay': optimizer_cfg.AdamWeightDecay.weight_decay}, group_params = [{'params': decay_params, 'weight_decay': optimizer_cfg.AdamWeightDecay.weight_decay},
{'params': other_params, 'weight_decay': 0.0}, {'params': other_params, 'weight_decay': 0.0},
{'order_params': params}] {'order_params': params}]
...@@ -165,7 +165,7 @@ def run_task_distill(ckpt_file): ...@@ -165,7 +165,7 @@ def run_task_distill(ckpt_file):
power=optimizer_cfg.AdamWeightDecay.power) power=optimizer_cfg.AdamWeightDecay.power)
params = netwithloss.trainable_params() params = netwithloss.trainable_params()
decay_params = list(filter(optimizer_cfg.AdamWeightDecay.decay_filter, params)) decay_params = list(filter(optimizer_cfg.AdamWeightDecay.decay_filter, params))
other_params = list(filter(lambda x: x not in decay_params, params)) other_params = list(filter(lambda x: not cfg.AdamWeightDecay.decay_filter(x), params))
group_params = [{'params': decay_params, 'weight_decay': optimizer_cfg.AdamWeightDecay.weight_decay}, group_params = [{'params': decay_params, 'weight_decay': optimizer_cfg.AdamWeightDecay.weight_decay},
{'params': other_params, 'weight_decay': 0.0}, {'params': other_params, 'weight_decay': 0.0},
{'order_params': params}] {'order_params': params}]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册