diff --git a/mindspore/common/parameter.py b/mindspore/common/parameter.py index b31aae727ec090772f56ac759cff31ffbda1d1df..091f4bc967380825012a7405aa32512fe724771e 100644 --- a/mindspore/common/parameter.py +++ b/mindspore/common/parameter.py @@ -270,6 +270,7 @@ class Parameter(MetaTensor): "Update the parameter by a Tensor." if isinstance(self, Tensor): # for Tensor same shape: + self.init_flag = False return self.assign_value(data) # create a new tensor return Parameter(data, self.name, self.requires_grad) diff --git a/mindspore/train/quant/quant.py b/mindspore/train/quant/quant.py index 87eb0ec43b30359cb12355f910ef9ccbac71d562..b94781103029246e71c0bb1635d72d598d5098ad 100644 --- a/mindspore/train/quant/quant.py +++ b/mindspore/train/quant/quant.py @@ -29,6 +29,7 @@ from ...common import dtype as mstype from ...common.api import _executor from ...nn.layer import quant from ...ops import functional as F +from ...ops import operations as P from ...ops.operations import _inner_ops as inner from ...train import serialization from . import quant_utils @@ -366,8 +367,6 @@ class ExportToQuantInferNetwork: sqrt_mode = True dequant_op = inner.Dequant(sqrt_mode) - # get op - op_core = cell_core.matmul if isinstance(cell_core, quant.DenseQuant) else cell_core.conv if isinstance(activation, _AddFakeQuantAfterSubCell): activation = activation.subcell elif hasattr(activation, "get_origin"): @@ -383,10 +382,17 @@ class ExportToQuantInferNetwork: weight, bias = quant_utils.fold_batchnorm(weight, cell_core) # apply the quant - weight = Tensor(quant_utils.weight2int(weight, scale_w, zp_w), self.data_type) + weight = quant_utils.weight2int(weight, scale_w, zp_w) if bias is not None: bias = Tensor(scale_a_in * scale_w * bias, mstype.int32) scale_deq = Tensor(scale_deq, mstype.float16) + # get op + if isinstance(cell_core, quant.DenseQuant): + op_core = P.MatMul() + weight = np.transpose(weight) + else: + op_core = cell_core.conv + weight = Tensor(weight, self.data_type) block = quant.QuantBlock(op_core, weight, quant_op, dequant_op, scale_deq, bias, activation) return block diff --git a/model_zoo/official/nlp/bert/run_classifier.py b/model_zoo/official/nlp/bert/run_classifier.py index c3663a57271783bf5ad616ba3dacb036d1ff94ab..d2278bbc3c9913512f40b23cc7cee0b11276a17e 100644 --- a/model_zoo/official/nlp/bert/run_classifier.py +++ b/model_zoo/official/nlp/bert/run_classifier.py @@ -50,7 +50,7 @@ def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoin power=optimizer_cfg.AdamWeightDecay.power) params = net_with_loss.trainable_params() decay_params = list(filter(optimizer_cfg.AdamWeightDecay.decay_filter, params)) - other_params = list(filter(lambda x: x not in decay_params, params)) + other_params = list(filter(lambda x: not cfg.AdamWeightDecay.decay_filter(x), params)) group_params = [{'params': decay_params, 'weight_decay': optimizer_cfg.AdamWeightDecay.weight_decay}, {'params': other_params, 'weight_decay': 0.0}] diff --git a/model_zoo/official/nlp/bert/run_ner.py b/model_zoo/official/nlp/bert/run_ner.py index 1ea689394517e911cd38a51fd0276e6e7861b9a5..b3119503153fa72f471d35771c12515c68349c7e 100644 --- a/model_zoo/official/nlp/bert/run_ner.py +++ b/model_zoo/official/nlp/bert/run_ner.py @@ -52,7 +52,7 @@ def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoin power=optimizer_cfg.AdamWeightDecay.power) params = network.trainable_params() decay_params = list(filter(optimizer_cfg.AdamWeightDecay.decay_filter, params)) - other_params = list(filter(lambda x: x not in decay_params, params)) + other_params = list(filter(lambda x: not cfg.AdamWeightDecay.decay_filter(x), params)) group_params = [{'params': decay_params, 'weight_decay': optimizer_cfg.AdamWeightDecay.weight_decay}, {'params': other_params, 'weight_decay': 0.0}] optimizer = AdamWeightDecay(group_params, lr_schedule, eps=optimizer_cfg.AdamWeightDecay.eps) diff --git a/model_zoo/official/nlp/bert/run_pretrain.py b/model_zoo/official/nlp/bert/run_pretrain.py index 1f31ff4015b55a163aea5a246a4aa4c693246834..6b4cb1548a63a700777a40c7dd95bde1559bbac1 100644 --- a/model_zoo/official/nlp/bert/run_pretrain.py +++ b/model_zoo/official/nlp/bert/run_pretrain.py @@ -116,7 +116,7 @@ def run_pretrain(): power=cfg.Lamb.power) params = net_with_loss.trainable_params() decay_params = list(filter(cfg.Lamb.decay_filter, params)) - other_params = list(filter(lambda x: x not in decay_params, params)) + other_params = list(filter(lambda x: not cfg.Lamb.decay_filter(x), params)) group_params = [{'params': decay_params, 'weight_decay': cfg.Lamb.weight_decay}, {'params': other_params}, {'order_params': params}] @@ -132,7 +132,7 @@ def run_pretrain(): power=cfg.AdamWeightDecay.power) params = net_with_loss.trainable_params() decay_params = list(filter(cfg.AdamWeightDecay.decay_filter, params)) - other_params = list(filter(lambda x: x not in decay_params, params)) + other_params = list(filter(lambda x: not cfg.AdamWeightDecay.decay_filter(x), params)) group_params = [{'params': decay_params, 'weight_decay': cfg.AdamWeightDecay.weight_decay}, {'params': other_params, 'weight_decay': 0.0}, {'order_params': params}] diff --git a/model_zoo/official/nlp/bert/run_squad.py b/model_zoo/official/nlp/bert/run_squad.py index 972f9dcdfc37d55be900ba86ec72010755244d7d..a026408e7c99731480bf30b9d5313fe722890ed2 100644 --- a/model_zoo/official/nlp/bert/run_squad.py +++ b/model_zoo/official/nlp/bert/run_squad.py @@ -52,7 +52,7 @@ def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoin power=optimizer_cfg.AdamWeightDecay.power) params = network.trainable_params() decay_params = list(filter(optimizer_cfg.AdamWeightDecay.decay_filter, params)) - other_params = list(filter(lambda x: x not in decay_params, params)) + other_params = list(filter(lambda x: not cfg.AdamWeightDecay.decay_filter(x), params)) group_params = [{'params': decay_params, 'weight_decay': optimizer_cfg.AdamWeightDecay.weight_decay}, {'params': other_params, 'weight_decay': 0.0}] diff --git a/model_zoo/official/nlp/bert_thor/run_pretrain.py b/model_zoo/official/nlp/bert_thor/run_pretrain.py index 08161c7a13e701b24d4e818f546090d0b6a9c247..0ec84545db9dd81b98bfb1a50a03ef04a13b3fc7 100644 --- a/model_zoo/official/nlp/bert_thor/run_pretrain.py +++ b/model_zoo/official/nlp/bert_thor/run_pretrain.py @@ -137,7 +137,7 @@ def run_pretrain(): power=cfg.Lamb.power) params = net_with_loss.trainable_params() decay_params = list(filter(cfg.Lamb.decay_filter, params)) - other_params = list(filter(lambda x: x not in decay_params, params)) + other_params = list(filter(lambda x: not cfg.Lamb.decay_filter(x), params)) group_params = [{'params': decay_params, 'weight_decay': cfg.Lamb.weight_decay}, {'params': other_params}, {'order_params': params}] @@ -153,7 +153,7 @@ def run_pretrain(): power=cfg.AdamWeightDecay.power) params = net_with_loss.trainable_params() decay_params = list(filter(cfg.AdamWeightDecay.decay_filter, params)) - other_params = list(filter(lambda x: x not in decay_params, params)) + other_params = list(filter(lambda x: not cfg.AdamWeightDecay.decay_filter(x), params)) group_params = [{'params': decay_params, 'weight_decay': cfg.AdamWeightDecay.weight_decay}, {'params': other_params, 'weight_decay': 0.0}, {'order_params': params}] diff --git a/model_zoo/official/nlp/tinybert/run_general_distill.py b/model_zoo/official/nlp/tinybert/run_general_distill.py index c0e104477362dee009ac042338d87721205962b3..50e586f0af50836260347f612c52dc3271827adc 100644 --- a/model_zoo/official/nlp/tinybert/run_general_distill.py +++ b/model_zoo/official/nlp/tinybert/run_general_distill.py @@ -99,7 +99,7 @@ def run_general_distill(): power=common_cfg.AdamWeightDecay.power) params = netwithloss.trainable_params() decay_params = list(filter(common_cfg.AdamWeightDecay.decay_filter, params)) - other_params = list(filter(lambda x: x not in decay_params, params)) + other_params = list(filter(lambda x: not cfg.AdamWeightDecay.decay_filter(x), params)) group_params = [{'params': decay_params, 'weight_decay': common_cfg.AdamWeightDecay.weight_decay}, {'params': other_params, 'weight_decay': 0.0}, {'order_params': params}] diff --git a/model_zoo/official/nlp/tinybert/run_task_distill.py b/model_zoo/official/nlp/tinybert/run_task_distill.py index 12a3acda4865192daa8c2ee05f8e42a1d589bafd..9469c475d2e716b9e6b9f5014fc7073b51f25f07 100644 --- a/model_zoo/official/nlp/tinybert/run_task_distill.py +++ b/model_zoo/official/nlp/tinybert/run_task_distill.py @@ -107,7 +107,7 @@ def run_predistill(): power=optimizer_cfg.AdamWeightDecay.power) params = netwithloss.trainable_params() decay_params = list(filter(optimizer_cfg.AdamWeightDecay.decay_filter, params)) - other_params = list(filter(lambda x: x not in decay_params, params)) + other_params = list(filter(lambda x: not cfg.AdamWeightDecay.decay_filter(x), params)) group_params = [{'params': decay_params, 'weight_decay': optimizer_cfg.AdamWeightDecay.weight_decay}, {'params': other_params, 'weight_decay': 0.0}, {'order_params': params}] @@ -165,7 +165,7 @@ def run_task_distill(ckpt_file): power=optimizer_cfg.AdamWeightDecay.power) params = netwithloss.trainable_params() decay_params = list(filter(optimizer_cfg.AdamWeightDecay.decay_filter, params)) - other_params = list(filter(lambda x: x not in decay_params, params)) + other_params = list(filter(lambda x: not cfg.AdamWeightDecay.decay_filter(x), params)) group_params = [{'params': decay_params, 'weight_decay': optimizer_cfg.AdamWeightDecay.weight_decay}, {'params': other_params, 'weight_decay': 0.0}, {'order_params': params}]