diff --git a/mindspore/common/parameter.py b/mindspore/common/parameter.py
index b31aae727ec090772f56ac759cff31ffbda1d1df..091f4bc967380825012a7405aa32512fe724771e 100644
--- a/mindspore/common/parameter.py
+++ b/mindspore/common/parameter.py
@@ -270,6 +270,7 @@ class Parameter(MetaTensor):
         "Update the parameter by a Tensor."
         if isinstance(self, Tensor):
             # for Tensor same shape:
+            self.init_flag = False
             return self.assign_value(data)
         # create a new tensor
         return Parameter(data, self.name, self.requires_grad)
diff --git a/mindspore/train/quant/quant.py b/mindspore/train/quant/quant.py
index 87eb0ec43b30359cb12355f910ef9ccbac71d562..b94781103029246e71c0bb1635d72d598d5098ad 100644
--- a/mindspore/train/quant/quant.py
+++ b/mindspore/train/quant/quant.py
@@ -29,6 +29,7 @@ from ...common import dtype as mstype
 from ...common.api import _executor
 from ...nn.layer import quant
 from ...ops import functional as F
+from ...ops import operations as P
 from ...ops.operations import _inner_ops as inner
 from ...train import serialization
 from . import quant_utils
@@ -366,8 +367,6 @@ class ExportToQuantInferNetwork:
             sqrt_mode = True
         dequant_op = inner.Dequant(sqrt_mode)
 
-        # get op
-        op_core = cell_core.matmul if isinstance(cell_core, quant.DenseQuant) else cell_core.conv
         if isinstance(activation, _AddFakeQuantAfterSubCell):
             activation = activation.subcell
         elif hasattr(activation, "get_origin"):
@@ -383,10 +382,17 @@ class ExportToQuantInferNetwork:
             weight, bias = quant_utils.fold_batchnorm(weight, cell_core)
 
         # apply the quant
-        weight = Tensor(quant_utils.weight2int(weight, scale_w, zp_w), self.data_type)
+        weight = quant_utils.weight2int(weight, scale_w, zp_w)
         if bias is not None:
             bias = Tensor(scale_a_in * scale_w * bias, mstype.int32)
         scale_deq = Tensor(scale_deq, mstype.float16)
+        # get op
+        if isinstance(cell_core, quant.DenseQuant):
+            op_core = P.MatMul()
+            weight = np.transpose(weight)
+        else:
+            op_core = cell_core.conv
+        weight = Tensor(weight, self.data_type)
         block = quant.QuantBlock(op_core, weight, quant_op, dequant_op, scale_deq, bias, activation)
         return block
 
diff --git a/model_zoo/official/nlp/bert/run_classifier.py b/model_zoo/official/nlp/bert/run_classifier.py
index c3663a57271783bf5ad616ba3dacb036d1ff94ab..d2278bbc3c9913512f40b23cc7cee0b11276a17e 100644
--- a/model_zoo/official/nlp/bert/run_classifier.py
+++ b/model_zoo/official/nlp/bert/run_classifier.py
@@ -50,7 +50,7 @@ def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoin
                                        power=optimizer_cfg.AdamWeightDecay.power)
         params = net_with_loss.trainable_params()
         decay_params = list(filter(optimizer_cfg.AdamWeightDecay.decay_filter, params))
-        other_params = list(filter(lambda x: x not in decay_params, params))
+        other_params = list(filter(lambda x: not cfg.AdamWeightDecay.decay_filter(x), params))
         group_params = [{'params': decay_params, 'weight_decay': optimizer_cfg.AdamWeightDecay.weight_decay},
                         {'params': other_params, 'weight_decay': 0.0}]
 
diff --git a/model_zoo/official/nlp/bert/run_ner.py b/model_zoo/official/nlp/bert/run_ner.py
index 1ea689394517e911cd38a51fd0276e6e7861b9a5..b3119503153fa72f471d35771c12515c68349c7e 100644
--- a/model_zoo/official/nlp/bert/run_ner.py
+++ b/model_zoo/official/nlp/bert/run_ner.py
@@ -52,7 +52,7 @@ def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoin
                                        power=optimizer_cfg.AdamWeightDecay.power)
         params = network.trainable_params()
         decay_params = list(filter(optimizer_cfg.AdamWeightDecay.decay_filter, params))
-        other_params = list(filter(lambda x: x not in decay_params, params))
+        other_params = list(filter(lambda x: not cfg.AdamWeightDecay.decay_filter(x), params))
         group_params = [{'params': decay_params, 'weight_decay': optimizer_cfg.AdamWeightDecay.weight_decay},
                         {'params': other_params, 'weight_decay': 0.0}]
         optimizer = AdamWeightDecay(group_params, lr_schedule, eps=optimizer_cfg.AdamWeightDecay.eps)
diff --git a/model_zoo/official/nlp/bert/run_pretrain.py b/model_zoo/official/nlp/bert/run_pretrain.py
index 1f31ff4015b55a163aea5a246a4aa4c693246834..6b4cb1548a63a700777a40c7dd95bde1559bbac1 100644
--- a/model_zoo/official/nlp/bert/run_pretrain.py
+++ b/model_zoo/official/nlp/bert/run_pretrain.py
@@ -116,7 +116,7 @@ def run_pretrain():
                                        power=cfg.Lamb.power)
         params = net_with_loss.trainable_params()
         decay_params = list(filter(cfg.Lamb.decay_filter, params))
-        other_params = list(filter(lambda x: x not in decay_params, params))
+        other_params = list(filter(lambda x: not cfg.Lamb.decay_filter(x), params))
         group_params = [{'params': decay_params, 'weight_decay': cfg.Lamb.weight_decay},
                         {'params': other_params},
                         {'order_params': params}]
@@ -132,7 +132,7 @@ def run_pretrain():
                                        power=cfg.AdamWeightDecay.power)
         params = net_with_loss.trainable_params()
         decay_params = list(filter(cfg.AdamWeightDecay.decay_filter, params))
-        other_params = list(filter(lambda x: x not in decay_params, params))
+        other_params = list(filter(lambda x: not cfg.AdamWeightDecay.decay_filter(x), params))
         group_params = [{'params': decay_params, 'weight_decay': cfg.AdamWeightDecay.weight_decay},
                         {'params': other_params, 'weight_decay': 0.0},
                         {'order_params': params}]
diff --git a/model_zoo/official/nlp/bert/run_squad.py b/model_zoo/official/nlp/bert/run_squad.py
index 972f9dcdfc37d55be900ba86ec72010755244d7d..a026408e7c99731480bf30b9d5313fe722890ed2 100644
--- a/model_zoo/official/nlp/bert/run_squad.py
+++ b/model_zoo/official/nlp/bert/run_squad.py
@@ -52,7 +52,7 @@ def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoin
                                        power=optimizer_cfg.AdamWeightDecay.power)
         params = network.trainable_params()
         decay_params = list(filter(optimizer_cfg.AdamWeightDecay.decay_filter, params))
-        other_params = list(filter(lambda x: x not in decay_params, params))
+        other_params = list(filter(lambda x: not cfg.AdamWeightDecay.decay_filter(x), params))
         group_params = [{'params': decay_params, 'weight_decay': optimizer_cfg.AdamWeightDecay.weight_decay},
                         {'params': other_params, 'weight_decay': 0.0}]
 
diff --git a/model_zoo/official/nlp/bert_thor/run_pretrain.py b/model_zoo/official/nlp/bert_thor/run_pretrain.py
index 08161c7a13e701b24d4e818f546090d0b6a9c247..0ec84545db9dd81b98bfb1a50a03ef04a13b3fc7 100644
--- a/model_zoo/official/nlp/bert_thor/run_pretrain.py
+++ b/model_zoo/official/nlp/bert_thor/run_pretrain.py
@@ -137,7 +137,7 @@ def run_pretrain():
                                        power=cfg.Lamb.power)
         params = net_with_loss.trainable_params()
         decay_params = list(filter(cfg.Lamb.decay_filter, params))
-        other_params = list(filter(lambda x: x not in decay_params, params))
+        other_params = list(filter(lambda x: not cfg.Lamb.decay_filter(x), params))
         group_params = [{'params': decay_params, 'weight_decay': cfg.Lamb.weight_decay},
                         {'params': other_params},
                         {'order_params': params}]
@@ -153,7 +153,7 @@ def run_pretrain():
                                        power=cfg.AdamWeightDecay.power)
         params = net_with_loss.trainable_params()
         decay_params = list(filter(cfg.AdamWeightDecay.decay_filter, params))
-        other_params = list(filter(lambda x: x not in decay_params, params))
+        other_params = list(filter(lambda x: not cfg.AdamWeightDecay.decay_filter(x), params))
         group_params = [{'params': decay_params, 'weight_decay': cfg.AdamWeightDecay.weight_decay},
                         {'params': other_params, 'weight_decay': 0.0},
                         {'order_params': params}]
diff --git a/model_zoo/official/nlp/tinybert/run_general_distill.py b/model_zoo/official/nlp/tinybert/run_general_distill.py
index c0e104477362dee009ac042338d87721205962b3..50e586f0af50836260347f612c52dc3271827adc 100644
--- a/model_zoo/official/nlp/tinybert/run_general_distill.py
+++ b/model_zoo/official/nlp/tinybert/run_general_distill.py
@@ -99,7 +99,7 @@ def run_general_distill():
                                    power=common_cfg.AdamWeightDecay.power)
     params = netwithloss.trainable_params()
     decay_params = list(filter(common_cfg.AdamWeightDecay.decay_filter, params))
-    other_params = list(filter(lambda x: x not in decay_params, params))
+    other_params = list(filter(lambda x: not cfg.AdamWeightDecay.decay_filter(x), params))
     group_params = [{'params': decay_params, 'weight_decay': common_cfg.AdamWeightDecay.weight_decay},
                     {'params': other_params, 'weight_decay': 0.0},
                     {'order_params': params}]
diff --git a/model_zoo/official/nlp/tinybert/run_task_distill.py b/model_zoo/official/nlp/tinybert/run_task_distill.py
index 12a3acda4865192daa8c2ee05f8e42a1d589bafd..9469c475d2e716b9e6b9f5014fc7073b51f25f07 100644
--- a/model_zoo/official/nlp/tinybert/run_task_distill.py
+++ b/model_zoo/official/nlp/tinybert/run_task_distill.py
@@ -107,7 +107,7 @@ def run_predistill():
                                    power=optimizer_cfg.AdamWeightDecay.power)
     params = netwithloss.trainable_params()
     decay_params = list(filter(optimizer_cfg.AdamWeightDecay.decay_filter, params))
-    other_params = list(filter(lambda x: x not in decay_params, params))
+    other_params = list(filter(lambda x: not cfg.AdamWeightDecay.decay_filter(x), params))
     group_params = [{'params': decay_params, 'weight_decay': optimizer_cfg.AdamWeightDecay.weight_decay},
                     {'params': other_params, 'weight_decay': 0.0},
                     {'order_params': params}]
@@ -165,7 +165,7 @@ def run_task_distill(ckpt_file):
                                    power=optimizer_cfg.AdamWeightDecay.power)
     params = netwithloss.trainable_params()
     decay_params = list(filter(optimizer_cfg.AdamWeightDecay.decay_filter, params))
-    other_params = list(filter(lambda x: x not in decay_params, params))
+    other_params = list(filter(lambda x: not cfg.AdamWeightDecay.decay_filter(x), params))
     group_params = [{'params': decay_params, 'weight_decay': optimizer_cfg.AdamWeightDecay.weight_decay},
                     {'params': other_params, 'weight_decay': 0.0},
                     {'order_params': params}]