diff --git a/dygraph/models/architectures/mobilenetv3.py b/dygraph/models/architectures/mobilenetv3.py
index cc7043db8ab50aff3fbb54afa0f80de84c1aa593..2899e3f76567cee638b07c86174896a19f51bd2f 100644
--- a/dygraph/models/architectures/mobilenetv3.py
+++ b/dygraph/models/architectures/mobilenetv3.py
@@ -16,16 +16,17 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import math
 import numpy as np
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid.param_attr import ParamAttr
 from paddle.fluid.layer_helper import LayerHelper
 from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout
-from paddle.fluid.dygraph import SyncBatchNorm as BatchNorm
-
-import math
+from paddle.nn import SyncBatchNorm as BatchNorm
 
+from dygraph.models.architectures import layer_utils
 from dygraph.cvlibs import manager
 
 __all__ = [
@@ -252,19 +253,18 @@ class ConvBNLayer(fluid.dygraph.Layer):
             bias_attr=False,
             use_cudnn=use_cudnn,
             act=None)
-        self.bn = fluid.dygraph.BatchNorm(
-            num_channels=out_c,
-            act=None,
-            param_attr=ParamAttr(
+        self.bn = BatchNorm(
+            num_features=out_c,
+            weight_attr=ParamAttr(
                 name=name + "_bn_scale",
                 regularizer=fluid.regularizer.L2DecayRegularizer(
                     regularization_coeff=0.0)),
             bias_attr=ParamAttr(
                 name=name + "_bn_offset",
                 regularizer=fluid.regularizer.L2DecayRegularizer(
-                    regularization_coeff=0.0)),
-            moving_mean_name=name + "_bn_mean",
-            moving_variance_name=name + "_bn_variance")
+                    regularization_coeff=0.0)))
+        
+        self._act_op = layer_utils.Activation(act=None)
 
     def forward(self, x):
         x = self.conv(x)
diff --git a/dygraph/models/architectures/resnet_vd.py b/dygraph/models/architectures/resnet_vd.py
index 2fcf8376199639d6148fc5f910df5d9dfdce918e..c27c810c46c0bbdc06053e747c7a7eaeb22be6e1 100644
--- a/dygraph/models/architectures/resnet_vd.py
+++ b/dygraph/models/architectures/resnet_vd.py
@@ -25,10 +25,10 @@ import paddle.fluid as fluid
 from paddle.fluid.param_attr import ParamAttr
 from paddle.fluid.layer_helper import LayerHelper
 from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout
-from paddle.fluid.dygraph import SyncBatchNorm as BatchNorm
+from paddle.nn import SyncBatchNorm as BatchNorm
 
 from dygraph.utils import utils
-
+from dygraph.models.architectures import layer_utils
 from dygraph.cvlibs import manager
 
 __all__ = [
@@ -70,17 +70,17 @@ class ConvBNLayer(fluid.dygraph.Layer):
             bn_name = "bn" + name[3:]
         self._batch_norm = BatchNorm(
             num_filters,
-            act=act,
-            param_attr=ParamAttr(name=bn_name + '_scale'),
-            bias_attr=ParamAttr(bn_name + '_offset'),
-            moving_mean_name=bn_name + '_mean',
-            moving_variance_name=bn_name + '_variance')
+            weight_attr=ParamAttr(name=bn_name + '_scale'),
+            bias_attr=ParamAttr(bn_name + '_offset'))
+        self._act_op = layer_utils.Activation(act=act)
 
     def forward(self, inputs):
         if self.is_vd_mode:
             inputs = self._pool2d_avg(inputs)
         y = self._conv(inputs)
         y = self._batch_norm(y)
+        y = self._act_op(y)
+
         return y
 
 
diff --git a/dygraph/models/architectures/xception_deeplab.py b/dygraph/models/architectures/xception_deeplab.py
index f856ffdedc487c9ebc6f824b835e39860f2a0ce3..f96dcb6936e25444c1d79b2461b941634fbb4c2f 100644
--- a/dygraph/models/architectures/xception_deeplab.py
+++ b/dygraph/models/architectures/xception_deeplab.py
@@ -3,8 +3,9 @@ import paddle.fluid as fluid
 from paddle.fluid.param_attr import ParamAttr
 from paddle.fluid.layer_helper import LayerHelper
 from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, Dropout
-from paddle.fluid.dygraph import SyncBatchNorm as BatchNorm
+from paddle.nn import SyncBatchNorm as BatchNorm
 
+from dygraph.models.architectures import layer_utils
 from dygraph.cvlibs import manager
 
 __all__ = ["Xception41_deeplab", "Xception65_deeplab", "Xception71_deeplab"]
@@ -80,17 +81,17 @@ class ConvBNLayer(fluid.dygraph.Layer):
             param_attr=ParamAttr(name=name + "/weights"),
             bias_attr=False)
         self._bn = BatchNorm(
-            num_channels=output_channels,
-            act=act,
+            num_features=output_channels,
             epsilon=1e-3,
             momentum=0.99,
-            param_attr=ParamAttr(name=name + "/BatchNorm/gamma"),
-            bias_attr=ParamAttr(name=name + "/BatchNorm/beta"),
-            moving_mean_name=name + "/BatchNorm/moving_mean",
-            moving_variance_name=name + "/BatchNorm/moving_variance")
+            weight_attr=ParamAttr(name=name + "/BatchNorm/gamma"),
+            bias_attr=ParamAttr(name=name + "/BatchNorm/beta"))
+        
+        self._act_op = layer_utils.Activation(act=act)
 
     def forward(self, inputs):
-        return self._bn(self._conv(inputs))
+        
+        return self._act_op(self._bn(self._conv(inputs)))
 
 
 class Seperate_Conv(fluid.dygraph.Layer):
@@ -116,13 +117,13 @@ class Seperate_Conv(fluid.dygraph.Layer):
             bias_attr=False)
         self._bn1 = BatchNorm(
             input_channels,
-            act=act,
             epsilon=1e-3,
             momentum=0.99,
-            param_attr=ParamAttr(name=name + "/depthwise/BatchNorm/gamma"),
-            bias_attr=ParamAttr(name=name + "/depthwise/BatchNorm/beta"),
-            moving_mean_name=name + "/depthwise/BatchNorm/moving_mean",
-            moving_variance_name=name + "/depthwise/BatchNorm/moving_variance")
+            weight_attr=ParamAttr(name=name + "/depthwise/BatchNorm/gamma"),
+            bias_attr=ParamAttr(name=name + "/depthwise/BatchNorm/beta"))
+        
+        self._act_op1 = layer_utils.Activation(act=act)
+
         self._conv2 = Conv2D(
             input_channels,
             output_channels,
@@ -134,19 +135,21 @@ class Seperate_Conv(fluid.dygraph.Layer):
             bias_attr=False)
         self._bn2 = BatchNorm(
             output_channels,
-            act=act,
             epsilon=1e-3,
             momentum=0.99,
-            param_attr=ParamAttr(name=name + "/pointwise/BatchNorm/gamma"),
-            bias_attr=ParamAttr(name=name + "/pointwise/BatchNorm/beta"),
-            moving_mean_name=name + "/pointwise/BatchNorm/moving_mean",
-            moving_variance_name=name + "/pointwise/BatchNorm/moving_variance")
+            weight_attr=ParamAttr(name=name + "/pointwise/BatchNorm/gamma"),
+            bias_attr=ParamAttr(name=name + "/pointwise/BatchNorm/beta"))
+        
+        self._act_op2 = layer_utils.Activation(act=act)
+        
 
     def forward(self, inputs):
         x = self._conv1(inputs)
         x = self._bn1(x)
+        x = self._act_op1(x)
         x = self._conv2(x)
         x = self._bn2(x)
+        x = self._act_op2(x)
         return x