diff --git a/ppcls/modeling/architectures/__init__.py b/ppcls/modeling/architectures/__init__.py
index e959e02ad5fae995ff9eecf8185887596d548ff6..bc7d7593f2eca994fc1d0133697a9f428b27a29a 100644
--- a/ppcls/modeling/architectures/__init__.py
+++ b/ppcls/modeling/architectures/__init__.py
@@ -47,4 +47,4 @@ from .ghostnet import GhostNet_x0_5, GhostNet_x1_0, GhostNet_x1_3
 # distillation model
 from .distillation_models import ResNet50_vd_distill_MobileNetV3_large_x1_0, ResNeXt101_32x16d_wsl_distill_ResNet50_vd
 
-from .csp_resnet import CSPResNet50_leaky
\ No newline at end of file
+from .csp_resnet import CSPResNet50_leaky
diff --git a/ppcls/modeling/architectures/ghostnet.py b/ppcls/modeling/architectures/ghostnet.py
index d68552b9dc213d6c41a80a09e6062e3500e4a9ac..d2c0b63e25770d284201e62f97d2fcdf5f16546a 100644
--- a/ppcls/modeling/architectures/ghostnet.py
+++ b/ppcls/modeling/architectures/ghostnet.py
@@ -37,65 +37,55 @@ class GhostNet():
     def net(self, input, class_dim=1000):
         # build first layer:
         output_channel = int(self._make_divisible(16 * self.scale, 4))
-        x = self.conv_bn_layer(
-            input=input,
-            num_filters=output_channel,
-            filter_size=3,
-            stride=2,
-            groups=1,
-            act="relu",
-            name="conv1")
+        x = self.conv_bn_layer(input=input,
+                            num_filters=output_channel,
+                            filter_size=3,
+                            stride=2,
+                            groups=1,
+                            act="relu",
+                            name="conv1")
         # build inverted residual blocks
         idx = 0
         for k, exp_size, c, use_se, s in self.cfgs:
             output_channel = int(self._make_divisible(c * self.scale, 4))
-            hidden_channel = int(
-                self._make_divisible(exp_size * self.scale, 4))
-            x = self.ghost_bottleneck(
-                inp=x,
-                hidden_dim=hidden_channel,
-                oup=output_channel,
-                kernel_size=k,
-                stride=s,
-                use_se=use_se,
-                name="ghost_bottle_" + str(idx))
+            hidden_channel = int(self._make_divisible(exp_size * self.scale, 4))
+            x = self.ghost_bottleneck(input=x,
+                                    hidden_dim=hidden_channel,
+                                    output=output_channel,
+                                    kernel_size=k,
+                                    stride=s,
+                                    use_se=use_se,
+                                    name="_ghostbottleneck_" + str(idx))
             idx += 1
         # build last several layers
-        output_channel = int(
-            self._make_divisible(exp_size * self.scale, 4))
-        x = self.conv_bn_layer(
-            input=x,
-            num_filters=output_channel,
-            filter_size=1,
-            stride=1,
-            groups=1,
-            act="relu",
-            name="conv2")
-        x = fluid.layers.pool2d(
-            input=x, pool_type='avg', global_pooling=True)
+        output_channel = int(self._make_divisible(exp_size * self.scale, 4))
+        x = self.conv_bn_layer(input=x,
+                            num_filters=output_channel,
+                            filter_size=1,
+                            stride=1,
+                            groups=1,
+                            act="relu",
+                            name="conv_last")
+        x = fluid.layers.pool2d(input=x, pool_type='avg', global_pooling=True)
         output_channel = 1280
 
         stdv = 1.0 / math.sqrt(x.shape[1] * 1.0)
-        out = self.conv_bn_layer(
-            input=x,
-            num_filters=output_channel,
-            filter_size=1,
-            stride=1,
-            groups=1,
-            act="relu",
-            name="fc_0")
+        out = self.conv_bn_layer(input=x,
+                          num_filters=output_channel,
+                          filter_size=1,
+                          stride=1,
+                          act="relu",
+                          name="fc_0")
         out = fluid.layers.dropout(x=out, dropout_prob=0.2)
         stdv = 1.0 / math.sqrt(out.shape[1] * 1.0)
-        out = fluid.layers.fc(
-            input=out,
-            size=class_dim,
-            param_attr=ParamAttr(
-                name="fc_1_weight",
-                initializer=fluid.initializer.Uniform(-stdv, stdv)),
-            bias_attr=ParamAttr(name="fc_1_offset"))
-
-        return out
+        out = fluid.layers.fc(input=out,
+                            size=class_dim,
+                            param_attr=ParamAttr(name="fc_1_weights",
+                                initializer=fluid.initializer.Uniform(-stdv, stdv)),
+                            bias_attr=ParamAttr(name="fc_1_offset"))
 
+        return out    
+        
     def _make_divisible(self, v, divisor, min_value=None):
         """
         This function is taken from the original tf repo.
@@ -119,160 +109,145 @@ class GhostNet():
                       groups=1,
                       act=None,
                       name=None):
-        x = fluid.layers.conv2d(
-            input=input,
-            num_filters=num_filters,
-            filter_size=filter_size,
-            stride=stride,
-            padding=(filter_size - 1) // 2,
-            groups=groups,
-            act=None,
-            param_attr=ParamAttr(
-                initializer=fluid.initializer.MSRA(), name=name + "_weights"),
-            bias_attr=False)
-
-        x = fluid.layers.batch_norm(
-            input=x,
-            act=act,
-            param_attr=ParamAttr(
-                name=name + "_bn_scale",
-                regularizer=fluid.regularizer.L2DecayRegularizer(
-                    regularization_coeff=0.0)),
-            bias_attr=ParamAttr(
-                name=name + "_bn_offset",
-                regularizer=fluid.regularizer.L2DecayRegularizer(
-                    regularization_coeff=0.0)),
-            moving_mean_name=name + "_bn_mean",
-            moving_variance_name=name + "_bn_variance")
+        x = fluid.layers.conv2d(input=input,
+                                num_filters=num_filters,
+                                filter_size=filter_size,
+                                stride=stride,
+                                padding=(filter_size - 1) // 2,
+                                groups=groups,
+                                act=None,
+                                param_attr=ParamAttr(
+                                    initializer=fluid.initializer.MSRA(), name=name + "_weights"),
+                                bias_attr=False)
+        bn_name = name+"_bn"
+        x = fluid.layers.batch_norm(input=x,
+                                    act=act,
+                                    param_attr=ParamAttr(
+                                        name=bn_name+"_scale",
+                                        regularizer=fluid.regularizer.L2DecayRegularizer(
+                                        regularization_coeff=0.0)),
+                                    bias_attr=ParamAttr(
+                                        name=bn_name+"_offset",
+                                        regularizer=fluid.regularizer.L2DecayRegularizer(
+                                        regularization_coeff=0.0)),
+                                    moving_mean_name=bn_name+"_mean",
+                                    moving_variance_name=name+"_variance")
         return x
 
-    def se_layer(self, input, num_channels, reduction_ratio=4, name=None):
-        pool = fluid.layers.pool2d(
-            input=input, pool_size=0, pool_type='avg', global_pooling=True)
+    def se_block(self, input, num_channels, reduction_ratio=4, name=None):
+        pool = fluid.layers.pool2d(input=input, pool_type='avg', global_pooling=True, use_cudnn=False)
         stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
-        squeeze = fluid.layers.fc(
-            input=pool,
-            size=num_channels // reduction_ratio,
-            act='relu',
-            param_attr=fluid.param_attr.ParamAttr(
-                initializer=fluid.initializer.Uniform(-stdv, stdv),
-                name=name + '_sqz_weights'),
-            bias_attr=ParamAttr(name=name + '_sqz_offset'))
+        squeeze = fluid.layers.fc(input=pool,
+                                size=num_channels // reduction_ratio,
+                                act='relu',
+                                param_attr=fluid.param_attr.ParamAttr(
+                                    initializer=fluid.initializer.Uniform(-stdv, stdv),
+                                    name=name + '_1_weights'),
+                                bias_attr=ParamAttr(name=name + '_1_offset'))
         stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0)
-        excitation = fluid.layers.fc(
-            input=squeeze,
-            size=num_channels,
-            act=None,
-            param_attr=fluid.param_attr.ParamAttr(
-                initializer=fluid.initializer.Uniform(-stdv, stdv),
-                name=name + '_exc_weights'),
-            bias_attr=ParamAttr(name=name + '_exc_offset'))
-        excitation = fluid.layers.clip(
-            x=excitation, min=0, max=1)
+        excitation = fluid.layers.fc(input=squeeze,
+                                    size=num_channels,
+                                    act=None,
+                                    param_attr=fluid.param_attr.ParamAttr(
+                                        initializer=fluid.initializer.Uniform(-stdv, stdv),
+                                        name=name + '_2_weights'),
+                                    bias_attr=ParamAttr(name=name + '_2_offset'))
+        excitation = fluid.layers.clip(x=excitation, min=0, max=1)
         se_scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0)
         return se_scale
 
     def depthwise_conv(self,
-                       inp,
-                       oup,
+                       input,
+                       output,
                        kernel_size,
                        stride=1,
                        relu=False,
                        name=None):
-        return self.conv_bn_layer(
-            input=inp,
-            num_filters=oup,
-            filter_size=kernel_size,
-            stride=stride,
-            groups=inp.shape[1],
-            act="relu" if relu else None,
-            name=name + "_dw")
+        return self.conv_bn_layer(input=input,
+                                num_filters=output,
+                                filter_size=kernel_size,
+                                stride=stride,
+                                groups=input.shape[1],
+                                act="relu" if relu else None,
+                                name=name + "_depthwise")
 
     def ghost_module(self,
-                    inp,
-                    oup,
+                    input,
+                    output,
                     kernel_size=1,
                     ratio=2,
                     dw_size=3,
                     stride=1,
                     relu=True,
                     name=None):
-        self.oup = oup
-        init_channels = int(math.ceil(oup / ratio))
+        self.output = output
+        init_channels = int(math.ceil(output / ratio))
         new_channels = int(init_channels * (ratio - 1))
-        primary_conv = self.conv_bn_layer(
-            input=inp,
-            num_filters=init_channels,
-            filter_size=kernel_size,
-            stride=stride,
-            groups=1,
-            act="relu" if relu else None,
-            name=name + "_primary_conv")
-        cheap_operation = self.conv_bn_layer(
-            input=primary_conv,
-            num_filters=new_channels,
-            filter_size=dw_size,
-            stride=1,
-            groups=init_channels,
-            act="relu" if relu else None,
-            name=name + "_cheap_operation")
-        out = fluid.layers.concat(
-            [primary_conv, cheap_operation], axis=1)
+        primary_conv = self.conv_bn_layer(input=input,
+                                        num_filters=init_channels,
+                                        filter_size=kernel_size,
+                                        stride=stride,
+                                        groups=1,
+                                        act="relu" if relu else None,
+                                        name=name + "_primary_conv")
+        cheap_operation = self.conv_bn_layer(input=primary_conv,
+                                            num_filters=new_channels,
+                                            filter_size=dw_size,
+                                            stride=1,
+                                            groups=init_channels,
+                                            act="relu" if relu else None,
+                                            name=name + "_cheap_operation")
+        out = fluid.layers.concat([primary_conv, cheap_operation], axis=1)
         return out
 
     def ghost_bottleneck(self,
-                        inp,
+                        input,
                         hidden_dim,
-                        oup,
+                        output,
                         kernel_size,
                         stride,
                         use_se,
                         name=None):
-        inp_channels = inp.shape[1]
-        x = self.ghost_module(
-            inp=inp,
-            oup=hidden_dim,
-            kernel_size=1,
-            stride=1,
-            relu=True,
-            name=name + "ghost_module_1")
+        inp_channels = input.shape[1]
+        x = self.ghost_module(input=input,
+                            output=hidden_dim,
+                            kernel_size=1,
+                            stride=1,
+                            relu=True,
+                            name=name + "_ghost_module_1")
         if stride == 2:
-            x = self.depthwise_conv(
-                inp=x,
-                oup=hidden_dim,
-                kernel_size=kernel_size,
-                stride=stride,
-                relu=False,
-                name=name + "_dw2")
+            x = self.depthwise_conv(input=x,
+                                    output=hidden_dim,
+                                    kernel_size=kernel_size,
+                                    stride=stride,
+                                    relu=False,
+                                    name=name + "_depthwise")
         if use_se:
-            x = self.se_layer(
-                input=x, num_channels=hidden_dim, name=name + "se_layer")
-        x = self.ghost_module(
-            inp=x,
-            oup=oup,
-            kernel_size=1,
-            relu=False,
-            name=name + "ghost_module_2")
-        if stride == 1 and inp_channels == oup:
-            shortcut = inp
+            x = self.se_block(input=x, num_channels=hidden_dim, name=name+"_se")
+        x = self.ghost_module(input=x,
+                            output=output,
+                            kernel_size=1,
+                            relu=False,
+                            name=name + "_ghost_module_2")
+        if stride == 1 and inp_channels == output:
+            shortcut = input
         else:
-            shortcut = self.depthwise_conv(
-                inp=inp,
-                oup=inp_channels,
-                kernel_size=kernel_size,
-                stride=stride,
-                relu=False,
-                name=name + "shortcut_depthwise_conv")
-            shortcut = self.conv_bn_layer(
-                input=shortcut,
-                num_filters=oup,
-                filter_size=1,
-                stride=1,
-                groups=1,
-                act=None,
-                name=name + "shortcut_conv_bn")
-        return fluid.layers.elementwise_add(
-            x=x, y=shortcut, axis=-1, act=None)
+            shortcut = self.depthwise_conv(input=input,
+                                        output=inp_channels,
+                                        kernel_size=kernel_size,
+                                        stride=stride,
+                                        relu=False,
+                                        name=name + "_shortcut_depthwise")
+            shortcut = self.conv_bn_layer(input=shortcut,
+                                        num_filters=output,
+                                        filter_size=1,
+                                        stride=1,
+                                        groups=1,
+                                        act=None,
+                                        name=name + "_shortcut_conv")
+        return fluid.layers.elementwise_add(x=x, 
+                                            y=shortcut,
+                                            axis=-1)
 
 
 def GhostNet_x0_5():