diff --git a/demo/models/mobilenet.py b/demo/models/mobilenet.py
index 9ae095e6bd54209a8144d73bdee07a45470855ce..f2e86e3f3b697fda699997785cda4b5923507e42 100644
--- a/demo/models/mobilenet.py
+++ b/demo/models/mobilenet.py
@@ -130,7 +130,6 @@ class MobileNet():
         with fluid.name_scope('last_fc'):
             output = fluid.layers.fc(input=input,
                                      size=class_dim,
-                                     act='softmax',
                                      param_attr=ParamAttr(
                                          initializer=MSRA(),
                                          name="fc7_weights"),
diff --git a/demo/models/mobilenet_v2.py b/demo/models/mobilenet_v2.py
index ccfb250b79a5365d28470886624287fbc87be50c..3e97b6c9527ab4a526068c0692fdefe150d67d33 100644
--- a/demo/models/mobilenet_v2.py
+++ b/demo/models/mobilenet_v2.py
@@ -110,7 +110,6 @@ class MobileNetV2():
 
         output = fluid.layers.fc(input=input,
                                  size=class_dim,
-                                 act='softmax',
                                  param_attr=ParamAttr(name='fc10_weights'),
                                  bias_attr=ParamAttr(name='fc10_offset'))
         return output
diff --git a/demo/models/mobilenet_v3.py b/demo/models/mobilenet_v3.py
index 3276b352744a199ee858d193cb46e1b5ce36bca7..eaba6ee3cbdeba3cfafe9f01a8c10060b590cdc8 100644
--- a/demo/models/mobilenet_v3.py
+++ b/demo/models/mobilenet_v3.py
@@ -119,7 +119,6 @@ class MobileNetV3():
         conv = self.hard_swish(conv)
         out = fluid.layers.fc(input=conv,
                               size=class_dim,
-                              act='softmax',
                               param_attr=ParamAttr(name='fc_weights'),
                               bias_attr=ParamAttr(name='fc_offset'))
         return out
@@ -244,8 +243,7 @@ class MobileNetV3():
         if num_in_filter != num_out_filter or stride != 1:
             return conv2
         else:
-            return fluid.layers.elementwise_add(
-                x=input_data, y=conv2, act=None)
+            return fluid.layers.elementwise_add(x=input_data, y=conv2, act=None)
 
 
 def MobileNetV3_small_x0_25():
diff --git a/demo/models/pvanet.py b/demo/models/pvanet.py
index 6f5024c94f33424b7a55474431cbf48d68133093..9ffa6e1804eb9eb98878cfb95b4a666f74f50709 100644
--- a/demo/models/pvanet.py
+++ b/demo/models/pvanet.py
@@ -59,10 +59,8 @@ class PVANet():
             block_configs=[
                 BlockConfig(2, '64 48-96 24-48-48 96 128', True,
                             BLOCK_TYPE_INCEP),
-                BlockConfig(1, '64 64-96 24-48-48 128', True,
-                            BLOCK_TYPE_INCEP),
-                BlockConfig(1, '64 64-96 24-48-48 128', True,
-                            BLOCK_TYPE_INCEP),
+                BlockConfig(1, '64 64-96 24-48-48 128', True, BLOCK_TYPE_INCEP),
+                BlockConfig(1, '64 64-96 24-48-48 128', True, BLOCK_TYPE_INCEP),
                 BlockConfig(1, '64 64-96 24-48-48 128', True, BLOCK_TYPE_INCEP)
             ],
             name='conv4',
@@ -76,9 +74,8 @@ class PVANet():
                 BlockConfig(1, '64 96-128 32-64-64 196', True,
                             BLOCK_TYPE_INCEP),
                 BlockConfig(1, '64 96-128 32-64-64 196', True,
-                            BLOCK_TYPE_INCEP), BlockConfig(
-                                1, '64 96-128 32-64-64 196', True,
-                                BLOCK_TYPE_INCEP)
+                            BLOCK_TYPE_INCEP),
+                BlockConfig(1, '64 96-128 32-64-64 196', True, BLOCK_TYPE_INCEP)
             ],
             name='conv5',
             end_points=end_points)
@@ -89,7 +86,6 @@ class PVANet():
 
         output = fluid.layers.fc(input=input,
                                  size=class_dim,
-                                 act='softmax',
                                  param_attr=ParamAttr(
                                      initializer=MSRA(), name="fc_weights"),
                                  bias_attr=ParamAttr(name="fc_offset"))
@@ -182,9 +178,8 @@ class PVANet():
                     conv_stride = stride
                 else:
                     conv_stride = 1
-                path_net = self._conv_bn_relu(path_net, num_output,
-                                              kernel_size, name + scope,
-                                              conv_stride)
+                path_net = self._conv_bn_relu(path_net, num_output, kernel_size,
+                                              name + scope, conv_stride)
             paths.append(path_net)
 
         if stride > 1:
@@ -359,8 +354,8 @@ class PVANet():
                       name,
                       stride=1,
                       groups=1):
-        return self._conv_bn_relu(input, num_filters, filter_size, name,
-                                  stride, groups)
+        return self._conv_bn_relu(input, num_filters, filter_size, name, stride,
+                                  groups)
 
 
 def Fpn_Fusion(blocks, net):
@@ -433,8 +428,7 @@ def east(input, class_num=31):
                 out[i], k, 1, name='fusion_' + str(len(blocks)))
         elif j <= 4:
             conv = net.deconv_bn_layer(
-                out[i], k, 2 * j, j, j // 2,
-                name='fusion_' + str(len(blocks)))
+                out[i], k, 2 * j, j, j // 2, name='fusion_' + str(len(blocks)))
         else:
             conv = net.deconv_bn_layer(
                 out[i], 32, 8, 4, 2, name='fusion_' + str(len(blocks)) + '_1')
diff --git a/demo/models/resnet.py b/demo/models/resnet.py
index 4ceaef41ecc87d7388ae05d7fcb199de1841ebc2..da5e562a86941c35dae7ae0cb6b15c5386a3eb5a 100644
--- a/demo/models/resnet.py
+++ b/demo/models/resnet.py
@@ -105,7 +105,6 @@ class ResNet():
             out = fluid.layers.fc(
                 input=pool,
                 size=class_dim,
-                act='softmax',
                 name=fc_name,
                 param_attr=fluid.param_attr.ParamAttr(
                     initializer=fluid.initializer.Uniform(-stdv, stdv)))
@@ -138,8 +137,7 @@ class ResNet():
                 bn_name = "bn" + name[3:]
         else:
             if name.split("_")[1] == "conv1":
-                bn_name = name.split("_", 1)[0] + "_bn_" + name.split("_",
-                                                                      1)[1]
+                bn_name = name.split("_", 1)[0] + "_bn_" + name.split("_", 1)[1]
             else:
                 bn_name = name.split("_", 1)[0] + "_bn" + name.split("_",
                                                                      1)[1][3:]
diff --git a/demo/prune/train.py b/demo/prune/train.py
index 9a4239872942fde3de3becc97e118117021c5ace..574f515803c7a4485bae0cf11bfc95babea9eec8 100644
--- a/demo/prune/train.py
+++ b/demo/prune/train.py
@@ -147,7 +147,8 @@ def compress(args):
     # model definition
     model = models.__dict__[args.model]()
     out = model.net(input=image, class_dim=class_dim)
-    avg_cost = paddle.nn.functional.loss.cross_entropy(input=out, label=label)
+    cost = paddle.nn.functional.loss.cross_entropy(input=out, label=label)
+    avg_cost = paddle.mean(x=cost)
     acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
     acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)
     val_program = paddle.static.default_main_program().clone(for_test=True)