diff --git a/x2paddle/convert.py b/x2paddle/convert.py
index b4566d128666a92296b4c243b7f551e80a658f36..a3cace47febe0429c52fcaa119229ee684f7ae90 100644
--- a/x2paddle/convert.py
+++ b/x2paddle/convert.py
@@ -106,8 +106,11 @@ def tf2paddle(model_path,
         # optimizer below is experimental
         optimizer.merge_activation()
         optimizer.merge_bias()
-        optimizer.merge_batch_norm()
-        optimizer.merge_prelu()
+        optimizer.optimize_sub_graph()
+
+
+#        optimizer.merge_batch_norm()
+#        optimizer.merge_prelu()
     else:
         mapper = TFOpMapperNHWC(model)
         optimizer = TFOptimizer(mapper)
diff --git a/x2paddle/optimizer/tf_optimizer.py b/x2paddle/optimizer/tf_optimizer.py
index 1980510147bb46079b6dea0f7f34af48477c2c56..0efbc2f7a166dbc74ecbb897077c891812b3a624 100644
--- a/x2paddle/optimizer/tf_optimizer.py
+++ b/x2paddle/optimizer/tf_optimizer.py
@@ -20,6 +20,15 @@ import numpy
 import copy as cp
 
 
+def exist_act(node):
+    for layer in node.fluid_code.layers:
+        if layer.param_attr is not None:
+            act = layer.param_attr.get("act", None)
+            if act is not None:
+                return True
+    return False
+
+
 class TFOptimizer(object):
     activation_ops = {
         'Relu': 'relu',
@@ -353,6 +362,12 @@ class TFOptimizer(object):
                     node.fluid_code.layers[-2].output = name
                     del node.fluid_code.layers[-1]
 
+    def optimize_sub_graph(self):
+        self.merge_batch_norm()
+        self.merge_prelu()
+        self.merge_scale()
+        self.merge_affine_channel()
+
     def merge_batch_norm(self):
         for i, name in enumerate(self.graph.topo_sort):
             node = self.graph.get_node(name)
@@ -368,6 +383,10 @@ class TFOptimizer(object):
                     is_batch_norm = False
                     continue
 
+                if exist_act(in_nodes0[0]) or exist_act(in_nodes0[1]):
+                    is_batch_norm = False
+                    continue
+
                 in_nodes1 = [
                     self.graph.get_node(in_name)
                     for in_name in in_nodes0[0].inputs
@@ -382,11 +401,17 @@ class TFOptimizer(object):
                 if in_nodes1[1].layer_type != "Mul":
                     is_batch_norm = False
                     continue
+                if exist_act(in_nodes1[1]):
+                    is_batch_norm = False
+                    continue
 
                 if in_nodes2[0].layer_type != "Const" or in_nodes2[
                         1].layer_type != "Mul":
                     is_batch_norm = False
                     continue
+                if exist_act(in_nodes2[1]):
+                    is_batch_norm = False
+                    continue
 
                 in_nodes3 = [
                     self.graph.get_node(in_name)
@@ -410,6 +435,9 @@ class TFOptimizer(object):
                 if in_nodes5.layer_type != "Add":
                     is_batch_norm = False
                     continue
+                if exist_act(in_nodes5):
+                    is_batch_norm = False
+                    continue
 
                 in_nodes6 = [
                     self.graph.get_node(in_name) for in_name in in_nodes5.inputs
@@ -485,10 +513,9 @@ class TFOptimizer(object):
 
                 if is_batch_norm:
                     index = in_nodes1[0].outputs.index(in_nodes0[0].layer_name)
-                    del in_nodes1[0].outputs[index]
+                    in_nodes1[0].outputs[index] = node.layer_name
                     node.layer_type = "FusedBatchNorm"
                     node.inputs = [in_nodes1[0].layer_name]
-                    node.outputs = node.outputs
                     act = node.fluid_code.layers[-1].param_attr.get("act", None)
                     node.fluid_code.clear()
                     attr = {
@@ -522,6 +549,9 @@ class TFOptimizer(object):
                 continue
             is_prelu = True
             if node.layer_type == "Add":
+                if exist_act(node):
+                    is_prelu = False
+                    continue
                 in_nodes0 = [
                     self.graph.get_node(in_name) for in_name in node.inputs
                 ]
@@ -529,6 +559,10 @@ class TFOptimizer(object):
                         1].layer_type != "Mul":
                     is_prelu = False
                     continue
+                if exist_act(in_nodes0[1]):
+                    is_prelu = False
+                    continue
+
                 if len(in_nodes0[0].outputs) != 1 or len(
                         in_nodes0[1].outputs) != 1:
                     is_prelu = False
@@ -546,6 +580,9 @@ class TFOptimizer(object):
                 if in_nodes2[0].layer_type != "Mul":
                     is_prelu = False
                     continue
+                if exist_act(in_nodes2[0]):
+                    is_prelu = False
+                    continue
                 if len(in_nodes2[1].outputs) != 1 or len(
                         in_nodes2[0].outputs) != 1:
                     is_prelu = False
@@ -559,6 +596,9 @@ class TFOptimizer(object):
                         1].layer_type != "Sub":
                     is_prelu = False
                     continue
+                if exist_act(in_nodes3[1]):
+                    is_prelu = False
+                    continue
                 if len(in_nodes3[0].outputs) != 1 or len(
                         in_nodes3[1].outputs) != 1:
                     is_prelu = False
@@ -638,10 +678,10 @@ class TFOptimizer(object):
                     del in_nodes1.outputs[index]
                     index = in_nodes1.outputs.index(in_nodes4[1].layer_name)
                     del in_nodes1.outputs[index]
+                    in_nodes1.outputs.append(node.layer_name)
 
                     node.layer_type = "Prelu"
                     node.inputs = [in_nodes1.layer_name]
-                    node.outputs = node.outputs
                     act = node.fluid_code.layers[-1].param_attr.get("act", None)
                     node.fluid_code.clear()
                     attr = {
@@ -660,3 +700,181 @@ class TFOptimizer(object):
                 del self.graph.node_map[in_nodes2[1].layer_name]
                 del self.graph.node_map[in_nodes3[1].layer_name]
                 del self.graph.node_map[in_nodes4[1].layer_name]
+
+    def merge_scale(self):
+        for i, name in enumerate(self.graph.topo_sort):
+            node = self.graph.get_node(name)
+            if node is None:
+                continue
+            is_scale = True
+            if node.layer_type == "Sub":
+                in_nodes0 = [
+                    self.graph.get_node(in_name) for in_name in node.inputs
+                ]
+                if in_nodes0[0].layer_type != "Mul" or in_nodes0[
+                        1].layer_type != "Const" or in_nodes0[1].value.size != 1:
+                    is_scale = False
+                    continue
+                if exist_act(in_nodes0[0]):
+                    is_scale = False
+                    continue
+                if len(in_nodes0[0].outputs) != 1 or len(
+                        in_nodes0[1].outputs) != 1:
+                    is_scale = False
+                    continue
+
+                in_nodes1 = [
+                    self.graph.get_node(in_name)
+                    for in_name in in_nodes0[0].inputs
+                ]
+                if in_nodes1[0].layer_type != "Const" or in_nodes1[
+                        1].layer_type != "RealDiv" or in_nodes1[
+                            0].value.size != 1:
+                    is_scale = False
+                    continue
+                if exist_act(in_nodes1[1]):
+                    is_scale = False
+                    continue
+                if len(in_nodes1[0].outputs) != 1 or len(
+                        in_nodes1[1].outputs) != 1:
+                    is_scale = False
+                    continue
+
+                in_nodes2 = [
+                    self.graph.get_node(in_name)
+                    for in_name in in_nodes1[1].inputs
+                ]
+                if in_nodes2[1].layer_type != "Const" or in_nodes2[
+                        1].value.size != 1:
+                    is_scale = False
+                    continue
+
+                if is_scale:
+                    in_node = self.graph.get_node(in_nodes1[1].inputs[0])
+                    index = in_node.outputs.index(in_nodes1[1].layer_name)
+                    in_node.outputs[index] = node.layer_name
+                    node.layer_type = "Scale"
+                    node.inputs = [in_node.layer_name]
+                    scale = 1.0 / in_nodes2[1].value * in_nodes1[0].value
+                    act = None
+                    if node.fluid_code.layers[0].param_attr is not None:
+                        act = node.fluid_code.layers[0].param_attr.get(
+                            "act", None)
+                    node.fluid_code.clear()
+
+                    attr = {
+                        "scale": scale,
+                        "bias": in_nodes0[1].value,
+                        "bias_after_scale": True,
+                        "act": act
+                    }
+                    node.fluid_code.add_layer("scale",
+                                              inputs=in_node,
+                                              output=node,
+                                              param_attr=attr)
+
+                    del self.graph.node_map[in_nodes0[0].layer_name]
+                    del self.graph.node_map[in_nodes0[1].layer_name]
+                    del self.graph.node_map[in_nodes1[0].layer_name]
+                    del self.graph.node_map[in_nodes1[1].layer_name]
+                    del self.graph.node_map[in_nodes2[1].layer_name]
+
+    def merge_affine_channel(self):
+        for i, name in enumerate(self.graph.topo_sort):
+            node = self.graph.get_node(name)
+            if node is None:
+                continue
+            is_affine_channel = True
+            if node.layer_type == "RealDiv":
+                in_nodes0 = [
+                    self.graph.get_node(in_name) for in_name in node.inputs
+                ]
+                bias_add = True
+                if (in_nodes0[0].layer_type != "Sub" and in_nodes0[0].layer_type
+                        != "Add") or in_nodes0[1].layer_type != "Const" or len(
+                            in_nodes0[1].value.shape) != 3:
+                    is_affine_channel = False
+                    continue
+                if in_nodes0[0].layer_type == "Sub":
+                    bias_add = False
+                if exist_act(in_nodes0[0]):
+                    is_affine_channel = False
+                    continue
+                if len(in_nodes0[0].outputs) != 1 or len(
+                        in_nodes0[1].outputs) != 1:
+                    is_affine_channel = False
+                    continue
+                in_nodes1 = [
+                    self.graph.get_node(in_name)
+                    for in_name in in_nodes0[0].inputs
+                ]
+                if len(in_nodes1[0].out_shapes[0]
+                       ) != 4 or in_nodes1[1].layer_type != "Const" or len(
+                           in_nodes1[1].value.shape) != 3:
+                    is_affine_channel = False
+                    continue
+                if len(in_nodes1[1].outputs) != 1:
+                    is_affine_channel = False
+                    continue
+                channel = in_nodes1[0].out_shapes[0][-1]
+                if channel < 0 or channel != in_nodes0[
+                        1].value.size or channel != in_nodes1[1].value.size:
+                    is_affine_channel = False
+                    continue
+                if in_nodes0[1].out_shapes[0][-1] != in_nodes0[
+                        1].value.size or in_nodes1[1].out_shapes[0][
+                            -1] != in_nodes1[1].value.size:
+                    is_affine_channel = False
+                    continue
+                if is_affine_channel:
+                    in_node = in_nodes1[0]
+                    index = in_node.outputs.index(in_nodes0[0].layer_name)
+                    in_node.outputs[index] = node.layer_name
+                    node.layer_type = "AffineChannel"
+                    node.inputs = [in_node.layer_name]
+                    scale = 1.0 / in_nodes0[1].value.flatten()
+                    bias = in_nodes1[1].value.flatten(
+                    ) / in_nodes0[1].value.flatten()
+                    if not bias_add:
+                        bias *= -1.0
+                    self.op_mapper.weights[node.layer_name + "_scale"] = scale
+                    self.op_mapper.weights[node.layer_name + "_bias"] = bias
+
+                    act = None
+                    if node.fluid_code.layers[0].param_attr is not None:
+                        act = node.fluid_code.layers[0].param_attr.get(
+                            "act", None)
+                    node.fluid_code.clear()
+
+                    attr = {
+                        "dtype": string(scale.dtype),
+                        "shape": [channel],
+                        "name": string(node.layer_name + "_scale")
+                    }
+                    node.fluid_code.add_layer("create_parameter",
+                                              inputs=None,
+                                              output=node.layer_name + "_scale",
+                                              param_attr=attr)
+                    attr = {
+                        "dtype": string(scale.dtype),
+                        "shape": [channel],
+                        "name": string(node.layer_name + "_bias")
+                    }
+                    node.fluid_code.add_layer("create_parameter",
+                                              inputs=None,
+                                              output=node.layer_name + "_bias",
+                                              param_attr=attr)
+                    inputs = {
+                        "x": in_node,
+                        "scale": node.layer_name + "_scale",
+                        "bias": node.layer_name + "_bias"
+                    }
+                    attr = {"act": act}
+                    node.fluid_code.add_layer("affine_channel",
+                                              inputs=inputs,
+                                              output=node,
+                                              param_attr=attr)
+
+                    del self.graph.node_map[in_nodes0[0].layer_name]
+                    del self.graph.node_map[in_nodes0[1].layer_name]
+                    del self.graph.node_map[in_nodes1[1].layer_name]