Merge pull request #68 from jiangjiajun/develop

add optimizer for tf2fluid 1. 增加逻辑，处理NHWC格式模型 2. bias和激活函数合入前置layer

Merge pull request #68 from jiangjiajun/develop
add optimizer for tf2fluid 1. 增加逻辑，处理NHWC格式模型 2. bias和激活函数合入前置layer
8fecf767 · Jason · GitHub · 43f788cb · d2340215 · 8fecf767
6 changed file
--- a/x2paddle/convert.py
+++ b/x2paddle/convert.py
@@ -67,10 +67,17 @@ def tf2paddle(model_path, save_dir):

    from x2paddle.decoder.tf_decoder import TFDecoder
    from x2paddle.op_mapper.tf_op_mapper import TFOpMapper
+    from x2paddle.optimizer.tf_optimizer import TFOptimizer

    print("Now translating model from tensorflow to paddle.")
    model = TFDecoder(model_path)
    mapper = TFOpMapper(model)
+    optimizer = TFOptimizer(mapper)
+    # neccesary optimization
+    optimizer.delete_redundance_code()
+    # optimizer below is experimental
+    optimizer.merge_activation()
+    optimizer.merge_bias()
    mapper.save_inference_model(save_dir)



--- a/x2paddle/core/graph.py
+++ b/x2paddle/core/graph.py
@@ -99,29 +99,6 @@ class Graph(object):
        self.node_map[dst].inputs.append(src)
        self.node_map[src].outputs.append(dst)

-    def remove_node(self, node_name):
-        if node_name not in self.node_map:
-            raise Exception("Node[{}] not in graph".format(node_name))
-        inputs = self.node_map[node_name].inputs
-        outputs = self.node_map[node_name].outputs
-        for input in inputs:
-            idx = self.node_map[input].outputs.index(node_name)
-            del self.node_map[input].outputs[idx]
-        for output in outputs:
-            idx = self.node_map[input].inputs.index(node_name)
-            del self.node_map[input].inputs[idx]
-        del self.node_map[node_name]
-
-        idx = self.topo_sort.index(node_name)
-        del self.topo_sort[idx]
-
-        if node_name in self.input_nodes:
-            idx = self.input_nodes.index(node_name)
-            del self.input_nodes[idx]
-        if node_name in self.output_nodes:
-            idx = self.output_nodes.index(node_name)
-            del self.output_nodes[idx]
-
    def print(self):
        for i, tmp in enumerate(self.topo_sort):
            print(tmp, self.node_map[tmp].layer_type, self.node_map[tmp].inputs,

--- a/x2paddle/core/op_mapper.py
+++ b/x2paddle/core/op_mapper.py
@@ -116,7 +116,7 @@ class OpMapper(object):
                                          feeded_var_names=input_names,
                                          target_vars=outputs,
                                          executor=exe,
-                                          params_filename="__params__")
+                                          params_filename=None)
        except:
            raise Exception(
                "Paddle code was saved in {}/model.py, but seems there's wrong exist, please check model.py manually."
@@ -142,9 +142,9 @@ class OpMapper(object):
        self.add_codes("\ndef x2paddle_net():", 0)
        for i in range(len(self.graph.topo_sort)):
            node_name = self.graph.topo_sort[i]
-            if hasattr(self, "omit_nodes") and node_name in self.omit_nodes:
-                continue
            node = self.graph.get_node(node_name)
+            if len(node.fluid_code.layers) == 0:
+                continue
            self.add_codes(node.fluid_code.gen_codes(), 1)

        self.add_codes("", 0)

--- a/x2paddle/decoder/tf_decoder.py
+++ b/x2paddle/decoder/tf_decoder.py
@@ -24,7 +24,7 @@ import sys


 class TFGraphNode(GraphNode):
-    def __init__(self, layer, layer_name=None):
+    def __init__(self, layer, layer_name=None, data_format="NHWC"):
        if layer_name is None:
            super(TFGraphNode,
                  self).__init__(layer,
@@ -35,6 +35,8 @@ class TFGraphNode(GraphNode):
                                 layer_name.replace('/', '_').replace('-', '_'))

        self.layer_type = layer.op
+        self.tf_data_format = data_format
+        self.pd_data_format = "NCHW"
        self.fluid_code = FluidCode()

        self.dtype_map = {1: "float32", 3: "int32", 4: "int8", 9: "int64"}
@@ -86,15 +88,16 @@ class TFGraphNode(GraphNode):


 class TFGraph(Graph):
-    def __init__(self, model):
+    def __init__(self, model, data_format="NHWC"):
        super(TFGraph, self).__init__(model)
        self.identity_map = dict()
        self.multi_out_ops = ['Split', 'SplitV']
+        self.tf_data_format = data_format

    def build(self):
        for layer in self.model.node:
            self.node_map[layer.name.replace('/', '_').replace(
-                '-', '_')] = TFGraphNode(layer)
+                '-', '_')] = TFGraphNode(layer, data_format=self.tf_data_format)

        for layer_name, node in self.node_map.items():
            for in_node in node.layer.input:
@@ -126,6 +129,26 @@ class TFGraph(Graph):
            node.index = 0
        return node

+    def remove_node(self, node_name):
+        if node_name not in self.node_map:
+            raise Exception("Node[{}] not in graph".format(node_name))
+        inputs = self.node_map[node_name].inputs
+        outputs = self.node_map[node_name].outputs
+        assert len(inputs) == 1
+        input_node = self.node_map[inputs[0]]
+        idx = input_node.outputs.index(node_name)
+        del input_node.outputs[idx]
+        for output in outputs:
+            node = self.node_map[output]
+            idx = node.inputs.index(node_name)
+            node.inputs[idx] = inputs[0]
+            input_node.outputs.append(output)
+
+        del self.node_map[node_name]
+
+        idx = self.topo_sort.index(node_name)
+        del self.topo_sort[idx]
+
    def _remove_isolated_node(self):
        # delete isolated nodes
        isolated_nodes = list()
@@ -135,7 +158,15 @@ class TFGraph(Graph):
                isolated_nodes.append(node_name)

        for node_name in isolated_nodes:
-            self.remove_node(node_name)
+            del self.node_map[node_name]
+            if node_name in self.input_nodes:
+                idx = self.input_nodes.index(node_name)
+                del self.input_nodes[idx]
+            if node_name in self.output_nodes:
+                idx = self.output_nodes.index(node_name)
+                del self.output_nodes[idx]
+            idx = self.topo_sort.index(node_name)
+            del self.topo_sort[idx]

    def _remove_identity_node(self):
        identity_node = list()
@@ -145,30 +176,47 @@ class TFGraph(Graph):

        for node_name in identity_node:
            node = self.get_node(node_name)
-            # Remind: Only 1 input for Identity node
            input_node = self.get_node(node.inputs[0])
+            self.remove_node(node_name)

-            # remove identity node from graph
            self.identity_map[node_name] = input_node.layer_name
-            idx = input_node.outputs.index(node_name)
-            del input_node.outputs[idx]

-            output_names = node.outputs
-            for output_name in output_names:
-                output_node = self.get_node(output_name)
-                idx = output_node.inputs.index(node_name)
-                output_node.inputs[idx] = input_node.layer_name
-
-            idx = self.topo_sort.index(node_name)
-            del self.topo_sort[idx]
+            #            node = self.get_node(node_name)
+            #            # Remind: Only 1 input for Identity node
+            #            input_node = self.get_node(node.inputs[0])
+            #
+            #            # remove identity node from graph
+            #            self.identity_map[node_name] = input_node.layer_name
+            #            idx = input_node.outputs.index(node_name)
+            #            del input_node.outputs[idx]
+            #
+            #            output_names = node.outputs
+            #            for output_name in output_names:
+            #                output_node = self.get_node(output_name)
+            #                idx = output_node.inputs.index(node_name)
+            #                output_node.inputs[idx] = input_node.layer_name
+            #
+            #            idx = self.topo_sort.index(node_name)
+            #            del self.topo_sort[idx]

            if node_name in self.output_nodes:
                idx = self.output_nodes.index(node_name)
                self.output_nodes[idx] = input_node.layer_name

+    def data_format_propagation(self, node):
+        current_node = self.node_map[node.layer_name]
+        current_node = node.tf_data_format
+        outputs = current_node.outputs
+        if len(outputs) == 0:
+            return
+        for out in outputs:
+            next_node = self.node_map[out]
+            next_node.tf_data_format = node.tf_data_format
+            self.data_format_propagation(next_node)
+

 class TFDecoder(object):
-    def __init__(self, pb_model):
+    def __init__(self, pb_model, data_format="NHWC"):
        self.sess = tf.Session()
        self.input_info = dict()
        with gfile.FastGFile(pb_model, 'rb') as f:
@@ -186,7 +234,7 @@ class TFDecoder(object):
        self.sess.run(tf.global_variables_initializer())

        self.tf_graph = TFGraph(
-            self.sess.graph._as_graph_def(add_shapes=True)[0])
+            self.sess.graph._as_graph_def(add_shapes=True)[0], data_format)
        self.tf_graph.build()

    def _fix_output_shape(self, graph):

--- a/x2paddle/op_mapper/tf_op_mapper.py
+++ b/x2paddle/op_mapper/tf_op_mapper.py
@@ -28,6 +28,25 @@ def get_same_padding(in_size, kernel_size, stride):
    return [pad0, pad1]


+def nhwc_dim_to_nchw(node, dim):
+    tf_data_format = list(node.tf_data_format)
+    pd_data_format = list(node.pd_data_format)
+    if isinstance(dim, list):
+        for i in range(len(dim)):
+            char = tf_data_format[dim[i]]
+            dim[i] = pd_data_format.index(char)
+    else:
+        char = tf_data_format[dim]
+        dim = pd_data_format.index(char)
+    return dim
+
+    if dim < 0:
+        dim += 4
+    if dim > 0:
+        dim = (dim + 1) % 4 + int((dim + 1) / 4)
+    return dim
+
+
 class TFOpMapper(OpMapper):
    directly_map_ops = {
        'Relu': ['relu'],
@@ -37,17 +56,11 @@ class TFOpMapper(OpMapper):
        'Sigmoid': ['sigmoid'],
        'Exp': ['exp'],
        'Rsqrt': ['rsqrt'],
-        'Squeeze': ['squeeze', {
-            'squeeze_dims': 'axes'
-        }],
-        'Softmax': ['softmax', {
-            'axis': 'axis'
-        }],
+        'swish_f32': ['swish']
    }
    elementwise_ops = {
        'Add': 'elementwise_add',
        'RealDiv': 'elementwise_div',
-        'BiasAdd': 'elementwise_add',
        'Sub': 'elementwise_sub',
        'Maximum': 'elementwise_max',
        'Mul': 'elementwise_mul'
@@ -121,6 +134,19 @@ class TFOpMapper(OpMapper):
            else:
                raise Exception("Unexpected situation happend")

+        if len(x_shape) == 4 and len(y_shape) == 1:
+            if x_input.tf_data_format == "NHWC":
+                axis = 1
+            else:
+                axis = -1
+            attr = {"axis": axis}
+            inputs = {"x": x_input, "y": y_input}
+            node.fluid_code.add_layer(op_type,
+                                      inputs=inputs,
+                                      output=node,
+                                      param_attr=attr)
+            return
+
        is_sub_seq = True
        for i in range(len(y_shape)):
            index = -1 * i - 1
@@ -143,6 +169,10 @@ class TFOpMapper(OpMapper):
                    else:
                        raise Exception("Unexpected situation happend")
            if x_need_expand:
+                if len(x_expand_times) == 3 and x.tf_data_format == "NHWC":
+                    x_expand_times = [x_expand_times[i] for i in [2, 0, 1]]
+                if len(x_expand_times) == 4 and x.tf_data_format == "NHWC":
+                    x_expand_times = [x_expand_times[i] for i in [0, 3, 1, 2]]
                attr = {"expand_times": x_expand_times}
                node.fluid_code.add_layer("expand",
                                          inputs=x_input,
@@ -150,6 +180,10 @@ class TFOpMapper(OpMapper):
                                          param_attr=attr)
                x_input = "x_tmp"
            if y_need_expand:
+                if len(y_expand_times) == 3 and y.tf_data_format == "NHWC":
+                    y_expand_times = [y_expand_times[i] for i in [2, 0, 1]]
+                if len(y_expand_times) == 4 and y.tf_data_format == "NHWC":
+                    y_expand_times = [y_expand_times[i] for i in [0, 3, 1, 2]]
                attr = {"expand_times": y_expand_times}
                node.fluid_code.add_layer("expand",
                                          inputs=y_input,
@@ -166,6 +200,10 @@ class TFOpMapper(OpMapper):
        shape = node.out_shapes[0]
        assert len(shape) != 0, "Unknown shape of input nodes[{}].".format(
            node.layer_name)
+        if node.tf_data_format == "NHWC" and len(shape) == 4:
+            shape = [shape[i] for i in [0, 3, 1, 2]]
+        elif node.tf_data_format == "NCHW" and len(shape) == 4:
+            self.graph.data_format_propagation(node)
        dtype = node.dtype
        attr = {
            'dtype': string(dtype),
@@ -188,6 +226,19 @@ class TFOpMapper(OpMapper):
            shape = [1]
            initializer = "Constant({})".format(value)

+        self.weights[node.layer_name] = node.value
+
+        if node.tf_data_format == "NHWC":
+            if len(shape) == 4:
+                shape = [shape[i] for i in [0, 3, 1, 2]]
+            if len(shape) == 3:
+                shape = [shape[i] for i in [2, 0, 1]]
+                self.weights[node.layer_name] = numpy.transpose(
+                    node.value, (2, 0, 1))
+        elif node.tf_data_format == "NCHW":
+            if len(shape) == 4:
+                self.graph.data_format_propagation(node)
+
        attr = {
            'dtype': string(dtype),
            'shape': shape,
@@ -198,7 +249,6 @@ class TFOpMapper(OpMapper):
                                  inputs=None,
                                  output=node,
                                  param_attr=attr)
-        self.weights[node.layer_name.replace('/', '_')] = node.value

    def Transpose(self, node):
        input = self.graph.get_node(node.layer.input[0], copy=True)
@@ -208,11 +258,46 @@ class TFOpMapper(OpMapper):
        perm.fluid_code.clear()
        perm = perm.value.tolist()

-        attr = {'perm': perm}
-        node.fluid_code.add_layer("transpose",
-                                  inputs=input,
-                                  output=node,
-                                  param_attr=attr)
+        if perm == [0, 3, 1, 2] and input.data_format == "NHWC":
+            node.fluid_code.add_layer("assign",
+                                      inputs=input,
+                                      output=node,
+                                      param_attr=None)
+            node.tf_data_format = "NCHW"
+            self.graph.data_format_propagation(node)
+        elif perm == [0, 2, 3, 1] and input.tf_data_format == "NCHW":
+            node.fluid_code.add_layer("assign",
+                                      inputs=input,
+                                      output=node,
+                                      param_attr=None)
+            node.tf_data_format = "NHWC"
+            self.graph.data_format_propagation(node)
+        elif len(input.out_shapes[0]) > 4:
+            print(input.layer_name, input.tf_data_format, input.pd_data_format)
+            tf_data_format = list(input.tf_data_format)
+            pd_data_format = list(input.pd_data_format)
+            new_perm = [i for i in range(len(perm))]
+            for i in range(len(perm)):
+                char0 = tf_data_format[i]
+                char1 = tf_data_format[perm[i]]
+                index0 = pd_data_format.index(char0)
+                index1 = pd_data_format.index(char1)
+                new_perm[index0] = index1
+            node.tf_data_format = [tf_data_format[i] for i in perm]
+            node.pd_data_format = [pd_data_format[i] for i in perm]
+            attr = {'perm': new_perm}
+            node.fluid_code.add_layer("transpose",
+                                      inputs=input,
+                                      output=node,
+                                      param_attr=attr)
+        elif len(node.out_shapes[0]) != 4:
+            attr = {'perm': perm}
+            node.fluid_code.add_layer("transpose",
+                                      inputs=input,
+                                      output=node,
+                                      param_attr=attr)
+        else:
+            raise Exception("Unexpected situation happend in Transpose OP")

    def MaxPool(self, node):
        input = self.graph.get_node(node.layer.input[0], copy=True)
@@ -226,16 +311,14 @@ class TFOpMapper(OpMapper):
        data_format = node.get_attr("data_format").decode()
        pad_mode = node.get_attr("padding").decode()
        channel_first = data_format == "NCHW"
+        padding = 0

        if not channel_first:
-            attr = {"perm": [0, 3, 1, 2]}
-            node.fluid_code.add_layer("transpose",
-                                      inputs=input,
-                                      output=node,
-                                      param_attr=attr)
            in_shape = [in_shape[i] for i in [0, 3, 1, 2]]
            strides = [strides[i] for i in [0, 3, 1, 2]]
            k_size = [k_size[i] for i in [0, 3, 1, 2]]
+        else:
+            self.graph.data_format_propagation(node)

        if pad_mode == "SAME":
            pad_h = get_same_padding(in_shape[2], k_size[2], strides[2])
@@ -243,29 +326,21 @@ class TFOpMapper(OpMapper):
            pad_h = pad_h[0] + pad_h[1]
            pad_w = pad_w[0] + pad_w[1]
            attr = {"paddings": [0, pad_h, 0, pad_w], "pad_value": -10000.0}
-            if pad_h + pad_w != 0:
-                node.fluid_code.add_layer(
-                    "pad2d",
-                    inputs=input if channel_first else node,
-                    output=node,
-                    param_attr=attr)
+            node.fluid_code.add_layer("pad2d",
+                                      inputs=input,
+                                      output=node,
+                                      param_attr=attr)
+            input = node
        attr = {
            "pool_size": k_size[2:4],
            "pool_type": string("max"),
+            "pool_padding": padding,
            "pool_stride": strides[2:4]
        }
-        node.fluid_code.add_layer(
-            "pool2d",
-            inputs=input if channel_first and pad_mode != "SAME" else node,
-            output=node,
-            param_attr=attr)
-
-        if not channel_first:
-            attr = {"perm": [0, 2, 3, 1]}
-            node.fluid_code.add_layer("transpose",
-                                      inputs=node,
-                                      output=node,
-                                      param_attr=attr)
+        node.fluid_code.add_layer("pool2d",
+                                  inputs=input,
+                                  output=node,
+                                  param_attr=attr)

    def Conv2D(self, node):
        input = self.graph.get_node(node.layer.input[0], copy=True)
@@ -288,49 +363,56 @@ class TFOpMapper(OpMapper):
        data_format = node.get_attr("data_format").decode()
        pad_mode = node.get_attr("padding").decode()
        channel_first = data_format == "NCHW"
+        padding = 0
+
+        self.weights[kernel.layer_name.replace('/', '_')] = numpy.transpose(
+            kernel.value, (3, 2, 0, 1))

        if not channel_first:
-            self.weights[kernel.layer_name.replace('/', '_')] = numpy.transpose(
-                kernel.value, (3, 2, 0, 1))
-            attr = {"perm": [0, 3, 1, 2]}
-            node.fluid_code.add_layer("transpose",
-                                      inputs=input,
-                                      output=node,
-                                      param_attr=attr)
            in_shape = [in_shape[i] for i in [0, 3, 1, 2]]
            strides = [strides[i] for i in [0, 3, 1, 2]]
            dilations = [dilations[i] for i in [0, 3, 1, 2]]
+        else:
+            self.graph.data_format_propagation(node)

        if pad_mode == "SAME":
            pad_h = get_same_padding(in_shape[2], k_size[0], strides[2])
            pad_w = get_same_padding(in_shape[3], k_size[1], strides[3])
-            attr = {"paddings": pad_h + pad_w, "pad_value": 0.0}
-            if pad_h[0] + pad_h[1] + pad_w[0] + pad_w[1] != 0:
-                node.fluid_code.add_layer(
-                    "pad2d",
-                    inputs=input if channel_first else node,
-                    output=node,
-                    param_attr=attr)
+            if pad_h[0] == pad_h[1] and pad_w[0] == pad_w[1]:
+                padding = [pad_h[0], pad_w[0]]
+            else:
+                attr = {"paddings": pad_h + pad_w, "pad_value": 0.0}
+                node.fluid_code.add_layer("pad2d",
+                                          inputs=input,
+                                          output=node,
+                                          param_attr=attr)
+                input = node
        attr = {
            "bias_attr": False,
            "param_attr": string(kernel.layer_name),
            "num_filters": k_size[3],
            "filter_size": k_size[0:2],
            "stride": strides[2:4],
-            "dilation": dilations[2:4]
+            "dilation": dilations[2:4],
+            "padding": padding
        }
-        node.fluid_code.add_layer(
-            "conv2d",
-            inputs=input if channel_first and pad_mode != "SAME" else node,
-            output=node,
-            param_attr=attr)
+        node.fluid_code.add_layer("conv2d",
+                                  inputs=input,
+                                  output=node,
+                                  param_attr=attr)

-        if not channel_first:
-            attr = {"perm": [0, 2, 3, 1]}
-            node.fluid_code.add_layer("transpose",
-                                      inputs=node,
-                                      output=node,
-                                      param_attr=attr)
+    def BiasAdd(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        bias = self.graph.get_node(node.layer.input[1], copy=True)
+        axis = -1
+        if input.tf_data_format == "NHWC" and len(input.out_shapes[0]) == 4:
+            axis = 1
+        inputs = {"x": input, "y": bias}
+        attr = {"axis": axis}
+        node.fluid_code.add_layer("elementwise_add",
+                                  inputs=inputs,
+                                  output=node,
+                                  param_attr=attr)

    def FusedBatchNorm(self, node):
        input = self.graph.get_node(node.layer.input[0], copy=True)
@@ -350,17 +432,12 @@ class TFOpMapper(OpMapper):
        self.omit_nodes.append(moving_mean.layer_name)
        self.omit_nodes.append(moving_var.layer_name)

-        if not channel_first:
-            attr = {"perm": [0, 3, 1, 2]}
-            node.fluid_code.add_layer("transpose",
-                                      inputs=input,
-                                      output=node,
-                                      param_attr=attr)
+        if channel_first:
+            self.data_format_propagation(node)

        attr = {
            "epsilon": node.get_attr("epsilon"),
            "param_attr": string(gamma.layer_name),
-            #            "data_layout": string(node.get_attr("data_format").decode()),
            "bias_attr": string(beta.layer_name),
            "moving_mean_name": string(moving_mean.layer_name),
            "moving_variance_name": string(moving_var.layer_name),
@@ -368,17 +445,10 @@ class TFOpMapper(OpMapper):
        }

        node.fluid_code.add_layer("batch_norm",
-                                  inputs=input if channel_first else node,
+                                  inputs=input,
                                  output=node,
                                  param_attr=attr)

-        if not channel_first:
-            attr = {"perm": [0, 2, 3, 1]}
-            node.fluid_code.add_layer("transpose",
-                                      inputs=node,
-                                      output=node,
-                                      param_attr=attr)
-
    def DepthwiseConv2dNative(self, node):
        input = self.graph.get_node(node.layer.input[0], copy=True)
        kernel = self.graph.get_node(node.layer.input[1], copy=True)
@@ -400,29 +470,31 @@ class TFOpMapper(OpMapper):
        data_format = node.get_attr("data_format").decode()
        pad_mode = node.get_attr("padding").decode()
        channel_first = data_format == "NCHW"
+        padding = 0
+
+        self.weights[kernel.layer_name.replace('/', '_')] = numpy.transpose(
+            kernel.value, (2, 3, 0, 1))

        if not channel_first:
-            self.weights[kernel.layer_name.replace('/', '_')] = numpy.transpose(
-                kernel.value, (2, 3, 0, 1))
-            attr = {"perm": [0, 3, 1, 2]}
-            node.fluid_code.add_layer("transpose",
-                                      inputs=input,
-                                      output=node,
-                                      param_attr=attr)
            in_shape = [in_shape[i] for i in [0, 3, 1, 2]]
            strides = [strides[i] for i in [0, 3, 1, 2]]
            dilations = [dilations[i] for i in [0, 3, 1, 2]]
+        else:
+            self.data_format_propagation(node)

        if pad_mode == "SAME":
            pad_h = get_same_padding(in_shape[2], k_size[0], strides[2])
            pad_w = get_same_padding(in_shape[3], k_size[1], strides[3])
-            attr = {"paddings": pad_h + pad_w, "pad_value": 0.0}
-            if pad_h[0] + pad_h[1] + pad_w[0] + pad_w[1] != 0:
+            if pad_h[0] == pad_h[1] and pad_w[0] == pad_w[1]:
+                padding = [pad_h[0], pad_w[0]]
+            else:
+                attr = {"paddings": pad_h + pad_w, "pad_value": 0.0}
                node.fluid_code.add_layer("pad2d",
-                                          inputs=input if channel_first
-                                          and pad_mode != "SAME" else node,
+                                          inputs=input,
                                          output=node,
                                          param_attr=attr)
+                input = node
+
        attr = {
            "bias_attr": False,
            "param_attr": string(kernel.layer_name),
@@ -430,20 +502,14 @@ class TFOpMapper(OpMapper):
            "filter_size": k_size[0:2],
            "stride": strides[2:4],
            "dilation": dilations[2:4],
-            "groups": k_size[3] * in_shape[1]
+            "groups": k_size[3] * in_shape[1],
+            "padding": padding
        }
        node.fluid_code.add_layer("conv2d",
-                                  inputs=input if channel_first else node,
+                                  inputs=input,
                                  output=node,
                                  param_attr=attr)

-        if not channel_first:
-            attr = {"perm": [0, 2, 3, 1]}
-            node.fluid_code.add_layer("transpose",
-                                      inputs=node,
-                                      output=node,
-                                      param_attr=attr)
-
    def Reshape(self, node):
        input = self.graph.get_node(node.layer.input[0], copy=True)
        param = self.graph.get_node(node.layer.input[1], copy=True)
@@ -474,6 +540,8 @@ class TFOpMapper(OpMapper):
                    new_param += (node.layer_name + "[{}]".format(i) + ", ")
                new_param = new_param.strip(", ") + "]"
                attr = {"shape": new_param}
+        if len(attr["shape"]) == 4 and node.tf_data_format == "NHWC":
+            attr["shape"] = [attr["shape"][i] for i in [0, 3, 1, 2]]
        node.fluid_code.add_layer("reshape",
                                  inputs=input,
                                  output=node,
@@ -493,14 +561,11 @@ class TFOpMapper(OpMapper):
        channel_first = data_format == "NCHW"

        if not channel_first:
-            attr = {"perm": [0, 3, 1, 2]}
-            node.fluid_code.add_layer("transpose",
-                                      inputs=input,
-                                      output=node,
-                                      param_attr=attr)
            in_shape = [in_shape[i] for i in [0, 3, 1, 2]]
            strides = [strides[i] for i in [0, 3, 1, 2]]
            k_size = [k_size[i] for i in [0, 3, 1, 2]]
+        else:
+            self.graph.data_format_propagation(node)

        attr = {
            "pool_size": k_size[2:4],
@@ -514,17 +579,10 @@ class TFOpMapper(OpMapper):
                1], "Cannot map AvgPool"
            attr["pool_padding"] = [pad_h[0], pad_w[0]]
        node.fluid_code.add_layer("pool2d",
-                                  inputs=input if channel_first else node,
+                                  inputs=input,
                                  output=node,
                                  param_attr=attr)

-        if not channel_first:
-            attr = {"perm": [0, 2, 3, 1]}
-            node.fluid_code.add_layer("transpose",
-                                      inputs=node,
-                                      output=node,
-                                      param_attr=attr)
-
    def SplitV(self, node):
        input = self.graph.get_node(node.layer.input[0], copy=True)
        num_sections = self.graph.get_node(node.layer.input[1], copy=True)
@@ -533,6 +591,9 @@ class TFOpMapper(OpMapper):
        assert dim.layer_type == "Const"
        self.omit_nodes.append(num_sections.layer_name)
        self.omit_nodes.append(dim.layer_name)
+        dim = dim.value
+        if input.tf_data_format == "NHWC" and len(input.out_shapes[0]) == 4:
+            dim = nhwc_dim_to_nchw(input, dim)
        attr = {
            "num_or_sections": num_sections.value.tolist(),
            "dim": dim.value
@@ -550,7 +611,11 @@ class TFOpMapper(OpMapper):
        axis = self.graph.get_node(node.layer.input[-1], copy=True)
        assert axis.layer_type == "Const"
        self.omit_nodes.append(axis.layer_name)
-        attr = {"axis": axis.value}
+        axis = axis.value
+        if inputs[0].tf_data_format == "NHWC" and len(
+                inputs[0].out_shapes[0]) == 4:
+            axis = nhwc_dim_to_nchw(inputs[0], axis)
+        attr = {"axis": axis}
        node.fluid_code.add_layer("concat",
                                  inputs=inputs,
                                  output=node,
@@ -561,7 +626,13 @@ class TFOpMapper(OpMapper):
        expand_times = self.graph.get_node(node.layer.input[1], copy=True)
        assert expand_times.layer_type == "Const"
        self.omit_nodes.append(expand_times.layer_name)
-        attr = {"expand_times": expand_times.value.tolist()}
+        expand_times = expand_times.value.tolist()
+        if input.tf_data_format == "NHWC":
+            if len(input.out_shapes[0]) == 4:
+                expand_times = [expand_times[i] for i in [0, 3, 1, 2]]
+            elif len(input.out_shape[0]) == 3:
+                expand_times = [expand_times[i] for i in [2, 0, 1]]
+        attr = {"expand_times": expand_times}
        node.fluid_code.add_layer("expand",
                                  inputs=input,
                                  output=node,
@@ -571,7 +642,18 @@ class TFOpMapper(OpMapper):
        inputs = [
            self.graph.get_node(name, copy=True) for name in node.layer.input
        ]
-        attr = {"axis": node.get_attr("axis")}
+        axis = node.get_attr("axis")
+        if inputs[0].tf_data_format == "NHWC" and len(
+                inputs[0].out_shapes[0]) == 4:
+            tf_data_format = list(inputs[0].tf_data_format)
+            tf_data_format.insert(axis, str(len(tf_data_format)))
+            axis = nhwc_dim_to_nchw(inputs[0], axis)
+            pd_data_format = list(inputs[0].pd_data_format)
+            pd_data_format.insert(axis, str(len(pd_data_format)))
+            node.tf_data_format = "".join(tf_data_format)
+            node.pd_data_format = "".join(pd_data_format)
+
+        attr = {"axis": axis}
        node.fluid_code.add_layer("stack",
                                  inputs=inputs,
                                  output=node,
@@ -582,7 +664,10 @@ class TFOpMapper(OpMapper):
        paddings = self.graph.get_node(node.layer.input[1], copy=True)
        assert paddings.layer_type == "Const", "Padding should be Const"
        self.omit_nodes.append(paddings.layer_name)
-        attr = {"paddings": paddings.value.flatten().tolist()}
+        paddings = paddings.value.flatten().tolist()
+        if input.tf_data_format == "NHWC" and len(input.out_shapes[0]) == 4:
+            paddings = [paddings[i] for i in [0, 1, 6, 7, 2, 3, 4, 5]]
+        attr = {"paddings": paddings}
        node.fluid_code.add_layer("pad",
                                  inputs=input,
                                  output=node,
@@ -608,24 +693,18 @@ class TFOpMapper(OpMapper):
                               output=node,
                               param_attr=None)

-    def swish_f32(self, node):
-        input = self.graph.get_node(node.layer.input[0], copy=True)
-        node.fluid_code.add_layer("sigmoid",
-                                  inputs=input,
-                                  output=node,
-                                  param_attr=None)
-        inputs = {"x": input, "y": node}
-        node.fluid_code.add_layer("elementwise_mul",
-                                  inputs=inputs,
-                                  output=node,
-                                  param_attr=None)
-
    def Mean(self, node):
        input = self.graph.get_node(node.layer.input[0], copy=True)
        reduce_idx = self.graph.get_node(node.layer.input[1], copy=True)
        assert reduce_idx.layer_type == "Const", "Only support Const parameter[reduce_idx]"
+        dims = reduce_idx.value.tolist()
        keep_dims = node.get_attr("keep_dims")
-        attr = {"dim": reduce_idx.value.tolist(), "keep_dim": keep_dims}
+
+        if input.tf_data_format == "NHWC" and len(input.out_shapes[0]) == 4:
+            for i in range(len(dims)):
+                dims[i] = nhwc_dim_to_nchw(input, dims[i])
+
+        attr = {"dim": dims, "keep_dim": keep_dims}
        node.fluid_code.add_layer("reduce_mean",
                                  inputs=input,
                                  output=node,
@@ -658,7 +737,10 @@ class TFOpMapper(OpMapper):
        axis = self.graph.get_node(node.layer.input[1], copy=True)
        assert axis.layer_type == "Const", "ArgMax only support Const parameter"
        self.omit_nodes.append(axis.layer_name)
-        attr = {"axis": axis.value}
+        axis = axis.value
+        if input.tf_data_format == "NHWC" and len(input.out_shapes[0]) == 4:
+            axis = nhwc_dim_to_nchw(input, axis)
+        attr = {"axis": axis}
        node.fluid_code.add_layer("argmax",
                                  inputs=input,
                                  output=node,
@@ -678,11 +760,13 @@ class TFOpMapper(OpMapper):
        strides = strides.value.tolist()
        assert len(set(strides)) == 1 and strides[0] == 1

-        attr = {
-            "axes": range(len(strides)),
-            "starts": begin.value.tolist(),
-            "ends": end.value.tolist()
-        }
+        begin = begin.value.tolist()
+        end = end.value.tolist()
+        if input.tf_data_format == "NHWC" and len(input.out_shapes[0]) == 4:
+            begin = [begin[i] for i in [0, 3, 1, 2]]
+            end = [end[i] for i in [0, 3, 1, 2]]
+
+        attr = {"axes": range(len(strides)), "starts": begin, "ends": end}
        node.fluid_code.add_layer("slice",
                                  inputs=input,
                                  output=node,
@@ -705,6 +789,10 @@ class TFOpMapper(OpMapper):
        else:
            size = self.decoder.infer_tensor(size).tolist()

+        if input.tf_data_format == "NHWC" and len(input.out_shapes[0]) == 4:
+            size = [size[i] for i in [0, 3, 1, 2]]
+            begin = [begin[i] for i in [0, 3, 1, 2]]
+
        attr = {"shape": size, "offsets": begin}
        node.fluid_code.add_layer("crop",
                                  inputs=input,
@@ -732,36 +820,37 @@ class TFOpMapper(OpMapper):
        data_format = node.get_attr("data_format").decode()
        pad_mode = node.get_attr("padding").decode()
        channel_first = data_format == "NCHW"
+        self.weights[kernel.layer_name.replace('/', '_')] = numpy.transpose(
+            kernel.value, (3, 2, 0, 1))

        if not channel_first:
-            self.weights[kernel.layer_name.replace('/', '_')] = numpy.transpose(
-                kernel.value, (3, 2, 0, 1))
-            attr = {"perm": [0, 3, 1, 2]}
-            node.fluid_code.add_layer("transpose",
-                                      inputs=input,
-                                      output=node,
-                                      param_attr=attr)
            in_shape = [in_shape[i] for i in [0, 3, 1, 2]]
            strides = [strides[i] for i in [0, 3, 1, 2]]
            dilations = [dilations[i] for i in [0, 3, 1, 2]]
+        else:
+            self.data_format_propagation(node)

+        padding = 0
        if pad_mode == "SAME":
            pad_h = get_same_padding(in_shape[2], k_size[0], strides[2])
            pad_w = get_same_padding(in_shape[3], k_size[1], strides[3])
-            attr = {"paddings": pad_h + pad_w, "pad_value": 0.0}
-            if pad_h[0] + pad_h[1] + pad_w[0] + pad_w[1] != 0:
-                node.fluid_code.add_layer(
-                    "pad2d",
-                    inputs=input if channel_first else node,
-                    output=node,
-                    param_attr=attr)
+            if pad_h[0] == pad_h[1] and pad_w[0] == pad_w[1]:
+                padding = [pad_h[0], pad_w[0]]
+            else:
+                attr = {"paddings": pad_h + pad_w, "pad_value": 0.0}
+                node.fluid_code.add_layer("pad2d",
+                                          inputs=input,
+                                          output=node,
+                                          param_attr=attr)
+                input = node
        attr = {
            "bias_attr": False,
            "param_attr": string(kernel.layer_name),
            "num_filters": k_size[3],
            "filter_size": k_size[0:2],
            "stride": strides[2:4],
-            "dilation": dilations[2:4]
+            "dilation": dilations[2:4],
+            "padding": padding
        }
        node.fluid_code.add_layer(
            "conv2d_transpose",
@@ -769,19 +858,16 @@ class TFOpMapper(OpMapper):
            output=node,
            param_attr=attr)

-        if not channel_first:
-            attr = {"perm": [0, 2, 3, 1]}
-            node.fluid_code.add_layer("transpose",
-                                      inputs=node,
-                                      output=node,
-                                      param_attr=attr)
-
    def Max(self, node):
        input = self.graph.get_node(node.layer.input[0], copy=True)
        reduce_idx = self.graph.get_node(node.layer.input[1], copy=True)
        assert reduce_idx.layer_type == "Const", "Only support Const parameter[reduce_idx]"
        keep_dims = node.get_attr("keep_dims")
-        attr = {"dim": reduce_idx.value.tolist(), "keep_dim": keep_dims}
+        dim = reduce_idx.value.tolist()
+        if input.tf_data_format == "NHWC" and len(input.out_shapes[0]) == 4:
+            dim = nhwc_dim_to_nchw(input, dim)
+
+        attr = {"dim": dim, "keep_dim": keep_dims}
        node.fluid_code.add_layer("reduce_max",
                                  inputs=input,
                                  output=node,
@@ -792,7 +878,11 @@ class TFOpMapper(OpMapper):
        reduce_idx = self.graph.get_node(node.layer.input[1], copy=True)
        assert reduce_idx.layer_type == "Const", "Only support Const parameter[reduce_idx]"
        keep_dims = node.get_attr("keep_dims")
-        attr = {"dim": reduce_idx.value.tolist(), "keep_dim": keep_dims}
+        dim = reduce_idx.value.tolist()
+        if input.tf_data_format == "NHWC" and len(input.out_shapes[0]) == 4:
+            dim = nhwc_dim_to_nchw(input, dim)
+
+        attr = {"dim": dim, "keep_dim": keep_dims}
        node.fluid_code.add_layer("reduce_sum",
                                  inputs=input,
                                  output=node,
@@ -826,8 +916,35 @@ class TFOpMapper(OpMapper):
        assert dim.layer_type == "Const"
        self.omit_nodes.append(dim.layer_name)
        num_split = node.get_attr('num_split')
-        attr = {"num_or_sections": num_split, "dim": dim.value}
+        dim = dim.value
+        if input.tf_data_format == "NHWC" and len(input.out_shapes[0]) == 4:
+            dim = nhwc_dim_to_nchw(input, dim)
+
+        attr = {"num_or_sections": num_split, "dim": dim}
        node.fluid_code.add_layer("split",
                                  inputs=input,
                                  output=node,
                                  param_attr=attr)
+
+    def Squeeze(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        squeeze_dims = node.get_attr('squeeze_dims')
+        if input.tf_data_format == "NHWC" and len(input.out_shapes[0]) == 4:
+            for i in range(len(squeeze_dims)):
+                squeeze_dims[i] = nhwc_dim_to_nchw(input, squeeze_dims[i])
+        attr = {"axes": squeeze_dims}
+        node.fluid_code.add_layer("squeeze",
+                                  inputs=input,
+                                  output=node,
+                                  param_attr=attr)
+
+    def Softmax(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        axis = node.get_attr("axis")
+        if input.tf_data_format == "NHWC" and len(input.out_shapes[0]) == 4:
+            axis = nhwc_dim_to_nchw(input, axis)
+        attr = {"axis": axis}
+        node.fluid_code.add_layer("softmax",
+                                  inputs=input,
+                                  output=node,
+                                  param_attr=attr)
--- a/x2paddle/optimizer/tf_optimizer.py
+++ b/x2paddle/optimizer/tf_optimizer.py
@@ -13,10 +13,95 @@
 # limitations under the License.

 # TODO useless node remove
-from x2paddle.decoder.tf_decoder import TFGraph
+from x2paddle.op_mapper.tf_op_mapper import TFOpMapper
+from x2paddle.core.util import *

-# TODO bn merge

-# TODO activation merge
+class TFOptimizer(object):
+    activation_ops = {
+        'Relu': 'relu',
+        'Sigmoid': 'sigmoid',
+        'Relu6': 'relu6',
+        'swish_f32': 'swish'
+    }
+    layers_with_act = [
+        'Conv2D', 'BiasAdd', 'DepthwiseConv2dNative', 'Conv2DBackpropInput',
+        'FusedBatchNorm'
+    ]
+    layers_with_bias = [
+        'Conv2D', 'DepthwiseConv2dNative', 'Conv2DBackpropInput'
+    ]

-# TODO biasadd merge
+    def __init__(self, op_mapper):
+        self.op_mapper = op_mapper
+        self.graph = op_mapper.graph
+
+    def delete_redundance_code(self):
+        for node_name in self.graph.topo_sort:
+            if node_name in self.op_mapper.omit_nodes:
+                node = self.graph.get_node(node_name)
+                omit_freq = self.op_mapper.omit_nodes.count(node_name)
+                if len(node.outputs) <= omit_freq:
+                    node.fluid_code.clear()
+
+    # TODO activation merge
+    def merge_activation(self):
+        act_nodes = list()
+        for node_name in self.graph.topo_sort:
+            node = self.graph.get_node(node_name)
+            if node.layer_type in self.activation_ops:
+                act_nodes.append(node_name)
+
+        for act_node_name in act_nodes:
+            node = self.graph.get_node(act_node_name)
+            input = self.graph.get_node(node.inputs[0])
+            if input.layer_type not in self.layers_with_act:
+                continue
+            if len(input.fluid_code.layers) == 0:
+                continue
+            if 'act' in input.fluid_code.layers[
+                    -1].param_attr and input.fluid_code.layers[-1].param_attr[
+                        'act'] is not None:
+                continue
+            if len(input.outputs) != 1:
+                continue
+            input.fluid_code.layers[-1].param_attr['act'] = string(
+                self.activation_ops[node.layer_type])
+            input.fluid_code.layers[-1].output = node.fluid_code.layers[
+                0].output
+            self.graph.remove_node(act_node_name)
+
+    # TODO bias merge
+    def merge_bias(self):
+        for node_name in self.graph.topo_sort:
+            node = self.graph.get_node(node_name)
+            if node.layer_type == "BiasAdd":
+                input = self.graph.get_node(node.inputs[0])
+                if input.layer_type not in self.layers_with_bias:
+                    continue
+                if len(input.outputs) != 1:
+                    continue
+                if len(input.fluid_code.layers) == 0:
+                    continue
+                bias_with_act = False
+                if 'act' in node.fluid_code.layers[-1].param_attr:
+                    bias_with_act = True
+                layer_with_act = False
+                if 'act' in input.fluid_code.layers[
+                        -1].param_attr and input.fluid_code.layers[
+                            -1].param_attr['act'] is not None:
+                    layer_with_act = True
+
+                if bias_with_act and layer_with_act:
+                    continue
+                if not input.fluid_code.layers[-1].param_attr['bias_attr']:
+                    bias_name = node.inputs[1]
+                    input.fluid_code.layers[-1].param_attr[
+                        'bias_attr'] = string(bias_name)
+                    input.fluid_code.layers[-1].output = node.fluid_code.layers[
+                        0].output
+                    if bias_with_act:
+                        input.fluid_code.layers[-1].param_attr[
+                            'act'] = node.fluid_code.layers[-1].param_attr[
+                                'act']
+                    node.fluid_code.clear()