add optimizer for tf2fluid

d2340215 · jiangjiajun · fef5149c · d2340215 · d2340215 · d2340215
6 changed file
--- a/x2paddle/convert.py
+++ b/x2paddle/convert.py
@@ -67,10 +67,17 @@ def tf2paddle(model_path, save_dir):

    from x2paddle.decoder.tf_decoder import TFDecoder
    from x2paddle.op_mapper.tf_op_mapper import TFOpMapper
+    from x2paddle.optimizer.tf_optimizer import TFOptimizer

    print("Now translating model from tensorflow to paddle.")
    model = TFDecoder(model_path)
    mapper = TFOpMapper(model)
+    optimizer = TFOptimizer(mapper)
+    # neccesary optimization
+    optimizer.delete_redundance_code()
+    # optimizer below is experimental
+    optimizer.merge_activation()
+    optimizer.merge_bias()
    mapper.save_inference_model(save_dir)



--- a/x2paddle/core/graph.py
+++ b/x2paddle/core/graph.py
@@ -97,29 +97,6 @@ class Graph(object):
        self.node_map[dst].inputs.append(src)
        self.node_map[src].outputs.append(dst)

-    def remove_node(self, node_name):
-        if node_name not in self.node_map:
-            raise Exception("Node[{}] not in graph".format(node_name))
-        inputs = self.node_map[node_name].inputs
-        outputs = self.node_map[node_name].outputs
-        for input in inputs:
-            idx = self.node_map[input].outputs.index(node_name)
-            del self.node_map[input].outputs[idx]
-        for output in outputs:
-            idx = self.node_map[input].inputs.index(node_name)
-            del self.node_map[input].inputs[idx]
-        del self.node_map[node_name]
-
-        idx = self.topo_sort.index(node_name)
-        del self.topo_sort[idx]
-
-        if node_name in self.input_nodes:
-            idx = self.input_nodes.index(node_name)
-            del self.input_nodes[idx]
-        if node_name in self.output_nodes:
-            idx = self.output_nodes.index(node_name)
-            del self.output_nodes[idx]
-
    def print(self):
        for i, tmp in enumerate(self.topo_sort):
            print(tmp, self.node_map[tmp].layer_type, self.node_map[tmp].inputs,

--- a/x2paddle/core/op_mapper.py
+++ b/x2paddle/core/op_mapper.py
@@ -142,9 +142,9 @@ class OpMapper(object):
        self.add_codes("\ndef x2paddle_net():", 0)
        for i in range(len(self.graph.topo_sort)):
            node_name = self.graph.topo_sort[i]
-            if hasattr(self, "omit_nodes") and node_name in self.omit_nodes:
-                continue
            node = self.graph.get_node(node_name)
+            if len(node.fluid_code.layers) == 0:
+                continue
            self.add_codes(node.fluid_code.gen_codes(), 1)

        self.add_codes("", 0)

--- a/x2paddle/decoder/tf_decoder.py
+++ b/x2paddle/decoder/tf_decoder.py
@@ -129,6 +129,26 @@ class TFGraph(Graph):
            node.index = 0
        return node

+    def remove_node(self, node_name):
+        if node_name not in self.node_map:
+            raise Exception("Node[{}] not in graph".format(node_name))
+        inputs = self.node_map[node_name].inputs
+        outputs = self.node_map[node_name].outputs
+        assert len(inputs) == 1
+        input_node = self.node_map[inputs[0]]
+        idx = input_node.outputs.index(node_name)
+        del input_node.outputs[idx]
+        for output in outputs:
+            node = self.node_map[output]
+            idx = node.inputs.index(node_name)
+            node.inputs[idx] = inputs[0]
+            input_node.outputs.append(output)
+
+        del self.node_map[node_name]
+
+        idx = self.topo_sort.index(node_name)
+        del self.topo_sort[idx]
+
    def _remove_isolated_node(self):
        # delete isolated nodes
        isolated_nodes = list()
@@ -138,7 +158,15 @@ class TFGraph(Graph):
                isolated_nodes.append(node_name)

        for node_name in isolated_nodes:
-            self.remove_node(node_name)
+            del self.node_map[node_name]
+            if node_name in self.input_nodes:
+                idx = self.input_nodes.index(node_name)
+                del self.input_nodes[idx]
+            if node_name in self.output_nodes:
+                idx = self.output_nodes.index(node_name)
+                del self.output_nodes[idx]
+            idx = self.topo_sort.index(node_name)
+            del self.topo_sort[idx]

    def _remove_identity_node(self):
        identity_node = list()
@@ -148,22 +176,28 @@ class TFGraph(Graph):

        for node_name in identity_node:
            node = self.get_node(node_name)
-            # Remind: Only 1 input for Identity node
            input_node = self.get_node(node.inputs[0])
+            self.remove_node(node_name)

-            # remove identity node from graph
            self.identity_map[node_name] = input_node.layer_name
-            idx = input_node.outputs.index(node_name)
-            del input_node.outputs[idx]
-
-            output_names = node.outputs
-            for output_name in output_names:
-                output_node = self.get_node(output_name)
-                idx = output_node.inputs.index(node_name)
-                output_node.inputs[idx] = input_node.layer_name

-            idx = self.topo_sort.index(node_name)
-            del self.topo_sort[idx]
+            #            node = self.get_node(node_name)
+            #            # Remind: Only 1 input for Identity node
+            #            input_node = self.get_node(node.inputs[0])
+            #
+            #            # remove identity node from graph
+            #            self.identity_map[node_name] = input_node.layer_name
+            #            idx = input_node.outputs.index(node_name)
+            #            del input_node.outputs[idx]
+            #
+            #            output_names = node.outputs
+            #            for output_name in output_names:
+            #                output_node = self.get_node(output_name)
+            #                idx = output_node.inputs.index(node_name)
+            #                output_node.inputs[idx] = input_node.layer_name
+            #
+            #            idx = self.topo_sort.index(node_name)
+            #            del self.topo_sort[idx]

            if node_name in self.output_nodes:
                idx = self.output_nodes.index(node_name)

--- a/x2paddle/op_mapper/tf_op_mapper.py
+++ b/x2paddle/op_mapper/tf_op_mapper.py
@@ -55,7 +55,8 @@ class TFOpMapper(OpMapper):
        'Abs': ['abs'],
        'Sigmoid': ['sigmoid'],
        'Exp': ['exp'],
-        'Rsqrt': ['rsqrt']
+        'Rsqrt': ['rsqrt'],
+        'swish_f32': ['swish']
    }
    elementwise_ops = {
        'Add': 'elementwise_add',
@@ -692,18 +693,6 @@ class TFOpMapper(OpMapper):
                               output=node,
                               param_attr=None)

-    def swish_f32(self, node):
-        input = self.graph.get_node(node.layer.input[0], copy=True)
-        node.fluid_code.add_layer("sigmoid",
-                                  inputs=input,
-                                  output=node,
-                                  param_attr=None)
-        inputs = {"x": input, "y": node}
-        node.fluid_code.add_layer("elementwise_mul",
-                                  inputs=inputs,
-                                  output=node,
-                                  param_attr=None)
-
    def Mean(self, node):
        input = self.graph.get_node(node.layer.input[0], copy=True)
        reduce_idx = self.graph.get_node(node.layer.input[1], copy=True)

--- a/x2paddle/optimizer/tf_optimizer.py
+++ b/x2paddle/optimizer/tf_optimizer.py
@@ -13,10 +13,95 @@
 # limitations under the License.

 # TODO useless node remove
-from x2paddle.decoder.tf_decoder import TFGraph
+from x2paddle.op_mapper.tf_op_mapper import TFOpMapper
+from x2paddle.core.util import *

-# TODO bn merge

-# TODO activation merge
+class TFOptimizer(object):
+    activation_ops = {
+        'Relu': 'relu',
+        'Sigmoid': 'sigmoid',
+        'Relu6': 'relu6',
+        'swish_f32': 'swish'
+    }
+    layers_with_act = [
+        'Conv2D', 'BiasAdd', 'DepthwiseConv2dNative', 'Conv2DBackpropInput',
+        'FusedBatchNorm'
+    ]
+    layers_with_bias = [
+        'Conv2D', 'DepthwiseConv2dNative', 'Conv2DBackpropInput'
+    ]

-# TODO biasadd merge
+    def __init__(self, op_mapper):
+        self.op_mapper = op_mapper
+        self.graph = op_mapper.graph
+
+    def delete_redundance_code(self):
+        for node_name in self.graph.topo_sort:
+            if node_name in self.op_mapper.omit_nodes:
+                node = self.graph.get_node(node_name)
+                omit_freq = self.op_mapper.omit_nodes.count(node_name)
+                if len(node.outputs) <= omit_freq:
+                    node.fluid_code.clear()
+
+    # TODO activation merge
+    def merge_activation(self):
+        act_nodes = list()
+        for node_name in self.graph.topo_sort:
+            node = self.graph.get_node(node_name)
+            if node.layer_type in self.activation_ops:
+                act_nodes.append(node_name)
+
+        for act_node_name in act_nodes:
+            node = self.graph.get_node(act_node_name)
+            input = self.graph.get_node(node.inputs[0])
+            if input.layer_type not in self.layers_with_act:
+                continue
+            if len(input.fluid_code.layers) == 0:
+                continue
+            if 'act' in input.fluid_code.layers[
+                    -1].param_attr and input.fluid_code.layers[-1].param_attr[
+                        'act'] is not None:
+                continue
+            if len(input.outputs) != 1:
+                continue
+            input.fluid_code.layers[-1].param_attr['act'] = string(
+                self.activation_ops[node.layer_type])
+            input.fluid_code.layers[-1].output = node.fluid_code.layers[
+                0].output
+            self.graph.remove_node(act_node_name)
+
+    # TODO bias merge
+    def merge_bias(self):
+        for node_name in self.graph.topo_sort:
+            node = self.graph.get_node(node_name)
+            if node.layer_type == "BiasAdd":
+                input = self.graph.get_node(node.inputs[0])
+                if input.layer_type not in self.layers_with_bias:
+                    continue
+                if len(input.outputs) != 1:
+                    continue
+                if len(input.fluid_code.layers) == 0:
+                    continue
+                bias_with_act = False
+                if 'act' in node.fluid_code.layers[-1].param_attr:
+                    bias_with_act = True
+                layer_with_act = False
+                if 'act' in input.fluid_code.layers[
+                        -1].param_attr and input.fluid_code.layers[
+                            -1].param_attr['act'] is not None:
+                    layer_with_act = True
+
+                if bias_with_act and layer_with_act:
+                    continue
+                if not input.fluid_code.layers[-1].param_attr['bias_attr']:
+                    bias_name = node.inputs[1]
+                    input.fluid_code.layers[-1].param_attr[
+                        'bias_attr'] = string(bias_name)
+                    input.fluid_code.layers[-1].output = node.fluid_code.layers[
+                        0].output
+                    if bias_with_act:
+                        input.fluid_code.layers[-1].param_attr[
+                            'act'] = node.fluid_code.layers[-1].param_attr[
+                                'act']
+                    node.fluid_code.clear()