Merge pull request #149 from jiangjiajun/develop

add optimization for scale

Merge pull request #149 from jiangjiajun/develop
add optimization for scale
4c29d6df · Jason · GitHub · a3275d80 · dd9e3172 · 4c29d6df
显示空白变更内容
内联并排

Showing with 226 addition and 5 deletion

x2paddle/convert.py x2paddle/convert.py +5 -2

x2paddle/optimizer/tf_optimizer.py x2paddle/optimizer/tf_optimizer.py +221 -3

未找到文件。
--- a/x2paddle/convert.py
+++ b/x2paddle/convert.py
@@ -106,8 +106,11 @@ def tf2paddle(model_path,
        # optimizer below is experimental
        optimizer.merge_activation()
        optimizer.merge_bias()
-        optimizer.merge_batch_norm()
+        optimizer.optimize_sub_graph()
-        optimizer.merge_prelu()
+#        optimizer.merge_batch_norm()
+#        optimizer.merge_prelu()
    else:
        mapper = TFOpMapperNHWC(model)
        optimizer = TFOptimizer(mapper)

--- a/x2paddle/optimizer/tf_optimizer.py
+++ b/x2paddle/optimizer/tf_optimizer.py
@@ -20,6 +20,15 @@ import numpy
 import copy as cp
+def exist_act(node):
+    for layer in node.fluid_code.layers:
+        if layer.param_attr is not None:
+            act = layer.param_attr.get("act", None)
+            if act is not None:
+                return True
+    return False
 class TFOptimizer(object):
    activation_ops = {
        'Relu': 'relu',
@@ -353,6 +362,12 @@ class TFOptimizer(object):
                    node.fluid_code.layers[-2].output = name
                    del node.fluid_code.layers[-1]
+    def optimize_sub_graph(self):
+        self.merge_batch_norm()
+        self.merge_prelu()
+        self.merge_scale()
+        self.merge_affine_channel()
    def merge_batch_norm(self):
        for i, name in enumerate(self.graph.topo_sort):
            node = self.graph.get_node(name)
@@ -368,6 +383,10 @@ class TFOptimizer(object):
                    is_batch_norm = False
                    continue
+                if exist_act(in_nodes0[0]) or exist_act(in_nodes0[1]):
+                    is_batch_norm = False
+                    continue
                in_nodes1 = [
                    self.graph.get_node(in_name)
                    for in_name in in_nodes0[0].inputs
@@ -382,11 +401,17 @@ class TFOptimizer(object):
                if in_nodes1[1].layer_type != "Mul":
                    is_batch_norm = False
                    continue
+                if exist_act(in_nodes1[1]):
+                    is_batch_norm = False
+                    continue
                if in_nodes2[0].layer_type != "Const" or in_nodes2[
                        1].layer_type != "Mul":
                    is_batch_norm = False
                    continue
+                if exist_act(in_nodes2[1]):
+                    is_batch_norm = False
+                    continue
                in_nodes3 = [
                    self.graph.get_node(in_name)
@@ -410,6 +435,9 @@ class TFOptimizer(object):
                if in_nodes5.layer_type != "Add":
                    is_batch_norm = False
                    continue
+                if exist_act(in_nodes5):
+                    is_batch_norm = False
+                    continue
                in_nodes6 = [
                    self.graph.get_node(in_name) for in_name in in_nodes5.inputs
@@ -485,10 +513,9 @@ class TFOptimizer(object):
                if is_batch_norm:
                    index = in_nodes1[0].outputs.index(in_nodes0[0].layer_name)
-                    del in_nodes1[0].outputs[index]
+                    in_nodes1[0].outputs[index] = node.layer_name
                    node.layer_type = "FusedBatchNorm"
                    node.inputs = [in_nodes1[0].layer_name]
-                    node.outputs = node.outputs
                    act = node.fluid_code.layers[-1].param_attr.get("act", None)
                    node.fluid_code.clear()
                    attr = {
@@ -522,6 +549,9 @@ class TFOptimizer(object):
                continue
            is_prelu = True
            if node.layer_type == "Add":
+                if exist_act(node):
+                    is_prelu = False
+                    continue
                in_nodes0 = [
                    self.graph.get_node(in_name) for in_name in node.inputs
                ]
@@ -529,6 +559,10 @@ class TFOptimizer(object):
                        1].layer_type != "Mul":
                    is_prelu = False
                    continue
+                if exist_act(in_nodes0[1]):
+                    is_prelu = False
+                    continue
                if len(in_nodes0[0].outputs) != 1 or len(
                        in_nodes0[1].outputs) != 1:
                    is_prelu = False
@@ -546,6 +580,9 @@ class TFOptimizer(object):
                if in_nodes2[0].layer_type != "Mul":
                    is_prelu = False
                    continue
+                if exist_act(in_nodes2[0]):
+                    is_prelu = False
+                    continue
                if len(in_nodes2[1].outputs) != 1 or len(
                        in_nodes2[0].outputs) != 1:
                    is_prelu = False
@@ -559,6 +596,9 @@ class TFOptimizer(object):
                        1].layer_type != "Sub":
                    is_prelu = False
                    continue
+                if exist_act(in_nodes3[1]):
+                    is_prelu = False
+                    continue
                if len(in_nodes3[0].outputs) != 1 or len(
                        in_nodes3[1].outputs) != 1:
                    is_prelu = False
@@ -638,10 +678,10 @@ class TFOptimizer(object):
                    del in_nodes1.outputs[index]
                    index = in_nodes1.outputs.index(in_nodes4[1].layer_name)
                    del in_nodes1.outputs[index]
+                    in_nodes1.outputs.append(node.layer_name)
                    node.layer_type = "Prelu"
                    node.inputs = [in_nodes1.layer_name]
-                    node.outputs = node.outputs
                    act = node.fluid_code.layers[-1].param_attr.get("act", None)
                    node.fluid_code.clear()
                    attr = {
@@ -660,3 +700,181 @@ class TFOptimizer(object):
                del self.graph.node_map[in_nodes2[1].layer_name]
                del self.graph.node_map[in_nodes3[1].layer_name]
                del self.graph.node_map[in_nodes4[1].layer_name]
+    def merge_scale(self):
+        for i, name in enumerate(self.graph.topo_sort):
+            node = self.graph.get_node(name)
+            if node is None:
+                continue
+            is_scale = True
+            if node.layer_type == "Sub":
+                in_nodes0 = [
+                    self.graph.get_node(in_name) for in_name in node.inputs
+                ]
+                if in_nodes0[0].layer_type != "Mul" or in_nodes0[
+                        1].layer_type != "Const" or in_nodes0[1].value.size != 1:
+                    is_scale = False
+                    continue
+                if exist_act(in_nodes0[0]):
+                    is_scale = False
+                    continue
+                if len(in_nodes0[0].outputs) != 1 or len(
+                        in_nodes0[1].outputs) != 1:
+                    is_scale = False
+                    continue
+                in_nodes1 = [
+                    self.graph.get_node(in_name)
+                    for in_name in in_nodes0[0].inputs
+                ]
+                if in_nodes1[0].layer_type != "Const" or in_nodes1[
+                        1].layer_type != "RealDiv" or in_nodes1[
+                            0].value.size != 1:
+                    is_scale = False
+                    continue
+                if exist_act(in_nodes1[1]):
+                    is_scale = False
+                    continue
+                if len(in_nodes1[0].outputs) != 1 or len(
+                        in_nodes1[1].outputs) != 1:
+                    is_scale = False
+                    continue
+                in_nodes2 = [
+                    self.graph.get_node(in_name)
+                    for in_name in in_nodes1[1].inputs
+                ]
+                if in_nodes2[1].layer_type != "Const" or in_nodes2[
+                        1].value.size != 1:
+                    is_scale = False
+                    continue
+                if is_scale:
+                    in_node = self.graph.get_node(in_nodes1[1].inputs[0])
+                    index = in_node.outputs.index(in_nodes1[1].layer_name)
+                    in_node.outputs[index] = node.layer_name
+                    node.layer_type = "Scale"
+                    node.inputs = [in_node.layer_name]
+                    scale = 1.0 / in_nodes2[1].value * in_nodes1[0].value
+                    act = None
+                    if node.fluid_code.layers[0].param_attr is not None:
+                        act = node.fluid_code.layers[0].param_attr.get(
+                            "act", None)
+                    node.fluid_code.clear()
+                    attr = {
+                        "scale": scale,
+                        "bias": in_nodes0[1].value,
+                        "bias_after_scale": True,
+                        "act": act
+                    }
+                    node.fluid_code.add_layer("scale",
+                                              inputs=in_node,
+                                              output=node,
+                                              param_attr=attr)
+                    del self.graph.node_map[in_nodes0[0].layer_name]
+                    del self.graph.node_map[in_nodes0[1].layer_name]
+                    del self.graph.node_map[in_nodes1[0].layer_name]
+                    del self.graph.node_map[in_nodes1[1].layer_name]
+                    del self.graph.node_map[in_nodes2[1].layer_name]
+    def merge_affine_channel(self):
+        for i, name in enumerate(self.graph.topo_sort):
+            node = self.graph.get_node(name)
+            if node is None:
+                continue
+            is_affine_channel = True
+            if node.layer_type == "RealDiv":
+                in_nodes0 = [
+                    self.graph.get_node(in_name) for in_name in node.inputs
+                ]
+                bias_add = True
+                if (in_nodes0[0].layer_type != "Sub" and in_nodes0[0].layer_type
+                        != "Add") or in_nodes0[1].layer_type != "Const" or len(
+                            in_nodes0[1].value.shape) != 3:
+                    is_affine_channel = False
+                    continue
+                if in_nodes0[0].layer_type == "Sub":
+                    bias_add = False
+                if exist_act(in_nodes0[0]):
+                    is_affine_channel = False
+                    continue
+                if len(in_nodes0[0].outputs) != 1 or len(
+                        in_nodes0[1].outputs) != 1:
+                    is_affine_channel = False
+                    continue
+                in_nodes1 = [
+                    self.graph.get_node(in_name)
+                    for in_name in in_nodes0[0].inputs
+                ]
+                if len(in_nodes1[0].out_shapes[0]
+                       ) != 4 or in_nodes1[1].layer_type != "Const" or len(
+                           in_nodes1[1].value.shape) != 3:
+                    is_affine_channel = False
+                    continue
+                if len(in_nodes1[1].outputs) != 1:
+                    is_affine_channel = False
+                    continue
+                channel = in_nodes1[0].out_shapes[0][-1]
+                if channel < 0 or channel != in_nodes0[
+                        1].value.size or channel != in_nodes1[1].value.size:
+                    is_affine_channel = False
+                    continue
+                if in_nodes0[1].out_shapes[0][-1] != in_nodes0[
+                        1].value.size or in_nodes1[1].out_shapes[0][
+                            -1] != in_nodes1[1].value.size:
+                    is_affine_channel = False
+                    continue
+                if is_affine_channel:
+                    in_node = in_nodes1[0]
+                    index = in_node.outputs.index(in_nodes0[0].layer_name)
+                    in_node.outputs[index] = node.layer_name
+                    node.layer_type = "AffineChannel"
+                    node.inputs = [in_node.layer_name]
+                    scale = 1.0 / in_nodes0[1].value.flatten()
+                    bias = in_nodes1[1].value.flatten(
+                    ) / in_nodes0[1].value.flatten()
+                    if not bias_add:
+                        bias *= -1.0
+                    self.op_mapper.weights[node.layer_name + "_scale"] = scale
+                    self.op_mapper.weights[node.layer_name + "_bias"] = bias
+                    act = None
+                    if node.fluid_code.layers[0].param_attr is not None:
+                        act = node.fluid_code.layers[0].param_attr.get(
+                            "act", None)
+                    node.fluid_code.clear()
+                    attr = {
+                        "dtype": string(scale.dtype),
+                        "shape": [channel],
+                        "name": string(node.layer_name + "_scale")
+                    }
+                    node.fluid_code.add_layer("create_parameter",
+                                              inputs=None,
+                                              output=node.layer_name + "_scale",
+                                              param_attr=attr)
+                    attr = {
+                        "dtype": string(scale.dtype),
+                        "shape": [channel],
+                        "name": string(node.layer_name + "_bias")
+                    }
+                    node.fluid_code.add_layer("create_parameter",
+                                              inputs=None,
+                                              output=node.layer_name + "_bias",
+                                              param_attr=attr)
+                    inputs = {
+                        "x": in_node,
+                        "scale": node.layer_name + "_scale",
+                        "bias": node.layer_name + "_bias"
+                    }
+                    attr = {"act": act}
+                    node.fluid_code.add_layer("affine_channel",
+                                              inputs=inputs,
+                                              output=node,
+                                              param_attr=attr)
+                    del self.graph.node_map[in_nodes0[0].layer_name]
+                    del self.graph.node_map[in_nodes0[1].layer_name]
+                    del self.graph.node_map[in_nodes1[1].layer_name]