From 6da21ebe41a64f8774e40f39eb92a984a8b8f8b3 Mon Sep 17 00:00:00 2001 From: jiangjiajun Date: Thu, 27 Aug 2020 08:25:54 +0000 Subject: [PATCH] temporay support --- x2paddle/convert.py | 8 + x2paddle/core/program.py | 106 ++++++++++- x2paddle/decoder/tf_decoder.py | 52 ++++-- x2paddle/op_mapper/tf_op_mapper_nhwc.py | 139 +++++++++++++-- x2paddle/optimizer/batch_norm.py | 22 +++ x2paddle/optimizer/bias.py | 61 +++++++ x2paddle/optimizer/transpose.py | 225 ++++++++++++++++++++++++ 7 files changed, 582 insertions(+), 31 deletions(-) create mode 100644 x2paddle/optimizer/batch_norm.py create mode 100644 x2paddle/optimizer/bias.py create mode 100644 x2paddle/optimizer/transpose.py diff --git a/x2paddle/convert.py b/x2paddle/convert.py index 3589869..2a543a6 100644 --- a/x2paddle/convert.py +++ b/x2paddle/convert.py @@ -118,13 +118,21 @@ def tf2paddle(model_path, from x2paddle.op_mapper.tf_op_mapper import TFOpMapper from x2paddle.op_mapper.tf_op_mapper_nhwc import TFOpMapperNHWC from x2paddle.optimizer.tf_optimizer import TFOptimizer + from x2paddle.optimizer.transpose import TransposeOpt + from x2paddle.optimizer.bias import BiasOpt print("Now translating model from tensorflow to paddle.") model = TFDecoder(model_path, define_input_shape=define_input_shape) mapper = TFOpMapperNHWC(model) program.build() + opt = BiasOpt() + opt.run(program) + opt = TransposeOpt() + opt.run(program) + program.gen_model(save_dir) + program.visualize(save_dir) def caffe2paddle(proto, weight, save_dir, caffe_proto, params_merge=False): diff --git a/x2paddle/core/program.py b/x2paddle/core/program.py index ef8d205..a51df3d 100644 --- a/x2paddle/core/program.py +++ b/x2paddle/core/program.py @@ -15,8 +15,11 @@ from __future__ import print_function from __future__ import division import paddle.fluid as fluid +from paddle.fluid.initializer import Constant +from paddle.fluid.param_attr import ParamAttr from paddle.fluid.proto import framework_pb2 from collections import OrderedDict +import copy import numpy import time import collections @@ -57,6 +60,29 @@ class PaddleLayer(object): block.father_layer = self self.blocks.append(block) + def get_code(self, with_outputs=True): + code = "" + + # if len(self.outputs) == 1: + # code = self.outputs[0] + # else: + # for output in self.outputs: + # code += "{}, ".format(output) + # code = code.strip(", ") + # code += " = " + + code += "{}(".format(self.kernel) + for k, v in self.inputs.items(): + if isinstance(v, list): + code += "{}=[{}], ".format(k, ", ".join(v)) + else: + code += "{}={}, ".format(k, v) + for k, v in self.attrs.items(): + code += "{}={}, ".format(k, v) + code = code.strip(", ") + code += ")" + return code + class PaddleProgram(object): def __init__(self): @@ -80,10 +106,59 @@ class PaddleProgram(object): layer = PaddleLayer(kernel, inputs, outputs, **kwargs) layer_id = str(len(self.layers)) if self.father_layer is not None: - layer_id = "{}.{}.{}".format(layer_id, len(self.father_layer.blocks()), self.father_layer.id) + layer_id = "{}.{}.{}".format(layer_id, + len(self.father_layer.blocks()), + self.father_layer.id) self.layers[layer_id] = layer return layer_id + def del_layer(self, layer_id): + layer = self.layers[layer_id] + outputs = self.edges_out.get(layer_id, []) + inputs = self.edges_in.get(layer_id, []) + + assert len( + inputs) <= 1, "There should be 0 or 1 input for deleted layer." + + if len(inputs) == 0: + for out in outputs: + while layer_id in self.edges_in[out]: + index = self.edges_in[out].index(layer_id) + del self.edges_in[out][index] + + input_keys = list(self.layers[out].inputs.keys()) + for k in input_keys: + if self.layers[out].inputs[k] == layer.outputs[0]: + del self.layers[out].inputs[k] + + del self.layers[layer_id] + if layer_id in self.edges_in: + del self.edges_in[layer_id] + if layer_id in self.edges_out: + del self.edges_out[layer_id] + return + + # 将所有输出layer的输入layer进行替换 + for out in outputs: + for i in range(len(self.edges_in[out])): + if self.edges_in[out][i] == layer_id: + self.edges_in[out][i] = inputs[0] + + # 将输出layer赋给输入layer的输出 + replace_index = self.edges_out[inputs[0]].index(layer_id) + del self.edges_out[inputs[0]][replace_index] + for i, out in enumerate(outputs): + self.edges_out[inputs[0]].insert(replace_index + i, out) + for k, v in self.layers[out].inputs.items(): + if v == layer.outputs[0]: + self.layers[out].inputs[k] = list(layer.inputs.values())[0] + + del self.layers[layer_id] + if layer_id in self.edges_out: + del self.edges_out[layer_id] + if layer_id in self.edges_in: + del self.edges_in[layer_id] + def build(self): outputs_from_nodes = dict() for layer_id, layer in self.layers.items(): @@ -105,6 +180,12 @@ class PaddleProgram(object): for output in layer.outputs: outputs_from_nodes[output] = layer_id + layer_ids = copy.deepcopy(list(self.layers.keys())) + for layer_id in layer_ids: + if len(self.edges_in.get(layer_id, [])) == 0 and len( + self.edges_out.get(layer_id, [])) == 0: + del self.layers[layer_id] + def gen_code(self, code_dir): def write_code(f, code_list, indent=0): indent_blank = " " * indent @@ -193,6 +274,13 @@ class PaddleProgram(object): feeded_var_names=[i.name for i in inputs], target_vars=outputs, executor=exe) + print("Model has been converted, saved in {}".format(save_dir)) + print("=====Model inputs info=====") + for ipt in self.inputs: + print("Tensor: {}".format(ipt)) + print("=====Model outputs info====") + for out in self.outputs: + print("Tensor: {}".format(out)) def dump_parameter(self, param_name, param, save_dir): if not os.path.exists(save_dir): @@ -227,3 +315,19 @@ class PaddleProgram(object): fp.write(tensor_desc.SerializeToString()) param.tofile(fp) fp.close() + + def visualize(self, save_dir): + from graphviz import Digraph + dot = Digraph("PaddleGraph", "Generated by X2Paddle") + for layer_id, layer in self.layers.items(): + dot.node(layer_id, layer.kernel) + + for layer_id, outputs in self.edges_out.items(): + for out in outputs: + dot.edge(layer_id, out) + + with open(os.path.join(save_dir, 'graph.dot'), 'w') as f: + f.write(dot.source) + + dot.format = 'svg' + dot.render(filename='graph', directory=save_dir) diff --git a/x2paddle/decoder/tf_decoder.py b/x2paddle/decoder/tf_decoder.py index dc04172..394ae91 100644 --- a/x2paddle/decoder/tf_decoder.py +++ b/x2paddle/decoder/tf_decoder.py @@ -60,7 +60,7 @@ class TFGraphNode(GraphNode): @property def dtype(self): - keys = ['dtype', 'Tidx', 'T', 'DstT'] + keys = ['dtype', 'T', 'DstT', 'Tidx'] for k in keys: dtype = self.layer.attr[k].type if dtype > 0: @@ -74,7 +74,7 @@ class TFGraphNode(GraphNode): @property def raw_dtype(self): - keys = ['dtype', 'Tidx', 'T', 'DstT'] + keys = ['dtype', 'T', 'DstT', 'Tidx'] for k in keys: dtype = self.layer.attr[k].type if dtype > 0: @@ -121,7 +121,7 @@ class TFGraph(Graph): def __init__(self, model, data_format="NHWC"): super(TFGraph, self).__init__(model) self.identity_map = dict() - self.multi_out_ops = ['Split', 'SplitV', 'IteratorV2'] + self.multi_out_ops = ['Split', 'SplitV', 'IteratorV2', 'Unpack'] self.tf_data_format = data_format def build(self): @@ -159,6 +159,7 @@ class TFGraph(Graph): del self.output_nodes[idx] # tensorflow graph optimize + self._get_inputs_outputs() self._remove_isolated_node() self._optimize_dialiation_conv() self._remove_identity_node() @@ -167,9 +168,11 @@ class TFGraph(Graph): def get_node(self, node_name, copy=False): items = node_name.strip().split(':') items[0] = items[0].replace('/', '_').replace('-', '_') + if items[0] in self.identity_map: - items[0] = self.identity_map[items[0]] - new_node_name = ":".join(items) + new_node_name = self.identity_map[items[0]] + else: + new_node_name = ":".join(items) node = super(TFGraph, self).get_node(new_node_name, copy) if node is None: return None @@ -200,6 +203,27 @@ class TFGraph(Graph): idx = self.topo_sort.index(node_name) del self.topo_sort[idx] + def _get_inputs_outputs(self): + node_inputs_info = dict() + node_outputs_info = dict() + self.input_nodes = list() + self.output_nodes = list() + for node in self.model.node: + inputs = [ipt.split(':')[0].replace('^', '') for ipt in node.input] + node_inputs_info[node.name] = inputs + for ipt in inputs: + if ipt not in node_outputs_info: + node_outputs_info[ipt] = list() + node_outputs_info[ipt].append(node.name) + for node in self.model.node: + if node.op == "Placeholder": + self.input_nodes.append( + node.name.replace('/', '_').replace('-', '_')) + if len(node_inputs_info.get(node.name, [])) > 0 and len( + node_outputs_info.get(node.name, [])) == 0: + self.output_nodes.append( + node.name.replace('/', '_').replace('-', '_')) + def _optimize_dialiation_conv(self): for name in list(self.node_map.keys()): node = self.node_map[name] @@ -268,6 +292,14 @@ class TFGraph(Graph): idx = self.output_nodes.index(node_name) self.output_nodes[idx] = input_node.layer_name + for i, out in enumerate(cp.deepcopy(self.output_nodes)): + if out not in self.node_map: + index = self.output_nodes.index(out) + del self.output_nodes[index] + elif len(self.node_map[out].layer.input) == 0: + index = self.output_nodes.index(out) + del self.output_nodes[index] + def _remove_cast_node(self): cast_node = list() for node_name, node in self.node_map.items(): @@ -289,16 +321,6 @@ class TFGraph(Graph): idx = self.output_nodes.index(node_name) self.output_nodes[idx] = input_node.layer_name - def data_format_propagation(self, node): - current_node = self.node_map[node.layer_name] - outputs = current_node.outputs - if len(outputs) == 0: - return - for out in outputs: - next_node = self.node_map[out] - next_node.tf_data_format = node.tf_data_format - self.data_format_propagation(next_node) - class TFDecoder(object): def __init__(self, pb_model, data_format="NHWC", define_input_shape=False): diff --git a/x2paddle/op_mapper/tf_op_mapper_nhwc.py b/x2paddle/op_mapper/tf_op_mapper_nhwc.py index 6b00c5c..70ef1d5 100644 --- a/x2paddle/op_mapper/tf_op_mapper_nhwc.py +++ b/x2paddle/op_mapper/tf_op_mapper_nhwc.py @@ -51,7 +51,8 @@ class TFOpMapperNHWC(OpMapper): 'alpha': 'alpha' }], 'Floor': ['floor'], - 'Erf': ['erf'] + 'Erf': ['erf'], + 'Square': ['square'] } elementwise_ops = { 'Add': 'elementwise_add', @@ -145,12 +146,23 @@ class TFOpMapperNHWC(OpMapper): op_type = self.elementwise_ops[node.layer_type] x = self.graph.get_node(node.layer.input[0]) y = self.graph.get_node(node.layer.input[1]) + program.add_layer( kernel="fluid.layers.{}".format(op_type), inputs={"x": x.name, "y": y.name}, outputs=[node.name]) + def NotEqual(self, node): + x = self.graph.get_node(node.layer.input[0]) + y = self.graph.get_node(node.layer.input[1]) + + program.add_layer( + kernel="fluid.layers.not_equal", + inputs={"x": x.name, + "y": y.name}, + outputs=[node.name]) + def Placeholder(self, node): shape = node.out_shapes[0] assert len(shape) != 0, "Unknown shape of input nodes[{}].".format( @@ -172,6 +184,8 @@ class TFOpMapperNHWC(OpMapper): if len(shape) == 0: assert value.size == 1, "Unexpected situation happend" shape = [1] + if value == float('inf'): + value = "float('inf')" initializer = "Constant({})".format(value) program.parameters[node.name] = node.value @@ -441,17 +455,28 @@ class TFOpMapperNHWC(OpMapper): def Reshape(self, node): input = self.graph.get_node(node.layer.input[0]) param = self.graph.get_node(node.layer.input[1]) + + input_name = input.name + if input.dtype == 'bool': + cast_name = gen_name('reshape', 'cast') + program.add_layer( + kernel="fluid.layers.cast", + inputs={"x": input_name}, + outputs=[cast_name], + dtype="'int32'") + input_name = cast_name + if param.layer_type == "Const": shape = param.value.tolist() program.add_layer( kernel="fluid.layers.reshape", - inputs={"x": input.name}, + inputs={"x": input_name}, outputs=[node.name], shape=shape) else: program.add_layer( kernel="fluid.layers.reshape", - inputs={"x": input.name, + inputs={"x": input_name, "shape": param.name}, outputs=[node.name]) if param.layer_type != "Const": @@ -464,6 +489,13 @@ class TFOpMapperNHWC(OpMapper): outputs=[node.name], shape=out_shape.tolist()) + if input.dtype == 'bool': + program.add_layer( + kernel="fluid.layers.cast", + inputs={"x": node.name}, + outputs=[node.name], + dtype="'bool'") + def Pad(self, node): input = self.graph.get_node(node.layer.input[0]) paddings = self.graph.get_node(node.layer.input[1]) @@ -517,9 +549,18 @@ class TFOpMapperNHWC(OpMapper): def Shape(self, node): input = self.graph.get_node(node.layer.input[0]) + input_name = input.name + if input.dtype == 'bool': + cast_name = gen_name('shape', 'cast') + program.add_layer( + kernel="fluid.layers.cast", + inputs={"x": input.name}, + outputs=[cast_name], + dtype="'int32'") + input_name = cast_name program.add_layer( kernel="fluid.layers.shape", - inputs={"input": input.name}, + inputs={"input": input_name}, outputs=[node.name]) def ArgMax(self, node): @@ -642,12 +683,43 @@ class TFOpMapperNHWC(OpMapper): def Pack(self, node): inputs = [self.graph.get_node(name) for name in node.layer.input] + input_names = [i.name for i in inputs] axis = node.get_attr("axis") program.add_layer( kernel="fluid.layers.stack", - inputs={"x": [i.name for i in inputs]}, + inputs={"x": input_names}, outputs=[node.name], axis=axis) + if len(node.out_shapes[0]) == 1: + program.add_layer( + kernel="fluid.layers.reshape", + inputs={"x": node.name}, + outputs=[node.name], + shape=[-1]) + + def Unpack(self, node): + input = self.graph.get_node(node.layer.input[0]) + axis = node.get_attr("axis") + num = node.get_attr("num") + shape = input.out_shapes[0] + input_name = input.name + if len(shape) == 1: + if shape[0] > 0 and num == shape[0]: + program.add_layer( + kernel="fluid.layers.unsqueeze", + inputs={"input": input.name}, + outputs=[node.name], + axes=[0]) + input_name = node.name + axis = 1 + else: + raise Exception("Unexpected situation happend in Unpack OP") + program.add_layer( + kernel="fluid.layers.unstack", + inputs={"x": input_name}, + outputs=["{}_p{}".format(node.layer_name, i) for i in range(num)], + axis=axis, + num=num) def ConcatV2(self, node): inputs = [self.graph.get_node(name) for name in node.layer.input[:-1]] @@ -656,27 +728,55 @@ class TFOpMapperNHWC(OpMapper): axis = axis.value if axis < 0: axis += len(inputs[0].out_shapes[0]) + + input_names = [i.name for i in inputs] + for i, ipt in enumerate(inputs): + if node.dtype == 'bool': + cast_name = gen_name('concat', 'cast') + program.add_layer( + kernel="fluid.layers.cast", + inputs={"x": ipt.name}, + outputs=[cast_name], + dtype="'int32'") + input_names[i] = cast_name program.add_layer( kernel="fluid.layers.concat", - inputs={"input": [i.name for i in inputs]}, + inputs={"input": input_names}, outputs=[node.name], axis=axis) + if node.dtype == 'bool': + program.add_layer( + kernel="fluid.layers.cast", + inputs={"x": node.name}, + outputs=[node.name], + dtype="'bool'") def StridedSlice(self, node): input = self.graph.get_node(node.layer.input[0]) begin = self.graph.get_node(node.layer.input[1]) end = self.graph.get_node(node.layer.input[2]) strides = self.graph.get_node(node.layer.input[3]) - assert begin.layer_type == "Const" - assert end.layer_type == "Const" - assert strides.layer_type == "Const" - strides = strides.value.tolist() + + if strides.layer_type == "Const": + strides = strides.value.tolist() + else: + strides = self.decoder.infer_shape_tensor(strides) + if begin.layer_type == "Const": + begin = begin.value.tolist() + else: + begin = self.decoder.infer_shape_tensor(begin) + if end.layer_type == "Const": + end = end.value.tolist() + else: + end = self.decoder.infer_shape_tensor(end) + assert len(set(strides)) == 1 and strides[ 0] == 1, "Only support strides be 1 in StridedSlice OP" - begin = begin.value.tolist() - end = end.value.tolist() - + if len(begin) < len(input.out_shapes[0]): + begin = begin + [0] * (len(input.out_shapes[0]) - len(begin)) + if len(end) < len(input.out_shapes[0]): + end = end + [0] * (len(input.out_shapes[0]) - len(end)) for i in range(len(end)): if end[i] == 0: end[i] = 999999 @@ -736,10 +836,10 @@ class TFOpMapperNHWC(OpMapper): pass else: program.add_layer( - kernel="fluid.layers.unsqueeze", + kernel="fluid.layers.squeeze", inputs={"input": node.name}, outputs=[node.name], - axes=new_axes) + axes=shrink_axes) def Split(self, node): dim = self.graph.get_node(node.layer.input[0]) @@ -1099,6 +1199,8 @@ class TFOpMapperNHWC(OpMapper): outputs=[node.name], **attr) + node.layer.attr['dtype'].type = 10 + def GatherV2(self, node): embeddings = self.graph.get_node(node.layer.input[0]) index = self.graph.get_node(node.layer.input[1]) @@ -1121,6 +1223,13 @@ class TFOpMapperNHWC(OpMapper): inputs=inputs, outputs=[node.name], overwrite=False) + if len(index.out_shapes[0]) != 1: + out_shape = node.out_shapes[0] + program.add_layer( + kernel="fluid.layers.reshape", + inputs={"x": node.name}, + outputs=[node.name], + shape=out_shape) def ExpandDims(self, node): x = self.graph.get_node(node.layer.input[0], copy=True) diff --git a/x2paddle/optimizer/batch_norm.py b/x2paddle/optimizer/batch_norm.py new file mode 100644 index 0000000..ae95400 --- /dev/null +++ b/x2paddle/optimizer/batch_norm.py @@ -0,0 +1,22 @@ +import copy + + +class BiasOpt: + def __init__(self): + self.conv_layers = [ + 'fluid.layers.conv2d', 'fluid.layers.conv2d_transpose' + ] + self.act_layers = [ + 'fluid.layers.relu', 'fluid.layers.relu6', 'fluid.layers.sigmoid', + 'fluid.layers.exp', 'fluid.layers.tanh', 'fluid.layers.softplus', + 'fluid.layers.leaky_relu' + ] + + def run(self, graph): + layers = copy.deepcopy(graph.layers) + for layer_id, layer in layers.items(): + can_be_optimized = True + if layer.kernel != "fluid.layers.elemenwise_mul": + can_be_optimized = False + continue + input_ids = graph.edges_in[layer_id] diff --git a/x2paddle/optimizer/bias.py b/x2paddle/optimizer/bias.py new file mode 100644 index 0000000..8c9e069 --- /dev/null +++ b/x2paddle/optimizer/bias.py @@ -0,0 +1,61 @@ +import copy + + +class BiasOpt: + def __init__(self): + self.conv_layers = [ + 'fluid.layers.conv2d', 'fluid.layers.conv2d_transpose' + ] + self.act_layers = [ + 'fluid.layers.relu', 'fluid.layers.relu6', 'fluid.layers.sigmoid', + 'fluid.layers.exp', 'fluid.layers.tanh', 'fluid.layers.softplus', + 'fluid.layers.leaky_relu' + ] + + def run(self, graph): + layers = copy.deepcopy(graph.layers) + for layer_id, layer in layers.items(): + if layer.kernel in self.conv_layers or layer.kernel == "fluid.layers.transpose": + if len(graph.edges_out[layer_id]) != 1: + continue + + out_layer_id = graph.edges_out[layer_id][0] + if graph.layers[ + out_layer_id].kernel != "fluid.layers.elementwise_add": + continue + if graph.layers[out_layer_id].attrs.get('axis', -1) != -1: + continue + + in_layer_id = graph.edges_in[out_layer_id] + bias_layer_id = in_layer_id[1 - in_layer_id.index(layer_id)] + if graph.layers[ + bias_layer_id].kernel != "fluid.layers.create_parameter": + continue + + bias_layer = graph.layers[bias_layer_id] + if len(bias_layer.attrs['shape']) != 1: + continue + if len(graph.edges_out[bias_layer_id]) != 1: + continue + if bias_layer.outputs[0] in graph.outputs: + continue + + if layer.kernel == "fluid.layers.transpose": + if layer.attrs['perm'] != [0, 2, 3, 1]: + continue + in_layer_id = graph.edges_in[layer_id][0] + if graph.layers[in_layer_id].kernel not in self.conv_layers: + continue + if graph.layers[in_layer_id].attrs['bias_attr'] != False: + continue + if len(graph.edges_out[in_layer_id]) != 1: + continue + graph.layers[in_layer_id].attrs[ + 'bias_attr'] = bias_layer.attrs['name'] + graph.del_layer(bias_layer_id) + graph.del_layer(out_layer_id) + else: + graph.layers[layer_id].attrs[ + 'bias_attr'] = bias_layer.attrs['name'] + graph.del_layer(bias_layer_id) + graph.del_layer(out_layer_id) diff --git a/x2paddle/optimizer/transpose.py b/x2paddle/optimizer/transpose.py new file mode 100644 index 0000000..4775867 --- /dev/null +++ b/x2paddle/optimizer/transpose.py @@ -0,0 +1,225 @@ +import copy +import sys + + +class TransposeOpt: + def __init__(self): + self.image_layers = [ + 'fluid.layers.conv2d', 'fluid.layers.batch_norm', + 'fluid.layers.conv2d_transpose', 'fluid.layers.resize_nearest', + 'fluid.layers.resize_bilinear', 'fluid.layers.pool2d', + 'fluid.layers.pad2d' + ] + self.direct_layers = [ + 'fluid.layers.relu', 'fluid.layers.relu6', 'fluid.layers.abs', + 'fluid.layers.sigmoid', 'fluid.layers.exp', 'fluid.layers.rsqrt', + 'fluid.layers.swish_f32', 'fluid.layers.tanh', + 'fluid.layers.softplus', 'fluid.layers.leaky_relu', + 'fluid.layers.floor', 'fluid.layers.erf' + ] + self.elementwise_layers = [ + 'fluid.layers.elementwise_add', 'fluid.layers.elementwise_sub', + 'fluid.layers.elementwise_mul', 'fluid.layers.elementwise_div' + ] + + def get_transpose_num(self, graph): + count = 0 + for layer_id, layer in graph.layers.items(): + if layer.kernel == "fluid.layers.transpose": + count += 1 + return count + + def strip_direct_layers(self, graph): + # 构建opt_graph + # 删除所有direct_layers, 便于对transpose进行优化 + opt_graph = copy.deepcopy(graph) + + remove_layer_ids = set() + for layer_id, layer in opt_graph.layers.items(): + if layer.kernel in self.direct_layers: + layer_out = opt_graph.edges_out[layer_id] + layer_in = opt_graph.edges_in[layer_id] + if len(layer_out) == 0 or len(layer_in) == 0: + continue + + assert len( + layer_in + ) == 1, "There should be only 1 input for direct layers." + + remove_layer_ids.add(layer_id) + + for layer_id in remove_layer_ids: + opt_graph.del_layer(layer_id) + return opt_graph + + def run(self, graph): + optimized_transpose_layers = list() + modified_layer_attrs = dict() + modified_parameters = dict() + scanned_layers = set() + total_layer_num = len(graph.layers) + + def strip_transpose(_graph): + layers = copy.deepcopy(_graph.layers) + for layer_id, layer in layers.items(): + if layer_id in scanned_layers: + continue + scanned_layers.add(layer_id) + percent = round(len(scanned_layers) / total_layer_num * 100, 2) + sys.stderr.write("\rOptimize Transpose Layers...{}%".format( + percent)) + + if layer.kernel != "fluid.layers.transpose": + continue + if layer.attrs["perm"] != [0, 2, 3, 1]: + continue + + transpose_layer_ids = list() + elementwise_layer_ids = list() + concat_layer_ids = list() + can_be_optimized = True + modified_attrs = dict() + parameter_layers = list() + parameters = dict() + + for out in _graph.edges_out[layer_id]: + if _graph.layers[out].kernel == "fluid.layers.transpose": + if _graph.layers[out].attrs["perm"] != [0, 3, 1, 2]: + can_be_optimized = False + continue + transpose_layer_ids.append(out) + elif _graph.layers[out].kernel in self.elementwise_layers: + elementwise_layer_ids.append(out) + elif _graph.layers[out].kernel == "fluid.layers.concat": + elementwise_layer_ids.append(out) + concat_layer_ids.append(out) + else: + can_be_optimized = False + break + + visited_layers = set() + while len(elementwise_layer_ids) > 0 and can_be_optimized: + current_id = elementwise_layer_ids.pop(0) + visited_layers.add(current_id) + for out in _graph.edges_out[current_id]: + if _graph.layers[ + out].kernel == "fluid.layers.transpose": + if _graph.layers[out].attrs["perm"] != [0, 3, 1, 2]: + can_be_optimized = False + break + if out not in visited_layers: + transpose_layer_ids.append(out) + elif _graph.layers[ + out].kernel in self.elementwise_layers: + if out not in visited_layers: + elementwise_layer_ids.append(out) + elif _graph.layers[out].kernel == "fluid.layers.concat": + if out not in visited_layers: + elementwise_layer_ids.append(out) + concat_layer_ids.append(out) + else: + can_be_optimized = False + break + + all_create_parameter = True + for ipt in _graph.edges_in.get(current_id, []): + if _graph.layers[ + ipt].kernel == "fluid.layers.transpose": + all_creater_parameter = False + if _graph.layers[ipt].attrs["perm"] != [0, 2, 3, 1]: + can_be_optimized = False + break + if ipt not in visited_layers: + transpose_layer_ids.append(ipt) + elif _graph.layers[ + ipt].kernel in self.elementwise_layers: + all_creater_parameter = False + if ipt not in visited_layers: + elementwise_layer_ids.append(ipt) + elif _graph.layers[ipt].kernel == "fluid.layers.concat": + all_creater_parameter = False + if ipt not in visited_layers: + elementwise_layer_ids.append(ipt) + concat_layer_ids.append(ipt) + elif _graph.layers[ + ipt].kernel == "fluid.layers.create_parameter": + if ipt not in visited_layers: + elementwise_layer_ids.append(ipt) + parameter_layers.append(ipt) + else: + can_be_optimized = False + break + if all_create_parameter: + can_be_optimized = False + break + + if not can_be_optimized: + break + if not can_be_optimized: + continue + + concat_layer_ids = list(set(concat_layer_ids)) + for l in concat_layer_ids: + axis = _graph.layers[l].attrs.get('axis', 0) + _graph.layers[l].attrs['axis'] = [0, 2, 3, 1][axis] + modified_attrs[l] = _graph.layers[l].attrs + + parameter_layers = list(set(parameter_layers)) + for l in parameter_layers: + for o in _graph.edges_out[l]: + if _graph.layers[o].kernel in self.elementwise_layers: + axis = _graph.layers[o].attrs.get('axis', -1) + _graph.layers[o].attrs['axis'] = [0, 3, 1, 2][axis] + modified_attrs[o] = _graph.layers[o].attrs + else: + can_be_optimized = False + break + if not can_be_optimized: + break + s = _graph.layers[l].attrs['shape'] + p = _graph.parameters[_graph.layers[l].outputs[0]] + if len(s) == 4: + _graph.layers[l].attrs[ + 'shape'] = [s[0], s[3], s[1], s[2]] + modified_attrs[l] = _graph.layers[l].attrs + parameters[_graph.layers[l].outputs[0]] = np.transpose( + p, (0, 3, 1, 2)) + elif len(s) == 3: + _graph.layers[l].attrs['shape'] = [s[2], s[0], s[1]] + modified_attrs[l] = _graph.layers[l].attrs + parameters[_graph.layers[l].outputs[0]] = np.transpose( + p, (2, 0, 1)) + + if not can_be_optimized: + continue + + transpose_layer_ids.append(layer_id) + transpose_layer_ids = list(set(transpose_layer_ids)) + for transpose_layer_id in transpose_layer_ids: + _graph.del_layer(transpose_layer_id) + optimized_transpose_layers.extend(transpose_layer_ids) + modified_layer_attrs.update(modified_attrs) + modified_parameters.update(parameters) + return True + return False + + before_transpose_num = self.get_transpose_num(graph) + + opt_graph = self.strip_direct_layers(graph) + total_layer_num = len(opt_graph.layers) + while strip_transpose(opt_graph): + pass + + for layer_id in optimized_transpose_layers: + graph.del_layer(layer_id) + + for layer_id, attrs in modified_layer_attrs.items(): + graph.layers[layer_id].attrs = attrs + + for name, parameter in modified_parameters.items(): + graph.parameters[name] = parameter + + current_transpose_num = self.get_transpose_num(graph) + print( + "\nTranspose layers optimized, before: transpose_num={}, after: transpose_num={}". + format(before_transpose_num, current_transpose_num)) -- GitLab