From a623556962b06e2667f9632eb16a4d8d7e75693a Mon Sep 17 00:00:00 2001 From: mamingjie-China Date: Tue, 14 Jul 2020 14:38:44 +0800 Subject: [PATCH] support for bert --- x2paddle/core/op_mapper.py | 38 +++++++- x2paddle/decoder/tf_decoder.py | 21 ++++- x2paddle/op_mapper/tf_op_mapper_nhwc.py | 118 +++++++++++++++++++++++- 3 files changed, 169 insertions(+), 8 deletions(-) diff --git a/x2paddle/core/op_mapper.py b/x2paddle/core/op_mapper.py index 33c2f66..9e6d241 100644 --- a/x2paddle/core/op_mapper.py +++ b/x2paddle/core/op_mapper.py @@ -55,6 +55,24 @@ def export_paddle_param(param, param_name, dir): def run_net(param_dir="./"): import os inputs, outputs = x2paddle_net() + + ops = fluid.default_main_program().global_block().ops + used_vars = list() + for op in ops: + used_vars += op.input_arg_names + + tmp = list() + for input in inputs: + if isinstance(input, list): + for ipt in input: + if ipt.name not in used_vars: + continue + tmp.append(ipt) + else: + if input.name not in used_vars: + continue + tmp.append(input) + inputs = tmp for i, out in enumerate(outputs): if isinstance(out, list): for out_part in out: @@ -122,12 +140,30 @@ class OpMapper(object): import model try: inputs, outputs = model.x2paddle_net() + + ops = fluid.default_main_program().global_block().ops + used_vars = list() + for op in ops: + used_vars += op.input_arg_names + for i, out in enumerate(outputs): if isinstance(out, list): for out_part in out: outputs.append(out_part) del outputs[i] - input_names = [input.name for input in inputs] + + input_names = list() + for input in inputs: + if isinstance(input, list): + for ipt in input: + if ipt.name not in used_vars: + continue + input_names.append(ipt.name) + else: + if input.name not in used_vars: + continue + input_names.append(input.name) + exe = fluid.Executor(fluid.CPUPlace()) exe.run(fluid.default_startup_program()) diff --git a/x2paddle/decoder/tf_decoder.py b/x2paddle/decoder/tf_decoder.py index dcc5342..8ffb369 100644 --- a/x2paddle/decoder/tf_decoder.py +++ b/x2paddle/decoder/tf_decoder.py @@ -48,7 +48,7 @@ class TFGraphNode(GraphNode): @property def out_shapes(self): - if self.layer_type == "OneShotIterator": + if self.layer_type == "OneShotIterator" or self.layer_type == "IteratorV2": values = self.layer.attr["output_shapes"].list.shape else: values = self.layer.attr["_output_shapes"].list.shape @@ -68,7 +68,8 @@ class TFGraphNode(GraphNode): if dtype == 0: dtype = self.layer.attr['output_types'].list.type[0] if dtype not in self.dtype_map: - raise Exception("Dtype[{}] not in dtype_map".format(dtype)) + raise Exception("Dtype[{}] of node({}) not in dtype_map".format( + dtype, self.layer.name)) return self.dtype_map[dtype] @property @@ -114,16 +115,20 @@ class TFGraph(Graph): def __init__(self, model, data_format="NHWC"): super(TFGraph, self).__init__(model) self.identity_map = dict() - self.multi_out_ops = ['Split', 'SplitV'] + self.multi_out_ops = ['Split', 'SplitV', 'IteratorV2'] self.tf_data_format = data_format def build(self): for layer in self.model.node: + if layer.op == 'Assert': + continue self.node_map[layer.name.replace('/', '_').replace( '-', '_')] = TFGraphNode( layer, data_format=self.tf_data_format) for layer_name, node in self.node_map.items(): + if node.layer_type == 'Const': + continue for in_node in node.layer.input: in_node = in_node.replace('/', '_').replace('-', '_').replace( '^', '') @@ -139,6 +144,14 @@ class TFGraph(Graph): super(TFGraph, self).build() + for layer in self.model.node: + if layer.op == 'Assert': + for ipt in layer.input: + ipt_name = ipt.replace('-', '_').replace('/', '_') + if ipt_name in self.output_nodes: + idx = self.output_nodes.index(ipt_name) + del self.output_nodes[idx] + # tensorflow graph optimize self._remove_isolated_node() self._optimize_dialiation_conv() @@ -322,7 +335,7 @@ class TFDecoder(object): graph_def = cp.deepcopy(graph_def) input_map = dict() for layer in graph_def.node: - if layer.op != "Placeholder" and layer.op != "OneShotIterator": + if layer.op != "Placeholder" and layer.op != "OneShotIterator" and layer.op != "IteratorV2": continue graph_node = TFGraphNode(layer) dtype = graph_node.layer.attr['dtype'].type diff --git a/x2paddle/op_mapper/tf_op_mapper_nhwc.py b/x2paddle/op_mapper/tf_op_mapper_nhwc.py index 5184a9f..102d439 100644 --- a/x2paddle/op_mapper/tf_op_mapper_nhwc.py +++ b/x2paddle/op_mapper/tf_op_mapper_nhwc.py @@ -15,6 +15,7 @@ from x2paddle.decoder.tf_decoder import TFGraph from x2paddle.core.op_mapper import OpMapper from x2paddle.core.util import * +import math import inspect import numpy import sys @@ -47,7 +48,8 @@ class TFOpMapperNHWC(OpMapper): 'LeakyRelu': ['leaky_relu', { 'alpha': 'alpha' }], - 'Floor': ['floor'] + 'Floor': ['floor'], + 'Erf': ['erf'] } elementwise_ops = { 'Add': 'elementwise_add', @@ -56,6 +58,7 @@ class TFOpMapperNHWC(OpMapper): 'Sub': 'elementwise_sub', 'Maximum': 'elementwise_max', 'Minimum': 'elementwise_min', + 'LessEqual': 'less_equal', 'Mul': 'elementwise_mul', 'FloorDiv': 'elementwise_floordiv' } @@ -71,7 +74,11 @@ class TFOpMapperNHWC(OpMapper): not_placeholder = list() for name in self.graph.input_nodes: - if self.graph.get_node(name).layer_type != "Placeholder": + if self.graph.get_node( + name).layer_type != "Placeholder" and self.graph.get_node( + name + ).layer_type != "OneShotIterator" and self.graph.get_node( + name).layer_type != "IteratorV2": not_placeholder.append(name) for name in not_placeholder: idx = self.graph.input_nodes.index(name) @@ -81,6 +88,7 @@ class TFOpMapperNHWC(OpMapper): sys.stderr.write("Total nodes: {}\n".format(len(self.graph.topo_sort))) for i, node_name in enumerate(self.graph.topo_sort): sys.stderr.write("\rConverting node {} ... ".format(i + 1)) + # sys.stderr.write("\rConverting node {} name: {:50}... ".format(i + 1, node_name)) node = self.graph.get_node(node_name) op = node.layer_type if op in self.directly_map_ops: @@ -631,10 +639,20 @@ class TFOpMapperNHWC(OpMapper): attr = {"shape": shape} node.fluid_code.add_layer( "reshape", inputs=x, output=x, param_attr=attr) + if transpose_a is None: + transpose_a = node.get_attr('adj_x') + if transpose_b is None: + transpose_b = node.get_attr('adj_y') attr = {"transpose_x": transpose_a, "transpose_y": transpose_b} node.fluid_code.add_layer( "matmul", inputs=inputs, output=node, param_attr=attr) + def BatchMatMul(self, node): + return self.MatMul(node) + + def BatchMatMulV2(self, node): + return self.MatMul(node) + def ArgMax(self, node): input = self.graph.get_node(node.layer.input[0], copy=True) axis = self.graph.get_node(node.layer.input[1], copy=True) @@ -957,7 +975,9 @@ class TFOpMapperNHWC(OpMapper): if y.layer_type == 'Const': self.add_omit_nodes(y.layer_name, node.layer_name) dim = y.value.tolist() - attr = {'axes': [dim]} + if not isinstance(dim, list): + dim = [dim] + attr = {'axes': dim} else: attr = {'axes': y} node.fluid_code.add_layer( @@ -980,3 +1000,95 @@ class TFOpMapperNHWC(OpMapper): "=", inputs=x, output=node, param_attr=None) else: raise Exception("SpaceToBatchND is not supported") + + def OneHot(self, node): + input = self.graph.get_node(node.layer.input[0], copy=True) + depth = self.graph.get_node(node.layer.input[1], copy=True) + on_value = self.graph.get_node(node.layer.input[2], copy=True) + off_value = self.graph.get_node(node.layer.input[3], copy=True) + assert depth.layer_type == 'Const', 'Parameter depth should be Const in OneHot' + assert on_value.layer_type == 'Const', 'Parameter on_value should be Const in OneHot' + assert off_value.layer_type == 'Const', 'Parameter off_value should be Const in OneHot' + self.add_omit_nodes(depth.layer_name, node.layer_name) + self.add_omit_nodes(on_value.layer_name, node.layer_name) + self.add_omit_nodes(off_value.layer_name, node.layer_name) + depth = depth.value + on_value = on_value.value + off_value = off_value.value + assert math.fabs(on_value - + 1.0) < 1e-06, "on_value should be 1 in OneHot" + assert math.fabs(off_value - + 0.0) < 1e-06, "off_value should be 0 in OneHot" + attr = {'depth': depth} + node.fluid_code.add_layer( + "one_hot", + inputs=input, + output=node, + param_attr=attr, + use_fluid=True) + + def Pow(self, node): + x = self.graph.get_node(node.layer.input[0], copy=True) + factor = self.graph.get_node(node.layer.input[1], copy=True) + self.add_omit_nodes(factor.layer_name, node.layer_name) + if factor.layer_type == 'Const': + factor = factor.value.tolist() + else: + factor = self.decoder.infer_tensor(factor) + attr = {'factor': factor} + node.fluid_code.add_layer("pow", inputs=x, output=node, param_attr=attr) + + def All(self, node): + input = self.graph.get_node(node.layer.input[0], copy=True) + reduce_idx = self.graph.get_node(node.layer.input[1], copy=True) + self.add_omit_nodes(reduce_idx.layer_name, node.layer_name) + assert reduce_idx.layer_type == "Const", "Only support Const parameter[reduce_idx]" + dims = reduce_idx.value.tolist() + keep_dims = node.get_attr("keep_dims") + + attr = {"dim": dims, "keep_dim": keep_dims} + node.fluid_code.add_layer( + "reduce_all", inputs=input, output=node, param_attr=attr) + + def GatherV2(self, node): + embeddings = self.graph.get_node(node.layer.input[0], copy=True) + index = self.graph.get_node(node.layer.input[1], copy=True) + axis = self.graph.get_node(node.layer.input[2], copy=True) + self.add_omit_nodes(axis.layer_name, node.layer_name) + assert axis.layer_type == 'Const', "Only support Const parameter[axis]" + axis = axis.value.tolist() + assert axis == 0, "Only support axis=0 in GatherV2 OP" + attr = {'overwrite': False} + if len(index.out_shapes[0]) != 1: + reshape_attr = {"shape": [-1]} + node.fluid_code.add_layer( + "reshape", inputs=index, output=index, param_attr=reshape_attr) + inputs = {'input': embeddings, 'index': index} + node.fluid_code.add_layer( + "gather", inputs=inputs, output=node, param_attr=attr) + + def OneShotIterator(self, node): + return self.Placeholder(node) + + def IteratorV2(self, node): + dtype_map = { + 1: "float32", + 3: "int32", + 4: "uint8", + 9: "int64", + 10: "bool" + } + shapes = node.out_shapes + dtypes = node.layer.attr['output_types'].list.type + node.fluid_code.add_note("{} = [0] * {}".format(node.layer_name, + len(shapes))) + for i, shape in enumerate(shapes): + attr = { + 'dtype': string(dtype_map[dtypes[i]]), + 'shape': shape, + 'name': string("{}_{}".format(node.layer_name, i)), + 'append_batch_size': False + } + output = "{}[{}]".format(node.layer_name, i) + node.fluid_code.add_layer( + "data", inputs=None, output=output, param_attr=attr) -- GitLab