From d9f33f456b2d15b50de9520a5f4d766c7518c004 Mon Sep 17 00:00:00 2001 From: SunAhong1993 Date: Thu, 24 Dec 2020 21:02:44 +0800 Subject: [PATCH] add lstm --- x2paddle/core/program.py | 10 ++- .../op_mapper/dygraph/pytorch2paddle/aten.py | 83 +++++++++++++++++++ .../op_mapper/dygraph/pytorch2paddle/prim.py | 30 ++++++- .../pytorch2paddle/pytorch_op_mapper.py | 12 ++- .../code_optimizer/hierachical_tree.py | 33 ++++++-- .../code_optimizer/layer_code_generator.py | 8 +- 6 files changed, 159 insertions(+), 17 deletions(-) diff --git a/x2paddle/core/program.py b/x2paddle/core/program.py index 27a26eb..27f23ca 100644 --- a/x2paddle/core/program.py +++ b/x2paddle/core/program.py @@ -210,6 +210,8 @@ class PaddleGraph(object): layer_id, 0) == 0 and layer.kernel != "prim.assert" \ and layer.kernel != "prim.exception" \ and layer.kernel != "prim.warnings": + if layer.kernel == "paddle.to_tensor": + self.inputs_info.pop(layer.outputs[0]) invalid_list.append(layer_id) for layer_id in invalid_list: self.layers.pop(layer_id) @@ -425,8 +427,7 @@ class PaddleGraph(object): continue if layer.kernel == "paddle.to_tensor": data = layer.attrs["data"] - if not data.startswith("params["): - self.inputs.append(data) + self.inputs.append(data) if len(layer.blocks) > 0: for block in layer.blocks: block.get_dygraph_inputs() @@ -578,7 +579,10 @@ class PaddleGraph(object): elif len(layer.outputs) == 2: line = layer.outputs[1] else: - line = ','.join(layer.outputs[1:]) + if layer.kernel == "paddle.nn.LSTM": + line = "{}, ({})".format(layer.outputs[1], ', '.join(layer.outputs[-2:])) + else: + line = ','.join(layer.outputs[1:]) if layer.kernel == "paddle.to_tensor" and layer.attrs[ "data"].startswith("params["): line += " = self.{}".format(layer.outputs[0]) diff --git a/x2paddle/op_mapper/dygraph/pytorch2paddle/aten.py b/x2paddle/op_mapper/dygraph/pytorch2paddle/aten.py index 8f443f1..b5ca630 100644 --- a/x2paddle/op_mapper/dygraph/pytorch2paddle/aten.py +++ b/x2paddle/op_mapper/dygraph/pytorch2paddle/aten.py @@ -2515,6 +2515,89 @@ def aten_log(mapper, graph, node): return current_inputs, current_outputs +def aten_lstm(mapper, graph, node): + """ 构造长短期记忆网络(LSTM)的PaddleLayer。 + + TorchScript示例: + %input.96, %551, %552 = aten::lstm(%input.95, %734, %549, %526, %525, %524, %526, %526, %526) + 参数含义: + %input.96 (Tensor): 输出,由前向和后向cell的输出拼接得到。 + %551 (Tensor): cell state。 + %552 (Tensor): hidden state。 + %input.95 (Tensor): 网络输入。 + %734 (Tensor): 网络的初始状态。 + %549 (list): 所有权重组合成的list。 + %526 (bool): 是否使用bias。 + %525 (int): 网络层数。 + %524 (float): dropout概率。 + %526 (bool): 是否为训练阶段。 + %526 (bool): 是否使用双向LSTM。 + %526 (bool): 第一个维度是否为batch size。 + """ + scope_name = mapper.normalize_scope_name(node) + op_name = name_generator("lstm", mapper.nn_name2id) + output_names = mapper._get_outputs_name(node) + layer_outputs = [op_name] + layer_outputs.extend(output_names) + layer_inputs = {} + layer_attrs = {} + inputs_name, inputs_node = mapper._get_inputs_name(node) + # 获取当前节点输出的list + current_outputs = output_names + # 处理输入0,即%input.95 + mapper._check_input(graph, inputs_node[0], inputs_name[0], current_outputs, scope_name) + layer_inputs["input0"] = inputs_name[0] + # 处理输入1,即%734 + mapper._check_input(graph, inputs_node[1], inputs_name[1], current_outputs, scope_name) + layer_inputs["input1"] = inputs_name[1] + # 获取当前节点输入、输出的list + current_inputs = list(layer_inputs.values()) + # 处理输入2,即%734 + mapper._check_input(graph, inputs_node[2], inputs_name[2], current_outputs, scope_name) + graph.layers.pop(mapper.output2id[inputs_name[2]]) + param_inputs_name, _ = mapper._get_inputs_name(inputs_node[2]) + new_param_inputs_name = list() + for i, param_name in enumerate(param_inputs_name): + if i == 0: + layer_attrs["hidden_size"] = int(mapper.paddle_params[param_name].shape[0] / 4) + layer_attrs["input_size"] = int(mapper.paddle_params[param_name].shape[1]) + if len(mapper.paddle_params[param_name].shape) > 1: + part_name = param_name.split("_weight_")[-1] + mapper.paddle_params["{}.weight_{}".format(op_name, part_name)] = mapper.paddle_params[param_name] + new_param_inputs_name.append("{}.weight_{}".format(op_name, part_name)) + else: + part_name = param_name.split("_bias_")[-1] + mapper.paddle_params["{}.bias_{}".format(op_name, part_name)] = mapper.paddle_params[param_name] + mapper.paddle_params.pop(param_name) + + # 处理输入3,即%526 + is_bias = mapper.attrs[inputs_name[3]] + if not is_bias: + for param_name in new_param_inputs_name: + bias_name = param_name.replace("weight", "bias") + bias_shape= mapper.paddle_params[param_name].shape[:1] + mapper.paddle_params[bias_name] = np.zeros(bias_shape).astype("float32") + # 处理输入4,即%525 + layer_attrs["num_layers"] = mapper.attrs[inputs_name[4]] + # 处理输入5,即%524 + layer_attrs["dropout"] = mapper.attrs[inputs_name[5]] + # 处理输入7,即%526 + is_bidirectional = mapper.attrs[inputs_name[7]] + if is_bidirectional: + layer_attrs["direction"] = string("bidirectional") + # 处理输入8,即%526 + batch_first = mapper.attrs[inputs_name[8]] + if not batch_first: + layer_attrs["time_major"] = True + graph.add_layer( + "paddle.nn.LSTM", + inputs=layer_inputs, + outputs=layer_outputs, + scope_name=scope_name, + **layer_attrs) + return current_inputs, current_outputs + + def aten_lt(mapper, graph, node): """ 构造对比大小的PaddleLayer。 diff --git a/x2paddle/op_mapper/dygraph/pytorch2paddle/prim.py b/x2paddle/op_mapper/dygraph/pytorch2paddle/prim.py index fb47a31..9aaed67 100644 --- a/x2paddle/op_mapper/dygraph/pytorch2paddle/prim.py +++ b/x2paddle/op_mapper/dygraph/pytorch2paddle/prim.py @@ -33,11 +33,33 @@ def prim_Constant(mapper, graph, node): output_type = output.type() if isinstance(value, str): value = string(value) - if str(output_type) == "Tensor": + if "Tensor" in str(output_type): tensor_value = value value = "{}".format(value) if "tensor" in value: - mapper.pytorch_params[output_name] = tensor_value.cpu().detach().numpy() + if isinstance(tensor_value, list) or isinstance(tensor_value, tuple): + name_dict = dict() + for i, tv in enumerate(tensor_value): + output_name_i = "{}_p{}".format(output_name,i) + key_i = "input{}".format(i) + mapper.paddle_params[output_name_i] = tv.cpu().detach().numpy() + graph.add_layer( + "self.create_parameter", + inputs={}, + outputs=[output_name_i], + scope_name=scope_name, + dtype=string(str(mapper.paddle_params[output_name_i].dtype)), + shape = mapper.paddle_params[output_name_i].shape, + default_initializer="paddle.nn.initializer.Constant(value=0.0)") + name_dict[key_i] = output_name_i + graph.add_layer( + "prim.list", + inputs=name_dict, + outputs=[output_name], + scope_name=scope_name) + return [], [output_name] + else: + mapper.pytorch_params[output_name] = tensor_value.cpu().detach().numpy() if "inf" in str(value): t = str(type(value)).split("'")[1] @@ -218,11 +240,13 @@ def prim_ListConstruct(mapper, graph, node): current_outputs = [output_name] # 处理每个输入 for i, input_name in enumerate(inputs_name): + mapper._check_input(graph, inputs_node[i], input_name, current_outputs, scope_name) layer_inputs["input{}".format(i)] = input_name # 获取当前节点输入的list current_inputs = list(layer_inputs.values()) - graph.add_layer("prim.list", inputs=layer_inputs, outputs=layer_outputs, scope_name=scope_name) + layer_id = graph.add_layer("prim.list", inputs=layer_inputs, outputs=layer_outputs, scope_name=scope_name) + mapper.output2id[output_name] = layer_id return current_inputs, current_outputs diff --git a/x2paddle/op_mapper/dygraph/pytorch2paddle/pytorch_op_mapper.py b/x2paddle/op_mapper/dygraph/pytorch2paddle/pytorch_op_mapper.py index 0df7cb7..5f43e33 100644 --- a/x2paddle/op_mapper/dygraph/pytorch2paddle/pytorch_op_mapper.py +++ b/x2paddle/op_mapper/dygraph/pytorch2paddle/pytorch_op_mapper.py @@ -37,6 +37,7 @@ class PyTorchOpMapper(OpMapper): self.scope_name_list = list() self.scope_name2id = dict() self.inputs_info = dict() + self.output2id = dict() # output名字和layer_id的关系,用于lstm去除前面的node # 转换 if not self.op_checker(decoder.graph): raise Exception("Model is not supported yet.") @@ -175,7 +176,7 @@ class PyTorchOpMapper(OpMapper): if add_dim: param = param[np.newaxis, :] self.paddle_params[output_name] = param - graph.add_layer( + layer_id = graph.add_layer( "self.create_parameter", inputs={}, outputs=[output_name], @@ -183,6 +184,7 @@ class PyTorchOpMapper(OpMapper): dtype=string(str(param.dtype)), shape = param.shape, default_initializer="paddle.nn.initializer.Constant(value=0.0)") + self.output2id[output_name] = layer_id else: if isinstance(param, dict) and "Tensor" in param and \ "parent_layer_id" in param: @@ -202,7 +204,7 @@ class PyTorchOpMapper(OpMapper): if add_dim: param = param[np.newaxis, :] self.paddle_params[output_name] = param - graph.add_layer( + layer_id = graph.add_layer( "self.create_parameter", inputs={}, outputs=[output_name], @@ -211,6 +213,7 @@ class PyTorchOpMapper(OpMapper): shape = param.shape, default_initializer="paddle.nn.initializer.Constant(value=0.0)") node_outputs.append(output_name) + self.output2id[output_name] = layer_id return # 若if-else外,则可直接引用if-else中的赋值结果 graph.add_layer( @@ -231,14 +234,15 @@ class PyTorchOpMapper(OpMapper): elif node.kind() == "prim::Constant" and output_name in self.pytorch_params: param = self.pytorch_params[output_name] self.paddle_params[output_name] = param - graph.add_layer( + layer_id = graph.add_layer( "self.create_parameter", inputs={}, outputs=[output_name], scope_name=scope_name, dtype=string(str(param.dtype)), shape = param.shape, - default_initializer="paddle.nn.initializer.Constant(value=0.0)") + default_initializer="paddle.nn.initializer.Constant(value=0.0)") + self.output2id[output_name] = layer_id def _get_inputs_name(self, node): diff --git a/x2paddle/optimizer/code_optimizer/hierachical_tree.py b/x2paddle/optimizer/code_optimizer/hierachical_tree.py index 774bad6..46ddbe8 100644 --- a/x2paddle/optimizer/code_optimizer/hierachical_tree.py +++ b/x2paddle/optimizer/code_optimizer/hierachical_tree.py @@ -39,6 +39,7 @@ class HierarchicalTree(Tree): self.identifier_idx = dict() self.param_tree = PamareterTree() self.module_name2count = dict() + self.scope_name_list = list() def insert(self, layer): """ 往层次树中插入节点。 @@ -47,6 +48,7 @@ class HierarchicalTree(Tree): layer (PaddleLayer): 需要插入的节点。 """ scope_name = layer.scope_name + self.scope_name_list.append(scope_name) if scope_name == "": if layer.kernel == "prim.tuple" or layer.kernel == "prim.tuple_unpack": layer_id = layer.id @@ -55,12 +57,33 @@ class HierarchicalTree(Tree): layer_id_list.append(int(input_layer_id)) layer_id_list = list(set(layer_id_list)) layer_id_list.sort(reverse=True) - for input_layer_id in layer_id_list: - input_layer_id_str = str(input_layer_id) - if self.pd_graph.layers[input_layer_id_str].scope_name != "": + + if layer.kernel == "prim.tuple": + for i, input_layer_id in enumerate(layer_id_list): + input_layer_id_str = str(input_layer_id) scope_name = self.pd_graph.layers[input_layer_id_str].scope_name - break - layer.scope_name = scope_name + if i == 0: + min_scope_name = scope_name + else: + len1 = len(min_scope_name.split("/")) + len2 = len(scope_name.split("/")) + if len1 > len2 and scope_name in self.scope_name_list: + min_scope_name = scope_name + if min_scope_name == "": + self.create_node(tag=layer.id, + identifier="no_scope_" + layer.id, + parent=self.pd_graph.name, + data=layer) + return + layer.scope_name = min_scope_name + scope_name = min_scope_name + else: + for input_layer_id in layer_id_list: + input_layer_id_str = str(input_layer_id) + if self.pd_graph.layers[input_layer_id_str].scope_name != "": + scope_name = self.pd_graph.layers[input_layer_id_str].scope_name + break + layer.scope_name = scope_name else: self.create_node(tag=layer.id, identifier="no_scope_" + layer.id, diff --git a/x2paddle/optimizer/code_optimizer/layer_code_generator.py b/x2paddle/optimizer/code_optimizer/layer_code_generator.py index 3c5589a..b7393df 100644 --- a/x2paddle/optimizer/code_optimizer/layer_code_generator.py +++ b/x2paddle/optimizer/code_optimizer/layer_code_generator.py @@ -22,6 +22,7 @@ NN_KERNEL_NAME = {"paddle.nn.BatchNorm": "bn", "paddle.nn.Embedding": "embedding", "paddle.nn.Linear": "linear", "paddle.nn.Conv2DTranspose": "conv", + "paddle.nn.LSTM": "lstm", "paddle.nn.ReLU": "relu", "paddle.nn.ReLU6": "relu", "paddle.nn.Softmax": "softmax", @@ -36,7 +37,7 @@ NN_KERNEL_NAME = {"paddle.nn.BatchNorm": "bn", "paddle.nn.GELU": "gelu", "paddle.nn.Hardtanh": "tanh", "paddle.nn.LeakyReLU": "leakly_relu"} -NN_KERNEL_WITH_PARAMS = list(NN_KERNEL_NAME.keys())[:6] +NN_KERNEL_WITH_PARAMS = list(NN_KERNEL_NAME.keys())[:7] def rename_layers(layers, param_tree=None, is_rename_module=False): """ 对子模块的输入输出等进行重命名。 @@ -235,7 +236,10 @@ def gen_layer_code(graph, sub_layers, sub_layers_name, different_attrs=list()): elif len(layer.outputs) == 2: line = layer.outputs[1] else: - line = ','.join(layer.outputs[1:]) + if layer.kernel == "paddle.nn.LSTM": + line = "{}, ({})".format(layer.outputs[1], ', '.join(layer.outputs[-2:])) + else: + line = ','.join(layer.outputs[1:]) line += " = self.{}(".format(layer.outputs[0]) for k, v in layer.inputs.items(): -- GitLab