提交 d9f33f45 编写于 作者: S SunAhong1993

add lstm

上级 4b7bc28c
...@@ -210,6 +210,8 @@ class PaddleGraph(object): ...@@ -210,6 +210,8 @@ class PaddleGraph(object):
layer_id, 0) == 0 and layer.kernel != "prim.assert" \ layer_id, 0) == 0 and layer.kernel != "prim.assert" \
and layer.kernel != "prim.exception" \ and layer.kernel != "prim.exception" \
and layer.kernel != "prim.warnings": and layer.kernel != "prim.warnings":
if layer.kernel == "paddle.to_tensor":
self.inputs_info.pop(layer.outputs[0])
invalid_list.append(layer_id) invalid_list.append(layer_id)
for layer_id in invalid_list: for layer_id in invalid_list:
self.layers.pop(layer_id) self.layers.pop(layer_id)
...@@ -425,7 +427,6 @@ class PaddleGraph(object): ...@@ -425,7 +427,6 @@ class PaddleGraph(object):
continue continue
if layer.kernel == "paddle.to_tensor": if layer.kernel == "paddle.to_tensor":
data = layer.attrs["data"] data = layer.attrs["data"]
if not data.startswith("params["):
self.inputs.append(data) self.inputs.append(data)
if len(layer.blocks) > 0: if len(layer.blocks) > 0:
for block in layer.blocks: for block in layer.blocks:
...@@ -577,6 +578,9 @@ class PaddleGraph(object): ...@@ -577,6 +578,9 @@ class PaddleGraph(object):
line = layer.outputs[0] line = layer.outputs[0]
elif len(layer.outputs) == 2: elif len(layer.outputs) == 2:
line = layer.outputs[1] line = layer.outputs[1]
else:
if layer.kernel == "paddle.nn.LSTM":
line = "{}, ({})".format(layer.outputs[1], ', '.join(layer.outputs[-2:]))
else: else:
line = ','.join(layer.outputs[1:]) line = ','.join(layer.outputs[1:])
if layer.kernel == "paddle.to_tensor" and layer.attrs[ if layer.kernel == "paddle.to_tensor" and layer.attrs[
......
...@@ -2515,6 +2515,89 @@ def aten_log(mapper, graph, node): ...@@ -2515,6 +2515,89 @@ def aten_log(mapper, graph, node):
return current_inputs, current_outputs return current_inputs, current_outputs
def aten_lstm(mapper, graph, node):
""" 构造长短期记忆网络(LSTM)的PaddleLayer。
TorchScript示例:
%input.96, %551, %552 = aten::lstm(%input.95, %734, %549, %526, %525, %524, %526, %526, %526)
参数含义:
%input.96 (Tensor): 输出,由前向和后向cell的输出拼接得到。
%551 (Tensor): cell state。
%552 (Tensor): hidden state。
%input.95 (Tensor): 网络输入。
%734 (Tensor): 网络的初始状态。
%549 (list): 所有权重组合成的list。
%526 (bool): 是否使用bias。
%525 (int): 网络层数。
%524 (float): dropout概率。
%526 (bool): 是否为训练阶段。
%526 (bool): 是否使用双向LSTM。
%526 (bool): 第一个维度是否为batch size。
"""
scope_name = mapper.normalize_scope_name(node)
op_name = name_generator("lstm", mapper.nn_name2id)
output_names = mapper._get_outputs_name(node)
layer_outputs = [op_name]
layer_outputs.extend(output_names)
layer_inputs = {}
layer_attrs = {}
inputs_name, inputs_node = mapper._get_inputs_name(node)
# 获取当前节点输出的list
current_outputs = output_names
# 处理输入0,即%input.95
mapper._check_input(graph, inputs_node[0], inputs_name[0], current_outputs, scope_name)
layer_inputs["input0"] = inputs_name[0]
# 处理输入1,即%734
mapper._check_input(graph, inputs_node[1], inputs_name[1], current_outputs, scope_name)
layer_inputs["input1"] = inputs_name[1]
# 获取当前节点输入、输出的list
current_inputs = list(layer_inputs.values())
# 处理输入2,即%734
mapper._check_input(graph, inputs_node[2], inputs_name[2], current_outputs, scope_name)
graph.layers.pop(mapper.output2id[inputs_name[2]])
param_inputs_name, _ = mapper._get_inputs_name(inputs_node[2])
new_param_inputs_name = list()
for i, param_name in enumerate(param_inputs_name):
if i == 0:
layer_attrs["hidden_size"] = int(mapper.paddle_params[param_name].shape[0] / 4)
layer_attrs["input_size"] = int(mapper.paddle_params[param_name].shape[1])
if len(mapper.paddle_params[param_name].shape) > 1:
part_name = param_name.split("_weight_")[-1]
mapper.paddle_params["{}.weight_{}".format(op_name, part_name)] = mapper.paddle_params[param_name]
new_param_inputs_name.append("{}.weight_{}".format(op_name, part_name))
else:
part_name = param_name.split("_bias_")[-1]
mapper.paddle_params["{}.bias_{}".format(op_name, part_name)] = mapper.paddle_params[param_name]
mapper.paddle_params.pop(param_name)
# 处理输入3,即%526
is_bias = mapper.attrs[inputs_name[3]]
if not is_bias:
for param_name in new_param_inputs_name:
bias_name = param_name.replace("weight", "bias")
bias_shape= mapper.paddle_params[param_name].shape[:1]
mapper.paddle_params[bias_name] = np.zeros(bias_shape).astype("float32")
# 处理输入4,即%525
layer_attrs["num_layers"] = mapper.attrs[inputs_name[4]]
# 处理输入5,即%524
layer_attrs["dropout"] = mapper.attrs[inputs_name[5]]
# 处理输入7,即%526
is_bidirectional = mapper.attrs[inputs_name[7]]
if is_bidirectional:
layer_attrs["direction"] = string("bidirectional")
# 处理输入8,即%526
batch_first = mapper.attrs[inputs_name[8]]
if not batch_first:
layer_attrs["time_major"] = True
graph.add_layer(
"paddle.nn.LSTM",
inputs=layer_inputs,
outputs=layer_outputs,
scope_name=scope_name,
**layer_attrs)
return current_inputs, current_outputs
def aten_lt(mapper, graph, node): def aten_lt(mapper, graph, node):
""" 构造对比大小的PaddleLayer。 """ 构造对比大小的PaddleLayer。
......
...@@ -33,10 +33,32 @@ def prim_Constant(mapper, graph, node): ...@@ -33,10 +33,32 @@ def prim_Constant(mapper, graph, node):
output_type = output.type() output_type = output.type()
if isinstance(value, str): if isinstance(value, str):
value = string(value) value = string(value)
if str(output_type) == "Tensor": if "Tensor" in str(output_type):
tensor_value = value tensor_value = value
value = "{}".format(value) value = "{}".format(value)
if "tensor" in value: if "tensor" in value:
if isinstance(tensor_value, list) or isinstance(tensor_value, tuple):
name_dict = dict()
for i, tv in enumerate(tensor_value):
output_name_i = "{}_p{}".format(output_name,i)
key_i = "input{}".format(i)
mapper.paddle_params[output_name_i] = tv.cpu().detach().numpy()
graph.add_layer(
"self.create_parameter",
inputs={},
outputs=[output_name_i],
scope_name=scope_name,
dtype=string(str(mapper.paddle_params[output_name_i].dtype)),
shape = mapper.paddle_params[output_name_i].shape,
default_initializer="paddle.nn.initializer.Constant(value=0.0)")
name_dict[key_i] = output_name_i
graph.add_layer(
"prim.list",
inputs=name_dict,
outputs=[output_name],
scope_name=scope_name)
return [], [output_name]
else:
mapper.pytorch_params[output_name] = tensor_value.cpu().detach().numpy() mapper.pytorch_params[output_name] = tensor_value.cpu().detach().numpy()
if "inf" in str(value): if "inf" in str(value):
...@@ -218,11 +240,13 @@ def prim_ListConstruct(mapper, graph, node): ...@@ -218,11 +240,13 @@ def prim_ListConstruct(mapper, graph, node):
current_outputs = [output_name] current_outputs = [output_name]
# 处理每个输入 # 处理每个输入
for i, input_name in enumerate(inputs_name): for i, input_name in enumerate(inputs_name):
mapper._check_input(graph, inputs_node[i], input_name, current_outputs, scope_name)
layer_inputs["input{}".format(i)] = input_name layer_inputs["input{}".format(i)] = input_name
# 获取当前节点输入的list # 获取当前节点输入的list
current_inputs = list(layer_inputs.values()) current_inputs = list(layer_inputs.values())
graph.add_layer("prim.list", inputs=layer_inputs, outputs=layer_outputs, scope_name=scope_name) layer_id = graph.add_layer("prim.list", inputs=layer_inputs, outputs=layer_outputs, scope_name=scope_name)
mapper.output2id[output_name] = layer_id
return current_inputs, current_outputs return current_inputs, current_outputs
......
...@@ -37,6 +37,7 @@ class PyTorchOpMapper(OpMapper): ...@@ -37,6 +37,7 @@ class PyTorchOpMapper(OpMapper):
self.scope_name_list = list() self.scope_name_list = list()
self.scope_name2id = dict() self.scope_name2id = dict()
self.inputs_info = dict() self.inputs_info = dict()
self.output2id = dict() # output名字和layer_id的关系,用于lstm去除前面的node
# 转换 # 转换
if not self.op_checker(decoder.graph): if not self.op_checker(decoder.graph):
raise Exception("Model is not supported yet.") raise Exception("Model is not supported yet.")
...@@ -175,7 +176,7 @@ class PyTorchOpMapper(OpMapper): ...@@ -175,7 +176,7 @@ class PyTorchOpMapper(OpMapper):
if add_dim: if add_dim:
param = param[np.newaxis, :] param = param[np.newaxis, :]
self.paddle_params[output_name] = param self.paddle_params[output_name] = param
graph.add_layer( layer_id = graph.add_layer(
"self.create_parameter", "self.create_parameter",
inputs={}, inputs={},
outputs=[output_name], outputs=[output_name],
...@@ -183,6 +184,7 @@ class PyTorchOpMapper(OpMapper): ...@@ -183,6 +184,7 @@ class PyTorchOpMapper(OpMapper):
dtype=string(str(param.dtype)), dtype=string(str(param.dtype)),
shape = param.shape, shape = param.shape,
default_initializer="paddle.nn.initializer.Constant(value=0.0)") default_initializer="paddle.nn.initializer.Constant(value=0.0)")
self.output2id[output_name] = layer_id
else: else:
if isinstance(param, dict) and "Tensor" in param and \ if isinstance(param, dict) and "Tensor" in param and \
"parent_layer_id" in param: "parent_layer_id" in param:
...@@ -202,7 +204,7 @@ class PyTorchOpMapper(OpMapper): ...@@ -202,7 +204,7 @@ class PyTorchOpMapper(OpMapper):
if add_dim: if add_dim:
param = param[np.newaxis, :] param = param[np.newaxis, :]
self.paddle_params[output_name] = param self.paddle_params[output_name] = param
graph.add_layer( layer_id = graph.add_layer(
"self.create_parameter", "self.create_parameter",
inputs={}, inputs={},
outputs=[output_name], outputs=[output_name],
...@@ -211,6 +213,7 @@ class PyTorchOpMapper(OpMapper): ...@@ -211,6 +213,7 @@ class PyTorchOpMapper(OpMapper):
shape = param.shape, shape = param.shape,
default_initializer="paddle.nn.initializer.Constant(value=0.0)") default_initializer="paddle.nn.initializer.Constant(value=0.0)")
node_outputs.append(output_name) node_outputs.append(output_name)
self.output2id[output_name] = layer_id
return return
# 若if-else外,则可直接引用if-else中的赋值结果 # 若if-else外,则可直接引用if-else中的赋值结果
graph.add_layer( graph.add_layer(
...@@ -231,7 +234,7 @@ class PyTorchOpMapper(OpMapper): ...@@ -231,7 +234,7 @@ class PyTorchOpMapper(OpMapper):
elif node.kind() == "prim::Constant" and output_name in self.pytorch_params: elif node.kind() == "prim::Constant" and output_name in self.pytorch_params:
param = self.pytorch_params[output_name] param = self.pytorch_params[output_name]
self.paddle_params[output_name] = param self.paddle_params[output_name] = param
graph.add_layer( layer_id = graph.add_layer(
"self.create_parameter", "self.create_parameter",
inputs={}, inputs={},
outputs=[output_name], outputs=[output_name],
...@@ -239,6 +242,7 @@ class PyTorchOpMapper(OpMapper): ...@@ -239,6 +242,7 @@ class PyTorchOpMapper(OpMapper):
dtype=string(str(param.dtype)), dtype=string(str(param.dtype)),
shape = param.shape, shape = param.shape,
default_initializer="paddle.nn.initializer.Constant(value=0.0)") default_initializer="paddle.nn.initializer.Constant(value=0.0)")
self.output2id[output_name] = layer_id
def _get_inputs_name(self, node): def _get_inputs_name(self, node):
......
...@@ -39,6 +39,7 @@ class HierarchicalTree(Tree): ...@@ -39,6 +39,7 @@ class HierarchicalTree(Tree):
self.identifier_idx = dict() self.identifier_idx = dict()
self.param_tree = PamareterTree() self.param_tree = PamareterTree()
self.module_name2count = dict() self.module_name2count = dict()
self.scope_name_list = list()
def insert(self, layer): def insert(self, layer):
""" 往层次树中插入节点。 """ 往层次树中插入节点。
...@@ -47,6 +48,7 @@ class HierarchicalTree(Tree): ...@@ -47,6 +48,7 @@ class HierarchicalTree(Tree):
layer (PaddleLayer): 需要插入的节点。 layer (PaddleLayer): 需要插入的节点。
""" """
scope_name = layer.scope_name scope_name = layer.scope_name
self.scope_name_list.append(scope_name)
if scope_name == "": if scope_name == "":
if layer.kernel == "prim.tuple" or layer.kernel == "prim.tuple_unpack": if layer.kernel == "prim.tuple" or layer.kernel == "prim.tuple_unpack":
layer_id = layer.id layer_id = layer.id
...@@ -55,6 +57,27 @@ class HierarchicalTree(Tree): ...@@ -55,6 +57,27 @@ class HierarchicalTree(Tree):
layer_id_list.append(int(input_layer_id)) layer_id_list.append(int(input_layer_id))
layer_id_list = list(set(layer_id_list)) layer_id_list = list(set(layer_id_list))
layer_id_list.sort(reverse=True) layer_id_list.sort(reverse=True)
if layer.kernel == "prim.tuple":
for i, input_layer_id in enumerate(layer_id_list):
input_layer_id_str = str(input_layer_id)
scope_name = self.pd_graph.layers[input_layer_id_str].scope_name
if i == 0:
min_scope_name = scope_name
else:
len1 = len(min_scope_name.split("/"))
len2 = len(scope_name.split("/"))
if len1 > len2 and scope_name in self.scope_name_list:
min_scope_name = scope_name
if min_scope_name == "":
self.create_node(tag=layer.id,
identifier="no_scope_" + layer.id,
parent=self.pd_graph.name,
data=layer)
return
layer.scope_name = min_scope_name
scope_name = min_scope_name
else:
for input_layer_id in layer_id_list: for input_layer_id in layer_id_list:
input_layer_id_str = str(input_layer_id) input_layer_id_str = str(input_layer_id)
if self.pd_graph.layers[input_layer_id_str].scope_name != "": if self.pd_graph.layers[input_layer_id_str].scope_name != "":
......
...@@ -22,6 +22,7 @@ NN_KERNEL_NAME = {"paddle.nn.BatchNorm": "bn", ...@@ -22,6 +22,7 @@ NN_KERNEL_NAME = {"paddle.nn.BatchNorm": "bn",
"paddle.nn.Embedding": "embedding", "paddle.nn.Embedding": "embedding",
"paddle.nn.Linear": "linear", "paddle.nn.Linear": "linear",
"paddle.nn.Conv2DTranspose": "conv", "paddle.nn.Conv2DTranspose": "conv",
"paddle.nn.LSTM": "lstm",
"paddle.nn.ReLU": "relu", "paddle.nn.ReLU": "relu",
"paddle.nn.ReLU6": "relu", "paddle.nn.ReLU6": "relu",
"paddle.nn.Softmax": "softmax", "paddle.nn.Softmax": "softmax",
...@@ -36,7 +37,7 @@ NN_KERNEL_NAME = {"paddle.nn.BatchNorm": "bn", ...@@ -36,7 +37,7 @@ NN_KERNEL_NAME = {"paddle.nn.BatchNorm": "bn",
"paddle.nn.GELU": "gelu", "paddle.nn.GELU": "gelu",
"paddle.nn.Hardtanh": "tanh", "paddle.nn.Hardtanh": "tanh",
"paddle.nn.LeakyReLU": "leakly_relu"} "paddle.nn.LeakyReLU": "leakly_relu"}
NN_KERNEL_WITH_PARAMS = list(NN_KERNEL_NAME.keys())[:6] NN_KERNEL_WITH_PARAMS = list(NN_KERNEL_NAME.keys())[:7]
def rename_layers(layers, param_tree=None, is_rename_module=False): def rename_layers(layers, param_tree=None, is_rename_module=False):
""" 对子模块的输入输出等进行重命名。 """ 对子模块的输入输出等进行重命名。
...@@ -234,6 +235,9 @@ def gen_layer_code(graph, sub_layers, sub_layers_name, different_attrs=list()): ...@@ -234,6 +235,9 @@ def gen_layer_code(graph, sub_layers, sub_layers_name, different_attrs=list()):
line = layer.outputs[0] line = layer.outputs[0]
elif len(layer.outputs) == 2: elif len(layer.outputs) == 2:
line = layer.outputs[1] line = layer.outputs[1]
else:
if layer.kernel == "paddle.nn.LSTM":
line = "{}, ({})".format(layer.outputs[1], ', '.join(layer.outputs[-2:]))
else: else:
line = ','.join(layer.outputs[1:]) line = ','.join(layer.outputs[1:])
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册