未验证 提交 a761b8fc 编写于 作者: J Jason 提交者: GitHub

Merge pull request #489 from Channingss/lstm

add lstm & mapping weight by rename to paddle's naming rule
......@@ -18,7 +18,7 @@ from __future__ import division
import paddle.fluid as fluid
import paddle
from paddle.fluid.proto import framework_pb2
from collections import OrderedDict
import collections
import numpy
import sys
import os
......@@ -38,7 +38,7 @@ class PaddleLayer(object):
outputs,
list), "parameter 'outputs' for PaddleLayer should be type of list"
for k, v in inputs.items():
if isinstance(v, list):
if isinstance(v, (list, tuple)):
for i in v:
assert isinstance(
i, six.string_types
......@@ -66,7 +66,7 @@ class PaddleLayer(object):
class PaddleGraph(object):
def __init__(self, source_type=None, parent_layer=None, graph_type="static"):
self.layers = OrderedDict()
self.layers = collections.OrderedDict()
self.edges_out = dict()
self.edges_in = dict()
self.inputs = list()
......@@ -94,7 +94,7 @@ class PaddleGraph(object):
self.script = script
def clear(self):
self.layers = OrderedDict()
self.layers = collections.OrderedDict()
self.edges_out = dict()
self.edges_in = dict()
self.inputs = list()
......@@ -168,7 +168,7 @@ class PaddleGraph(object):
for layer_id, layer in self.layers.items():
for input_key, input_var in layer.inputs.items():
vs = input_var
if not isinstance(vs, list):
if not isinstance(vs, (list, tuple)):
vs = [vs]
for v in vs:
assert v in outputs_from_nodes or (
......@@ -521,7 +521,7 @@ class PaddleGraph(object):
gen_codes(
comment_list,
indent=1))
use_structured_name = False if self.source_type in ["tf", "onnx"] else True
use_structured_name = False if self.source_type in ["tf"] else True
self.run_func.extend(
gen_codes(["paddle.disable_static()",
"params = paddle.load('{}/model.pdparams')".format(osp.abspath(code_dir)),
......@@ -590,7 +590,7 @@ class PaddleGraph(object):
elif len(layer.outputs) == 2:
line = layer.outputs[1]
else:
if layer.kernel == "paddle.nn.LSTM":
if layer.kernel in ["paddle.nn.LSTM"]:
line = "{}, ({})".format(layer.outputs[1], ', '.join(layer.outputs[-2:]))
else:
line = ','.join(layer.outputs[1:])
......@@ -599,8 +599,13 @@ class PaddleGraph(object):
line += " = self.{}".format(layer.outputs[0])
else:
line += " = self.{}(".format(layer.outputs[0])
for k, v in layer.inputs.items():
line += "{}, ".format(v)
for v in layer.inputs.values():
if isinstance(v, list):
line += "[{}], ".format(", ".join(v))
elif isinstance(v, tuple):
line += "({}), ".format(", ".join(v))
else:
line += "{}, ".format(v)
line = line.strip(", ")
line += ")"
self.forward_func.extend(gen_codes([line], indent=indent))
......@@ -627,6 +632,8 @@ class PaddleGraph(object):
for k, v in layer.inputs.items():
if isinstance(v, list):
line += "{}=[{}], ".format(k, ", ".join(v))
elif isinstance(v, tuple):
line += "{}=({}), ".format(k, ", ".join(v))
else:
if k == "args":
line += v
......@@ -666,7 +673,7 @@ class PaddleGraph(object):
paddle.disable_static()
restore = paddle.load(osp.join(save_dir, "model.pdparams"))
model = getattr(x2paddle_code, self.name)()
if self.source_type in ["tf", "onnx"]:
if self.source_type in ["tf"]:
model.set_dict(restore, use_structured_name=False)
else:
model.set_dict(restore)
......
......@@ -96,6 +96,11 @@ class ONNXGraphNode(GraphNode):
return default
return self.attr_map[name]
def output(self, index=0):
if index >0 and len(self.layer.output) <= index:
raise IndexError('Output numbers of Node:{} is {} <= index:{}'.format(self.layer_name, len(self.layer.output), index))
return self.layer.output[index]
class ONNXGraphDataNode(GraphNode):
def __init__(self, layer, layer_name=None, is_global_input=False):
......@@ -246,12 +251,7 @@ class ONNXGraph(Graph):
"""
generate output_nodes node of ONNX model
"""
output_nodes = [value.name for value in self.graph.output]
for opt_data in output_nodes:
n = super(ONNXGraph, self).get_node(opt_data)
if n is None:
self.topo_sort.append(self.node_map[opt_data])
self.output_nodes.append(opt_data)
self.output_nodes = [value.name for value in self.graph.output]
def is_place_holder_nodes(self, layer):
"""
......
......@@ -17,4 +17,4 @@ from .one_hot import OneHot
from .pad_two_input import PadWithTwoInput
from .pad_all_dim2 import PadAllDim2
from .pad_all_dim4 import PadAllDim4
from .pad_all_dim4_one_input import PadAllDim4WithOneInput
\ No newline at end of file
from .pad_all_dim4_one_input import PadAllDim4WithOneInput
......@@ -42,6 +42,31 @@ def _const_weight_or_none(node, necessary=False):
return None
def _rename_or_remove_weight(weights, origin_name, target_name=None, is_remove=True):
'''
Rename parameters by Paddle's naming rule of parameters.
Args:
weights(dict[String:np.ndarray]): Dict stored paramters, the key in weights is name of parameter.
origin_name(String): Name of parameter to rename or remove.
target_name(String, optional): if target_name is not None, add new key-value pair
{target_name:weights[origin_name]} to weights, and target_name must follow paddle's
naming rule of parameters. Default: None.
is_remove: if is_remove is True, remove origin key-value pair. Default: True.
Returns:
None
'''
if origin_name not in weights:
raise KeyError('{} not a key in {}'.format(origin_name, weights))
if is_remove:
# remove weight
data = weights.pop(origin_name)
else:
data = weights[origin_name]
if target_name is not None:
# rename weight
weights[target_name] = data
def _is_static_shape(shape):
negtive_dims = 0
error_dims = 0
......@@ -125,6 +150,9 @@ class OpSet9():
dict(threshold='threshold'),
dict(threshold=float(sys.maxsize))],
'Exp': ['paddle.exp'],
'LogSoftmax': ['paddle.nn.functional.log_softmax',
dict(axis='axis'),
dict(axis=1)],
'Softmax': ['paddle.nn.Softmax',
dict(axis='axis'),
dict(axis=1)],
......@@ -164,11 +192,12 @@ class OpSet9():
layer_attrs[pd_attr_name] = onnx_attrs[onnx_attr_name]
else:
layer_attrs[pd_attr_name] = op_info[2][onnx_attr_name]
if paddle_op.startswith("paddle.nn"):
if paddle_op.startswith("paddle.nn") and 'functional' not in paddle_op:
op_name = paddle_op[10:].lower()
op_name = name_generator(op_name, self.nn_name2id)
output_name = node.name
layer_outputs = [op_name, output_name]
self.paddle_graph.add_layer(
kernel=paddle_op,
inputs={"x": input.name},
......@@ -258,14 +287,12 @@ class OpSet9():
val_scales = self.graph.get_input_node(node, idx=1, copy=True)
# TODO(syf): paddle.nn.functional.interpolate will support the length
# which is the same as the rank of input.
# inputs['scale_factor'] = val_scales.name
attrs['scale_factor'] = self.weights[val_scales.name].tolist()[2:]
elif len(node.layer.input) == 3:
# opset 11
val_scales = self.graph.get_input_node(node, idx=2, copy=True)
# TODO(syf): paddle.nn.functional.interpolate will support the length
# which is the same as the rank of input.
# inputs['scale_factor'] = val_scales.name
attrs['scale_factor'] = self.weights[val_scales.name].tolist()[2:]
elif len(node.layer.input) == 4:
# opset 11
......@@ -602,11 +629,11 @@ class OpSet9():
val_scale = self.graph.get_input_node(node, idx=1, copy=True)
val_b = self.graph.get_input_node(node, idx=2, copy=True)
epsilon = node.get_attr('epsilon', 1e-5)
self.weights[op_name+'.scale'] = self.weights[val_scale.name]
self.weights[op_name+'.bias'] = self.weights[val_b.name]
layer_attrs = {
'num_features': node.out_shapes[0][1],
'epsilon': epsilon,
'weight_attr': string(val_scale.name),
'bias_attr': string(val_b.name)
}
dim = len(val_x.out_shapes[0])
if dim == 3:
......@@ -717,11 +744,11 @@ class OpSet9():
op_name = name_generator("embedding", self.nn_name2id)
output_name = node.name
layer_outputs = [op_name, output_name]
self.weights[op_name + '.weight'] = _const_weight_or_none(val_x)
self.paddle_graph.add_layer(
'paddle.nn.Embedding',
inputs={"x": indices_cast},
outputs=layer_outputs,
weight_attr=string(val_x.name),
num_embeddings=val_x.out_shapes[0][0],
embedding_dim=val_x.out_shapes[0][1])
else:
......@@ -918,10 +945,6 @@ class OpSet9():
if starts_value is not None and ends_value is not None and axes is not None:
starts_value = starts_value.copy()
ends_value = ends_value.copy()
#for idx in range(len(ends_value)):
# if ends_value[idx] > 2**31 - 1:
# ends_value[idx] = 2**31 - 1
#print(val_x.out_shapes)
for idx in range(len(ends_value)):
if starts_value[idx] >= val_x.out_shapes[0][axes[idx]]:
starts_value[idx] = val_x.out_shapes[0][axes[idx]] - 1
......@@ -1316,6 +1339,11 @@ class OpSet9():
epsilon = node.get_attr('epsilon', 1e-5)
c = val_x.out_shapes[0][1]
_rename_or_remove_weight(self.weights, val_scale.name, op_name+'.weight')
_rename_or_remove_weight(self.weights, val_b.name, op_name+'.bias')
_rename_or_remove_weight(self.weights, val_var.name, op_name+'._variance')
_rename_or_remove_weight(self.weights, val_mean.name, op_name+'._mean')
# Attribute: spatial is used in BatchNormalization-1,6,7
spatial = bool(node.get_attr('spatial'))
layer_attrs = {
......@@ -1323,10 +1351,6 @@ class OpSet9():
"momentum": momentum,
"epsilon": epsilon,
"is_test": True,
"param_attr": string(val_scale.name),
"bias_attr": string(val_b.name),
"moving_mean_name": string(val_mean.name),
"moving_variance_name": string(val_var.name),
"use_global_stats": False,
}
self.paddle_graph.add_layer(
......@@ -1358,7 +1382,7 @@ class OpSet9():
mode = 'channel'
shape_slope = val_slope.out_shapes[0]
if shape_slope == [1]:
if shape_slope == [1] * len(shape_slope):
mode = 'all'
if mode == "element":
......@@ -1391,17 +1415,19 @@ class OpSet9():
else:
if mode == 'channel':
slope_data = _const_weight_or_none(val_slope)
_rename_or_remove_weight(self.weights, val_slope.name)
if len(shape_slope) > 1:
self.weights[val_slope.name] = np.reshape(slope_data, shape_slope[0])
self.weights[op_name+'._weight'] = np.reshape(slope_data, shape_slope[0])
num_parameters = val_x.out_shapes[0][1]
else:
num_parameters = 1
_rename_or_remove_weight(self.weights, val_slope.name)
self.weights[op_name+'._weight'] = np.reshape(self.weights[val_slope.name], [1])
self.paddle_graph.add_layer(
"paddle.nn.PReLU",
inputs={"x": val_x.name},
outputs=layer_outputs,
num_parameters=num_parameters,
weight_attr=string(val_slope.name))
num_parameters=num_parameters)
@print_mapping_info
def Squeeze(self, node):
......@@ -1679,19 +1705,15 @@ class OpSet9():
"dilation": dilations,
"groups": num_groups,
}
val_w_name = val_w.name
while val_w_name in self.done_weight_list:
val_w_name += "__repeat"
self.done_weight_list.append(val_w_name)
layer_attrs["weight_attr"] = string(val_w_name)
self.weights[val_w_name] = self.weights[val_w.name]
remove_weight = True if val_w.name in self.done_weight_list else False
if remove_weight:
self.done_weight_list.append(val_w.name)
_rename_or_remove_weight(self.weights, val_w.name, op_name+'.weight', remove_weight)
if has_bias:
val_b_name = val_b.name
while val_b_name in self.done_weight_list:
val_b_name += "__repeat"
self.done_weight_list.append(val_b_name)
layer_attrs["bias_attr"] = string(val_b_name)
self.weights[val_b_name] = self.weights[val_b.name]
remove_bias = True if val_b.name in self.done_weight_list else False
if remove_bias:
self.done_weight_list.append(val_b_name)
_rename_or_remove_weight(self.weights, val_b.name, op_name+'.bias', remove_bias)
else:
layer_attrs["bias_attr"] = False
input_shape = val_x.out_shapes[0]
......@@ -1712,6 +1734,9 @@ class OpSet9():
@print_mapping_info
def ConvTranspose(self, node):
op_name = name_generator("conv_trans", self.nn_name2id)
output_name = node.name
layer_outputs = [op_name, output_name]
val_x = self.graph.get_input_node(node, idx=0, copy=True)
val_w = self.graph.get_input_node(node, idx=1, copy=True)
val_b = None
......@@ -1725,7 +1750,7 @@ class OpSet9():
assert 2 <= convnd <= 3, 'only Conv2DTranspose and Conv3DTranspose supported'
num_in_channels = val_w.out_shapes[0][0]
num_out_channels = val_w.out_shapes[0][1]
paddle_op = 'paddle.nn.functional.conv{}d_transpose'.format(convnd)
paddle_op = 'paddle.nn.Conv{}DTranspose'.format(convnd)
num_groups = node.get_attr('group', 1)
strides = node.get_attr('strides', [1] * convnd)
......@@ -1743,23 +1768,26 @@ class OpSet9():
output_size[1] = (val_x.out_shapes[0][3] - 1
) * strides[1] - 2 * paddings[1] + dilations[1] * (
kernel_shape[1] - 1) + 1 + out_padding[1]
# Conv2DTranspose缺少output_size,只能在forward里头传进output_size
inputs_dict = {'x': val_x if isinstance(val_x, str) else val_x.name,
"weight": val_w.name}
inputs_dict = {'x': val_x if isinstance(val_x, str) else val_x.name}
layer_attrs = {
"in_channels": num_in_channels,
"out_channels": num_out_channels,
"kernel_size": kernel_shape,
"stride": strides,
"dilation": dilations,
"padding": paddings,
"groups": num_groups,
"output_size": node.out_shapes[0][2:]}
"output_padding":out_padding}
_rename_or_remove_weight(self.weights, val_w.name, op_name+'.weight',)
if val_b is not None:
inputs_dict["bias"] = val_b.name
else:
layer_attrs["bias"] = None
_rename_or_remove_weight(self.weights, val_b.name, op_name+'.bias')
self.paddle_graph.add_layer(
kernel="paddle.nn.functional.conv2d_transpose",
kernel=paddle_op,
inputs=inputs_dict,
outputs=[node.name],
outputs=layer_outputs,
**layer_attrs)
@print_mapping_info
......@@ -1774,6 +1802,7 @@ class OpSet9():
inputs={"x": val_x.name},
outputs=[node.name],
**layer_attrs)
@print_mapping_info
def Size(self, node):
......@@ -1836,3 +1865,115 @@ class OpSet9():
"paddle.reciprocal",
inputs={"x": val_x.name},
outputs=[node.name])
@print_mapping_info
def LSTM(self, node):
x = self.graph.get_input_node(node, idx=0, copy=True)
input_weight = self.graph.get_input_node(node, idx=1, copy=True)
hidden_weight = self.graph.get_input_node(node, idx=2, copy=True)
input_nums = len(node.layer.input)
exist_input_nums = 3
have_bias = False
if input_nums > 3 and node.layer.input[3] != '':
bias = self.graph.get_input_node(node, idx=exist_input_nums, copy=True)
have_bias = True
exist_input_nums += 1
if input_nums > 4 and node.layer.input[4] != '':
sequence_lens = self.graph.get_input_node(node, idx=exist_input_nums, copy=True)
exist_input_nums += 1
if input_nums > 5 and node.layer.input[5] != '':
init_h = self.graph.get_input_node(node, idx=exist_input_nums, copy=True)
self.paddle_graph.add_layer(
'paddle.reshape',
inputs={"x": init_h.name},
outputs=[init_h.name],
shape=init_h.out_shapes[0]
)
exist_input_nums += 1
if input_nums > 6 and node.layer.input[6] != '':
init_c = self.graph.get_input_node(node, idx=exist_input_nums, copy=True)
self.paddle_graph.add_layer(
'paddle.reshape',
inputs={"x": init_c.name},
outputs=[init_c.name],
shape=init_c.out_shapes[0]
)
input_weight_np = _const_weight_or_none(input_weight)
_rename_or_remove_weight(self.weights, input_weight.name)
hidden_size = node.get_attr('hidden_size', input_weight_np.shape[1]/4)
input_size = input_weight_np.shape[2]
hidden_weight_np = _const_weight_or_none(hidden_weight)
_rename_or_remove_weight(self.weights, hidden_weight.name)
bias_np = _const_weight_or_none(bias)
_rename_or_remove_weight(self.weights, bias.name)
input_bias_np = bias_np[:, :4*hidden_size]
hidden_bias_np = bias_np[:, 4*hidden_size:]
# parameters order in paddle:lstm:
# 1. gate order in paddle is: input, forget, cell, output.
# 2. gate orfer in onnx is: input, output, forget, cell.
def reform_weights(w, n, intervals):
slices = [w[:,x * n: y * n] for x, y in intervals]
return np.concatenate(slices, axis=1)
def transform_weight_with_bias(weights, n, intervals):
return [reform_weights(w, n, intervals) for w in weights]
reform_permutation = [(0, 1), (2, 4), (1, 2)]
weights = transform_weight_with_bias(
[input_weight_np, hidden_weight_np, input_bias_np, hidden_bias_np],
hidden_size, reform_permutation)
op_name = name_generator("lstm", self.nn_name2id)
y_out = node.output(0)
yh_out = node.output(1)
yc_out = node.output(2)
direction = node.get_attr('direction', 'forward')
def generate_paddle_param_names(op_name, suffix=''):
param_names = []
param_names.extend(['{}.weight_ih_l0{}', '{}.weight_hh_l0{}'])
if have_bias != False: param_names.append('{}.bias_ih_l0{}')
if have_bias != False: param_names.append('{}.bias_hh_l0{}')
param_names = [x.format(op_name, suffix) for x in param_names]
return param_names
def assign_params(op_name, weights, weight_idx=0, suffix=''):
param_names = generate_paddle_param_names(op_name, suffix)
print(param_names)
for param_name, weight in zip(param_names, weights):
self.weights[param_name] = weight[weight_idx]
if direction == 'backward':
raise Exception("LSTM support 'forward' or 'bidirectional', except '{}'.".format(direction))
else:
assign_params(op_name, weights)
if direction == 'bidirectional':
assign_params(op_name, weights, 1, '_reverse')
self.paddle_graph.add_layer(
'paddle.nn.LSTM',
inputs={'input': x.name, 'initial_states': (init_h.name, init_c.name)},
outputs=[op_name, y_out, yh_out, yc_out],
input_size=input_size,
hidden_size=hidden_size,
num_layers=1,
direction=string(direction),
time_major=True)
self.paddle_graph.add_layer(
'paddle.reshape',
inputs={"x": y_out},
outputs=[y_out],
shape=[0, 0, -1, hidden_size]
)
self.paddle_graph.add_layer(
'paddle.transpose',
inputs={"x": y_out},
outputs=[y_out],
perm=[0,2,1,3]
)
......@@ -1757,4 +1757,4 @@ class OpSet9():
self.paddle_graph.add_layer(
"paddle.reciprocal",
inputs={"x": val_x.name},
outputs=[node.name])
\ No newline at end of file
outputs=[node.name])
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册