提交 e1dda433 编写于 作者: S SunAhong1993

add roberta aten

上级 52c6ea75
......@@ -92,7 +92,7 @@ def arg_parser():
"--input_shapes",
"-is",
action='append',
default=[],
default=None,
help="define the inputs' shape")
return parser
......@@ -207,12 +207,15 @@ def pytorch2paddle(model_path, save_dir, input_shapes):
graph_opt = GraphOptimizer()
graph_opt.optimize(mapper.graph)
print("Model optimized.")
real_input_shapes = list()
for shape in input_shapes:
sp = shape[1:-1].split(",")
for i, s in enumerate(sp):
sp[i] = int(s)
real_input_shapes.append(sp)
if input_shapes is not None:
real_input_shapes = list()
for shape in input_shapes:
sp = shape[1:-1].split(",")
for i, s in enumerate(sp):
sp[i] = int(s)
real_input_shapes.append(sp)
else:
real_input_shapes = None
mapper.graph.gen_model(save_dir, real_input_shapes)
......
......@@ -226,7 +226,7 @@ class PaddleGraph(object):
indent=1)
f.close()
def gen_model(self, save_dir, input_shapes):
def gen_model(self, save_dir, input_shapes=None):
if not os.path.exists(save_dir):
os.makedirs(save_dir)
if self.graph_type == "static":
......@@ -264,8 +264,9 @@ class PaddleGraph(object):
else:
self.gen_dygraph_code(save_dir)
self.dump_dygraph_parameter(save_dir)
# self.dygraph2static(save_dir, input_shapes) #[[None, 3, 224, 224]]
if input_shapes is not None:
# 如果input_shapes非空,则导出推理模型;其值类似[[None, 3, 224, 224]]
self.dygraph2static(save_dir, input_shapes)
def dump_parameter(self, param_name, param, save_dir):
if not os.path.exists(save_dir):
......@@ -318,6 +319,8 @@ class PaddleGraph(object):
update(self.layers)
self.inputs = list(set(self.inputs))
if self.inputs is not None:
self.inputs.sort()
def get_dygraph_outputs(self):
for layer_id, layer in self.layers.items():
......@@ -389,7 +392,8 @@ class PaddleGraph(object):
for layer_id, layer in self.layers.items():
if ("paddle.nn" in layer.kernel and "functional" not in layer.kernel
) or layer.kernel == "fluid.dygraph.base.to_variable":
) or layer.kernel == "fluid.dygraph.base.to_variable" or \
"paddle.fluid.dygraph" in layer.kernel:
line = "{}".format(
layer.outputs[0]
) if layer.kernel == "fluid.dygraph.base.to_variable" and not layer.attrs[
......
......@@ -765,6 +765,51 @@ def aten_cos(mapper, graph, node):
return current_inputs, current_outputs
def aten_cumsum(mapper, graph, node):
""" 构造与前一个元素累加的PaddleLayer。
TorchScript示例:
%56 : Tensor = aten::cumsum(%mask.1, %46, %48)
参数含义:
%56 (Tensor): 输出,累加后的结果。
%mask.1 (Tensor): 输入,需要累加的Tensor。
%46 (int): 累加的维度。
%48 (int/None): Tensor的类型。
"""
output_name = mapper._get_outputs_name(node)[0]
layer_outputs = [output_name]
layer_inputs = {}
layer_attrs = {}
inputs_name, inputs_node = mapper._get_inputs_name(node)
# 获取当前节点输出的list
current_outputs = [output_name]
# 处理输入0,即%mask.1
mapper._check_input(graph, inputs_node[0], inputs_name[0], current_outputs)
layer_inputs["x"] = inputs_name[0]
# 获取当前节点输入、输出的list
current_inputs = list(layer_inputs.values())
# 处理输入1,即%46
if inputs_name[1] in mapper.attrs:
layer_attrs["axis"] = mapper.attrs[inputs_name[1]]
else:
mapper._check_input(graph, inputs_node[1], inputs_name[1],
current_outputs)
layer_inputs["axis"] = inputs_name[1]
current_inputs.append(inputs_name[1])
# 处理输入1,即%48,代表dtype
if mapper.attrs[inputs_name[2]] is None:
layer_attrs["dtype"] = None
else:
layer_attrs["dtype"] = dtype_dict[mapper.attrs[inputs_name[2]]]
graph.add_layer(
"paddle.cumsum",
inputs=layer_inputs,
outputs=layer_outputs,
**layer_attrs)
return current_inputs, current_outputs
def aten_detach(mapper, graph, node):
""" 构造返回一个新的Tensor,从当前计算图中分离下来的,但是仍指向原变量的存放位置的PaddleLayer。
......@@ -990,6 +1035,8 @@ def aten_embedding(mapper, graph, node):
# 处理输入0,即%57
weights = mapper.pytorch_params[inputs_name[0]]
mapper.paddle_params[embedding_name + ".weight"] = weights
# layer_attrs["num_embeddings"] = weights.shape[0]
# layer_attrs["embedding_dim"] = weights.shape[1]
layer_attrs["size"] = weights.shape
# 处理输入1,即%input_ids.1
mapper._check_input(graph, inputs_node[1], inputs_name[1], current_outputs)
......@@ -999,10 +1046,11 @@ def aten_embedding(mapper, graph, node):
# 处理输入2,即%45
layer_attrs["padding_idx"] = mapper.attrs[inputs_name[2]]
# 处理输入4,即%46
# layer_attrs["sparse"] = mapper.attrs[inputs_name[4]]
layer_attrs["is_sparse"] = mapper.attrs[inputs_name[4]]
graph.add_layer(
"paddle.nn.Embedding",
"paddle.fluid.dygraph.Embedding",
inputs=layer_inputs,
outputs=layer_outputs,
**layer_attrs)
......@@ -2885,6 +2933,44 @@ def aten_softplus(mapper, graph, node):
return current_inputs, current_outputs
def aten_stack(mapper, graph, node):
""" 构造堆叠Tensor的PaddleLayer。
TorchScript示例:
%x.222 : Tensor = aten::stack(%32, %7)
参数含义:
%x.222 (Tensor): 输出,堆叠后的结果。
%i.12 (Tensor): 需要堆叠的Tensor组成的Tensor。
%7 (int): 堆叠的轴。
"""
output_name = mapper._get_outputs_name(node)[0]
layer_outputs = [output_name]
layer_inputs = {}
layer_attrs = {}
inputs_name, inputs_node = mapper._get_inputs_name(node)
# 获取当前节点输出的list
current_outputs = [output_name]
# 处理输入0,即%13
mapper._check_input(graph, inputs_node[0], inputs_name[0], current_outputs)
layer_inputs["x"] = inputs_name[0]
# 获取当前节点输入的list
current_inputs = list(layer_inputs.values())
# 处理输入1,即%12
if inputs_name[1] in mapper.attrs:
layer_attrs["axis"] = mapper.attrs[inputs_name[1]]
else:
mapper._check_input(graph, inputs_node[1], inputs_name[1],
current_outputs)
layer_inputs["axis"] = inputs_name[1]
current_inputs.append(inputs_name[1])
graph.add_layer(
"paddle.stack",
inputs=layer_inputs,
outputs=layer_outputs,
**layer_attrs)
return current_inputs, current_outputs
def aten_sub(mapper, graph, node):
""" 构造数值相减的PaddleLayer。
......@@ -3017,44 +3103,6 @@ def aten_split(mapper, graph, node):
return current_inputs, current_outputs
def aten_stack(mapper, graph, node):
""" 构造堆叠Tensor的PaddleLayer。
TorchScript示例:
%x.222 : Tensor = aten::stack(%32, %7)
参数含义:
%x.222 (Tensor): 输出,堆叠后的结果。
%i.12 (Tensor): 需要堆叠的Tensor组成的Tensor。
%7 (int): 堆叠的轴。
"""
output_name = mapper._get_outputs_name(node)[0]
layer_outputs = [output_name]
layer_inputs = {}
layer_attrs = {}
inputs_name, inputs_node = mapper._get_inputs_name(node)
# 获取当前节点输出的list
current_outputs = [output_name]
# 处理输入0,即%13
mapper._check_input(graph, inputs_node[0], inputs_name[0], current_outputs)
layer_inputs["x"] = inputs_name[0]
# 获取当前节点输入的list
current_inputs = list(layer_inputs.values())
# 处理输入1,即%12
if inputs_name[1] in mapper.attrs:
layer_attrs["axis"] = mapper.attrs[inputs_name[1]]
else:
mapper._check_input(graph, inputs_node[1], inputs_name[1],
current_outputs)
layer_inputs["axis"] = inputs_name[1]
current_inputs.append(inputs_name[1])
graph.add_layer(
"paddle.stack",
inputs=layer_inputs,
outputs=layer_outputs,
**layer_attrs)
return current_inputs, current_outputs
def aten_transpose(mapper, graph, node):
""" 构造矩阵转置的PaddleLayer。
......@@ -3152,14 +3200,16 @@ def aten_to(mapper, graph, node):
inputs_name, inputs_node = mapper._get_inputs_name(node)
# 获取当前节点输出的list
current_outputs = [output_name]
assert len(inputs_name) == 5, "Paddle only support converting the dtype!"
# 处理输入0,即%13
mapper._check_input(graph, inputs_node[0], inputs_name[0], current_outputs)
layer_inputs["x"] = inputs_name[0]
# 获取当前节点输入的list
current_inputs = list(layer_inputs.values())
# 处理输入1,即%12
layer_attrs["dtype"] = dtype_dict[mapper.attrs[inputs_name[1]]]
if len(inputs_name) == 6:
layer_attrs["dtype"] = dtype_dict[mapper.attrs[inputs_name[2]]]
else:
layer_attrs["dtype"] = dtype_dict[mapper.attrs[inputs_name[1]]]
graph.add_layer(
"fluid.layers.cast",
......@@ -3169,6 +3219,41 @@ def aten_to(mapper, graph, node):
return current_inputs, current_outputs
def aten_type_as(mapper, graph, node):
""" 构造转换Tensor类型的PaddleLayer。
TorchScript示例:
%57 : Tensor = aten::type_as(%56, %mask.1)
参数含义:
%57 (Tensor): 输出,改变类型后的Tensor。
%56 (Tensor): 需要改变类型的Tensor。
%mask.1 (Tensor): 转换成与该Tensor相一致的类型。
"""
output_name = mapper._get_outputs_name(node)[0]
layer_outputs = [output_name]
layer_inputs = {}
inputs_name, inputs_node = mapper._get_inputs_name(node)
# 获取当前节点输出的list
current_outputs = [output_name]
# 处理输入0,即%56
mapper._check_input(graph, inputs_node[0], inputs_name[0], current_outputs)
layer_inputs["x"] = inputs_name[0]
# 获取当前节点输入的list
current_inputs = list(layer_inputs.values())
# 处理输入0,即%mask.1
mapper._check_input(graph, inputs_node[1], inputs_name[1], current_outputs)
graph.add_layer(
"prim.type",
inputs={"input": inputs_name[1]},
outputs=[inputs_name[1] + "_type"])
layer_inputs["dtype"] = inputs_name[1] + "_type"
current_inputs.append(inputs_name[1])
graph.add_layer(
"fluid.layers.cast", inputs=layer_inputs, outputs=layer_outputs)
return current_inputs, current_outputs
def aten_unsqueeze(mapper, graph, node):
""" 构造插入维度的PaddleLayer。
......
......@@ -29,7 +29,6 @@ def prim_Constant(mapper, graph, node):
output = list(node.outputs())[0]
value = output.toIValue()
output_type = output.type()
mapper.attrs[output_name] = value
if isinstance(value, str):
value = string(value)
if str(output_type) == "Tensor":
......@@ -45,6 +44,7 @@ def prim_Constant(mapper, graph, node):
if "9223372036854775807" in str(value):
import math
value = int(math.pow(2, 31) - 1)
mapper.attrs[output_name] = value
graph.add_layer(
"prim.constant", inputs={}, outputs=[output_name], value=value)
return [], [output_name]
......
......@@ -25,28 +25,25 @@ class AdaptivePool2dFuser(FuseBase):
def build_pattern(self):
""" 描述需要替换的adaptive pool2d图结构。
adaptive pool2d层模式python实现代码示例:
x72 = [6, 6]
x73 = fluid.layers.shape(x71)
x78 = len(x73)
x80 = x78 <= x79
if x80 :
raise RaiseException(x75)
x83 = []
x85 = x73[x84: x76: x77]
x87 = len(x85)
x88 = [x86, x87]
x89 = min(x88)
for _x91 in range(x89):
x92 = x72[_x91]
x83.append(x92)
x93 = fluid.layers.adaptive_pool2d(input=x71, pool_size=x83, pool_type='avg')
x68 = fluid.layers.shape(input=x60)
x69 = len(x68)
x70 = x69 <= 2
if x70 :
raise RaiseException('Exception')
x73 = []
x74 = x68[-2: 2147483647: 1]
x75 = len(x74)
x76 = [2, x75]
x77 = min(x76)
for _x79 in range(x77):
x80 = [6, 6][_x79]
x73.append(x80)
x81 = fluid.layers.adaptive_pool2d(input=x60, pool_size=x73, pool_type='avg')
"""
def gen_name(id):
return "x" + str(id)
self.pattern.add_layer(
"prim.constant", inputs={}, outputs=[gen_name(0)], value=[6, 6])
self.pattern.add_layer(
"fluid.layers.shape",
inputs={'input': "pool-input-0"},
......@@ -54,38 +51,33 @@ class AdaptivePool2dFuser(FuseBase):
self.pattern.add_layer(
"prim.len", inputs={"input": gen_name(1)}, outputs=[gen_name(6)])
self.pattern.add_layer(
"prim.le",
inputs={"x": gen_name(6),
"y": "pool-input-1"},
outputs=[gen_name(8)])
"prim.le", inputs={"x": gen_name(6)}, outputs=[gen_name(8)], y=2)
self.pattern.add_layer("prim.if", {'input': gen_name(8)}, [gen_name(9)])
if_layer = self.pattern.layers[list(self.pattern.layers.keys())[-1]]
pattern_block0 = PaddleGraph(if_layer, graph_type="dygraph")
pattern_block0.add_layer(
"prim.exception",
inputs={"input": "pool-input-6"},
outputs=[gen_name(9)])
if_layer.inputs["input-0"] = "pool-input-6"
inputs={},
outputs=[gen_name(9)],
input="Exception")
if_layer.add_block(pattern_block0)
pattern_block1 = PaddleGraph(if_layer, graph_type="dygraph")
if_layer.add_block(pattern_block1)
self.pattern.add_layer("prim.list", inputs={}, outputs=[gen_name(10)])
self.pattern.add_layer(
"prim.slice",
inputs={
"input": gen_name(1),
"start": "pool-input-2",
"end": "pool-input-3",
"step": "pool-input-4"
},
outputs=[gen_name(12)])
inputs={"input": gen_name(1), },
outputs=[gen_name(12)],
start=-1,
end=100,
step=1)
self.pattern.add_layer(
"prim.len", inputs={"input": gen_name(12)}, outputs=[gen_name(14)])
self.pattern.add_layer(
"prim.list",
inputs={"input0": "pool-input-4",
"input1": gen_name(14)},
outputs=[gen_name(15)])
inputs={"input1": gen_name(14)},
outputs=[gen_name(15)],
input0=2)
self.pattern.add_layer(
"prim.min", inputs={"input": gen_name(15)}, outputs=[gen_name(16)])
self.pattern.add_layer("prim.loop", {'input': gen_name(16)},
......@@ -94,16 +86,15 @@ class AdaptivePool2dFuser(FuseBase):
pattern_block = PaddleGraph(loop_layer, graph_type="dygraph")
pattern_block.add_layer(
"prim.getitem",
inputs={"list": gen_name(0),
"index": gen_name(18)},
outputs=[gen_name(19)])
inputs={"index": gen_name(18)},
outputs=[gen_name(19)],
list=[6, 6])
pattern_block.add_layer(
"prim.append",
inputs={"list": gen_name(10),
"index": gen_name(19)},
outputs=[gen_name(20)])
loop_layer.inputs["input-0"] = gen_name(0)
loop_layer.inputs["input-2"] = gen_name(10)
loop_layer.inputs["input-0"] = gen_name(10)
loop_layer.add_block(pattern_block)
pool_attrs = {'pool_type': string("avg")}
self.pattern.add_layer(
......@@ -112,15 +103,7 @@ class AdaptivePool2dFuser(FuseBase):
"pool_size": gen_name(10)},
outputs=[gen_name(21)],
**pool_attrs)
self.pattern.build(inputs={
"input-0": "pool-input-0",
"input-1": "pool-input-1",
"input-2": "pool-input-2",
"input-3": "pool-input-3",
"input-4": "pool-input-4",
"input-5": "pool-input-5",
"input-6": "pool-input-6"
})
self.pattern.build(inputs={"input-0": "pool-input-0", })
def insert_new_layer(self, graph, parameters, matches):
parameters = graph.parameters
......@@ -131,9 +114,9 @@ class AdaptivePool2dFuser(FuseBase):
def gen_new_layer(self, parameters, matches):
layers_id = list(matches.keys())
layer = matches[layers_id[11]]
pool_size = layer.attrs["list"]
layer = matches[layers_id[0]]
pool_size = layer.attrs["value"]
layer = matches[layers_id[1]]
input_name = layer.inputs["input"]
layer = matches[layers_id[-1]]
output_name = layer.outputs[0]
......
......@@ -19,14 +19,10 @@ from x2paddle.optimizer.pass_manager import PassManager
class GraphOptimizer(object):
def __init__(self):
self.passes = [
"constant_fuse_pass",
"batchnorm2d_fuse_pass",
"interpolate_bilinear_fuse_pass",
"fc_fuse_pass",
# "interpolate_bilinear_fuse_pass",
# "fc_fuse_pass",
# "adaptive_pool2d_fuse_pass", "batchnorm2d_fuse_pass",
# "constant_fuse_pass", "reshape_fuse_pass", "dropout_fuse_pass"
"constant_fuse_pass", "batchnorm2d_fuse_pass",
"interpolate_bilinear_fuse_pass", "fc_fuse_pass",
"adaptive_pool2d_fuse_pass", "reshape_fuse_pass",
"dropout_fuse_pass"
]
def optimize(self, graph):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册