diff --git a/x2paddle/convert.py b/x2paddle/convert.py index 97e072425857bf80feb176a026d963159859d9fa..39a8f7a1fb56d9df34dccc0a12a763e8422a9f42 100644 --- a/x2paddle/convert.py +++ b/x2paddle/convert.py @@ -92,7 +92,7 @@ def arg_parser(): "--input_shapes", "-is", action='append', - default=[], + default=None, help="define the inputs' shape") return parser @@ -207,12 +207,15 @@ def pytorch2paddle(model_path, save_dir, input_shapes): graph_opt = GraphOptimizer() graph_opt.optimize(mapper.graph) print("Model optimized.") - real_input_shapes = list() - for shape in input_shapes: - sp = shape[1:-1].split(",") - for i, s in enumerate(sp): - sp[i] = int(s) - real_input_shapes.append(sp) + if input_shapes is not None: + real_input_shapes = list() + for shape in input_shapes: + sp = shape[1:-1].split(",") + for i, s in enumerate(sp): + sp[i] = int(s) + real_input_shapes.append(sp) + else: + real_input_shapes = None mapper.graph.gen_model(save_dir, real_input_shapes) diff --git a/x2paddle/core/program.py b/x2paddle/core/program.py index d30a239b4f288d807b7d7b3034b859040a237833..88b9df330a17f98e87b33cb07204925b0df897c5 100644 --- a/x2paddle/core/program.py +++ b/x2paddle/core/program.py @@ -226,7 +226,7 @@ class PaddleGraph(object): indent=1) f.close() - def gen_model(self, save_dir, input_shapes): + def gen_model(self, save_dir, input_shapes=None): if not os.path.exists(save_dir): os.makedirs(save_dir) if self.graph_type == "static": @@ -264,8 +264,9 @@ class PaddleGraph(object): else: self.gen_dygraph_code(save_dir) self.dump_dygraph_parameter(save_dir) - -# self.dygraph2static(save_dir, input_shapes) #[[None, 3, 224, 224]] + if input_shapes is not None: + # 如果input_shapes非空,则导出推理模型;其值类似[[None, 3, 224, 224]] + self.dygraph2static(save_dir, input_shapes) def dump_parameter(self, param_name, param, save_dir): if not os.path.exists(save_dir): @@ -318,6 +319,8 @@ class PaddleGraph(object): update(self.layers) self.inputs = list(set(self.inputs)) + if self.inputs is not None: + self.inputs.sort() def get_dygraph_outputs(self): for layer_id, layer in self.layers.items(): @@ -389,7 +392,8 @@ class PaddleGraph(object): for layer_id, layer in self.layers.items(): if ("paddle.nn" in layer.kernel and "functional" not in layer.kernel - ) or layer.kernel == "fluid.dygraph.base.to_variable": + ) or layer.kernel == "fluid.dygraph.base.to_variable" or \ + "paddle.fluid.dygraph" in layer.kernel: line = "{}".format( layer.outputs[0] ) if layer.kernel == "fluid.dygraph.base.to_variable" and not layer.attrs[ diff --git a/x2paddle/op_mapper/pytorch2paddle/aten.py b/x2paddle/op_mapper/pytorch2paddle/aten.py index b23ef3675fdaf8c1010192b23257ed6ace3b49b5..f5289fd5ac2864db8ca96a4c438737209df5ecb4 100644 --- a/x2paddle/op_mapper/pytorch2paddle/aten.py +++ b/x2paddle/op_mapper/pytorch2paddle/aten.py @@ -765,6 +765,51 @@ def aten_cos(mapper, graph, node): return current_inputs, current_outputs +def aten_cumsum(mapper, graph, node): + """ 构造与前一个元素累加的PaddleLayer。 + + TorchScript示例: + %56 : Tensor = aten::cumsum(%mask.1, %46, %48) + 参数含义: + %56 (Tensor): 输出,累加后的结果。 + %mask.1 (Tensor): 输入,需要累加的Tensor。 + %46 (int): 累加的维度。 + %48 (int/None): Tensor的类型。 + """ + output_name = mapper._get_outputs_name(node)[0] + layer_outputs = [output_name] + layer_inputs = {} + layer_attrs = {} + inputs_name, inputs_node = mapper._get_inputs_name(node) + # 获取当前节点输出的list + current_outputs = [output_name] + # 处理输入0,即%mask.1 + mapper._check_input(graph, inputs_node[0], inputs_name[0], current_outputs) + layer_inputs["x"] = inputs_name[0] + # 获取当前节点输入、输出的list + current_inputs = list(layer_inputs.values()) + # 处理输入1,即%46 + if inputs_name[1] in mapper.attrs: + layer_attrs["axis"] = mapper.attrs[inputs_name[1]] + else: + mapper._check_input(graph, inputs_node[1], inputs_name[1], + current_outputs) + layer_inputs["axis"] = inputs_name[1] + current_inputs.append(inputs_name[1]) + # 处理输入1,即%48,代表dtype + if mapper.attrs[inputs_name[2]] is None: + layer_attrs["dtype"] = None + else: + layer_attrs["dtype"] = dtype_dict[mapper.attrs[inputs_name[2]]] + + graph.add_layer( + "paddle.cumsum", + inputs=layer_inputs, + outputs=layer_outputs, + **layer_attrs) + return current_inputs, current_outputs + + def aten_detach(mapper, graph, node): """ 构造返回一个新的Tensor,从当前计算图中分离下来的,但是仍指向原变量的存放位置的PaddleLayer。 @@ -990,6 +1035,8 @@ def aten_embedding(mapper, graph, node): # 处理输入0,即%57 weights = mapper.pytorch_params[inputs_name[0]] mapper.paddle_params[embedding_name + ".weight"] = weights + # layer_attrs["num_embeddings"] = weights.shape[0] + # layer_attrs["embedding_dim"] = weights.shape[1] layer_attrs["size"] = weights.shape # 处理输入1,即%input_ids.1 mapper._check_input(graph, inputs_node[1], inputs_name[1], current_outputs) @@ -999,10 +1046,11 @@ def aten_embedding(mapper, graph, node): # 处理输入2,即%45 layer_attrs["padding_idx"] = mapper.attrs[inputs_name[2]] # 处理输入4,即%46 + # layer_attrs["sparse"] = mapper.attrs[inputs_name[4]] layer_attrs["is_sparse"] = mapper.attrs[inputs_name[4]] graph.add_layer( - "paddle.nn.Embedding", + "paddle.fluid.dygraph.Embedding", inputs=layer_inputs, outputs=layer_outputs, **layer_attrs) @@ -2885,6 +2933,44 @@ def aten_softplus(mapper, graph, node): return current_inputs, current_outputs +def aten_stack(mapper, graph, node): + """ 构造堆叠Tensor的PaddleLayer。 + + TorchScript示例: + %x.222 : Tensor = aten::stack(%32, %7) + 参数含义: + %x.222 (Tensor): 输出,堆叠后的结果。 + %i.12 (Tensor): 需要堆叠的Tensor组成的Tensor。 + %7 (int): 堆叠的轴。 + """ + output_name = mapper._get_outputs_name(node)[0] + layer_outputs = [output_name] + layer_inputs = {} + layer_attrs = {} + inputs_name, inputs_node = mapper._get_inputs_name(node) + # 获取当前节点输出的list + current_outputs = [output_name] + # 处理输入0,即%13 + mapper._check_input(graph, inputs_node[0], inputs_name[0], current_outputs) + layer_inputs["x"] = inputs_name[0] + # 获取当前节点输入的list + current_inputs = list(layer_inputs.values()) + # 处理输入1,即%12 + if inputs_name[1] in mapper.attrs: + layer_attrs["axis"] = mapper.attrs[inputs_name[1]] + else: + mapper._check_input(graph, inputs_node[1], inputs_name[1], + current_outputs) + layer_inputs["axis"] = inputs_name[1] + current_inputs.append(inputs_name[1]) + graph.add_layer( + "paddle.stack", + inputs=layer_inputs, + outputs=layer_outputs, + **layer_attrs) + return current_inputs, current_outputs + + def aten_sub(mapper, graph, node): """ 构造数值相减的PaddleLayer。 @@ -3017,44 +3103,6 @@ def aten_split(mapper, graph, node): return current_inputs, current_outputs -def aten_stack(mapper, graph, node): - """ 构造堆叠Tensor的PaddleLayer。 - - TorchScript示例: - %x.222 : Tensor = aten::stack(%32, %7) - 参数含义: - %x.222 (Tensor): 输出,堆叠后的结果。 - %i.12 (Tensor): 需要堆叠的Tensor组成的Tensor。 - %7 (int): 堆叠的轴。 - """ - output_name = mapper._get_outputs_name(node)[0] - layer_outputs = [output_name] - layer_inputs = {} - layer_attrs = {} - inputs_name, inputs_node = mapper._get_inputs_name(node) - # 获取当前节点输出的list - current_outputs = [output_name] - # 处理输入0,即%13 - mapper._check_input(graph, inputs_node[0], inputs_name[0], current_outputs) - layer_inputs["x"] = inputs_name[0] - # 获取当前节点输入的list - current_inputs = list(layer_inputs.values()) - # 处理输入1,即%12 - if inputs_name[1] in mapper.attrs: - layer_attrs["axis"] = mapper.attrs[inputs_name[1]] - else: - mapper._check_input(graph, inputs_node[1], inputs_name[1], - current_outputs) - layer_inputs["axis"] = inputs_name[1] - current_inputs.append(inputs_name[1]) - graph.add_layer( - "paddle.stack", - inputs=layer_inputs, - outputs=layer_outputs, - **layer_attrs) - return current_inputs, current_outputs - - def aten_transpose(mapper, graph, node): """ 构造矩阵转置的PaddleLayer。 @@ -3152,14 +3200,16 @@ def aten_to(mapper, graph, node): inputs_name, inputs_node = mapper._get_inputs_name(node) # 获取当前节点输出的list current_outputs = [output_name] - assert len(inputs_name) == 5, "Paddle only support converting the dtype!" # 处理输入0,即%13 mapper._check_input(graph, inputs_node[0], inputs_name[0], current_outputs) layer_inputs["x"] = inputs_name[0] # 获取当前节点输入的list current_inputs = list(layer_inputs.values()) # 处理输入1,即%12 - layer_attrs["dtype"] = dtype_dict[mapper.attrs[inputs_name[1]]] + if len(inputs_name) == 6: + layer_attrs["dtype"] = dtype_dict[mapper.attrs[inputs_name[2]]] + else: + layer_attrs["dtype"] = dtype_dict[mapper.attrs[inputs_name[1]]] graph.add_layer( "fluid.layers.cast", @@ -3169,6 +3219,41 @@ def aten_to(mapper, graph, node): return current_inputs, current_outputs +def aten_type_as(mapper, graph, node): + """ 构造转换Tensor类型的PaddleLayer。 + + TorchScript示例: + %57 : Tensor = aten::type_as(%56, %mask.1) + 参数含义: + %57 (Tensor): 输出,改变类型后的Tensor。 + %56 (Tensor): 需要改变类型的Tensor。 + %mask.1 (Tensor): 转换成与该Tensor相一致的类型。 + """ + output_name = mapper._get_outputs_name(node)[0] + layer_outputs = [output_name] + layer_inputs = {} + inputs_name, inputs_node = mapper._get_inputs_name(node) + # 获取当前节点输出的list + current_outputs = [output_name] + # 处理输入0,即%56 + mapper._check_input(graph, inputs_node[0], inputs_name[0], current_outputs) + layer_inputs["x"] = inputs_name[0] + # 获取当前节点输入的list + current_inputs = list(layer_inputs.values()) + # 处理输入0,即%mask.1 + mapper._check_input(graph, inputs_node[1], inputs_name[1], current_outputs) + graph.add_layer( + "prim.type", + inputs={"input": inputs_name[1]}, + outputs=[inputs_name[1] + "_type"]) + layer_inputs["dtype"] = inputs_name[1] + "_type" + current_inputs.append(inputs_name[1]) + + graph.add_layer( + "fluid.layers.cast", inputs=layer_inputs, outputs=layer_outputs) + return current_inputs, current_outputs + + def aten_unsqueeze(mapper, graph, node): """ 构造插入维度的PaddleLayer。 diff --git a/x2paddle/op_mapper/pytorch2paddle/prim.py b/x2paddle/op_mapper/pytorch2paddle/prim.py index 4a56fe69350c308484610c79d5821aba3528db6f..801a6f923d7449f50ce8a7d63361f676c457ef73 100644 --- a/x2paddle/op_mapper/pytorch2paddle/prim.py +++ b/x2paddle/op_mapper/pytorch2paddle/prim.py @@ -29,7 +29,6 @@ def prim_Constant(mapper, graph, node): output = list(node.outputs())[0] value = output.toIValue() output_type = output.type() - mapper.attrs[output_name] = value if isinstance(value, str): value = string(value) if str(output_type) == "Tensor": @@ -45,6 +44,7 @@ def prim_Constant(mapper, graph, node): if "9223372036854775807" in str(value): import math value = int(math.pow(2, 31) - 1) + mapper.attrs[output_name] = value graph.add_layer( "prim.constant", inputs={}, outputs=[output_name], value=value) return [], [output_name] diff --git a/x2paddle/optimizer/fusion/adaptive_pool2d_fuser.py b/x2paddle/optimizer/fusion/adaptive_pool2d_fuser.py index 39ace52655611362b76bc6adb8da998a657fe852..4c07848381c63e68dc9c0ef7fad425d363e73a01 100644 --- a/x2paddle/optimizer/fusion/adaptive_pool2d_fuser.py +++ b/x2paddle/optimizer/fusion/adaptive_pool2d_fuser.py @@ -25,28 +25,25 @@ class AdaptivePool2dFuser(FuseBase): def build_pattern(self): """ 描述需要替换的adaptive pool2d图结构。 adaptive pool2d层模式python实现代码示例: - x72 = [6, 6] - x73 = fluid.layers.shape(x71) - x78 = len(x73) - x80 = x78 <= x79 - if x80 : - raise RaiseException(x75) - x83 = [] - x85 = x73[x84: x76: x77] - x87 = len(x85) - x88 = [x86, x87] - x89 = min(x88) - for _x91 in range(x89): - x92 = x72[_x91] - x83.append(x92) - x93 = fluid.layers.adaptive_pool2d(input=x71, pool_size=x83, pool_type='avg') + x68 = fluid.layers.shape(input=x60) + x69 = len(x68) + x70 = x69 <= 2 + if x70 : + raise RaiseException('Exception') + x73 = [] + x74 = x68[-2: 2147483647: 1] + x75 = len(x74) + x76 = [2, x75] + x77 = min(x76) + for _x79 in range(x77): + x80 = [6, 6][_x79] + x73.append(x80) + x81 = fluid.layers.adaptive_pool2d(input=x60, pool_size=x73, pool_type='avg') """ def gen_name(id): return "x" + str(id) - self.pattern.add_layer( - "prim.constant", inputs={}, outputs=[gen_name(0)], value=[6, 6]) self.pattern.add_layer( "fluid.layers.shape", inputs={'input': "pool-input-0"}, @@ -54,38 +51,33 @@ class AdaptivePool2dFuser(FuseBase): self.pattern.add_layer( "prim.len", inputs={"input": gen_name(1)}, outputs=[gen_name(6)]) self.pattern.add_layer( - "prim.le", - inputs={"x": gen_name(6), - "y": "pool-input-1"}, - outputs=[gen_name(8)]) + "prim.le", inputs={"x": gen_name(6)}, outputs=[gen_name(8)], y=2) self.pattern.add_layer("prim.if", {'input': gen_name(8)}, [gen_name(9)]) if_layer = self.pattern.layers[list(self.pattern.layers.keys())[-1]] pattern_block0 = PaddleGraph(if_layer, graph_type="dygraph") pattern_block0.add_layer( "prim.exception", - inputs={"input": "pool-input-6"}, - outputs=[gen_name(9)]) - if_layer.inputs["input-0"] = "pool-input-6" + inputs={}, + outputs=[gen_name(9)], + input="Exception") if_layer.add_block(pattern_block0) pattern_block1 = PaddleGraph(if_layer, graph_type="dygraph") if_layer.add_block(pattern_block1) self.pattern.add_layer("prim.list", inputs={}, outputs=[gen_name(10)]) self.pattern.add_layer( "prim.slice", - inputs={ - "input": gen_name(1), - "start": "pool-input-2", - "end": "pool-input-3", - "step": "pool-input-4" - }, - outputs=[gen_name(12)]) + inputs={"input": gen_name(1), }, + outputs=[gen_name(12)], + start=-1, + end=100, + step=1) self.pattern.add_layer( "prim.len", inputs={"input": gen_name(12)}, outputs=[gen_name(14)]) self.pattern.add_layer( "prim.list", - inputs={"input0": "pool-input-4", - "input1": gen_name(14)}, - outputs=[gen_name(15)]) + inputs={"input1": gen_name(14)}, + outputs=[gen_name(15)], + input0=2) self.pattern.add_layer( "prim.min", inputs={"input": gen_name(15)}, outputs=[gen_name(16)]) self.pattern.add_layer("prim.loop", {'input': gen_name(16)}, @@ -94,16 +86,15 @@ class AdaptivePool2dFuser(FuseBase): pattern_block = PaddleGraph(loop_layer, graph_type="dygraph") pattern_block.add_layer( "prim.getitem", - inputs={"list": gen_name(0), - "index": gen_name(18)}, - outputs=[gen_name(19)]) + inputs={"index": gen_name(18)}, + outputs=[gen_name(19)], + list=[6, 6]) pattern_block.add_layer( "prim.append", inputs={"list": gen_name(10), "index": gen_name(19)}, outputs=[gen_name(20)]) - loop_layer.inputs["input-0"] = gen_name(0) - loop_layer.inputs["input-2"] = gen_name(10) + loop_layer.inputs["input-0"] = gen_name(10) loop_layer.add_block(pattern_block) pool_attrs = {'pool_type': string("avg")} self.pattern.add_layer( @@ -112,15 +103,7 @@ class AdaptivePool2dFuser(FuseBase): "pool_size": gen_name(10)}, outputs=[gen_name(21)], **pool_attrs) - self.pattern.build(inputs={ - "input-0": "pool-input-0", - "input-1": "pool-input-1", - "input-2": "pool-input-2", - "input-3": "pool-input-3", - "input-4": "pool-input-4", - "input-5": "pool-input-5", - "input-6": "pool-input-6" - }) + self.pattern.build(inputs={"input-0": "pool-input-0", }) def insert_new_layer(self, graph, parameters, matches): parameters = graph.parameters @@ -131,9 +114,9 @@ class AdaptivePool2dFuser(FuseBase): def gen_new_layer(self, parameters, matches): layers_id = list(matches.keys()) + layer = matches[layers_id[11]] + pool_size = layer.attrs["list"] layer = matches[layers_id[0]] - pool_size = layer.attrs["value"] - layer = matches[layers_id[1]] input_name = layer.inputs["input"] layer = matches[layers_id[-1]] output_name = layer.outputs[0] diff --git a/x2paddle/optimizer/optimizer.py b/x2paddle/optimizer/optimizer.py index ed427a213aedff9343cc6767b3923dc0948cf439..c507047bd6f7892eff2f9ae5bcd0a98e05c32e2f 100644 --- a/x2paddle/optimizer/optimizer.py +++ b/x2paddle/optimizer/optimizer.py @@ -19,14 +19,10 @@ from x2paddle.optimizer.pass_manager import PassManager class GraphOptimizer(object): def __init__(self): self.passes = [ - "constant_fuse_pass", - "batchnorm2d_fuse_pass", - "interpolate_bilinear_fuse_pass", - "fc_fuse_pass", - # "interpolate_bilinear_fuse_pass", - # "fc_fuse_pass", - # "adaptive_pool2d_fuse_pass", "batchnorm2d_fuse_pass", - # "constant_fuse_pass", "reshape_fuse_pass", "dropout_fuse_pass" + "constant_fuse_pass", "batchnorm2d_fuse_pass", + "interpolate_bilinear_fuse_pass", "fc_fuse_pass", + "adaptive_pool2d_fuse_pass", "reshape_fuse_pass", + "dropout_fuse_pass" ] def optimize(self, graph):