diff --git a/x2paddle/op_mapper/dygraph/pytorch2paddle/aten.py b/x2paddle/op_mapper/dygraph/pytorch2paddle/aten.py index c63603eb13054d26b757bb070b2ad2dabe6f2794..b629c68b06e9aec04f23939fd1b61e6d58352682 100644 --- a/x2paddle/op_mapper/dygraph/pytorch2paddle/aten.py +++ b/x2paddle/op_mapper/dygraph/pytorch2paddle/aten.py @@ -14,6 +14,7 @@ # limitations under the License. import copy +import numpy as np from x2paddle.core.util import * from x2paddle.core.program import PaddleGraph @@ -926,7 +927,7 @@ def aten_conv2d(mapper, graph, node): %27 (Tensor): bias。 %28 (int): 步长大小。 %29 (int): 填充大小。 - %30 (int): 膨胀系数大小。 + %30 (int): 空洞大小。 %26 (int): 卷积的组数。 """ scope_name = mapper.normalize_scope_name(node) @@ -988,7 +989,7 @@ def aten__convolution(mapper, graph, node): %10 (Tensor): bias。 %19 (list): 步长大小。 %20 (list): 填充大小。 - %21 (list): 膨胀系数大小。 + %21 (list): 空洞大小。 %13 (bool): 是否进行转置卷积。 %22 (list): 输出形状上一侧额外添加的大小。 %12 (int): 卷积的组数。 @@ -1009,9 +1010,12 @@ def aten__convolution(mapper, graph, node): current_inputs = list(layer_inputs.values()) # 处理输入1,即%18 weights = mapper.pytorch_params[inputs_name[1]] - mapper.paddle_params[op_name + ".weight"] = weights - layer_attrs["out_channels"] = weights.shape[0] - layer_attrs["kernel_size"] = weights.shape[2:] + mapper.paddle_params[op_name + ".weight"] = weights #np.swapaxes(weights, 0, 1) + if mapper.attrs[inputs_name[6]]: + layer_attrs["out_channels"] = weights.shape[1] + else: + layer_attrs["out_channels"] = weights.shape[0] + layer_attrs["kernel_size"] = weights.shape[2:] # 处理输入2,即%10 if inputs_name[2] in mapper.pytorch_params: bias = mapper.pytorch_params[inputs_name[2]] @@ -1033,8 +1037,12 @@ def aten__convolution(mapper, graph, node): layer_attrs["output_padding"] = mapper.attrs[inputs_name[7]] # 处理输入8,即%12 layer_attrs["groups"] = mapper.attrs[inputs_name[8]] - layer_attrs['in_channels'] = weights.shape[1] * mapper.attrs[inputs_name[ - 8]] + if mapper.attrs[inputs_name[6]]: + layer_attrs['in_channels'] = weights.shape[0] * mapper.attrs[inputs_name[ + 8]] + else: + layer_attrs['in_channels'] = weights.shape[1] * mapper.attrs[inputs_name[ + 8]] if mapper.attrs[inputs_name[6]]: graph.add_layer( "paddle.nn.Conv2DTranspose", @@ -1052,6 +1060,71 @@ def aten__convolution(mapper, graph, node): return current_inputs, current_outputs +def aten_conv_transpose2d(mapper, graph, node): + """ 构造conv_transpose2d的PaddleLayer。 + + TorchScript示例: + %input.10 : Tensor = aten::conv_transpose2d(%input.1, %18, %10, %19, %20, %21, %13, %22) + 参数含义: + %input.10 (Tensor): 输出,卷积后的结果。 + %input.8 (Tensor): 需要进行卷积的特征层。 + %18 (Tensor): weights。 + %10 (Tensor): bias。 + %19 (list): 步长大小。 + %20 (list): 填充大小。 + %21 (int/tuple): 输出形状上一侧额外添加的大小。 + %13 (int): 二维卷积层的组数。 + %22 (int/tuple): 空洞大小。 + """ + scope_name = mapper.normalize_scope_name(node) + op_name = name_generator("conv2d", mapper.nn_name2id) + output_name = mapper._get_outputs_name(node)[0] + layer_outputs = [op_name, output_name] + layer_inputs = {} + layer_attrs = {} + inputs_name, inputs_node = mapper._get_inputs_name(node) + # 获取当前节点输出的list + current_outputs = [output_name] + # 处理输入0,即%input.8 + mapper._check_input(graph, inputs_node[0], inputs_name[0], current_outputs, scope_name) + layer_inputs["input"] = inputs_name[0] + # 获取当前节点输入的list + current_inputs = list(layer_inputs.values()) + # 处理输入1,即%18 + weights = mapper.pytorch_params[inputs_name[1]] + mapper.paddle_params[op_name + ".weight"] = weights + layer_attrs["out_channels"] = weights.shape[1] + layer_attrs["kernel_size"] = weights.shape[2:] + # 处理输入2,即%10 + if inputs_name[2] in mapper.pytorch_params: + bias = mapper.pytorch_params[inputs_name[2]] + if bias is not None: + mapper.paddle_params[op_name + ".bias"] = bias + else: + layer_attrs["bias_attr"] = False + else: + layer_attrs["bias_attr"] = False + # 处理输入3,即%19 + layer_attrs["stride"] = mapper.attrs[inputs_name[3]] + # 处理输入4,即%20 + layer_attrs["padding"] = mapper.attrs[inputs_name[4]] + # 处理输入5,即%21 + layer_attrs["output_padding"] = mapper.attrs[inputs_name[5]] + # 处理输入6,即%13 + layer_attrs["groups"] = mapper.attrs[inputs_name[6]] + # 处理输入7,即%22 + layer_attrs["dilation"] = mapper.attrs[inputs_name[7]] + layer_attrs['in_channels'] = weights.shape[0] * mapper.attrs[inputs_name[ + 6]] + graph.add_layer( + "paddle.nn.Conv2DTranspose", + inputs=layer_inputs, + outputs=layer_outputs, + scope_name=scope_name, + **layer_attrs) + return current_inputs, current_outputs + + def aten_cos(mapper, graph, node): """ 构造数学计算cos的PaddleLayer。 diff --git a/x2paddle/optimizer/code_optimizer/layer_code_generator.py b/x2paddle/optimizer/code_optimizer/layer_code_generator.py index 93a041d7c12df04dd19286f0026b00de851b5ab2..37fdc9b798423896542ec09d6eee9ca978bd251e 100644 --- a/x2paddle/optimizer/code_optimizer/layer_code_generator.py +++ b/x2paddle/optimizer/code_optimizer/layer_code_generator.py @@ -21,6 +21,7 @@ NN_KERNEL_NAME = {"paddle.nn.BatchNorm": "bn", "paddle.nn.Conv2D": "conv", "paddle.nn.Embedding": "embedding", "paddle.nn.Linear": "linear", + "paddle.nn.Conv2DTranspose": "conv", "paddle.nn.ReLU": "relu", "paddle.nn.ReLU6": "relu", "paddle.nn.Softmax": "softmax", @@ -35,7 +36,7 @@ NN_KERNEL_NAME = {"paddle.nn.BatchNorm": "bn", "paddle.nn.GELU": "gelu", "paddle.nn.Hardtanh": "tanh", "paddle.nn.LeakyReLU": "leakly_relu"} -NN_KERNEL_WITH_PARAMS = list(NN_KERNEL_NAME.keys())[:5] +NN_KERNEL_WITH_PARAMS = list(NN_KERNEL_NAME.keys())[:6] def rename_layers(layers, param_tree=None): """ 对子模块的输入输出等进行重命名。 diff --git a/x2paddle/optimizer/fusion/dygraph/interpolate_bilinear_fuser.py b/x2paddle/optimizer/fusion/dygraph/interpolate_bilinear_fuser.py index c7fad00fd9a9539abc61c4d371d820e014f3bff7..9e7cff3f8af1de23178d53aff3b5eaa24bc4f277 100644 --- a/x2paddle/optimizer/fusion/dygraph/interpolate_bilinear_fuser.py +++ b/x2paddle/optimizer/fusion/dygraph/interpolate_bilinear_fuser.py @@ -22,13 +22,7 @@ from x2paddle.core.util import * class DygraphInterpolateBilinearFuser(FuseBase): def __init__(self): super(DygraphInterpolateBilinearFuser, self).__init__(graph_type="dygraph") - import torch - torch_version = torch.__version__ - torch_version_part = torch_version.split(".") - if int(torch_version_part[0]) == 1 and int(torch_version_part[1]) > 5: - self.version_gt_150 = True - else: - self.version_gt_150 = False + self.pattenrs = list() def build_pattern(self): """ 描述需要替换的双线性插值图结构。 @@ -72,201 +66,201 @@ class DygraphInterpolateBilinearFuser(FuseBase): def gen_name(id): return "x" + str(id) - if self.version_gt_150: - self.pattern.add_layer( - "prim.shape", - inputs={"input": "interpolate-input-0"}, - outputs=[gen_name(9)]) - self.pattern.add_layer( - "prim.len", - inputs={"input": gen_name(9)}, - outputs=[gen_name(9)]) - self.pattern.add_layer( - "prim.sub", - inputs={"x": gen_name(9)}, - outputs=[gen_name(10)], - y=2) - self.pattern.add_layer( - "prim.list", inputs={}, outputs=[gen_name(11)]) - self.pattern.add_layer( - "prim.loop", - inputs={"input": gen_name(10)}, - outputs=[gen_name(12.1), gen_name(12.2)]) - loop_layer = self.pattern.layers[list(self.pattern.layers.keys())[ - -1]] - pattern_block = PaddleGraph(loop_layer, graph_type="dygraph") - pattern_block.add_layer( - "prim.append", - inputs={"list": gen_name(11)}, - outputs=[], - element=None) - loop_layer.inputs["input-0"] = gen_name(11) - loop_layer.add_block(pattern_block) - self.pattern.add_layer( - "prim.tuple", - inputs={ - "input0": "interpolate-input-0", - "input1": "interpolate-input-4", - }, - outputs=[gen_name(12)], - input2=None, - input3=None) - - self.pattern.add_layer( - "prim.eq", - inputs={"x": "interpolate-input-2"}, - outputs=[gen_name(10.1)], - y=3) - - self.pattern.add_layer( - "prim.if", - inputs={"input": gen_name(10.1)}, - outputs=[gen_name(14)]) - if_layer1 = self.pattern.layers[list(self.pattern.layers.keys())[ - -1]] - pattern_block = PaddleGraph(parent_layer=if_layer1, graph_type="dygraph") - pattern_block.add_layer( - "prim.exception", - inputs={}, - outputs=[gen_name(15)], - input="Exception") - pattern_block.add_layer( - "prim.equal", inputs={}, outputs=[gen_name(14)], input=None) - if_layer1.add_block(pattern_block) - pattern_block = PaddleGraph(parent_layer=if_layer1, graph_type="dygraph") - pattern_block.add_layer( - "prim.shape", - inputs={"input": "interpolate-input-0"}, - outputs=[gen_name(18)]) - pattern_block.add_layer( - "prim.len", - inputs={"input": gen_name(18)}, - outputs=[gen_name(18)]) - pattern_block.add_layer( - "prim.eq", - inputs={"x": gen_name(18)}, - outputs=[gen_name(19)], - y=4) - - - pattern_block.add_layer( - "prim.if", - inputs={"input": gen_name(19)}, - outputs=[gen_name(20)]) - if_layer2 = pattern_block.layers[list(pattern_block.layers.keys())[ - -1]] - pattern_block_block = PaddleGraph(parent_layer=if_layer2, graph_type="dygraph") - pattern_block_block.add_layer( - "prim.getitem", - inputs={"list": gen_name(11)}, - outputs=[gen_name(21)], - element=0) - pattern_block_block.add_layer( - "prim.getitem", - inputs={"list": gen_name(11)}, - outputs=[gen_name(22)], - element=1) - pattern_block_block.add_layer( - "prim.isinstance", - inputs={"input": "interpolate-input-3"}, - outputs=["interpolate-input-0_isinstance"], - cls="paddle.fluid.Variable") - pattern_block_block.add_layer( - "prim.if", {"input": "interpolate-input-0_isinstance"}, - outputs=["interpolate-input-0_if1"]) - if_layer_isinstance = pattern_block_block.layers[list( - pattern_block_block.layers.keys())[-1]] - pattern_block_block_block = PaddleGraph( - if_layer_isinstance, graph_type="dygraph") - pattern_block_block_block.add_layer( - "prim.var2list", - inputs={"input": "interpolate-input-3"}, - outputs=["interpolate-input-3"]) - if_layer_isinstance.add_block(pattern_block_block_block) - pattern_block_block_block = PaddleGraph( - if_layer_isinstance, graph_type="dygraph") - if_layer_isinstance.add_block(pattern_block_block_block) - if_layer_isinstance.inputs["input-0"] = "interpolate-input-3" - pattern_block_block.add_layer( - "paddle.nn.functional.interpolate", - inputs={ - "input": "interpolate-input-0", - "size": "interpolate-input-3", - "scale_factor": gen_name(21) - }, - outputs=[gen_name(23)]) - pattern_block_block.add_layer( - "prim.equal", - inputs={"input": gen_name(23)}, - outputs=[gen_name(20)]) - if_layer2.add_block(pattern_block_block) - pattern_block_block = PaddleGraph(if_layer2, graph_type="dygraph") - pattern_block_block.add_layer( - "prim.shape", - inputs={"input": "interpolate-input-0"}, - outputs=[gen_name(24)]) - pattern_block_block.add_layer( - "prim.len", - inputs={"input": gen_name(24)}, - outputs=[gen_name(24)]) - pattern_block_block.add_layer( - "prim.eq", - inputs={"x": gen_name(24)}, - outputs=[gen_name(25)], - y=5) - pattern_block_block.add_layer( - "prim.if", - inputs={"input": gen_name(25)}, - outputs=[gen_name(26)]) - if_layer3 = pattern_block_block.layers[list( - pattern_block_block.layers.keys())[-1]] - pattern_block_block_block = PaddleGraph( - parent_layer=if_layer3, graph_type="dygraph") - pattern_block_block_block.add_layer( - "prim.exception", - inputs={}, - outputs=[gen_name(27)], - input="Exception") - if_layer3.add_block(pattern_block_block_block) - pattern_block_block_block = PaddleGraph( - parent_layer=if_layer3, graph_type="dygraph") - pattern_block_block_block.add_layer( - "prim.exception", - inputs={}, - outputs=[gen_name(28)], - input="Exception") - if_layer3.add_block(pattern_block_block_block) - pattern_block_block.add_layer( - "prim.equal", inputs={}, outputs=[gen_name(20)], input=None) - if_layer2.add_block(pattern_block_block) - if_layer2.inputs.update({ - "input-0": "interpolate-input-0", - "input-1": "interpolate-input-3", - "input-2": "interpolate-input-3", - "input-3": gen_name(11), - "input-5": gen_name(11), - }) - pattern_block.add_layer( - "prim.equal", - inputs={"input": gen_name(20)}, - outputs=[gen_name(14)]) - if_layer1.add_block(pattern_block) - if_layer1.inputs.update({ - 'input-2': 'interpolate-input-0', - 'input-4': gen_name(11), - 'input-6': gen_name(11), - 'input-8': 'interpolate-input-0', - 'input-9': 'interpolate-input-3', - 'input-10': 'interpolate-input-0' - }) - self.pattern.build(inputs={ - "input-0": "interpolate-input-0", - "input-1": "interpolate-input-1", - "input-2": "interpolate-input-2", - "input-3": "interpolate-input-3", - "input-4": "interpolate-input-4" - }) - + pattern = PaddleGraph(graph_type="dygraph") + pattern.add_layer( + "prim.shape", + inputs={"input": "interpolate-input-0"}, + outputs=[gen_name(9)]) + pattern.add_layer( + "prim.len", + inputs={"input": gen_name(9)}, + outputs=[gen_name(9)]) + pattern.add_layer( + "prim.sub", + inputs={"x": gen_name(9)}, + outputs=[gen_name(10)], + y=2) + pattern.add_layer( + "prim.list", inputs={}, outputs=[gen_name(11)]) + pattern.add_layer( + "prim.loop", + inputs={"input": gen_name(10)}, + outputs=[gen_name(12.1), gen_name(12.2)]) + loop_layer = pattern.layers[list(pattern.layers.keys())[ + -1]] + pattern_block = PaddleGraph(loop_layer, graph_type="dygraph") + pattern_block.add_layer( + "prim.append", + inputs={"list": gen_name(11)}, + outputs=[], + element=None) + loop_layer.inputs["input-0"] = gen_name(11) + loop_layer.add_block(pattern_block) + pattern.add_layer( + "prim.tuple", + inputs={ + "input0": "interpolate-input-0", + "input1": "interpolate-input-4", + }, + outputs=[gen_name(12)], + input2=None, + input3=None) + + pattern.add_layer( + "prim.eq", + inputs={"x": "interpolate-input-2"}, + outputs=[gen_name(10.1)], + y=3) + + pattern.add_layer( + "prim.if", + inputs={"input": gen_name(10.1)}, + outputs=[gen_name(14)]) + if_layer1 = pattern.layers[list(pattern.layers.keys())[ + -1]] + pattern_block = PaddleGraph(parent_layer=if_layer1, graph_type="dygraph") + pattern_block.add_layer( + "prim.exception", + inputs={}, + outputs=[gen_name(15)], + input="Exception") + pattern_block.add_layer( + "prim.equal", inputs={}, outputs=[gen_name(14)], input=None) + if_layer1.add_block(pattern_block) + pattern_block = PaddleGraph(parent_layer=if_layer1, graph_type="dygraph") + pattern_block.add_layer( + "prim.shape", + inputs={"input": "interpolate-input-0"}, + outputs=[gen_name(18)]) + pattern_block.add_layer( + "prim.len", + inputs={"input": gen_name(18)}, + outputs=[gen_name(18)]) + pattern_block.add_layer( + "prim.eq", + inputs={"x": gen_name(18)}, + outputs=[gen_name(19)], + y=4) + + + pattern_block.add_layer( + "prim.if", + inputs={"input": gen_name(19)}, + outputs=[gen_name(20)]) + if_layer2 = pattern_block.layers[list(pattern_block.layers.keys())[ + -1]] + pattern_block_block = PaddleGraph(parent_layer=if_layer2, graph_type="dygraph") + pattern_block_block.add_layer( + "prim.getitem", + inputs={"list": gen_name(11)}, + outputs=[gen_name(21)], + element=0) + pattern_block_block.add_layer( + "prim.getitem", + inputs={"list": gen_name(11)}, + outputs=[gen_name(22)], + element=1) + pattern_block_block.add_layer( + "prim.isinstance", + inputs={"input": "interpolate-input-3"}, + outputs=["interpolate-input-0_isinstance"], + cls="paddle.fluid.Variable") + pattern_block_block.add_layer( + "prim.if", {"input": "interpolate-input-0_isinstance"}, + outputs=["interpolate-input-0_if1"]) + if_layer_isinstance = pattern_block_block.layers[list( + pattern_block_block.layers.keys())[-1]] + pattern_block_block_block = PaddleGraph( + if_layer_isinstance, graph_type="dygraph") + pattern_block_block_block.add_layer( + "prim.var2list", + inputs={"input": "interpolate-input-3"}, + outputs=["interpolate-input-3"]) + if_layer_isinstance.add_block(pattern_block_block_block) + pattern_block_block_block = PaddleGraph( + if_layer_isinstance, graph_type="dygraph") + if_layer_isinstance.add_block(pattern_block_block_block) + if_layer_isinstance.inputs["input-0"] = "interpolate-input-3" + pattern_block_block.add_layer( + "paddle.nn.functional.interpolate", + inputs={ + "input": "interpolate-input-0", + "size": "interpolate-input-3", + "scale_factor": gen_name(21) + }, + outputs=[gen_name(23)]) + pattern_block_block.add_layer( + "prim.equal", + inputs={"input": gen_name(23)}, + outputs=[gen_name(20)]) + if_layer2.add_block(pattern_block_block) + pattern_block_block = PaddleGraph(if_layer2, graph_type="dygraph") + pattern_block_block.add_layer( + "prim.shape", + inputs={"input": "interpolate-input-0"}, + outputs=[gen_name(24)]) + pattern_block_block.add_layer( + "prim.len", + inputs={"input": gen_name(24)}, + outputs=[gen_name(24)]) + pattern_block_block.add_layer( + "prim.eq", + inputs={"x": gen_name(24)}, + outputs=[gen_name(25)], + y=5) + pattern_block_block.add_layer( + "prim.if", + inputs={"input": gen_name(25)}, + outputs=[gen_name(26)]) + if_layer3 = pattern_block_block.layers[list( + pattern_block_block.layers.keys())[-1]] + pattern_block_block_block = PaddleGraph( + parent_layer=if_layer3, graph_type="dygraph") + pattern_block_block_block.add_layer( + "prim.exception", + inputs={}, + outputs=[gen_name(27)], + input="Exception") + if_layer3.add_block(pattern_block_block_block) + pattern_block_block_block = PaddleGraph( + parent_layer=if_layer3, graph_type="dygraph") + pattern_block_block_block.add_layer( + "prim.exception", + inputs={}, + outputs=[gen_name(28)], + input="Exception") + if_layer3.add_block(pattern_block_block_block) + pattern_block_block.add_layer( + "prim.equal", inputs={}, outputs=[gen_name(20)], input=None) + if_layer2.add_block(pattern_block_block) + if_layer2.inputs.update({ + "input-0": "interpolate-input-0", + "input-1": "interpolate-input-3", + "input-2": "interpolate-input-3", + "input-3": gen_name(11), + "input-5": gen_name(11), + }) + pattern_block.add_layer( + "prim.equal", + inputs={"input": gen_name(20)}, + outputs=[gen_name(14)]) + if_layer1.add_block(pattern_block) + if_layer1.inputs.update({ + 'input-2': 'interpolate-input-0', + 'input-4': gen_name(11), + 'input-6': gen_name(11), + 'input-8': 'interpolate-input-0', + 'input-9': 'interpolate-input-3', + 'input-10': 'interpolate-input-0' + }) + pattern.build(inputs={ + "input-0": "interpolate-input-0", + "input-1": "interpolate-input-1", + "input-2": "interpolate-input-2", + "input-3": "interpolate-input-3", + "input-4": "interpolate-input-4" + }) + self.patterns.append(pattern)