diff --git a/x2paddle/op_mapper/pytorch2paddle/aten.py b/x2paddle/op_mapper/pytorch2paddle/aten.py index e03910f7c4997cafc6fbdb5827b3433d3ff18df4..e00c858cb9bf183f2774b143ae2e15d7c6eba7e6 100644 --- a/x2paddle/op_mapper/pytorch2paddle/aten.py +++ b/x2paddle/op_mapper/pytorch2paddle/aten.py @@ -13,6 +13,7 @@ # limitations under the License. from x2paddle.core.util import * +from x2paddle.core.program import PaddleGraph dtype_dict = { 0: string("uint8"), @@ -833,7 +834,8 @@ def aten_dim(mapper, graph, node): # 获取当前节点输入的list current_inputs = list(layer_inputs.values()) - graph.add_layer("prim.shape", inputs=layer_inputs, outputs=layer_outputs) + graph.add_layer( + "fluid.layers.shape", inputs=layer_inputs, outputs=layer_outputs) graph.add_layer( "prim.len", inputs={"input": output_name}, outputs=layer_outputs) return current_inputs, current_outputs @@ -1027,12 +1029,15 @@ def aten_eq(mapper, graph, node): # 处理输入0,即%124 mapper._check_input(graph, inputs_node[0], inputs_name[0], current_outputs) layer_inputs["x"] = inputs_name[0] + x_value = list(node.inputs())[0] + x_type = x_value.type() # 处理输入1,即%123 mapper._check_input(graph, inputs_node[1], inputs_name[1], current_outputs) layer_inputs["y"] = inputs_name[1] + y_value = list(node.inputs())[1] + y_type = y_value.type() # 获取当前节点输入的list current_inputs = list(layer_inputs.values()) - graph.add_layer("prim.eq", inputs=layer_inputs, outputs=layer_outputs) return current_inputs, current_outputs @@ -1064,13 +1069,14 @@ def aten_exp(mapper, graph, node): def aten_expand(mapper, graph, node): - """ 构造复制维度的PaddleLayer。 + """ 构造对某维度进行广播的PaddleLayer。 TorchScript示例: %1889 : Tensor = aten::expand(%1875, %1888, %1567) 参数含义: - %1889 (Tensor): 复制后的结果。 - %1875 (Tensor): 需要复制的Tensor。 + %1889 (Tensor): 广播后的结果。 + %1875 (Tensor): 需要广播的Tensor。 + %1888 (int): 广播的维度。 %1567 (bool): 未使用。 """ output_name = mapper._get_outputs_name(node)[0] @@ -1084,13 +1090,54 @@ def aten_expand(mapper, graph, node): layer_inputs["x"] = inputs_name[0] # 处理输入1,即%1888 mapper._check_input(graph, inputs_node[1], inputs_name[1], current_outputs) + + graph.add_layer( + "prim.type", + inputs={"input": inputs_name[0]}, + outputs=[inputs_name[0] + "_type"]) + graph.add_layer( + "prim.str", + inputs={"input": inputs_name[0] + "_type"}, + outputs=[inputs_name[0] + "_type"]) + graph.add_layer( + "prim.eq", + inputs={"x": inputs_name[0] + "_type"}, + outputs=[inputs_name[0] + "_cond"], + y=string("VarType.BOOL")) graph.add_layer( + "prim.if", {'input': inputs_name[0] + "_cond"}, + outputs=[inputs_name[0] + "_if1"]) + if_layer = graph.layers[list(graph.layers.keys())[-1]] + block = PaddleGraph(if_layer, graph_type="dygraph") + block.add_layer( + "fluid.layers.cast", + inputs={"x": inputs_name[0]}, + outputs=[inputs_name[0]], + dtype=string("int64")) + block.add_layer( "fluid.layers.create_global_var", inputs={"shape": inputs_name[1]}, outputs=[inputs_name[1] + "_var"], value=1.0, dtype=string("int64"), persistable=True) + if_layer.add_block(block) + block = PaddleGraph(if_layer, graph_type="dygraph") + block.add_layer( + "prim.type", + inputs={"input": inputs_name[0]}, + outputs=[inputs_name[0] + "_type"]) + block.add_layer( + "fluid.layers.create_global_var", + inputs={"shape": inputs_name[1]}, + outputs=[inputs_name[1] + "_var"], + value=1.0, + dtype=inputs_name[0] + "_type", + persistable=True) + if_layer.add_block(block) + if_layer.inputs["input-0"] = inputs_name[0] + if_layer.inputs["input-1"] = inputs_name[1] + layer_inputs["target_tensor"] = inputs_name[1] + "_var" current_outputs.append(inputs_name[1] + "_var") # 获取当前节点输入的list @@ -1102,6 +1149,82 @@ def aten_expand(mapper, graph, node): return current_inputs, current_outputs +def aten_expand_as(mapper, graph, node): + """ 构造广播的PaddleLayer。 + + TorchScript示例: + %1889 : Tensor = aten::expand_as(%1875, %1888) + 参数含义: + %1889 (Tensor): 广播后的结果。 + %1875 (Tensor): 需要广播的Tensor。 + %1888 (Tensor): 广播的示例。 + """ + output_name = mapper._get_outputs_name(node)[0] + layer_outputs = [output_name] + layer_inputs = {} + inputs_name, inputs_node = mapper._get_inputs_name(node) + # 获取当前节点输出的list + current_outputs = [output_name] + # 处理输入0,即%1875 + mapper._check_input(graph, inputs_node[0], inputs_name[0], current_outputs) + layer_inputs["x"] = inputs_name[0] + # 处理输入1,即%1888 + mapper._check_input(graph, inputs_node[1], inputs_name[1], current_outputs) + layer_inputs["target_tensor"] = inputs_name[1] + # 获取当前节点输入的list + current_inputs = list(layer_inputs.values()) + + graph.add_layer( + "prim.type", + inputs={"input": inputs_name[0]}, + outputs=[inputs_name[0] + "_type"]) + graph.add_layer( + "prim.str", + inputs={"input": inputs_name[0] + "_type"}, + outputs=[inputs_name[0] + "_type"]) + graph.add_layer( + "prim.eq", + inputs={"x": inputs_name[0] + "_type"}, + outputs=[inputs_name[0] + "_cond"], + y=string("VarType.BOOL")) + graph.add_layer( + "prim.if", {'input': inputs_name[0] + "_cond"}, + outputs=[inputs_name[0] + "_if1"]) + if_layer = graph.layers[list(graph.layers.keys())[-1]] + block = PaddleGraph(if_layer, graph_type="dygraph") + block.add_layer( + "prim.type", + inputs={"input": inputs_name[1]}, + outputs=[inputs_name[1] + "_type"]) + block.add_layer( + "fluid.layers.cast", + inputs={"x": inputs_name[0]}, + outputs=[inputs_name[0]], + dtype=inputs_name[1] + "_type") + if_layer.add_block(block) + block = PaddleGraph(if_layer, graph_type="dygraph") + if_layer.add_block(block) + if_layer.inputs["input-0"] = inputs_name[0] + if_layer.inputs["input-1"] = inputs_name[1] + graph.add_layer( + "fluid.layers.expand_as", inputs=layer_inputs, outputs=layer_outputs) + graph.add_layer( + "prim.if", {'input': inputs_name[0] + "_cond"}, + outputs=[inputs_name[0] + "_if2"]) + if_layer = graph.layers[list(graph.layers.keys())[-1]] + block = PaddleGraph(if_layer, graph_type="dygraph") + block.add_layer( + "fluid.layers.cast", + inputs={"x": layer_outputs[0]}, + outputs=layer_outputs, + dtype=string("bool")) + if_layer.add_block(block) + block = PaddleGraph(if_layer, graph_type="dygraph") + if_layer.add_block(block) + if_layer.inputs["input-0"] = layer_outputs[0] + return current_inputs, current_outputs + + def aten_eye(mapper, graph, node): """ 构造批次二维矩阵的PaddleLayer。 @@ -1734,6 +1857,101 @@ def aten_lt(mapper, graph, node): return current_inputs, current_outputs +def aten_masked_fill_(mapper, graph, node): + """ 构造填充mask的PaddleLayer。 + + TorchScript示例: + %input.4 : Tensor = aten::masked_fill_(%scores.2, %mask.2, %46) + 参数含义: + %input.4 (Tensor): 输出,填充后的结果。 + %scores.2 (Tensor): 需要填充的Tensor。 + %mask.2 (Tensor): bool型的Tensor,哪些位置需要填充。 + %46 (-): 填充的值。 + """ + output_name = mapper._get_outputs_name(node)[0] + layer_outputs = [output_name] + layer_inputs = {} + inputs_name, inputs_node = mapper._get_inputs_name(node) + # 获取当前节点输入的list + current_inputs = [] + # 获取当前节点输出的list + current_outputs = [output_name] + # 处理输入0,即%input.4 + mapper._check_input(graph, inputs_node[0], inputs_name[0], current_outputs) + current_inputs.append(inputs_name[0]) + graph.add_layer( + "prim.type", + inputs={"input": inputs_name[0]}, + outputs=[inputs_name[0] + "_type"]) + # 处理输入1,即%scores.2 + mapper._check_input(graph, inputs_node[1], inputs_name[1], current_outputs) + current_inputs.append(inputs_name[1]) + graph.add_layer( + "paddle.logical_not", + inputs={"x": inputs_name[1]}, + outputs=[inputs_name[1] + "_not"]) + graph.add_layer( + "fluid.layers.cast", + inputs={"x": inputs_name[1]}, + outputs=[inputs_name[1] + "_mask"], + dtype=inputs_name[0] + "_type") + graph.add_layer( + "fluid.layers.cast", + inputs={"x": inputs_name[1] + "_not"}, + outputs=[inputs_name[1] + "_not_mask"], + dtype=inputs_name[0] + "_type") + graph.add_layer( + "paddle.multiply", + inputs={"x": inputs_name[0], + "y": inputs_name[1] + "_not_mask"}, + outputs=[inputs_name[0] + "_not_mask"]) + # 处理输入2,即%46 + mapper._check_input(graph, inputs_node[2], inputs_name[2], current_outputs) + graph.add_layer( + "prim.eq", + inputs={"x": inputs_name[2]}, + outputs=[inputs_name[2] + "_cond1"], + y="-float('inf')") + graph.add_layer( + "prim.eq", + inputs={"x": inputs_name[2]}, + outputs=[inputs_name[2] + "_cond2"], + y="float('inf')") + graph.add_layer( + "prim.or", + inputs={ + "x": inputs_name[2] + "_cond1", + "y": inputs_name[2] + "_cond2" + }, + outputs=[inputs_name[2] + "_cond"]) + graph.add_layer( + "prim.if", {'input': inputs_name[2] + "_cond"}, + outputs=[inputs_name[2] + "_if"]) + if_layer = graph.layers[list(graph.layers.keys())[-1]] + block = PaddleGraph(if_layer, graph_type="dygraph") + block.add_layer( + "prim.equal", + inputs={"input": inputs_name[1] + "_mask"}, + outputs=[inputs_name[2] + "_1"]) + if_layer.add_block(block) + block = PaddleGraph(if_layer, graph_type="dygraph") + block.add_layer( + "prim.mul", + inputs={"x": inputs_name[1] + "_mask", + "y": inputs_name[2]}, + outputs=[inputs_name[2] + "_1"]) + if_layer.add_block(block) + if_layer.inputs["input-0"] = inputs_name[1] + "_mask" + if_layer.inputs["input-1"] = inputs_name[2] + if_layer.outputs.append(inputs_name[2] + "_1") + graph.add_layer( + "fluid.layers.elementwise_add", + inputs={"x": inputs_name[2] + "_1", + "y": inputs_name[0] + "_not_mask"}, + outputs=layer_outputs) + return current_inputs, current_outputs + + def aten_max_pool2d(mapper, graph, node): """ 构造最大池化的PaddleLayer。 @@ -2184,11 +2402,52 @@ def aten_reshape(mapper, graph, node): layer_inputs["shape"] = inputs_name[1] current_inputs.append(inputs_name[1]) + graph.add_layer( + "prim.type", + inputs={"input": inputs_name[0]}, + outputs=[inputs_name[0] + "_type"]) + graph.add_layer( + "prim.str", + inputs={"input": inputs_name[0] + "_type"}, + outputs=[inputs_name[0] + "_type"]) + graph.add_layer( + "prim.eq", + inputs={"x": inputs_name[0] + "_type"}, + outputs=[inputs_name[0] + "_cond"], + y=string("VarType.BOOL")) + graph.add_layer( + "prim.if", {'input': inputs_name[0] + "_cond"}, + outputs=[inputs_name[0] + "_if1"]) + if_layer = graph.layers[list(graph.layers.keys())[-1]] + block = PaddleGraph(if_layer, graph_type="dygraph") + block.add_layer( + "fluid.layers.cast", + inputs={"x": inputs_name[0]}, + outputs=[inputs_name[0]], + dtype=string("int32")) + if_layer.add_block(block) + block = PaddleGraph(if_layer, graph_type="dygraph") + if_layer.add_block(block) + if_layer.inputs["input-0"] = inputs_name[0] graph.add_layer( "fluid.layers.reshape", inputs=layer_inputs, outputs=layer_outputs, **layer_attrs) + graph.add_layer( + "prim.if", {'input': inputs_name[0] + "_cond"}, + outputs=[inputs_name[0] + "_if2"]) + if_layer = graph.layers[list(graph.layers.keys())[-1]] + block = PaddleGraph(if_layer, graph_type="dygraph") + block.add_layer( + "fluid.layers.cast", + inputs={"x": layer_outputs[0]}, + outputs=layer_outputs, + dtype=string("bool")) + if_layer.add_block(block) + block = PaddleGraph(if_layer, graph_type="dygraph") + if_layer.add_block(block) + if_layer.inputs["input-0"] = layer_outputs[0] return current_inputs, current_outputs @@ -2416,7 +2675,8 @@ def aten_size(mapper, graph, node): **layer_attrs) return current_inputs, current_outputs - graph.add_layer("prim.shape", inputs=layer_inputs, outputs=layer_outputs) + graph.add_layer( + "fluid.layers.shape", inputs=layer_inputs, outputs=layer_outputs) return current_inputs, current_outputs @@ -2465,7 +2725,7 @@ def aten_slice(mapper, graph, node): layer_inputs["axes"] = inputs_name[1] + "_list" current_inputs.append(inputs_name[1] + "_list") current_outputs.append(inputs_name[1] + "_list") - # 处理输入3,即%82 + # 处理输入2,即%82 if inputs_name[2] in mapper.attrs: graph.add_layer( "prim.list", @@ -2485,14 +2745,11 @@ def aten_slice(mapper, graph, node): current_outputs.append(inputs_name[2] + "_list") # 处理输入3,即%85 if inputs_name[3] in mapper.attrs: - if 9223372036854775807 == mapper.attrs[inputs_name[3]]: - import math - input0 = int(math.pow(2, 31) - 1) graph.add_layer( "prim.list", inputs={}, outputs=[inputs_name[3] + "_list"], - input0=input0) + input0=mapper.attrs[inputs_name[3]]) else: mapper._check_input(graph, inputs_node[3], inputs_name[3], current_outputs) @@ -2749,27 +3006,7 @@ def aten_split(mapper, graph, node): if "[]" in str(input_type): layer_inputs["num_or_sections"] = inputs_name[1] else: - graph.add_layer( - "prim.shape", - inputs={"input": inputs_name[0]}, - outputs=[inputs_name[1] + "_shape"]) - graph.add_layer( - "prim.getitem", - inputs={ - "list": inputs_name[1] + "_shape", - "index": inputs_name[2] - }, - outputs=[inputs_name[1] + "_item"]) - graph.add_layer( - "prim.div", - inputs={"x": inputs_name[1] + "_item", - "y": inputs_name[1]}, - outputs=[inputs_name[1] + "_div"]) - graph.add_layer( - "prim.int", - inputs={"input": inputs_name[1] + "_div"}, - outputs=[inputs_name[1] + "_int"]) - layer_inputs["num_or_sections"] = inputs_name[1] + "_int" + layer_attrs["num_or_sections"] = 1 # 获取当前节点输入的list current_inputs = list(layer_inputs.values()) @@ -2849,7 +3086,7 @@ def aten_transpose(mapper, graph, node): # 获取当前节点输入的list current_inputs = list(layer_inputs.values()) graph.add_layer( - "prim.shape", + "fluid.layers.shape", inputs={"input": inputs_name[0]}, outputs=[output_name + "_shape"]) current_outputs.append(output_name + "_shape") @@ -3071,12 +3308,52 @@ def aten_view(mapper, graph, node): current_outputs) layer_inputs["shape"] = inputs_name[1] current_inputs.append(inputs_name[1]) - + graph.add_layer( + "prim.type", + inputs={"input": inputs_name[0]}, + outputs=[inputs_name[0] + "_type"]) + graph.add_layer( + "prim.str", + inputs={"input": inputs_name[0] + "_type"}, + outputs=[inputs_name[0] + "_type"]) + graph.add_layer( + "prim.eq", + inputs={"x": inputs_name[0] + "_type"}, + outputs=[inputs_name[0] + "_cond"], + y=string("VarType.BOOL")) + graph.add_layer( + "prim.if", {'input': inputs_name[0] + "_cond"}, + outputs=[inputs_name[0] + "_if1"]) + if_layer = graph.layers[list(graph.layers.keys())[-1]] + block = PaddleGraph(if_layer, graph_type="dygraph") + block.add_layer( + "fluid.layers.cast", + inputs={"x": inputs_name[0]}, + outputs=[inputs_name[0]], + dtype=string("int32")) + if_layer.add_block(block) + block = PaddleGraph(if_layer, graph_type="dygraph") + if_layer.add_block(block) + if_layer.inputs["input-0"] = inputs_name[0] graph.add_layer( "fluid.layers.reshape", inputs=layer_inputs, outputs=layer_outputs, **layer_attrs) + graph.add_layer( + "prim.if", {'input': inputs_name[0] + "_cond"}, + outputs=[inputs_name[0] + "_if2"]) + if_layer = graph.layers[list(graph.layers.keys())[-1]] + block = PaddleGraph(if_layer, graph_type="dygraph") + block.add_layer( + "fluid.layers.cast", + inputs={"x": layer_outputs[0]}, + outputs=layer_outputs, + dtype=string("bool")) + if_layer.add_block(block) + block = PaddleGraph(if_layer, graph_type="dygraph") + if_layer.add_block(block) + if_layer.inputs["input-0"] = layer_outputs[0] return current_inputs, current_outputs diff --git a/x2paddle/op_mapper/pytorch2paddle/prim.py b/x2paddle/op_mapper/pytorch2paddle/prim.py index 45849cdde722449fa61493799606151840c143da..8cbab9bf9dec3f320da258bc53631d53c9a96672 100644 --- a/x2paddle/op_mapper/pytorch2paddle/prim.py +++ b/x2paddle/op_mapper/pytorch2paddle/prim.py @@ -33,7 +33,18 @@ def prim_Constant(mapper, graph, node): if isinstance(value, str): value = string(value) if str(output_type) == "Tensor": - value = "paddle.to_tensor({})".format(value) + # value = "paddle.to_tensor({})".format(value) + value = "{}".format(value) + + if "inf" in str(value): + t = str(type(value)).split("'")[1] + if str(value).startswith("-"): + value = "-{}({})".format(t, string(str(value)[1:])) + else: + value = "{}({})".format(t, string(str(value))) + if "9223372036854775807" in str(value): + import math + value = int(math.pow(2, 31) - 1) graph.add_layer( "prim.constant", inputs={}, outputs=[output_name], value=value) return [], [output_name] @@ -193,6 +204,7 @@ def prim_ListUnpack(mapper, graph, node): graph.add_layer( "prim.list_unpack", inputs=layer_inputs, outputs=layer_outputs) + mapper.split_len[list(layer_inputs.values())[0]] = len(layer_outputs) return current_inputs, current_outputs @@ -302,19 +314,25 @@ def prim_NumToTensor(mapper, graph, node): current_outputs = [output_name] # 处理输入0,即%86 mapper._check_input(graph, inputs_node[0], inputs_name[0], current_outputs) - layer_inputs["value"] = inputs_name[0] - # 获取当前节点输入的list - current_inputs = list(layer_inputs.values()) - input_type = list(node.inputs())[0].type() - layer_attrs["dtype"] = input_type - layer_attrs["persistable"] = True - layer_attrs["shape"] = [1] - - graph.add_layer( - "fluid.layers.create_global_var", - inputs=layer_inputs, - outputs=layer_outputs, - **layer_attrs) + if inputs_node[0].kind() == "aten::size": + layer_inputs["input"] = inputs_name[0] + # 获取当前节点输入的list + current_inputs = list(layer_inputs.values()) + graph.add_layer( + "prim_equal", inputs=layer_inputs, outputs=layer_outputs) + else: + layer_inputs["value"] = inputs_name[0] + # 获取当前节点输入的list + current_inputs = list(layer_inputs.values()) + input_type = list(node.inputs())[0].type() + layer_attrs["dtype"] = input_type + layer_attrs["persistable"] = True + layer_attrs["shape"] = [1] + graph.add_layer( + "fluid.layers.create_global_var", + inputs=layer_inputs, + outputs=layer_outputs, + **layer_attrs) return current_inputs, current_outputs diff --git a/x2paddle/op_mapper/pytorch2paddle/prim2code.py b/x2paddle/op_mapper/pytorch2paddle/prim2code.py index 3af7b2f0085c07ff3cc559f5b672281dab9ded39..d9179f84f42f0afb9bd91e87042c8b90852ed144 100644 --- a/x2paddle/op_mapper/pytorch2paddle/prim2code.py +++ b/x2paddle/op_mapper/pytorch2paddle/prim2code.py @@ -257,8 +257,6 @@ def prim_mul(layer, indent=1, init_func=[], forward_func=[]): line = "{} = {} * {}".format(layer.outputs[0], get_value(layer, "x"), get_value(layer, "y")) forward_func.extend(gen_codes([line], indent=indent)) - if "x2589" in layer.outputs: - print(layer.inputs["y"]) def prim_ne(layer, indent=1, init_func=[], forward_func=[]): @@ -277,6 +275,12 @@ def prim_not(layer, indent=1, init_func=[], forward_func=[]): forward_func.extend(gen_codes([line], indent=indent)) +def prim_or(layer, indent=1, init_func=[], forward_func=[]): + line = "{} = {} or {}".format(layer.outputs[0], + get_value(layer, "x"), get_value(layer, "y")) + forward_func.extend(gen_codes([line], indent=indent)) + + def prim_replaceitem(layer, indent=1, init_func=[], forward_func=[]): line = "{}[{}] = {}".format( get_value(layer, "list"), @@ -318,15 +322,10 @@ def prim_set_item(layer, indent=1, init_func=[], forward_func=[]): forward_func.extend(gen_codes([line], indent=indent)) -def prim_shape(layer, indent=1, init_func=[], forward_func=[]): - line = "{} = {}.shape".format(layer.outputs[0], get_value(layer, "input")) - forward_func.extend(gen_codes([line], indent=indent)) - - def prim_shape_dim(layer, indent=1, init_func=[], forward_func=[]): - line = "{} = {}.shape[{}]".format(layer.outputs[0], - get_value(layer, "input"), - get_value(layer, "dim")) + line = "{} = fluid.layers.shape({})[{}]".format(layer.outputs[0], + get_value(layer, "input"), + get_value(layer, "dim")) forward_func.extend(gen_codes([line], indent=indent)) @@ -339,6 +338,11 @@ def prim_slice(layer, indent=1, init_func=[], forward_func=[]): forward_func.extend(gen_codes([line], indent=indent)) +def prim_str(layer, indent=1, init_func=[], forward_func=[]): + line = "{} = str({})".format(layer.outputs[0], get_value(layer, "input")) + forward_func.extend(gen_codes([line], indent=indent)) + + def prim_sub(layer, indent=1, init_func=[], forward_func=[]): line = "{} = {} - {}".format(layer.outputs[0], get_value(layer, "x"), get_value(layer, "y")) @@ -361,6 +365,11 @@ def prim_tuple_unpack(layer, indent=1, init_func=[], forward_func=[]): forward_func.extend(gen_codes([line], indent=indent)) +def prim_type(layer, indent=1, init_func=[], forward_func=[]): + line = "{} = {}.dtype".format(layer.outputs[0], get_value(layer, "input")) + forward_func.extend(gen_codes([line], indent=indent)) + + def prim_warnings(layer, indent=1, init_func=[], forward_func=[]): lines = ["import warnings"] line = "warnings.warn({}, stacklevel={})".format( diff --git a/x2paddle/op_mapper/pytorch2paddle/pytorch_op_mapper.py b/x2paddle/op_mapper/pytorch2paddle/pytorch_op_mapper.py index 2b95218142790a1f181e46ba0c547403492f8813..937d7bc4d982283be3954bcc25a154b6c1400081 100644 --- a/x2paddle/op_mapper/pytorch2paddle/pytorch_op_mapper.py +++ b/x2paddle/op_mapper/pytorch2paddle/pytorch_op_mapper.py @@ -31,6 +31,7 @@ class PyTorchOpMapper(OpMapper): self.attrs = {} # key为节点名,value为属性值 self.output_index = 0 self.dygraph_name_id = {} # 动态图__init__输出名字中的id,key为kernel类型,value为id + self.split_len = {} # split的长度 # 转换 self.check_op(decoder.graph) self.graph, _ = self.traverse(decoder.graph) @@ -116,6 +117,11 @@ class PyTorchOpMapper(OpMapper): inputs_name, inputs_node = self._get_inputs_name( script_graph.return_node()) graph.outputs = inputs_name + # 更新split参数 + for layer in graph.layers.values(): + if layer.kernel == "fluid.layers.split" and "num_or_sections" in layer.attrs: + layer.attrs["num_or_sections"] = self.split_len[layer.outputs[ + 0]] return graph, graph_inputs def _get_outputs_name(self, node, attr_name=None): diff --git a/x2paddle/optimizer/fusion/__init__.py b/x2paddle/optimizer/fusion/__init__.py index 96303eab3d075bef5eb87fda9fc4199a5c81be2a..63fc4ff773b2844e465030bc126dae123ce014f5 100644 --- a/x2paddle/optimizer/fusion/__init__.py +++ b/x2paddle/optimizer/fusion/__init__.py @@ -22,3 +22,5 @@ from .fc_fuser import FcFuser from .fc_fuse_pass import FcFusePass from .interpolate_bilinear_fuser import InterpolateBilinearFuser from .interpolate_bilinear_fuse_pass import InterpolateBilinearFusePass +from .reshape_fuser import ReshapeFuser +from .reshape_fuse_pass import ReshapeFusePass diff --git a/x2paddle/optimizer/fusion/adaptive_pool2d_fuser.py b/x2paddle/optimizer/fusion/adaptive_pool2d_fuser.py index 0fcfb46733bcac2343c713a743b2b79129abf2ae..39ace52655611362b76bc6adb8da998a657fe852 100644 --- a/x2paddle/optimizer/fusion/adaptive_pool2d_fuser.py +++ b/x2paddle/optimizer/fusion/adaptive_pool2d_fuser.py @@ -26,19 +26,13 @@ class AdaptivePool2dFuser(FuseBase): """ 描述需要替换的adaptive pool2d图结构。 adaptive pool2d层模式python实现代码示例: x72 = [6, 6] - x73 = x71.shape - x75 = 'Exception' - x76 = 9223372036854775807 - x77 = 1 + x73 = fluid.layers.shape(x71) x78 = len(x73) - x79 = 2 x80 = x78 <= x79 if x80 : raise RaiseException(x75) x83 = [] - x84 = -2 x85 = x73[x84: x76: x77] - x86 = 2 x87 = len(x85) x88 = [x86, x87] x89 = min(x88) @@ -54,62 +48,42 @@ class AdaptivePool2dFuser(FuseBase): self.pattern.add_layer( "prim.constant", inputs={}, outputs=[gen_name(0)], value=[6, 6]) self.pattern.add_layer( - "prim.shape", + "fluid.layers.shape", inputs={'input': "pool-input-0"}, outputs=[gen_name(1)]) - self.pattern.add_layer( - "prim.constant", inputs={}, outputs=[gen_name(2)], value=True) - self.pattern.add_layer( - "prim.constant", - inputs={}, - outputs=[gen_name(3)], - value="Exception") - self.pattern.add_layer( - "prim.constant", - inputs={}, - outputs=[gen_name(4)], - value=9223372036854775807) - self.pattern.add_layer( - "prim.constant", inputs={}, outputs=[gen_name(5)], value=1) self.pattern.add_layer( "prim.len", inputs={"input": gen_name(1)}, outputs=[gen_name(6)]) - self.pattern.add_layer( - "prim.constant", inputs={}, outputs=[gen_name(7)], value=2) self.pattern.add_layer( "prim.le", inputs={"x": gen_name(6), - "y": gen_name(7)}, + "y": "pool-input-1"}, outputs=[gen_name(8)]) self.pattern.add_layer("prim.if", {'input': gen_name(8)}, [gen_name(9)]) if_layer = self.pattern.layers[list(self.pattern.layers.keys())[-1]] pattern_block0 = PaddleGraph(if_layer, graph_type="dygraph") pattern_block0.add_layer( "prim.exception", - inputs={"input": gen_name(3)}, + inputs={"input": "pool-input-6"}, outputs=[gen_name(9)]) - if_layer.inputs["input-0"] = gen_name(3) + if_layer.inputs["input-0"] = "pool-input-6" if_layer.add_block(pattern_block0) pattern_block1 = PaddleGraph(if_layer, graph_type="dygraph") if_layer.add_block(pattern_block1) self.pattern.add_layer("prim.list", inputs={}, outputs=[gen_name(10)]) - self.pattern.add_layer( - "prim.constant", inputs={}, outputs=[gen_name(11)], value=-2) self.pattern.add_layer( "prim.slice", inputs={ "input": gen_name(1), - "start": gen_name(11), - "end": gen_name(4), - "step": gen_name(5) + "start": "pool-input-2", + "end": "pool-input-3", + "step": "pool-input-4" }, outputs=[gen_name(12)]) - self.pattern.add_layer( - "prim.constant", inputs={}, outputs=[gen_name(13)], value=2) self.pattern.add_layer( "prim.len", inputs={"input": gen_name(12)}, outputs=[gen_name(14)]) self.pattern.add_layer( "prim.list", - inputs={"input0": gen_name(13), + inputs={"input0": "pool-input-4", "input1": gen_name(14)}, outputs=[gen_name(15)]) self.pattern.add_layer( @@ -138,7 +112,15 @@ class AdaptivePool2dFuser(FuseBase): "pool_size": gen_name(10)}, outputs=[gen_name(21)], **pool_attrs) - self.pattern.build(inputs={"input-0": "pool-input-0"}) + self.pattern.build(inputs={ + "input-0": "pool-input-0", + "input-1": "pool-input-1", + "input-2": "pool-input-2", + "input-3": "pool-input-3", + "input-4": "pool-input-4", + "input-5": "pool-input-5", + "input-6": "pool-input-6" + }) def insert_new_layer(self, graph, parameters, matches): parameters = graph.parameters diff --git a/x2paddle/optimizer/fusion/fc_fuser.py b/x2paddle/optimizer/fusion/fc_fuser.py index f5ce8aceabf92f918e08d4d9aa390eeee00a13af..97c9f769aae903519cbf53d9b426533a38adf929 100644 --- a/x2paddle/optimizer/fusion/fc_fuser.py +++ b/x2paddle/optimizer/fusion/fc_fuser.py @@ -26,7 +26,6 @@ class FcFuser(FuseBase): def build_pattern(self): """ 描述需要替换的fc图结构。 fc层模式python实现代码示例: - x131 = 2 x133 = x128.shape x133 = len(x133) x134 = x133 == x131 @@ -49,18 +48,15 @@ class FcFuser(FuseBase): return "x" + str(id) self.pattern.add_layer( - "prim.constant", inputs={}, outputs=[gen_name(0)], value=2) - self.pattern.add_layer( - "prim.constant", inputs={}, outputs=[gen_name(1)], value=1) - self.pattern.add_layer( - "prim.shape", inputs={'input': "fc-input-0"}, + "fluid.layers.shape", + inputs={'input': "fc-input-0"}, outputs=[gen_name(2)]) self.pattern.add_layer( "prim.len", inputs={'input': gen_name(2)}, outputs=[gen_name(2)]) self.pattern.add_layer( "prim.eq", inputs={"eq0": gen_name(2), - "eq1": gen_name(0)}, + "eq1": "fc-input-1"}, outputs=[gen_name(3)]) self.pattern.add_layer("prim.if", {'input': gen_name(3)}, [gen_name(4)]) self.pattern.outputs.append(gen_name(4)) @@ -106,7 +102,7 @@ class FcFuser(FuseBase): outputs=[gen_name(6)], perm=[1, 0]) pattern_block1.add_layer( - "fluid.layers.matmul", + "paddle.matmul", inputs={"x": "fc-input-0", "y": gen_name(6)}, outputs=[gen_name(9)]) @@ -126,7 +122,9 @@ class FcFuser(FuseBase): "prim.equal", inputs={'input': gen_name(13)}, outputs=[gen_name(4)]) if_layer1.add_block(pattern_block1) - self.pattern.build(inputs={"input-0": "fc-input-0"}) + self.pattern.build( + inputs={"input-0": "fc-input-0", + "input-1": "fc-input-1"}) def insert_new_layer(self, graph, parameters, matches): new_layer = self.gen_new_layer(parameters, matches) @@ -136,13 +134,13 @@ class FcFuser(FuseBase): def gen_new_layer(self, parameters, matches): layers_id = list(matches.keys()) - layer = matches[layers_id[2]] + layer = matches[layers_id[0]] input_name = layer.inputs["input"] - layer = matches[layers_id[5]] + layer = matches[layers_id[3]] output_name = layer.outputs[0] - layer = matches[layers_id[6]] + layer = matches[layers_id[4]] weight_name = layer.attrs["value"][8:-2] - layer = matches[layers_id[8]] + layer = matches[layers_id[6]] bias_name = layer.attrs["value"][8:-2] attrs = dict() attrs["input_dim"] = parameters[weight_name].shape[1] diff --git a/x2paddle/optimizer/fusion/reshape_fuse_pass.py b/x2paddle/optimizer/fusion/reshape_fuse_pass.py new file mode 100644 index 0000000000000000000000000000000000000000..b21a23324180b1c1894117f9c3ec88d0ae956f1a --- /dev/null +++ b/x2paddle/optimizer/fusion/reshape_fuse_pass.py @@ -0,0 +1,33 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from x2paddle.optimizer.pass_ import Pass +from x2paddle.optimizer.fusion import ReshapeFuser +from x2paddle.optimizer.pass_manager import pass_register + + +@pass_register +class ReshapeFusePass(Pass): + name = "reshape_fuse_pass" + + def __init__(self): + Pass.__init__(self) + + def apply(self, graph): + fuser = ReshapeFuser() + fuser.operate(graph, match_kind="edge") + + +# 用于注册 +reshape_fuse_pass = ReshapeFusePass() diff --git a/x2paddle/optimizer/fusion/reshape_fuser.py b/x2paddle/optimizer/fusion/reshape_fuser.py new file mode 100644 index 0000000000000000000000000000000000000000..daf95cebc95f2bde06511162fbec35e32b8b2891 --- /dev/null +++ b/x2paddle/optimizer/fusion/reshape_fuser.py @@ -0,0 +1,73 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +from x2paddle.optimizer.pattern_matcher import FuseBase +from x2paddle.core.program import PaddleGraph, PaddleLayer +from x2paddle.core.util import * + + +class ReshapeFuser(FuseBase): + def __init__(self): + super(ReshapeFuser, self).__init__(graph_type="dygraph") + + def build_pattern(self): + """ 描述需要替换的reshape图结构。 + reshape层模式python实现代码示例: + x165 = int(x164) + x166 = [x158, x159, x165] + x167 = fluid.layers.reshape(x=x157, shape=x166) + """ + + def gen_name(id): + return "x" + str(id) + + self.pattern.add_layer( + "prim.int", + inputs={"input": "reshape-input-0"}, + outputs=[gen_name(0)]) + self.pattern.add_layer( + "prim.list", + inputs={ + "input0": "reshape-input-1", + "input1": "reshape-input-2", + "input2": gen_name(0) + }, + outputs=[gen_name(1)]) + self.pattern.add_layer( + "fluid.layers.reshape", + inputs={"x": "reshape-input-3", + "shape": gen_name(1)}, + outputs=[gen_name(2)]) + self.pattern.build(inputs={ + "input-0": "reshape-input-0", + "input-1": "reshape-input-1", + "input-2": "reshape-input-2", + "input-3": "reshape-input-3", + }) + + def insert_new_layer(self, graph, parameters, matches): + self.update_layer(matches) + matches.pop(list(matches.keys())[1]) + matches.pop(list(matches.keys())[1]) + + def update_layer(self, matches): + layers_id = list(matches.keys()) + layer = matches[layers_id[0]] + int_input_name = layer.inputs["input"] + output_name = layer.outputs[0] + layer = matches[layers_id[1]] + for key, input_name in layer.inputs.items(): + if input_name == output_name: + layer.inputs[key] = int_input_name diff --git a/x2paddle/optimizer/optimizer.py b/x2paddle/optimizer/optimizer.py index 6a69ec66445e401aa40222ff2f644f42e2aa54bd..5ee18038a5d4b6a785348a4f091582e12dbdf88c 100644 --- a/x2paddle/optimizer/optimizer.py +++ b/x2paddle/optimizer/optimizer.py @@ -21,7 +21,7 @@ class GraphOptimizer(object): self.passes = [ "interpolate_bilinear_fuse_pass", "fc_fuse_pass", "adaptive_pool2d_fuse_pass", "batchnorm2d_fuse_pass", - "constant_fuse_pass" + "constant_fuse_pass", "reshape_fuse_pass" ] def optimize(self, graph): diff --git a/x2paddle/optimizer/pattern_matcher.py b/x2paddle/optimizer/pattern_matcher.py index cb398b043946a1bd59ee7f72f56f1da5d14e315c..8a1d57d01427aecc799a122856927ce615107a09 100644 --- a/x2paddle/optimizer/pattern_matcher.py +++ b/x2paddle/optimizer/pattern_matcher.py @@ -63,6 +63,8 @@ class PatternMatcher(object): pattern_layer_id_in = pattern_layer_in[i] if pattern_layer_id_in != -1: subgraph_ids = list(subgraph_id2layers.keys()) + if layer_id_in not in subgraph_ids: + return False if pattern_ids.index(pattern_layer_id_in) == \ subgraph_ids.index(layer_id_in): # 判断pattern输入在pattern_ids的索引 @@ -76,7 +78,7 @@ class PatternMatcher(object): if not set(pattern_layer.outputs).issubset( pattern.outputs): # 若pattern当前layer的输出是pattern的输出,则是正确的 - + print("4--") return False else: if len(graph.edges_out[layer_id]) != len( @@ -85,10 +87,12 @@ class PatternMatcher(object): if not set(pattern_layer.outputs).issubset( pattern.outputs): # 若pattern当前layer的输出是pattern的输出,则是正确的 + print("5--") return False # 当为控制流时的处理 if layer.kernel == "prim.if" or layer.kernel == "prim.loop": if len(pattern_layer.blocks) != len(layer.blocks): + print("6--") return False for i, b in enumerate(pattern_layer.blocks): match_info = get_subgraph(pattern_layer.blocks[i], @@ -96,13 +100,17 @@ class PatternMatcher(object): if match_info is not False: subgraph_id2layers.update(match_info) else: + print("7--") return False pattern_index += 1 if pattern_index == len(pattern.layers): return subgraph_id2layers else: - return False - return subgraph_id2layers + if pattern_index == 0: + return False + if pattern_index == len(pattern.layers): + return subgraph_id2layers + return False for i, (layer_id, layer) in enumerate(graph.layers.items()): match_info = get_subgraph(self.pattern, graph, i) @@ -112,10 +120,70 @@ class PatternMatcher(object): if len(block.layers) > 0: self.detect_patterns_by_topo(layer.blocks[j]) - def detect_patterns_by_edge(self, graph): + def detect_patterns_by_edge(self, graph, ignore_list_inputs=True): """当遇见顺序没有强制规定的pattern时使用该方式 """ - pass + + def get_subgraph(pattern, graph, start_index): + pattern_id2layers = pattern.get_global_layers() + pattern_ids = list(pattern_id2layers.keys()) + pattern_layer_id = pattern_ids[0] + subgraph_id2layers = dict() + graph_layers = dict(list(graph.layers.items())[start_index:]) + layer_id = list(graph_layers.keys())[0] + + def update(layer_id, pattern_layer_id): + layer = graph_layers[layer_id] + pattern_layer = pattern_id2layers[pattern_layer_id] + if layer.kernel != pattern_layer.kernel: + return False + subgraph_id2layers[layer_id] = layer + for i, pattern_layer_id_in in enumerate(pattern.edges_in[ + pattern_layer_id]): + if pattern_layer_id_in == -1 or ignore_list_inputs: + continue + layer_id_in = graph.edges_in[layer_id][i] + subgraph_ids = list(subgraph_id2layers.keys()) + if layer_id_in not in subgraph_ids: + return False + if pattern.edges_out.get(pattern_layer_id, 0) != 0: + if len(pattern.edges_out[pattern_layer_id]) != \ + len(graph.edges_out[layer_id]): + return False + for i, pattern_layer_id_out in enumerate(pattern.edges_out[ + pattern_layer_id]): + if pattern_layer_id_out in pattern_ids: + new_layer_id_out = graph.edges_out[layer_id][i] + for j, new_new_layer_id_in in enumerate( + graph.edges_in[new_layer_id_out]): + if new_new_layer_id_in not in subgraph_id2layers: + if ignore_list_inputs: + continue + new_new_pattern_layer_id_in = pattern.edges_in[ + pattern_layer_id_out][j] + if new_new_pattern_layer_id_in == -1: + continue + update(new_new_layer_id_in, + new_new_pattern_layer_id_in) + update(new_layer_id_out, pattern_layer_id_out) + + while len(subgraph_id2layers) != len(pattern_id2layers): + out = update(layer_id, pattern_layer_id) + if out == False: + return False + else: + if len(subgraph_id2layers) == len(pattern_id2layers): + return subgraph_id2layers + else: + return False + + for i, (layer_id, layer) in enumerate(graph.layers.items()): + match_info = get_subgraph(self.pattern, graph, i) + if match_info: + self.matches.append(match_info) + for j, block in enumerate(layer.blocks): + if len(block.layers) > 0: + self.detect_patterns_by_edge(layer.blocks[j]) def remove_overlapped_match(self): """ 如果2个子图有重叠,只取前一个子图。