diff --git a/x2paddle/core/paddle_default_parameter.yml b/x2paddle/core/paddle_default_parameter.yml deleted file mode 100644 index 1191b098c457f14aea809e5530a912683e65f1c0..0000000000000000000000000000000000000000 --- a/x2paddle/core/paddle_default_parameter.yml +++ /dev/null @@ -1,105 +0,0 @@ -paddle.nn.AdaptiveAvgPool2D: - data_format: NCHW - -paddle.addmm: - alpha: 1.0 - beta: 1.0 - -paddle.arange: - start: 0 - end: ~ - step: 1 - -paddle.nn.BatchNorm: - momentum: 0.9 - epsilon: 0.00001 - data_format: NCHW - -paddle.concat: - axis: 0 - -paddle.nn.Conv2D: - stride: - - 1 - - 1 - padding: - - 0 - - 0 - dilation: - - 1 - - 1 - groups: 1 - padding_mode: zeros - data_format: NCHW - -paddle.nn.Conv2DTranspose: - stride: - - 1 - - 1 - padding: - - 0 - - 0 - dilation: - - 1 - - 1 - output_padding: - - 0 - - 0 - groups: 1 - padding_mode: zeros - data_format: NCHW - -paddle.flatten: - start_axis: 0 - stop_axis: -1 - -paddle.nn.Hardtanh: - min: -1.0 - max: 1.0 - -paddle.nn.LayerNorm: - epsilon: 0.00001 - -paddle.nn.LeakyReLU: - negative_slope: 0.01 - -paddle.nn.LSTM: - num_layers: 1 - direction: forward - time_major: False - dropout: 0 - -paddle.nn.MaxPool2D: - padding: - - 0 - - 0 - ceil_mode: False - return_mask: False - -paddle.mean: - keepdim: False - -paddle.split: - axis: 0 - -paddle.nn.Softmax: - axis: -1 - -paddle.nn.Softplus: - beta: 1 - threshold: 20 - -paddle.stack: - axis: 0 - -paddle.split: - axis: 0 - -paddle.nn.functioanl.interpolate: - mode: nearest - align_corners: False - align_mode: 0 - data_format: NCHW - - - diff --git a/x2paddle/core/program.py b/x2paddle/core/program.py index 27f23caaa1aa7ca14fb5f36477a7911f72a1b3a6..e65c0825b520909bbd6f0b2134ea94554a8d6ec1 100644 --- a/x2paddle/core/program.py +++ b/x2paddle/core/program.py @@ -26,6 +26,7 @@ import six import pickle import numpy as np from os import path as osp +from x2paddle.core.util import * class PaddleLayer(object): @@ -326,12 +327,10 @@ class PaddleGraph(object): write_code( f, [ - "from paddle.fluid.initializer import Constant", - "from paddle.fluid.param_attr import ParamAttr", - "import paddle.fluid as fluid", custom_import, - "import paddle", "import math", "", - + "import paddle", + "import math", + "", ], indent=0) if self.custom_code is not None: @@ -348,6 +347,7 @@ class PaddleGraph(object): ], indent=1) for layer_id, layer in self.layers.items(): + remove_default_attrs(layer) edges_in = self.edges_in.get(layer_id, []) edges_out = self.edges_out.get(layer_id, []) if len(edges_in) == 0 and len(edges_out) == 0: @@ -474,10 +474,7 @@ class PaddleGraph(object): custom_import = "" self.head = gen_codes( [ - "from paddle.fluid.initializer import Constant", - "from paddle.fluid.param_attr import ParamAttr", "import paddle", - "import paddle.fluid as fluid", "import math", custom_import, "", @@ -549,6 +546,7 @@ class PaddleGraph(object): gen_head() for layer_id, layer in self.layers.items(): + remove_default_attrs(layer) if ("paddle.nn" in layer.kernel and "functional" not in layer.kernel ) or layer.kernel == "paddle.to_tensor" or \ layer.kernel.startswith("custom_layer") or \ diff --git a/x2paddle/core/util.py b/x2paddle/core/util.py index 2ecf3a070d692eb476ddb89f2798f9f013a2d3dd..38cf6ab2030437da21a1f61184792019f47a3e88 100644 --- a/x2paddle/core/util.py +++ b/x2paddle/core/util.py @@ -14,6 +14,7 @@ import numpy import math import os +import inspect def string(param): @@ -25,4 +26,44 @@ def name_generator(nn_name, nn_name2id): else: nn_name2id[nn_name] = 0 real_nn_name = nn_name + str(nn_name2id[nn_name]) - return real_nn_name \ No newline at end of file + return real_nn_name + +def remove_default_attrs(layer, diff_attrs=None): + def get_default_args(func): + signature = inspect.signature(func) + return { + k: v.default + for k, v in signature.parameters.items() + if v.default is not inspect.Parameter.empty + } + kernel = layer.kernel + attrs = layer.attrs + if ":" in kernel or "prim" in kernel or "module" in kernel: + return + is_func = True + if "paddle.nn" in kernel and "functional"not in kernel: + is_func = False + import paddle + obj = paddle + for i, part in enumerate(kernel.split(".")): + if i == 0: + continue + obj = getattr(obj, part) + if is_func: + func = obj + else: + func = obj.__init__ + default_attrs = get_default_args(func) + for default_k, default_v in default_attrs.items(): + if default_k in attrs: + if (isinstance(attrs[default_k], list) or isinstance(attrs[default_k], tuple)) \ + and not is_func: + if len(set(attrs[default_k])) == 1: + attrs[default_k] = attrs[default_k][0] + if default_v == attrs[default_k]: + if diff_attrs is None: + attrs.pop(default_k) + else: + key_name = "{}_{}".format(layer.outputs[0], default_k) + if key_name not in diff_attrs: + attrs.pop(default_k) diff --git a/x2paddle/decoder/onnx_decoder.py b/x2paddle/decoder/onnx_decoder.py index 899c201b5bfaa60a0bca53b00754999f5641afe4..4e01fe3bf8181ebfd0a73c70865a67d0b5b903ae 100644 --- a/x2paddle/decoder/onnx_decoder.py +++ b/x2paddle/decoder/onnx_decoder.py @@ -205,7 +205,7 @@ class ONNXGraph(Graph): shape = raw_input( "Shape of Input(e.g. -1,3,224,224), enter 'N' to skip: " ) - except NameError: + except: shape = input( "Shape of Input(e.g. -1,3,224,224), enter 'N' to skip: " ) @@ -302,18 +302,7 @@ class ONNXGraph(Graph): if opt == in_node: self.connect(nd.name, layer_name) flag = 1 - if nd.name in node.which_child: - for n_i, n_ipt in enumerate(node.inputs): - if first_i == n_i: - continue - if n_ipt == nd.name: - new_nd_name = "{}/{}".format(nd.name, n_i) - if new_nd_name not in node.which_child: - node.which_child[new_nd_name] = idx - break - else: - first_i = node.inputs.index(nd.name) - node.which_child[nd.name] = idx + node.which_child[nd.name] = idx self.node_map[nd.name].index = 0 break if flag == 1: @@ -329,15 +318,11 @@ class ONNXGraph(Graph): if len(node.which_child) == 0: ipt_node = super(ONNXGraph, self).get_node(node.inputs[idx], copy) return ipt_node + else: ipt_node = super(ONNXGraph, self).get_node(node.inputs[idx], copy) - new_ipt_name = "{}/{}".format(ipt_node.layer_name, idx) - if new_ipt_name in node.which_child: - ipt_node.index = node.which_child[new_ipt_name] - else: - if ipt_node.layer_name in node.which_child: - ipt_node.index = node.which_child[ipt_node.layer_name] - + if ipt_node.layer_name in node.which_child: + ipt_node.index = node.which_child[ipt_node.layer_name] return ipt_node diff --git a/x2paddle/op_mapper/dygraph/caffe2paddle/caffe_op_mapper.py b/x2paddle/op_mapper/dygraph/caffe2paddle/caffe_op_mapper.py index 93212c68812a1e17a725c78e079f7fe0033f0b01..f2ae88e88982d0de952c00c31914c4256b05f21b 100644 --- a/x2paddle/op_mapper/dygraph/caffe2paddle/caffe_op_mapper.py +++ b/x2paddle/op_mapper/dygraph/caffe2paddle/caffe_op_mapper.py @@ -367,57 +367,46 @@ class CaffeOpMapper(OpMapper): output_size=kernel) else: layer_attrs = { - 'pool_size': kernel, - 'pool_stride': stride, - 'pool_padding': pad, + 'kernel_size': kernel, + 'stride': stride, + 'padding': pad, 'ceil_mode': ceil_mode, - 'pool_type': string(pool_type), - 'exclusive': False, - 'global_pooling': global_pool, } - self.paddle_graph.add_layer( - "paddle.fluid.dygraph.Pool2D", - inputs={"input": input.name}, - outputs=layer_outputs, - **layer_attrs) -# layer_attrs = { -# 'kernel_size': kernel, -# 'stride': stride, -# 'padding': pad, -# 'ceil_mode': ceil_mode, -# } -# if params.pool == 0: -# self.paddle_graph.add_layer( -# "paddle.nn.MaxPool2D", -# inputs={"input": input.name}, -# outputs=layer_outputs, -# **layer_attrs) -# else: -# layer_attrs["count_include_pad"] = True -# self.paddle_graph.add_layer( -# "paddle.nn.AvgPool2D", -# inputs={"input": input.name}, -# outputs=layer_outputs, -# **layer_attrs) + if params.pool == 0: + self.paddle_graph.add_layer( + "paddle.nn.MaxPool2D", + inputs={"input": input.name}, + outputs=layer_outputs, + **layer_attrs) + else: + self.paddle_graph.add_layer( + "paddle.nn.AvgPool2D", + inputs={"input": input.name}, + outputs=layer_outputs, + **layer_attrs) def LRN(self, node): + lrn_name = name_generator("lrn", self.nn_name2id) + output_name = node.layer_name + layer_outputs = [lrn_name, output_name] assert len(node.inputs) == 1, "The count of LRN node\'s input is not 1." input = self.graph.get_input_node(node, idx=0, copy=True) params = node.layer.lrn_param assert params.local_size % 2 == 1 alpha = params.alpha / float(params.local_size) layer_attrs = { - "n": params.local_size, - "k": params.k, + "size": params.local_size, + "k": params.k, "alpha": alpha, - "beta": params.beta, + "beta": params.beta } self.paddle_graph.add_layer( - "fluid.layers.lrn", + "paddle.nn.LocalResponseNorm", inputs={"input": input.name}, - outputs=[node.layer_name], + outputs=layer_outputs, **layer_attrs) + def InnerProduct(self, node): linear_name = name_generator("linear", self.nn_name2id) output_name = node.layer_name @@ -1131,7 +1120,7 @@ class CaffeOpMapper(OpMapper): input = self.graph.get_input_node(node, idx=0, copy=True) params = node.layer.shuffle_channel_param self.paddle_graph.add_layer( - "fluid.layers.shuffle_channel", + "paddle.fluid.layers.shuffle_channel", inputs={"x": input.name}, outputs=[node.layer_name], group=params.group) diff --git a/x2paddle/op_mapper/dygraph/onnx2paddle/opset9/opset.py b/x2paddle/op_mapper/dygraph/onnx2paddle/opset9/opset.py index ea524e0f7d222e5193934ec9dfcaff256d8330f7..ca05417b3b12bf6ba3cc8ba6d90ae6206e2915ea 100644 --- a/x2paddle/op_mapper/dygraph/onnx2paddle/opset9/opset.py +++ b/x2paddle/op_mapper/dygraph/onnx2paddle/opset9/opset.py @@ -14,8 +14,6 @@ from x2paddle.decoder.onnx_decoder import ONNXGraph, ONNXGraphNode, ONNXGraphDataNode from x2paddle.core.graph import GraphNode -from x2paddle.core.fluid_code import Layer -from x2paddle.core.fluid_code import FluidCode from x2paddle.core.util import * from functools import reduce import numpy as np @@ -86,7 +84,7 @@ class OpSet9(): elementwise_ops = { 'Add': 'paddle.add', 'Div': 'paddle.divide', - 'Sub': 'fluid.layers.elementwise_sub', + 'Sub': 'paddle.subtract', 'Mul': 'paddle.multiply', 'Pow': 'paddle.pow', } @@ -250,22 +248,15 @@ class OpSet9(): def _interpolate(self, node): val_x = self.graph.get_input_node(node, idx=0, copy=True) inputs = {'x': val_x.name} - attrs = dict() if node.layer_type == 'Resize': if len(node.layer.input) == 2: # opset 10 val_scales = self.graph.get_input_node(node, idx=1, copy=True) - # TODO(syf): paddle.nn.functional.interpolate will support the length - # which is the same as the rank of input. -# inputs['scale_factor'] = val_scales.name - attrs['scale_factor'] = self.weights[val_scales.name].tolist()[2:] + inputs['scale_factor'] = val_scales.name elif len(node.layer.input) == 3: # opset 11 val_scales = self.graph.get_input_node(node, idx=2, copy=True) - # TODO(syf): paddle.nn.functional.interpolate will support the length - # which is the same as the rank of input. -# inputs['scale_factor'] = val_scales.name - attrs['scale_factor'] = self.weights[val_scales.name].tolist()[2:] + inputs['scale_factor'] = val_scales.name elif len(node.layer.input) == 4: # opset 11 val_sizes = self.graph.get_input_node(node, idx=3, copy=True) @@ -281,28 +272,35 @@ class OpSet9(): inputs={"x": var_hw}, outputs=[var_hw], dtype=string('int32')) -# inputs['size'] = var_hw - - # TODO(syf): all use - inputs['out_shape'] = var_hw - ipt = inputs.pop("x") - inputs["input"] = ipt - mode = node.get_attr('mode', 'nearest') - attrs.update({"align_corners": False}) + inputs['size'] = var_hw + attrs = {"align_corners": False, + "mode": string(node.get_attr('mode', 'nearest'))} self.paddle_graph.add_layer( - kernel="fluid.layers.resize_nearest", + kernel="paddle.nn.functioanl.interpolate", inputs=inputs, outputs=[node.name], **attrs) + +# # TODO(syf): all use +# inputs['out_shape'] = var_hw +# ipt = inputs.pop("x") +# inputs["input"] = ipt +# mode = node.get_attr('mode', 'nearest') +# attrs = {"align_corners": False} +# self.paddle_graph.add_layer( +# kernel="fluid.layers.resize_nearest", +# inputs=inputs, +# outputs=[node.name], +# **attrs) return elif node.layer_type == 'Upsample': val_scales = self.graph.get_input_node(node, idx=1, copy=True) - inputs['scale_factor'] = val_scales + inputs['scale'] = val_scales mode = node.get_attr('mode', 'nearest') - attrs.update({"align_corners": False, - "mode": string(mode), - "align_mode": 1}) + attrs = {"align_corners": False, + "mode": string(mode), + "align_mode": 1} self.paddle_graph.add_layer( kernel="paddle.nn.functional.interpolate", inputs=inputs, @@ -356,7 +354,7 @@ class OpSet9(): 'sampling_ratio': sampling_ratio, } self.paddle_graph.add_layer( - 'fluid.layers.roi_align', + 'paddle.fluid.layers.roi_align', inputs={'input': val_x.name, 'rois': val_rois.name}, outputs=[node.name], @@ -376,7 +374,7 @@ class OpSet9(): 'spatial_scale': spatial_scale, } self.paddle_graph.add_layer( - 'fluid.layers.roi_pool', + 'paddle.fluid.layers.roi_pool', inputs={'input': val_x.name, 'rois': val_rois.name}, outputs=[node.name], @@ -933,17 +931,16 @@ class OpSet9(): 'max': max_value, 'min': min_value, } - self.paddle_graph.add_layer( 'paddle.clip', inputs={"x": val_x.name}, outputs=[node.name], **layer_attrs) else: - min_ipt = self.graph.get_input_node(node, idx=1, copy=True) - max_ipt = self.graph.get_input_node(node, idx=2, copy=True) - min_value = _const_weight_or_none(min_ipt) + max_ipt = self.graph.get_input_node(node, idx=1, copy=True) + min_ipt = self.graph.get_input_node(node, idx=2, copy=True) max_value = _const_weight_or_none(max_ipt) + min_value = _const_weight_or_none(min_ipt) if max_value.shape == (1, ): max_value = max_value[0] if min_value.shape == (1, ): @@ -1062,40 +1059,23 @@ class OpSet9(): strides[1]) paddings = pad_h + pad_w - paddle_op = 'fluid.layers.pool{}d'.format(poolnd) - assert 2 <= poolnd <= 3, 'only pool2d and pool3d are supported' + op_name = name_generator("pool", self.nn_name2id) + output_name = node.name + layer_outputs = [op_name, output_name] + paddle_op = 'paddle.nn.AvgPool{}D'.format(poolnd) + assert 1 <= poolnd <= 3, 'only Pool1D, Pool2D and Pool3D are supported' layer_attrs = { - "pool_size": kernel_shape, - "pool_type": string('avg'), - "pool_stride": strides, - "pool_padding": paddings, + "kernel_size": kernel_shape, + "stride": strides, + "padding": paddings, "ceil_mode": ceil_mode, "exclusive": 'True', - "name": string(node.name) } self.paddle_graph.add_layer( paddle_op, - inputs={'input': val_x if isinstance(val_x, str) else val_x.name}, - outputs=[node.name], + inputs={'x': val_x.name}, + outputs=layer_outputs, **layer_attrs) - # TODO(syf): op has diff -# op_name = name_generator("pool", self.nn_name2id) -# output_name = node.name -# layer_outputs = [op_name, output_name] -# paddle_op = 'paddle.nn.Pool{}D'.format(poolnd) -# assert 1 <= poolnd <= 3, 'only Pool1D, Pool2D and Pool3D are supported' -# layer_attrs = { -# "kernel_size": kernel_shape, -# "stride": strides, -# "padding": paddings, -# "ceil_mode": ceil_mode, -# "exclusive": 'True', -# } -# self.paddle_graph.add_layer( -# paddle_op, -# inputs={'x': val_x.name}, -# outputs=layer_outputs, -# **layer_attrs) @print_mapping_info def Concat(self, node): @@ -1645,16 +1625,3 @@ class OpSet9(): inputs=inputs_dict, outputs=[node.name], **layer_attrs) - - @print_mapping_info - def ArgMax(self, node): - val_x = self.graph.get_input_node(node, idx=0, copy=True) - axis = node.get_attr('axis') - keepdims = False if node.get_attr('keepdims') == 0 else True - layer_attrs = {'axis': axis, - 'keepdim': keepdims} - self.paddle_graph.add_layer( - 'paddle.argmax', - inputs={"x": val_x.name}, - outputs=[node.name], - **layer_attrs) diff --git a/x2paddle/op_mapper/dygraph/pytorch2paddle/aten.py b/x2paddle/op_mapper/dygraph/pytorch2paddle/aten.py index 72c3bf7ed5c0a5e11236af7fdcea457e41a2a2dc..00121bb4128182506daca12e612e36e44375b421 100644 --- a/x2paddle/op_mapper/dygraph/pytorch2paddle/aten.py +++ b/x2paddle/op_mapper/dygraph/pytorch2paddle/aten.py @@ -426,11 +426,11 @@ def aten_avg_pool2d(mapper, graph, node): # 获取当前节点输入的list current_inputs = list(layer_inputs.values()) # 处理输入1,即%538 - layer_attrs["pool_size"] = mapper.attrs[inputs_name[1]] + layer_attrs["kernel_size"] = mapper.attrs[inputs_name[1]] # 处理输入2,即%539 - layer_attrs["pool_stride"] = mapper.attrs[inputs_name[2]] + layer_attrs["stride"] = mapper.attrs[inputs_name[2]] # 处理输入3,即%540 - layer_attrs["pool_padding"] = mapper.attrs[inputs_name[3]] + layer_attrs["padding"] = mapper.attrs[inputs_name[3]] # 处理输入4,即%273 layer_attrs["ceil_mode"] = mapper.attrs[inputs_name[4]] # 处理输入5,即%272 @@ -445,22 +445,13 @@ def aten_avg_pool2d(mapper, graph, node): key=mapper.attrs[inputs_name[6]], value=None) - # TODO(syf): The op has diff. -# self.paddle_graph.add_layer( -# kernel="paddle.nn.AvgPool2D", -# inputs={"input": input_name}, -# outputs=layer_outputs, -# kernel_size=k_size[2:4], -# stride=strides[2:4], -# padding=string(pad_mode)) - - layer_attrs["pool_type"] = string("avg") graph.add_layer( - "fluid.layers.pool2d", + kernel="paddle.nn.AvgPool2D", inputs=layer_inputs, - outputs=layer_outputs[1:], + outputs=layer_outputs, scope_name=scope_name, **layer_attrs) + return current_inputs, current_outputs def aten_avg_pool3d(mapper, graph, node): @@ -493,11 +484,11 @@ def aten_avg_pool3d(mapper, graph, node): # 获取当前节点输入的list current_inputs = list(layer_inputs.values()) # 处理输入1,即%538 - layer_attrs["pool_size"] = mapper.attrs[inputs_name[1]] + layer_attrs["kernel_size"] = mapper.attrs[inputs_name[1]] # 处理输入2,即%539 - layer_attrs["pool_stride"] = mapper.attrs[inputs_name[2]] + layer_attrs["stride"] = mapper.attrs[inputs_name[2]] # 处理输入3,即%540 - layer_attrs["pool_padding"] = mapper.attrs[inputs_name[3]] + layer_attrs["padding"] = mapper.attrs[inputs_name[3]] # 处理输入4,即%273 layer_attrs["ceil_mode"] = mapper.attrs[inputs_name[4]] # 处理输入5,即%272 @@ -512,20 +503,10 @@ def aten_avg_pool3d(mapper, graph, node): key=mapper.attrs[inputs_name[6]], value=None) - # TODO(syf): The op has diff. -# self.paddle_graph.add_layer( -# kernel="paddle.nn.AvgPool2D", -# inputs={"input": input_name}, -# outputs=layer_outputs, -# kernel_size=k_size[2:4], -# stride=strides[2:4], -# padding=string(pad_mode)) - - layer_attrs["pool_type"] = string("avg") graph.add_layer( - "fluid.layers.pool3d", + kernel="paddle.nn.AvgPool3D", inputs=layer_inputs, - outputs=layer_outputs[1:], + outputs=layer_outputs, scope_name=scope_name, **layer_attrs) return current_inputs, current_outputs @@ -561,11 +542,11 @@ def aten_avg_pool1d(mapper, graph, node): # 获取当前节点输入的list current_inputs = list(layer_inputs.values()) # 处理输入1,即%538 - layer_attrs["pool_size"] = mapper.attrs[inputs_name[1]] + layer_attrs["kernel_size"] = mapper.attrs[inputs_name[1]] # 处理输入2,即%539 - layer_attrs["pool_stride"] = mapper.attrs[inputs_name[2]] + layer_attrs["stride"] = mapper.attrs[inputs_name[2]] # 处理输入3,即%540 - layer_attrs["pool_padding"] = mapper.attrs[inputs_name[3]] + layer_attrs["padding"] = mapper.attrs[inputs_name[3]] # 处理输入4,即%273 layer_attrs["ceil_mode"] = mapper.attrs[inputs_name[4]] # 处理输入5,即%272 @@ -580,20 +561,10 @@ def aten_avg_pool1d(mapper, graph, node): key=mapper.attrs[inputs_name[6]], value=None) - # TODO(syf): The op has diff. -# self.paddle_graph.add_layer( -# kernel="paddle.nn.AvgPool2D", -# inputs={"input": input_name}, -# outputs=layer_outputs, -# kernel_size=k_size[2:4], -# stride=strides[2:4], -# padding=string(pad_mode)) - - layer_attrs["pool_type"] = string("avg") graph.add_layer( - "fluid.layers.pool1d", + kernel="paddle.nn.AvgPool1D", inputs=layer_inputs, - outputs=layer_outputs[1:], + outputs=layer_outputs, scope_name=scope_name, **layer_attrs) return current_inputs, current_outputs @@ -2930,22 +2901,13 @@ def aten_max_pool2d(mapper, graph, node): # 处理输入5,即%19 layer_attrs["ceil_mode"] = mapper.attrs[inputs_name[5]] layer_attrs_tmp["ceil_mode"] = mapper.attrs[inputs_name[5]] - - if mapper.attrs[inputs_name[5]] == True: - layer_attrs["pool_type"] = string("max") - graph.add_layer( - "fluid.layers.pool2d", - inputs=layer_inputs, - outputs=layer_outputs[1:], - scope_name=scope_name, - **layer_attrs_tmp) - else: - graph.add_layer( - "paddle.nn.MaxPool2D", - inputs=layer_inputs, - outputs=layer_outputs, - scope_name=scope_name, - **layer_attrs) + + graph.add_layer( + "paddle.nn.MaxPool2D", + inputs=layer_inputs, + outputs=layer_outputs, + scope_name=scope_name, + **layer_attrs) return current_inputs, current_outputs diff --git a/x2paddle/op_mapper/dygraph/pytorch2paddle/pytorch_custom_layer/gather.py b/x2paddle/op_mapper/dygraph/pytorch2paddle/pytorch_custom_layer/gather.py index 71b51f6222ea43262a0718b6730a6f24ecf773ee..10850ee5bbf91fa42e39f4dbd67ec1fa0d6682d7 100644 --- a/x2paddle/op_mapper/dygraph/pytorch2paddle/pytorch_custom_layer/gather.py +++ b/x2paddle/op_mapper/dygraph/pytorch2paddle/pytorch_custom_layer/gather.py @@ -13,7 +13,6 @@ # limitations under the License. import paddle -import paddle.fluid as fluid from itertools import product import numpy as np diff --git a/x2paddle/op_mapper/dygraph/tf2paddle/tf_op_mapper.py b/x2paddle/op_mapper/dygraph/tf2paddle/tf_op_mapper.py index e0baa027dd87ed49e4e3feb34f3a345677f6b161..ad03f06dd262acd4eaddee3b68854fb4363ea051 100644 --- a/x2paddle/op_mapper/dygraph/tf2paddle/tf_op_mapper.py +++ b/x2paddle/op_mapper/dygraph/tf2paddle/tf_op_mapper.py @@ -70,7 +70,7 @@ class TFOpMapper(OpMapper): 'AddV2': 'paddle.add', 'RealDiv': 'paddle.divide', 'DivNoNan': 'paddle.divide', - 'Sub': 'fluid.layers.elementwise_sub', + 'Sub': 'paddle.subtract', 'Maximum': 'paddle.maximum', 'Minimum': 'paddle.minimum', 'Mul': 'paddle.multiply', @@ -346,7 +346,7 @@ class TFOpMapper(OpMapper): shape=[0, c, h, w]) self.paddle_graph.add_layer( - kernel="fluid.layers.pixel_shuffle", + kernel="paddle.nn.functional.pixel_shuffle", inputs={"x": reshape_name}, outputs=[node.name], upscale_factor=block_size) @@ -858,22 +858,22 @@ class TFOpMapper(OpMapper): layer_outputs = [op_name, output_name] # TODO(syf): The op has diff. -# self.paddle_graph.add_layer( -# kernel="paddle.nn.AvgPool2D", -# inputs={"input": input_name}, -# outputs=layer_outputs, -# kernel_size=k_size[2:4], -# stride=strides[2:4], -# padding=string(pad_mode)) - self.paddle_graph.add_layer( - kernel="fluid.layers.pool2d", + kernel="paddle.nn.AvgPool2D", inputs={"input": input_name}, - outputs=[node.name], - pool_size=k_size[2:4], - pool_type=string("avg"), - pool_stride=strides[2:4], - pool_padding=string(pad_mode)) + outputs=layer_outputs, + kernel_size=k_size[2:4], + stride=strides[2:4], + padding=string(pad_mode)) + +# self.paddle_graph.add_layer( +# kernel="fluid.layers.pool2d", +# inputs={"input": input_name}, +# outputs=[node.name], +# pool_size=k_size[2:4], +# pool_type=string("avg"), +# pool_stride=strides[2:4], +# pool_padding=string(pad_mode)) if data_format == "NHWC": self.paddle_graph.add_layer( @@ -1118,14 +1118,6 @@ class TFOpMapper(OpMapper): begin = begin.value.tolist() attrs['offsets'] = begin else: - # shape = begin.out_shapes[0] - # reshape_name = gen_name("slice", "reshape") - # self.paddle_graph.add_layer( - # kernel="fluid.layers.reshape", - # inputs={"x": begin.name}, - # outputs=[reshape_name], - # shape=shape) - # inputs['offsets'] = reshape_name begin = self.decoder.infer_tensor(begin, use_diff_inputs=False).tolist() attrs['offsets'] = begin if size.layer_type == "Const": @@ -1433,7 +1425,7 @@ class TFOpMapper(OpMapper): y_shape = y.out_shapes[0] # TODO(syf) layer_id = self.paddle_graph.add_layer( - "fluid.layers.elementwise_sub", inputs=inputs, outputs=[node.name]) + "paddle.subtract", inputs=inputs, outputs=[node.name]) self.paddle_graph.layers[layer_id].input_shapes = {"x": x_shape, "y": y_shape} inputs = {"x": node.name, "y": node.name} diff --git a/x2paddle/op_mapper/static/caffe2paddle/caffe_op_mapper.py b/x2paddle/op_mapper/static/caffe2paddle/caffe_op_mapper.py index 53e7222bb31bd14cd40b2bdc4e384870029bd1e0..d9ba3fd4774669f96f7276fd3eb1414f74c8965a 100644 --- a/x2paddle/op_mapper/static/caffe2paddle/caffe_op_mapper.py +++ b/x2paddle/op_mapper/static/caffe2paddle/caffe_op_mapper.py @@ -401,18 +401,14 @@ class CaffeOpMapper(OpMapper): padding=pad, ceil_mode=ceil_mode) else: - # TODO(syf): The op has diff. self.paddle_graph.add_layer( - kernel="fluid.layers.pool2d", - inputs={"input": input.name}, + kernel="paddle.nn.functional.avg_pool2d", + inputs={"x": input.name}, outputs=[node.name], - pool_size=kernel, - pool_type=string("avg"), - pool_stride=stride, - pool_padding=pad, - ceil_mode=ceil_mode, - exclusive=False, - global_pooling=False) + kernel_size=kernel, + stride=stride, + padding=pad, + ceil_mode=ceil_mode) def LRN(self, node): assert len(node.inputs) == 1, 'The count of LRN node\'s input is not 1.' @@ -433,7 +429,7 @@ class CaffeOpMapper(OpMapper): 'name': string(node.name) } self.paddle_graph.add_layer( - kernel="fluid.layers.lrn", + kernel="paddle.fluid.layers.lrn", inputs={"input": input.name}, outputs=[node.name], **layer_attrs) @@ -1184,7 +1180,7 @@ class CaffeOpMapper(OpMapper): input = self.graph.get_input_node(node, idx=0, copy=True) params = node.layer.shuffle_channel_param self.paddle_graph.add_layer( - "fluid.layers.shuffle_channel", + "paddle.fluid.layers.shuffle_channel", inputs={"x": input.name}, outputs=[node.layer_name], group=params.group) diff --git a/x2paddle/op_mapper/static/onnx2paddle/opset9/opset.py b/x2paddle/op_mapper/static/onnx2paddle/opset9/opset.py index 5d00e6644d07a9508a64f83b8c4f313f2ef65858..1861e0dbff5f8f8832c5a0da2271fbfbcfe9c61d 100644 --- a/x2paddle/op_mapper/static/onnx2paddle/opset9/opset.py +++ b/x2paddle/op_mapper/static/onnx2paddle/opset9/opset.py @@ -14,8 +14,6 @@ from x2paddle.decoder.onnx_decoder import ONNXGraph, ONNXGraphNode, ONNXGraphDataNode from x2paddle.core.graph import GraphNode -from x2paddle.core.fluid_code import Layer -from x2paddle.core.fluid_code import FluidCode from x2paddle.core.util import string from functools import reduce import numpy as np @@ -88,7 +86,7 @@ class OpSet9(): elementwise_ops = { 'Add': 'paddle.add', 'Div': 'paddle.divide', - 'Sub': 'fluid.layers.elementwise_sub', + 'Sub': 'paddle.substract', 'Mul': 'paddle.multiply', 'Pow': 'paddle.pow', } @@ -240,22 +238,15 @@ class OpSet9(): def _interpolate(self, node): val_x = self.graph.get_input_node(node, idx=0, copy=True) inputs = {'x': val_x.name} - attrs = dict() if node.layer_type == 'Resize': if len(node.layer.input) == 2: # opset 10 val_scales = self.graph.get_input_node(node, idx=1, copy=True) - # TODO(syf): paddle.nn.functional.interpolate will support the length - # which is the same as the rank of input. -# inputs['scale_factor'] = val_scales.name - attrs['scale_factor'] = self.params[val_scales.name].tolist()[2:] + inputs['scale_factor'] = val_scales.name elif len(node.layer.input) == 3: # opset 11 val_scales = self.graph.get_input_node(node, idx=2, copy=True) - # TODO(syf): paddle.nn.functional.interpolate will support the length - # which is the same as the rank of input. -# inputs['scale_factor'] = val_scales.name - attrs['scale_factor'] = self.params[val_scales.name].tolist()[2:] + inputs['scale_factor'] = val_scales.name elif len(node.layer.input) == 4: # opset 11 val_sizes = self.graph.get_input_node(node, idx=3, copy=True) @@ -271,28 +262,35 @@ class OpSet9(): inputs={"x": var_hw}, outputs=[var_hw], dtype=string('int32')) -# inputs['size'] = var_hw - - # TODO(syf): all use - inputs['out_shape'] = var_hw - ipt = inputs.pop("x") - inputs["input"] = ipt - mode = node.get_attr('mode', 'nearest') - attrs.update({"align_corners": False}) + inputs['size'] = var_hw + attrs = {"align_corners": False, + "mode": string(node.get_attr('mode', 'nearest'))} self.paddle_graph.add_layer( - kernel="fluid.layers.resize_nearest", + kernel="paddle.nn.functioanl.interpolate", inputs=inputs, outputs=[node.name], **attrs) + +# # TODO(syf): all use +# inputs['out_shape'] = var_hw +# ipt = inputs.pop("x") +# inputs["input"] = ipt +# mode = node.get_attr('mode', 'nearest') +# attrs = {"align_corners": False} +# self.paddle_graph.add_layer( +# kernel="fluid.layers.resize_nearest", +# inputs=inputs, +# outputs=[node.name], +# **attrs) return elif node.layer_type == 'Upsample': val_scales = self.graph.get_input_node(node, idx=1, copy=True) inputs['scale'] = val_scales mode = node.get_attr('mode', 'nearest') - attrs.update({"align_corners": False, + attrs = {"align_corners": False, "mode": string(mode), - "align_mode": 1}) + "align_mode": 1} self.paddle_graph.add_layer( kernel="paddle.nn.functional.interpolate", inputs=inputs, @@ -346,7 +344,7 @@ class OpSet9(): 'sampling_ratio': sampling_ratio, } self.paddle_graph.add_layer( - 'fluid.layers.roi_align', + 'paddle.fluid.layers.roi_align', inputs={'input': val_x.name, 'rois': val_rois.name}, outputs=[node.name], @@ -365,7 +363,7 @@ class OpSet9(): 'spatial_scale': spatial_scale, } self.paddle_graph.add_layer( - 'fluid.layers.roi_pool', + 'paddle.fluid.layers.roi_pool', inputs={'input': val_x.name, 'rois': val_rois.name}, outputs=[node.name], @@ -394,7 +392,7 @@ class OpSet9(): layer_attrs['data_format'] = string('NCHW') layer_attrs['value'] = value else: - paddle_op = 'fluid.layers.pad' + paddle_op = 'paddle.fluid.layers.pad' layer_attrs["pad_value"] = value if len(pads) == 4: paddings = np.array(pads).reshape( @@ -924,10 +922,10 @@ class OpSet9(): outputs=[node.name], **layer_attrs) else: - min_ipt = self.graph.get_input_node(node, idx=1, copy=True) - max_ipt = self.graph.get_input_node(node, idx=2, copy=True) - min_value = _const_weight_or_none(min_ipt) + max_ipt = self.graph.get_input_node(node, idx=1, copy=True) + min_ipt = self.graph.get_input_node(node, idx=2, copy=True) max_value = _const_weight_or_none(max_ipt) + min_value = _const_weight_or_none(min_ipt) if max_value.shape == (1, ): max_value = max_value[0] if min_value.shape == (1, ): @@ -1046,20 +1044,19 @@ class OpSet9(): strides[1]) paddings = pad_h + pad_w - paddle_op = 'fluid.layers.pool{}d'.format(poolnd) - assert 2 <= poolnd <= 3, 'only pool2d and pool3d are supported' + paddle_op = 'paddle.nn.functional.avg_pool{}d'.format(poolnd) + assert 1 <= poolnd <= 3, 'only avg_pool1d, avg_pool2d and avg_pool3d are supported' layer_attrs = { - "pool_size": kernel_shape, - "pool_type": string('avg'), - "pool_stride": strides, - "pool_padding": paddings, + "kernel_size": kernel_shape, + "stride": strides, + "padding": paddings, "ceil_mode": ceil_mode, - "exclusive": 'True', + "exclusive": True, "name": string(node.name) } self.paddle_graph.add_layer( paddle_op, - inputs={'input': val_x if isinstance(val_x, str) else val_x.name}, + inputs={'x': val_x if isinstance(val_x, str) else val_x.name}, outputs=[node.name], **layer_attrs) # TODO(syf): op has diff @@ -1583,17 +1580,4 @@ class OpSet9(): kernel=paddle_op, inputs=layer_inputs, outputs=[node.name], - **layer_attrs) - - @print_mapping_info - def ArgMax(self, node): - val_x = self.graph.get_input_node(node, idx=0, copy=True) - axis = node.get_attr('axis') - keepdims = False if node.get_attr('keepdims') == 0 else True - layer_attrs = {'axis': axis, - 'keepdim': keepdims} - self.paddle_graph.add_layer( - 'paddle.argmax', - inputs={"x": val_x.name}, - outputs=[node.name], **layer_attrs) \ No newline at end of file diff --git a/x2paddle/op_mapper/static/tf2paddle/tf_op_mapper.py b/x2paddle/op_mapper/static/tf2paddle/tf_op_mapper.py index 7667cf9c629c75cbdbb1e9a17c8acb6825a0360f..700ac74a0d9bd512f0016ee64cfd1ff792ad4a5f 100644 --- a/x2paddle/op_mapper/static/tf2paddle/tf_op_mapper.py +++ b/x2paddle/op_mapper/static/tf2paddle/tf_op_mapper.py @@ -72,7 +72,7 @@ class TFOpMapper(OpMapper): 'RealDiv': 'paddle.divide', 'DivNoNan': 'paddle.divide', # TODO (syf): replace - 'Sub': 'fluid.layers.elementwise_sub', + 'Sub': 'paddle.subtract', 'Maximum': 'paddle.maximum', 'Minimum': 'paddle.minimum', 'Mul': 'paddle.multiply', @@ -315,7 +315,7 @@ class TFOpMapper(OpMapper): shape=[0, c, h, w]) self.paddle_graph.add_layer( - kernel="fluid.layers.pixel_shuffle", + kernel="paddle.nn.functional.pixel_shuffle", inputs={"x": reshape_name}, outputs=[node.name], upscale_factor=block_size) @@ -437,8 +437,6 @@ class TFOpMapper(OpMapper): if c == -1: attr = {"shape": [0, k_size[2], 0, 0]} - node.fluid_code.add_layer( - "reshape", inputs=input, output=input, param_attr=attr) self.paddle_graph.add_layer( kernel="paddle.reshape", inputs={"x": input_name}, @@ -842,13 +840,12 @@ class TFOpMapper(OpMapper): # TODO(syf): The op has diff. self.paddle_graph.add_layer( - kernel="fluid.layers.pool2d", - inputs={"input": input_name}, + kernel="paddle.nn.functional.avg_pool2d", + inputs={"x": input_name}, outputs=[node.name], - pool_size=k_size[2:4], - pool_type=string("avg"), - pool_stride=strides[2:4], - pool_padding=string(pad_mode)) + kernel_size=k_size[2:4], + stride=strides[2:4], + padding=string(pad_mode)) if data_format == "NHWC": self.paddle_graph.add_layer( @@ -1406,7 +1403,7 @@ class TFOpMapper(OpMapper): y_shape = y.out_shapes[0] # TODO(syf) layer_id = self.paddle_graph.add_layer( - "fluid.layers.elementwise_sub", inputs=inputs, outputs=[node.name]) + "paddle.subtract", inputs=inputs, outputs=[node.name]) self.paddle_graph.layers[layer_id].input_shapes = {"x": x_shape, "y": y_shape} inputs = {"x": node.name, "y": node.name} diff --git a/x2paddle/optimizer/code_optimizer/hierachical_tree.py b/x2paddle/optimizer/code_optimizer/hierachical_tree.py index e40ca323a1b2328e4789a13dcf178b96481f91e5..6a69e52da477f62c83f843876e0b9c3e7bb65254 100644 --- a/x2paddle/optimizer/code_optimizer/hierachical_tree.py +++ b/x2paddle/optimizer/code_optimizer/hierachical_tree.py @@ -395,9 +395,6 @@ class HierarchicalTree(Tree): self.convert_subgraph_to_layer() self.update_parameters() import_list = ["import paddle", - "import paddle.fluid as fluid", - "from paddle.fluid.initializer import Constant", - "from paddle.fluid.param_attr import ParamAttr", "import math", "from x2paddle.op_mapper.dygraph.pytorch2paddle " + \ "import pytorch_custom_layer as x2paddle_nn" diff --git a/x2paddle/optimizer/code_optimizer/layer_code_generator.py b/x2paddle/optimizer/code_optimizer/layer_code_generator.py index e0ce9f128a78db1169bf3d714af875d13cdf56ac..bf3d6050fb9987bba25a65c2bcf3e47245e2d41c 100644 --- a/x2paddle/optimizer/code_optimizer/layer_code_generator.py +++ b/x2paddle/optimizer/code_optimizer/layer_code_generator.py @@ -14,10 +14,11 @@ # limitations under the License. import copy -import yaml import os.path as osp import x2paddle from x2paddle.optimizer.code_optimizer.parameter_tree import PamareterNode +from x2paddle.core.util import * + NN_KERNEL_NAME = {"paddle.nn.BatchNorm": "bn", "paddle.nn.LayerNorm": "layernorm", @@ -127,25 +128,6 @@ def rename_layers(layers, param_tree=None, is_rename_module=False): return count rename_sub_layers(layers_cp, count) return layers_cp, nn_param_nodes, new_names - -def load_default_parameter(): - path = x2paddle.__file__ - path = path.replace("__init__.py", "core") - yaml_dir = osp.join(path, "paddle_default_parameter.yml") - with open(yaml_dir, "rb") as fr: - default_parameter = yaml.load(fr.read()) - return default_parameter - -def is_abandon(default_parameter, layer_kernel, param_key, param_value): - if layer_kernel not in default_parameter: - return False - params = default_parameter[layer_kernel] - if param_key not in params: - return False - if params[param_key] == param_value: - return True - else: - return False def gen_layer_code(graph, sub_layers, sub_layers_name, different_attrs=dict()): @@ -241,8 +223,8 @@ def gen_layer_code(graph, sub_layers, sub_layers_name, different_attrs=dict()): if is_set_item: outputs.append(layer.outputs[0]) no_output_count = 0 - default_parameter = load_default_parameter() for i, (layer_id, layer) in enumerate(sub_layers.items()): + remove_default_attrs(layer, different_attrs) if ("paddle.nn" in layer.kernel and "functional" not in layer.kernel) or \ layer.kernel.startswith("custom_layer"): line = "self.{}".format(layer.outputs[0]) @@ -255,8 +237,6 @@ def gen_layer_code(graph, sub_layers, sub_layers_name, different_attrs=dict()): if key_name in different_attrs: line += "{}={}, ".format(k, key_name) else: - if is_abandon(default_parameter, layer.kernel, k, v): - continue line += "{}={}, ".format(k, v) line = line.strip(", ") line += ")" @@ -358,8 +338,6 @@ def gen_layer_code(graph, sub_layers, sub_layers_name, different_attrs=dict()): line += "{}=self.{}, ".format(k, key_name) init_func.extend(gen_codes(["self.{} = {}".format(key_name, key_name)], indent=2)) else: - if is_abandon(default_parameter, layer.kernel, k, v): - continue line += "{}={}, ".format(k, v) line = line.strip(", ") line += ")" diff --git a/x2paddle/optimizer/code_optimizer/module_graph.py b/x2paddle/optimizer/code_optimizer/module_graph.py index c7a34bcb66776950df9ea9499ab18b6d34e559f8..c9086bb79485157a9d5ae53dc784ec30d1c47a4c 100644 --- a/x2paddle/optimizer/code_optimizer/module_graph.py +++ b/x2paddle/optimizer/code_optimizer/module_graph.py @@ -196,7 +196,7 @@ class ModuleGraph(object): if len(elements_list) > 1: max_ct = 0 for k, v in zip(elements_list, count_list): - if v > max_ct: + if v > max_ct and str(k) != "nan" : max_ele = k max_ct = v diff_attrs_column[column] = max_ele @@ -365,9 +365,6 @@ class ModuleGraph(object): self.convert_subgraph_to_layer(combination, combination_id) self.update_parameters() import_list = ["import paddle", - "import paddle.fluid as fluid", - "from paddle.fluid.initializer import Constant", - "from paddle.fluid.param_attr import ParamAttr", "import math", "from x2paddle.op_mapper.dygraph.pytorch2paddle " + \ "import pytorch_custom_layer as x2paddle_nn" diff --git a/x2paddle/optimizer/fusion/dygraph/bn_scale_fuser.py b/x2paddle/optimizer/fusion/dygraph/bn_scale_fuser.py index 5b093d1b6b40871637f169dc858dd16d8e51a413..9fbed4c53b0bf5859f16048d6308bf753ca7f6a1 100644 --- a/x2paddle/optimizer/fusion/dygraph/bn_scale_fuser.py +++ b/x2paddle/optimizer/fusion/dygraph/bn_scale_fuser.py @@ -21,47 +21,94 @@ from x2paddle.core.util import * class DygraphBNScaleFuser(FuseBase): def __init__(self): super(DygraphBNScaleFuser, self).__init__(graph_type="dygraph") + patterns = list() def build_pattern(self): """ 描述需要替换的batchnorm2d图结构。 batchnorm2d层模式python实现代码示例: + 模式一: bn_conv1 = self.batchnorm0(conv1) scale_conv1_cparam1 = self.scale_conv1_cparam1 scale_conv1_mul = paddle.multiply(x=bn_conv1, y=scale_conv1_cparam1, axis=1) scale_conv1_cparam2 = self.scale_conv1_cparam2 - scale_conv1 = fluid.layers.elementwise_add(x=scale_conv1_mul, y=scale_conv1_cparam2, axis=1) + scale_conv1 = paddle.add(x=scale_conv1_mul, y=scale_conv1_cparam2, axis=1) + 模式二: + bn_conv1 = self.batchnorm0(conv1) + scale_conv1_cparam1 = self.scale_conv1_cparam1 + scale_conv1_mul = paddle.multiply(x=bn_conv1, y=scale_conv1_cparam1, axis=1) + scale_conv1_cparam2 = self.scale_conv1_cparam2 + scale_conv1_cparam2 = paddle.reshape(x=scale_conv1_cparam2, shape=[32, 1, 1]) + scale_conv1 = paddle.add(x=scale_conv1_mul, y=scale_conv1_cparam2, axis=1) """ def gen_name(id): return "x" + str(id) - self.pattern.add_layer( + pattern = PaddleGraph(graph_type="dygraph") + pattern.add_layer( + "paddle.nn.BatchNorm2D", + inputs={"input": "bn-input-0"}, + outputs=[gen_name(0)]) + pattern.add_layer( + "self.create_parameter", + inputs={}, + outputs=[gen_name(1)]) + inputs_dict = {} + inputs_dict['x'] = gen_name(0) + inputs_dict['y'] = gen_name(1) + pattern.add_layer( + "paddle.multiply", + inputs=inputs_dict, + outputs=[gen_name(2)]) + pattern.add_layer( + "self.create_parameter", + inputs={}, + outputs=[gen_name(3)]) + inputs_dict = {} + inputs_dict['x'] = gen_name(2) + inputs_dict['y'] = gen_name(3) + pattern.add_layer( + "paddle.add", + inputs=inputs_dict, + outputs=[gen_name(4)]) + pattern.build(inputs={"input-0": "bn-input-0"}) + self.patterns.append(pattern) + + pattern = PaddleGraph(graph_type="dygraph") + pattern.add_layer( "paddle.nn.BatchNorm2D", inputs={"input": "bn-input-0"}, outputs=[gen_name(0)]) - self.pattern.add_layer( + pattern.add_layer( "self.create_parameter", inputs={}, outputs=[gen_name(1)]) inputs_dict = {} inputs_dict['x'] = gen_name(0) inputs_dict['y'] = gen_name(1) - self.pattern.add_layer( + pattern.add_layer( "paddle.multiply", inputs=inputs_dict, outputs=[gen_name(2)]) - self.pattern.add_layer( + pattern.add_layer( "self.create_parameter", inputs={}, outputs=[gen_name(3)]) + pattern.add_layer( + "paddle.reshape", + inputs={"x": gen_name(3)}, + outputs=[gen_name(3)]) inputs_dict = {} inputs_dict['x'] = gen_name(2) inputs_dict['y'] = gen_name(3) - self.pattern.add_layer( - "fluid.layers.elementwise_add", + pattern.add_layer( + "paddle.add", inputs=inputs_dict, outputs=[gen_name(4)]) - self.pattern.build(inputs={"input-0": "bn-input-0"}) + pattern.build(inputs={"input-0": "bn-input-0"}) + self.patterns.append(pattern) + + def insert_new_layer(self, graph, parameters, matches): new_layer = self.gen_new_layer(parameters, matches) @@ -78,7 +125,7 @@ class DygraphBNScaleFuser(FuseBase): layer_attrs = layer.attrs layer_attrs.pop("weight_attr") layer_attrs.pop("bias_attr") - layer = matches[layers_id[4]] + layer = matches[layers_id[-1]] layer_outputs = [bn_name] + layer.outputs layer = matches[layers_id[1]] data0_name = layer.outputs[0] diff --git a/x2paddle/optimizer/fusion/dygraph/reshape_fuser.py b/x2paddle/optimizer/fusion/dygraph/reshape_fuser.py index a5a68258da941a5da302051055b22d3eb8a65f90..c49bab18c8762f99e8f6471e0f1ad61ac3031eae 100644 --- a/x2paddle/optimizer/fusion/dygraph/reshape_fuser.py +++ b/x2paddle/optimizer/fusion/dygraph/reshape_fuser.py @@ -27,7 +27,7 @@ class DygraphReshapeFuser(FuseBase): reshape层模式python实现代码示例: x165 = int(x164) x166 = [x158, x159, x165] - x167 = fluid.layers.reshape(x=x157, shape=x166) + x167 = paddle.reshape(x=x157, shape=x166) """ def gen_name(id): @@ -46,7 +46,7 @@ class DygraphReshapeFuser(FuseBase): }, outputs=[gen_name(1)]) self.pattern.add_layer( - "fluid.layers.reshape", + "paddle.reshape", inputs={"x": "reshape-input-3", "shape": gen_name(1)}, outputs=[gen_name(2)]) diff --git a/x2paddle/optimizer/fusion/dygraph/trace_fc_fuser.py b/x2paddle/optimizer/fusion/dygraph/trace_fc_fuser.py index fdce28a7453ab20114ba9897192a0fa994844b47..ca8058fd5579868b45d90ab26de4cbcfa5180ebf 100644 --- a/x2paddle/optimizer/fusion/dygraph/trace_fc_fuser.py +++ b/x2paddle/optimizer/fusion/dygraph/trace_fc_fuser.py @@ -49,7 +49,7 @@ class TraceFcFuser(FuseBase): inputs={}, outputs=[gen_name(0)]) pattern.add_layer( - "fluid.layers.transpose", + "paddle.transpose", inputs={"x": gen_name(0)}, outputs=[gen_name(1)], perm=[1, 0]) diff --git a/x2paddle/optimizer/fusion/static/bn_scale_fuser.py b/x2paddle/optimizer/fusion/static/bn_scale_fuser.py index 946e7fe804d58a0885a5b1d3e1decc7d16e1d687..bd1e5a77dee95dfc52c1a02dc13d705eff3cf7b4 100644 --- a/x2paddle/optimizer/fusion/static/bn_scale_fuser.py +++ b/x2paddle/optimizer/fusion/static/bn_scale_fuser.py @@ -21,12 +21,14 @@ from x2paddle.core.util import * class Static_BNScaleFuser(FuseBase): def __init__(self): super(Static_BNScaleFuser, self).__init__(graph_type="static") - patterns = list() + self.patterns = list() def build_pattern(self): """ 描述需要替换的batchnorm2d图结构。 batchnorm2d层模式python实现代码示例: 模式一: + conv1_bn_mean = paddle.static.create_parameter(shape=(128,), dtype='float32', name='conv1_bn_mean') + conv1_bn_variance = paddle.static.create_parameter(shape=(128,), dtype='float32', name='conv1_bn_variance') conv1_bn = paddle.nn.functional.batch_norm(x=conv1, weight=conv1_bn_weight, bias=conv1_bn_bias, running_mean=conv1_bn_mean, running_var=conv1_bn_variance, epsilon=9.999999747378752e-06, momentum=0.9990000128746033) conv1_scale_cparam1 = paddle.static.create_parameter(shape=(32,), dtype='float32', name='conv1_scale_cparam1') conv1_scale_mul = paddle.multiply(x=conv1_bn, y=conv1_scale_cparam1, axis=1) @@ -34,6 +36,8 @@ class Static_BNScaleFuser(FuseBase): conv1_scale_cparam2 = paddle.reshape(x=conv1_scale_cparam2, shape=[32, 1, 1]) conv1_scale = paddle.add(x=conv1_scale_mul, y=conv1_scale_cparam2) 模式二: + conv1_bn_mean = paddle.static.create_parameter(shape=(128,), dtype='float32', name='conv1_bn_mean') + conv1_bn_variance = paddle.static.create_parameter(shape=(128,), dtype='float32', name='conv1_bn_variance') conv1_bn = paddle.nn.functional.batch_norm(x=conv1, weight=conv1_bn_weight, bias=conv1_bn_bias, running_mean=conv1_bn_mean, running_var=conv1_bn_variance, epsilon=9.999999747378752e-06, momentum=0.9990000128746033) conv1_scale_cparam1 = paddle.static.create_parameter(shape=(32,), dtype='float32', name='conv1_scale_cparam1') conv1_scale_mul = paddle.multiply(x=conv1_bn, y=conv1_scale_cparam1, axis=1) @@ -45,13 +49,21 @@ class Static_BNScaleFuser(FuseBase): return "x" + str(id) pattern = PaddleGraph(graph_type="dygraph") + pattern.add_layer( + "paddle.static.create_parameter", + inputs={}, + outputs=[gen_name(10)]) + pattern.add_layer( + "paddle.static.create_parameter", + inputs={}, + outputs=[gen_name(11)]) pattern.add_layer( "paddle.nn.functional.batch_norm", inputs={"input": "bn-input-0", "weight": "bn-input-1", "bias": "bn-input-2", - "running_mean": "bn-input-3", - "running_var": "bn-input-4",}, + "running_mean": gen_name(10), + "running_var": gen_name(11)}, outputs=[gen_name(0)]) pattern.add_layer( "paddle.static.create_parameter", @@ -81,19 +93,25 @@ class Static_BNScaleFuser(FuseBase): outputs=[gen_name(5)]) pattern.build(inputs={"input-0": "bn-input-0", "input-1": "bn-input-1", - "input-2": "bn-input-2", - "input-3": "bn-input-3", - "input-4": "bn-input-4"}) + "input-2": "bn-input-2"}) self.patterns.append(pattern) pattern = PaddleGraph(graph_type="dygraph") + pattern.add_layer( + "paddle.static.create_parameter", + inputs={}, + outputs=[gen_name(10)]) + pattern.add_layer( + "paddle.static.create_parameter", + inputs={}, + outputs=[gen_name(11)]) pattern.add_layer( "paddle.nn.functional.batch_norm", inputs={"input": "bn-input-0", "weight": "bn-input-1", "bias": "bn-input-2", - "running_mean": "bn-input-3", - "running_var": "bn-input-4",}, + "running_mean": gen_name(10), + "running_var": gen_name(11),}, outputs=[gen_name(0)]) pattern.add_layer( "paddle.static.create_parameter", @@ -119,25 +137,25 @@ class Static_BNScaleFuser(FuseBase): outputs=[gen_name(4)]) pattern.build(inputs={"input-0": "bn-input-0", "input-1": "bn-input-1", - "input-2": "bn-input-2", - "input-3": "bn-input-3", - "input-4": "bn-input-4"}) + "input-2": "bn-input-2"}) self.patterns.append(pattern) def insert_new_layer(self, graph, parameters, matches): new_layer = self.gen_new_layer(parameters, matches) new_layer_id = list(matches.keys())[-1] graph.layers[new_layer_id] = new_layer + matches.pop(list(matches.keys())[0]) + matches.pop(list(matches.keys())[0]) matches.pop(list(matches.keys())[1]) matches.pop(list(matches.keys())[2]) matches.pop(new_layer_id) def gen_new_layer(self, parameters, matches): layers_id = list(matches.keys()) - bn_layer = matches[layers_id[0]] - layer = matches[layers_id[1]] - bn_layer.inputs["weight"] = layer.outputs[0] + bn_layer = matches[layers_id[2]] layer = matches[layers_id[3]] + bn_layer.inputs["weight"] = layer.outputs[0] + layer = matches[layers_id[5]] bn_layer.inputs["bias"] = layer.outputs[0] bn_layer.id = layers_id[-1] layer = matches[layers_id[-1]] diff --git a/x2paddle/optimizer/pattern_matcher.py b/x2paddle/optimizer/pattern_matcher.py index 22e23cae425ebaf36d223a6e812a6899e498a935..8c3d796203fa3a67555f7dcb4e50cf14aff8a519 100644 --- a/x2paddle/optimizer/pattern_matcher.py +++ b/x2paddle/optimizer/pattern_matcher.py @@ -99,7 +99,7 @@ class PatternMatcher(object): return False else: subgraph_id2layers.pop(layer_id) - continue + continue else: if len(graph.edges_out[layer_id]) != len( pattern.edges_out[pattern_layer_id]): @@ -116,7 +116,20 @@ class PatternMatcher(object): else: subgraph_id2layers.pop(layer_id) continue - + else: + layer_out = graph.edges_out[layer_id] + pattern_layer_out = pattern.edges_out[pattern_layer_id] + is_pop = False + for i in range(len(layer_out)): + layer_id_out = layer_out[i] + pattern_layer_id_out = pattern_layer_out[i] + if layer_id_out != -1: + if graph_layers[layer_id_out].kernel != pattern.layers[pattern_layer_id_out].kernel: + is_pop = True + break + if is_pop: + subgraph_id2layers.pop(layer_id) + continue # 当为控制流时的处理 if layer.kernel == "prim.if" or layer.kernel == "prim.loop": if len(pattern_layer.blocks) != len(layer.blocks): @@ -161,7 +174,7 @@ class PatternMatcher(object): for i, (layer_id, layer) in enumerate(graph.layers.items()): match_info = get_subgraph(self.pattern, graph, i) - if match_info: + if match_info and match_info not in self.matches: self.matches.append(match_info) for j, block in enumerate(layer.blocks): if len(block.layers) > 0: @@ -343,4 +356,5 @@ class FuseBase(object): if layer_id in subgraph.layers: # layer_id可能是属于子图的,此时删除父layer,即删除整个子图 subgraph.layers.pop(layer_id) + \ No newline at end of file