diff --git a/README.md b/README.md index ee8aba898fdc0dbadea8067fcfd11f5a4c1805fe..d038201fdb48220178e54737e2c22ab6c3134288 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ X2Paddle在多个主流的CV模型上,测试过TensorFlow/Caffe/ONNX/PyTorch ## 环境依赖 python == 2.7 | python >= 3.5 -paddlepaddle 2.0-rc 或者 develop +paddlepaddle 2.0.0-rc1 或者 develop **按需安装以下依赖** tensorflow : tensorflow == 1.14.0 @@ -93,12 +93,6 @@ X2Paddle提供了工具解决如下问题,详见[tools/README.md](tools/README 6. [X2Paddle添加内置的Caffe自定义层](./docs/user_guides/add_caffe_custom_layer.md) ## 更新历史 -2019.08.05 -1. 统一tensorflow/caffe/onnx模型转换代码和对外接口 -2. 解决上一版caffe2fluid无法转换多分支模型的问题 -3. 解决Windows上保存模型无法加载的问题 -4. 新增optimizer,优化代码结构,合并conv、batch_norm的bias和激活函数 - 2020.12.09 1. 新增PyTorch2Paddle转换方式,转换得到Paddle动态图代码,并动转静获得inference_model。 方式一:trace方式,转换后的代码有模块划分,每个模块的功能与PyTorch相同。 @@ -107,8 +101,6 @@ X2Paddle提供了工具解决如下问题,详见[tools/README.md](tools/README 3. 新增TensorFlow op(14个):Neg、Greater、FloorMod、LogicalAdd、Prd、Equal、Conv3D、Ceil、AddN、DivNoNan、Where、MirrorPad、Size、TopKv2 4. 新增Optimizer模块,主要包括op融合、op消除功能,转换后的代码可读性更强,进行预测时耗时更短。 -**如果你需要之前版本的tensorflow2fluid/caffe2fluid/onnx2fluid,可以继续访问release-0.9分支,获取之前版本的代码使用。** - ## Acknowledgements diff --git a/docs/introduction/op_list.md b/docs/introduction/op_list.md index c52bf1b6c0d2091af82b584e574791268c45a1fe..eecef91a989b4250c5f42521670a584263a06134 100644 --- a/docs/introduction/op_list.md +++ b/docs/introduction/op_list.md @@ -61,7 +61,7 @@ | 41 | MatMul | 42 | Sum | 43 | Transpose | 44 | BatchNormalization | | 45 | Squeeze | 46 | Equal | 47 | Identity | 48 | GlobalAveragePool | | 49 | MaxPool | 50 | Conv | 51 | Gemm | 52 | NonZero | -| 53 | Abs | 54 | Floor | +| 53 | Abs | 54 | Floor | 52 | ArgMax | ## PyTorch Aten: @@ -93,7 +93,8 @@ Aten: | 93 | aten::sub | 94 | aten::t |95|aten::tanh|96|aten::split| | 97 | aten::transpose | 98 | aten::to |99|aten::type\_as|100|aten::unsqueeze| | 101 | aten::upsample\_bilinear2d | 102 | aten::values |103|aten::view|104|aten::warn| -| 105 | aten::where | 106 | aten::zeros |107|aten::zeros\_like||| +| 105 | aten::where | 106 | aten::zeros |107|aten::zeros\_like|108|aten::bmm| +| 109 | aten::sub\_ | 110 | aten:erf |111|aten::lstm|112|aten::gather| Prim: | 序号 | OP | 序号 | OP | 序号 | OP | 序号 | OP | diff --git a/docs/introduction/x2paddle_model_zoo.md b/docs/introduction/x2paddle_model_zoo.md index 7197db50b56e12c7c14f93ccc53887733181da20..333dca2e0018eb136ed34b5d285bf24c767e9e83 100644 --- a/docs/introduction/x2paddle_model_zoo.md +++ b/docs/introduction/x2paddle_model_zoo.md @@ -5,28 +5,28 @@ ## TensorFlow -| 模型 | 代码 | 备注 | -|------|----------|------| -| SqueezeNet | [code](https://github.com/tensorflow/tpu/blob/master/models/official/squeezenet/squeezenet_model.py)|-| -| MobileNet_V1 | [code](https://github.com/tensorflow/models/tree/master/research/slim/nets) |-| -| MobileNet_V2 | [code](https://github.com/tensorflow/models/tree/master/research/slim/nets) |-| -| ShuffleNet | [code](https://github.com/TropComplique/shufflenet-v2-tensorflow) |-| -| mNASNet | [code](https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet) |-| -| EfficientNet | [code](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet) |-| -| Inception_V3 | [code](https://github.com/tensorflow/models/blob/master/research/slim/nets/inception_v3.py) |-| -| Inception_V4 | [code](https://github.com/tensorflow/models/blob/master/research/slim/nets/inception_v4.py) |-| -| Inception_ResNet_V2 | [code](https://github.com/tensorflow/models/blob/master/research/slim/nets/inception_resnet_v2.py) |-| -| VGG16 | [code](https://github.com/tensorflow/models/tree/master/research/slim/nets) |-| -| ResNet_V1_101 | [code](https://github.com/tensorflow/models/tree/master/research/slim/nets) |-| -| ResNet_V2_101 | [code](https://github.com/tensorflow/models/tree/master/research/slim/nets) |-| -| UNet | [code1](https://github.com/jakeret/tf_unet )/[code2](https://github.com/lyatdawn/Unet-Tensorflow) |-| -| MTCNN | [code](https://github.com/AITTSMD/MTCNN-Tensorflow) |-| -| YOLO-V3| [code](https://github.com/YunYang1994/tensorflow-yolov3) | -| -| FALSR | [code](https://github.com/xiaomi-automl/FALSR) | 需使用参数without_data_format_optimization | -| DCSCN | [code](https://modelzoo.co/model/dcscn-super-resolution) | 需使用参数without_data_format_optimization | -| Bert(albert) | [code](https://github.com/google-research/albert#pre-trained-models) | 需使用参数without_data_format_optimization | -| Bert(chinese_L-12_H-768_A-12) | [code](https://github.com/google-research/bert#pre-trained-models) | 需使用参数without_data_format_optimization | -| Bert(multi_cased_L-12_H-768_A-12) | [code](https://github.com/google-research/bert#pre-trained-models) | 需使用参数without_data_format_optimization | +| 模型 | 代码 | +|------|----------| +| SqueezeNet | [code](https://github.com/tensorflow/tpu/blob/master/models/official/squeezenet/squeezenet_model.py)| +| MobileNet_V1 | [code](https://github.com/tensorflow/models/tree/master/research/slim/nets) | +| MobileNet_V2 | [code](https://github.com/tensorflow/models/tree/master/research/slim/nets) | +| ShuffleNet | [code](https://github.com/TropComplique/shufflenet-v2-tensorflow) | +| mNASNet | [code](https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet) | +| EfficientNet | [code](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet) | +| Inception_V3 | [code](https://github.com/tensorflow/models/blob/master/research/slim/nets/inception_v3.py) | +| Inception_V4 | [code](https://github.com/tensorflow/models/blob/master/research/slim/nets/inception_v4.py) | +| Inception_ResNet_V2 | [code](https://github.com/tensorflow/models/blob/master/research/slim/nets/inception_resnet_v2.py) | +| VGG16 | [code](https://github.com/tensorflow/models/tree/master/research/slim/nets) | +| ResNet_V1_101 | [code](https://github.com/tensorflow/models/tree/master/research/slim/nets) | +| ResNet_V2_101 | [code](https://github.com/tensorflow/models/tree/master/research/slim/nets) | +| UNet | [code1](https://github.com/jakeret/tf_unet )/[code2](https://github.com/lyatdawn/Unet-Tensorflow) | +| MTCNN | [code](https://github.com/AITTSMD/MTCNN-Tensorflow) | +| YOLO-V3| [code](https://github.com/YunYang1994/tensorflow-yolov3) | +| FALSR | [code](https://github.com/xiaomi-automl/FALSR) | +| DCSCN | [code](https://modelzoo.co/model/dcscn-super-resolution) | +| Bert(albert) | [code](https://github.com/google-research/albert#pre-trained-models) | +| Bert(chinese_L-12_H-768_A-12) | [code](https://github.com/google-research/bert#pre-trained-models) | +| Bert(multi_cased_L-12_H-768_A-12) | [code](https://github.com/google-research/bert#pre-trained-models) | ## Caffe @@ -72,8 +72,8 @@ | EfficientNet | [pytorch(personal practice)](https://github.com/rwightman/gen-efficientnet-pytorch) |9| | SqueezeNet | [onnx official](https://s3.amazonaws.com/download.onnx/models/opset_9/squeezenet.tar.gz) |9| |Ultra-Light-Fast-Generic-Face-Detector-1MB| [onnx_model](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/tree/master/models/onnx)|9 | -|BERT| [pytorch(huggingface)](https://github.com/huggingface/transformers/blob/master/notebooks/04-onnx-export.ipynb)|11|转换时需指定input shape,见[文档Q3](FAQ.md)| -|GPT2| [pytorch(huggingface)](https://github.com/huggingface/transformers/blob/master/notebooks/04-onnx-export.ipynb)|11|转换时需指定input shape,见[文档Q3](FAQ.md)| +|BERT| [pytorch(huggingface)](https://github.com/huggingface/transformers/blob/master/notebooks/04-onnx-export.ipynb)|11|转换时需指定input shape,见[文档Q3](../user_guides/FAQ.md)| +|GPT2| [pytorch(huggingface)](https://github.com/huggingface/transformers/blob/master/notebooks/04-onnx-export.ipynb)|11|转换时需指定input shape,见[文档Q3](../user_guides/FAQ.md)| ## PyTorch @@ -96,3 +96,6 @@ | FlaubertModel | [code](https://huggingface.co/transformers/model_doc/flaubert.html) |只支持trace模式| | Roberta| [code](https://huggingface.co/transformers/model_doc/roberta.html) |只支持trace模式| | XLMRobertaForTokenClassification|[code](https://huggingface.co/transformers/model_doc/xlmroberta.html) |只支持trace模式| +| EasyOCR_detector|[code](https://github.com/JaidedAI/EasyOCR/blob/master/easyocr/detection.py) |-| +| EasyOCR_recognizer|[code](https://github.com/JaidedAI/EasyOCR/blob/master/easyocr/recognition.py) |-| + diff --git a/x2paddle/core/program.py b/x2paddle/core/program.py index 27a26eb5ec82a78c261f3f77329ed16412822f68..3d59ae95693ddb3361ee67747bd790fea204efc8 100644 --- a/x2paddle/core/program.py +++ b/x2paddle/core/program.py @@ -26,6 +26,7 @@ import six import pickle import numpy as np from os import path as osp +from x2paddle.core.util import * class PaddleLayer(object): @@ -210,6 +211,8 @@ class PaddleGraph(object): layer_id, 0) == 0 and layer.kernel != "prim.assert" \ and layer.kernel != "prim.exception" \ and layer.kernel != "prim.warnings": + if layer.kernel == "paddle.to_tensor": + self.inputs_info.pop(layer.outputs[0]) invalid_list.append(layer_id) for layer_id in invalid_list: self.layers.pop(layer_id) @@ -272,7 +275,7 @@ class PaddleGraph(object): def gen_dygraph_model(self, save_dir, jit_type=None): if jit_type == "trace": - from x2paddle.optimizer.code_optimizer import HierarchicalTree + from x2paddle.optimizer.pytorch_code_optimizer import HierarchicalTree hierarchical_tree = HierarchicalTree(self) for layer_id, layer in self.layers.items(): hierarchical_tree.insert(layer) @@ -280,7 +283,7 @@ class PaddleGraph(object): self.dump_dygraph_parameter(save_dir) else: if self.source_type == "pytorch": - from x2paddle.optimizer.code_optimizer import ModuleGraph + from x2paddle.optimizer.pytorch_code_optimizer import ModuleGraph module_graph = ModuleGraph(self) module_graph.save_source_files(save_dir) self.dump_dygraph_parameter(save_dir) @@ -324,12 +327,10 @@ class PaddleGraph(object): write_code( f, [ - "from paddle.fluid.initializer import Constant", - "from paddle.fluid.param_attr import ParamAttr", - "import paddle.fluid as fluid", custom_import, - "import paddle", "import math", "", - + "import paddle", + "import math", + "", ], indent=0) if self.custom_code is not None: @@ -346,6 +347,8 @@ class PaddleGraph(object): ], indent=1) for layer_id, layer in self.layers.items(): + if layer.kernel.startswith("paddle"): + remove_default_attrs(layer.kernel, layer.attrs) edges_in = self.edges_in.get(layer_id, []) edges_out = self.edges_out.get(layer_id, []) if len(edges_in) == 0 and len(edges_out) == 0: @@ -425,8 +428,7 @@ class PaddleGraph(object): continue if layer.kernel == "paddle.to_tensor": data = layer.attrs["data"] - if not data.startswith("params["): - self.inputs.append(data) + self.inputs.append(data) if len(layer.blocks) > 0: for block in layer.blocks: block.get_dygraph_inputs() @@ -473,10 +475,7 @@ class PaddleGraph(object): custom_import = "" self.head = gen_codes( [ - "from paddle.fluid.initializer import Constant", - "from paddle.fluid.param_attr import ParamAttr", "import paddle", - "import paddle.fluid as fluid", "import math", custom_import, "", @@ -548,6 +547,8 @@ class PaddleGraph(object): gen_head() for layer_id, layer in self.layers.items(): + if layer.kernel.startswith("paddle"): + remove_default_attrs(layer.kernel, layer.attrs) if ("paddle.nn" in layer.kernel and "functional" not in layer.kernel ) or layer.kernel == "paddle.to_tensor" or \ layer.kernel.startswith("custom_layer") or \ @@ -578,7 +579,10 @@ class PaddleGraph(object): elif len(layer.outputs) == 2: line = layer.outputs[1] else: - line = ','.join(layer.outputs[1:]) + if layer.kernel == "paddle.nn.LSTM": + line = "{}, ({})".format(layer.outputs[1], ', '.join(layer.outputs[-2:])) + else: + line = ','.join(layer.outputs[1:]) if layer.kernel == "paddle.to_tensor" and layer.attrs[ "data"].startswith("params["): line += " = self.{}".format(layer.outputs[0]) diff --git a/x2paddle/core/util.py b/x2paddle/core/util.py index 2ecf3a070d692eb476ddb89f2798f9f013a2d3dd..45ba881913c2599efb971cb43aa9c02ec00b55ac 100644 --- a/x2paddle/core/util.py +++ b/x2paddle/core/util.py @@ -1,3 +1,4 @@ +# -*- coding:UTF-8 -*- # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" @@ -14,15 +15,61 @@ import numpy import math import os +import inspect def string(param): + """ 生成字符串。 + """ return "\'{}\'".format(param) def name_generator(nn_name, nn_name2id): + """ 生成paddle.nn类op的名字。 + + Args: + nn_name (str): 名字。 + nn_name2id (dict): key为名字,value为名字出现的次数-1。 + """ if nn_name in nn_name2id: nn_name2id[nn_name] += 1 else: nn_name2id[nn_name] = 0 real_nn_name = nn_name + str(nn_name2id[nn_name]) - return real_nn_name \ No newline at end of file + return real_nn_name + +def remove_default_attrs(kernel, attrs): + """ 删除每个OP的默认参数。 + + Args: + kernel (str): OP的类型名字。 + attrs (dict): 目前该OP所包含的参数, key为参数名,value为参数值。 + """ + def get_default_args(func): + signature = inspect.signature(func) + return { + k: v.default + for k, v in signature.parameters.items() + if v.default is not inspect.Parameter.empty + } + is_func = True + if "paddle.nn" in kernel and "functional"not in kernel: + is_func = False + import paddle + obj = paddle + for i, part in enumerate(kernel.split(".")): + if i == 0: + continue + obj = getattr(obj, part) + if is_func: + func = obj + else: + func = obj.__init__ + default_attrs = get_default_args(func) + for default_k, default_v in default_attrs.items(): + if default_k in attrs: + if (isinstance(attrs[default_k], list) or isinstance(attrs[default_k], tuple)) \ + and not is_func: + if len(set(attrs[default_k])) == 1: + attrs[default_k] = attrs[default_k][0] + if default_v == attrs[default_k]: + attrs.pop(default_k) \ No newline at end of file diff --git a/x2paddle/decoder/onnx_decoder.py b/x2paddle/decoder/onnx_decoder.py index 899c201b5bfaa60a0bca53b00754999f5641afe4..694b340e21b0e92cca15760dfc9f81aa89d97cd8 100644 --- a/x2paddle/decoder/onnx_decoder.py +++ b/x2paddle/decoder/onnx_decoder.py @@ -571,4 +571,4 @@ class ONNXDecoder(object): node.input[i] = self.make_variable_name(node.input[i]) for i in range(len(node.output)): node.output[i] = self.make_variable_name(node.output[i]) - return model \ No newline at end of file + return model diff --git a/x2paddle/op_mapper/dygraph/caffe2paddle/caffe_op_mapper.py b/x2paddle/op_mapper/dygraph/caffe2paddle/caffe_op_mapper.py index 93212c68812a1e17a725c78e079f7fe0033f0b01..f2ae88e88982d0de952c00c31914c4256b05f21b 100644 --- a/x2paddle/op_mapper/dygraph/caffe2paddle/caffe_op_mapper.py +++ b/x2paddle/op_mapper/dygraph/caffe2paddle/caffe_op_mapper.py @@ -367,57 +367,46 @@ class CaffeOpMapper(OpMapper): output_size=kernel) else: layer_attrs = { - 'pool_size': kernel, - 'pool_stride': stride, - 'pool_padding': pad, + 'kernel_size': kernel, + 'stride': stride, + 'padding': pad, 'ceil_mode': ceil_mode, - 'pool_type': string(pool_type), - 'exclusive': False, - 'global_pooling': global_pool, } - self.paddle_graph.add_layer( - "paddle.fluid.dygraph.Pool2D", - inputs={"input": input.name}, - outputs=layer_outputs, - **layer_attrs) -# layer_attrs = { -# 'kernel_size': kernel, -# 'stride': stride, -# 'padding': pad, -# 'ceil_mode': ceil_mode, -# } -# if params.pool == 0: -# self.paddle_graph.add_layer( -# "paddle.nn.MaxPool2D", -# inputs={"input": input.name}, -# outputs=layer_outputs, -# **layer_attrs) -# else: -# layer_attrs["count_include_pad"] = True -# self.paddle_graph.add_layer( -# "paddle.nn.AvgPool2D", -# inputs={"input": input.name}, -# outputs=layer_outputs, -# **layer_attrs) + if params.pool == 0: + self.paddle_graph.add_layer( + "paddle.nn.MaxPool2D", + inputs={"input": input.name}, + outputs=layer_outputs, + **layer_attrs) + else: + self.paddle_graph.add_layer( + "paddle.nn.AvgPool2D", + inputs={"input": input.name}, + outputs=layer_outputs, + **layer_attrs) def LRN(self, node): + lrn_name = name_generator("lrn", self.nn_name2id) + output_name = node.layer_name + layer_outputs = [lrn_name, output_name] assert len(node.inputs) == 1, "The count of LRN node\'s input is not 1." input = self.graph.get_input_node(node, idx=0, copy=True) params = node.layer.lrn_param assert params.local_size % 2 == 1 alpha = params.alpha / float(params.local_size) layer_attrs = { - "n": params.local_size, - "k": params.k, + "size": params.local_size, + "k": params.k, "alpha": alpha, - "beta": params.beta, + "beta": params.beta } self.paddle_graph.add_layer( - "fluid.layers.lrn", + "paddle.nn.LocalResponseNorm", inputs={"input": input.name}, - outputs=[node.layer_name], + outputs=layer_outputs, **layer_attrs) + def InnerProduct(self, node): linear_name = name_generator("linear", self.nn_name2id) output_name = node.layer_name @@ -1131,7 +1120,7 @@ class CaffeOpMapper(OpMapper): input = self.graph.get_input_node(node, idx=0, copy=True) params = node.layer.shuffle_channel_param self.paddle_graph.add_layer( - "fluid.layers.shuffle_channel", + "paddle.fluid.layers.shuffle_channel", inputs={"x": input.name}, outputs=[node.layer_name], group=params.group) diff --git a/x2paddle/op_mapper/dygraph/onnx2paddle/opset9/opset.py b/x2paddle/op_mapper/dygraph/onnx2paddle/opset9/opset.py index ea524e0f7d222e5193934ec9dfcaff256d8330f7..0eeb7993a5bf4f37b30a1ddd14ffffd166d0e222 100644 --- a/x2paddle/op_mapper/dygraph/onnx2paddle/opset9/opset.py +++ b/x2paddle/op_mapper/dygraph/onnx2paddle/opset9/opset.py @@ -14,8 +14,6 @@ from x2paddle.decoder.onnx_decoder import ONNXGraph, ONNXGraphNode, ONNXGraphDataNode from x2paddle.core.graph import GraphNode -from x2paddle.core.fluid_code import Layer -from x2paddle.core.fluid_code import FluidCode from x2paddle.core.util import * from functools import reduce import numpy as np @@ -86,7 +84,7 @@ class OpSet9(): elementwise_ops = { 'Add': 'paddle.add', 'Div': 'paddle.divide', - 'Sub': 'fluid.layers.elementwise_sub', + 'Sub': 'paddle.subtract', 'Mul': 'paddle.multiply', 'Pow': 'paddle.pow', } @@ -281,16 +279,11 @@ class OpSet9(): inputs={"x": var_hw}, outputs=[var_hw], dtype=string('int32')) -# inputs['size'] = var_hw - - # TODO(syf): all use - inputs['out_shape'] = var_hw - ipt = inputs.pop("x") - inputs["input"] = ipt - mode = node.get_attr('mode', 'nearest') - attrs.update({"align_corners": False}) + inputs['size'] = var_hw + attrs = {"align_corners": False, + "mode": string(node.get_attr('mode', 'nearest'))} self.paddle_graph.add_layer( - kernel="fluid.layers.resize_nearest", + kernel="paddle.nn.functional.interpolate", inputs=inputs, outputs=[node.name], **attrs) @@ -356,7 +349,7 @@ class OpSet9(): 'sampling_ratio': sampling_ratio, } self.paddle_graph.add_layer( - 'fluid.layers.roi_align', + 'paddle.fluid.layers.roi_align', inputs={'input': val_x.name, 'rois': val_rois.name}, outputs=[node.name], @@ -376,7 +369,7 @@ class OpSet9(): 'spatial_scale': spatial_scale, } self.paddle_graph.add_layer( - 'fluid.layers.roi_pool', + 'paddle.fluid.layers.roi_pool', inputs={'input': val_x.name, 'rois': val_rois.name}, outputs=[node.name], @@ -405,7 +398,7 @@ class OpSet9(): layer_attrs['data_format'] = string('NCHW') layer_attrs['value'] = value else: - paddle_op = 'fluid.layers.pad' + paddle_op = 'paddle.fluid.layers.pad' layer_attrs["pad_value"] = value if len(pads) == 4: paddings = np.array(pads).reshape( @@ -1062,40 +1055,23 @@ class OpSet9(): strides[1]) paddings = pad_h + pad_w - paddle_op = 'fluid.layers.pool{}d'.format(poolnd) - assert 2 <= poolnd <= 3, 'only pool2d and pool3d are supported' + op_name = name_generator("pool", self.nn_name2id) + output_name = node.name + layer_outputs = [op_name, output_name] + paddle_op = 'paddle.nn.AvgPool{}D'.format(poolnd) + assert 1 <= poolnd <= 3, 'only Pool1D, Pool2D and Pool3D are supported' layer_attrs = { - "pool_size": kernel_shape, - "pool_type": string('avg'), - "pool_stride": strides, - "pool_padding": paddings, + "kernel_size": kernel_shape, + "stride": strides, + "padding": paddings, "ceil_mode": ceil_mode, "exclusive": 'True', - "name": string(node.name) } self.paddle_graph.add_layer( paddle_op, - inputs={'input': val_x if isinstance(val_x, str) else val_x.name}, - outputs=[node.name], + inputs={'x': val_x.name}, + outputs=layer_outputs, **layer_attrs) - # TODO(syf): op has diff -# op_name = name_generator("pool", self.nn_name2id) -# output_name = node.name -# layer_outputs = [op_name, output_name] -# paddle_op = 'paddle.nn.Pool{}D'.format(poolnd) -# assert 1 <= poolnd <= 3, 'only Pool1D, Pool2D and Pool3D are supported' -# layer_attrs = { -# "kernel_size": kernel_shape, -# "stride": strides, -# "padding": paddings, -# "ceil_mode": ceil_mode, -# "exclusive": 'True', -# } -# self.paddle_graph.add_layer( -# paddle_op, -# inputs={'x': val_x.name}, -# outputs=layer_outputs, -# **layer_attrs) @print_mapping_info def Concat(self, node): @@ -1657,4 +1633,4 @@ class OpSet9(): 'paddle.argmax', inputs={"x": val_x.name}, outputs=[node.name], - **layer_attrs) + **layer_attrs) \ No newline at end of file diff --git a/x2paddle/op_mapper/dygraph/pytorch2paddle/aten.py b/x2paddle/op_mapper/dygraph/pytorch2paddle/aten.py index 8f443f166f84687b883c96b565534fda2bdd4d92..00121bb4128182506daca12e612e36e44375b421 100644 --- a/x2paddle/op_mapper/dygraph/pytorch2paddle/aten.py +++ b/x2paddle/op_mapper/dygraph/pytorch2paddle/aten.py @@ -426,11 +426,11 @@ def aten_avg_pool2d(mapper, graph, node): # 获取当前节点输入的list current_inputs = list(layer_inputs.values()) # 处理输入1,即%538 - layer_attrs["pool_size"] = mapper.attrs[inputs_name[1]] + layer_attrs["kernel_size"] = mapper.attrs[inputs_name[1]] # 处理输入2,即%539 - layer_attrs["pool_stride"] = mapper.attrs[inputs_name[2]] + layer_attrs["stride"] = mapper.attrs[inputs_name[2]] # 处理输入3,即%540 - layer_attrs["pool_padding"] = mapper.attrs[inputs_name[3]] + layer_attrs["padding"] = mapper.attrs[inputs_name[3]] # 处理输入4,即%273 layer_attrs["ceil_mode"] = mapper.attrs[inputs_name[4]] # 处理输入5,即%272 @@ -445,22 +445,13 @@ def aten_avg_pool2d(mapper, graph, node): key=mapper.attrs[inputs_name[6]], value=None) - # TODO(syf): The op has diff. -# self.paddle_graph.add_layer( -# kernel="paddle.nn.AvgPool2D", -# inputs={"input": input_name}, -# outputs=layer_outputs, -# kernel_size=k_size[2:4], -# stride=strides[2:4], -# padding=string(pad_mode)) - - layer_attrs["pool_type"] = string("avg") graph.add_layer( - "fluid.layers.pool2d", + kernel="paddle.nn.AvgPool2D", inputs=layer_inputs, - outputs=layer_outputs[1:], + outputs=layer_outputs, scope_name=scope_name, **layer_attrs) + return current_inputs, current_outputs def aten_avg_pool3d(mapper, graph, node): @@ -493,11 +484,11 @@ def aten_avg_pool3d(mapper, graph, node): # 获取当前节点输入的list current_inputs = list(layer_inputs.values()) # 处理输入1,即%538 - layer_attrs["pool_size"] = mapper.attrs[inputs_name[1]] + layer_attrs["kernel_size"] = mapper.attrs[inputs_name[1]] # 处理输入2,即%539 - layer_attrs["pool_stride"] = mapper.attrs[inputs_name[2]] + layer_attrs["stride"] = mapper.attrs[inputs_name[2]] # 处理输入3,即%540 - layer_attrs["pool_padding"] = mapper.attrs[inputs_name[3]] + layer_attrs["padding"] = mapper.attrs[inputs_name[3]] # 处理输入4,即%273 layer_attrs["ceil_mode"] = mapper.attrs[inputs_name[4]] # 处理输入5,即%272 @@ -512,20 +503,10 @@ def aten_avg_pool3d(mapper, graph, node): key=mapper.attrs[inputs_name[6]], value=None) - # TODO(syf): The op has diff. -# self.paddle_graph.add_layer( -# kernel="paddle.nn.AvgPool2D", -# inputs={"input": input_name}, -# outputs=layer_outputs, -# kernel_size=k_size[2:4], -# stride=strides[2:4], -# padding=string(pad_mode)) - - layer_attrs["pool_type"] = string("avg") graph.add_layer( - "fluid.layers.pool3d", + kernel="paddle.nn.AvgPool3D", inputs=layer_inputs, - outputs=layer_outputs[1:], + outputs=layer_outputs, scope_name=scope_name, **layer_attrs) return current_inputs, current_outputs @@ -561,11 +542,11 @@ def aten_avg_pool1d(mapper, graph, node): # 获取当前节点输入的list current_inputs = list(layer_inputs.values()) # 处理输入1,即%538 - layer_attrs["pool_size"] = mapper.attrs[inputs_name[1]] + layer_attrs["kernel_size"] = mapper.attrs[inputs_name[1]] # 处理输入2,即%539 - layer_attrs["pool_stride"] = mapper.attrs[inputs_name[2]] + layer_attrs["stride"] = mapper.attrs[inputs_name[2]] # 处理输入3,即%540 - layer_attrs["pool_padding"] = mapper.attrs[inputs_name[3]] + layer_attrs["padding"] = mapper.attrs[inputs_name[3]] # 处理输入4,即%273 layer_attrs["ceil_mode"] = mapper.attrs[inputs_name[4]] # 处理输入5,即%272 @@ -580,20 +561,10 @@ def aten_avg_pool1d(mapper, graph, node): key=mapper.attrs[inputs_name[6]], value=None) - # TODO(syf): The op has diff. -# self.paddle_graph.add_layer( -# kernel="paddle.nn.AvgPool2D", -# inputs={"input": input_name}, -# outputs=layer_outputs, -# kernel_size=k_size[2:4], -# stride=strides[2:4], -# padding=string(pad_mode)) - - layer_attrs["pool_type"] = string("avg") graph.add_layer( - "fluid.layers.pool1d", + kernel="paddle.nn.AvgPool1D", inputs=layer_inputs, - outputs=layer_outputs[1:], + outputs=layer_outputs, scope_name=scope_name, **layer_attrs) return current_inputs, current_outputs @@ -929,7 +900,7 @@ def aten_constant_pad_nd(mapper, graph, node): outputs=[inputs_name[0] + "_list"], scope_name=scope_name) block.add_layer( - "paddle.tensor.unsqueeze", + "paddle.unsqueeze", inputs={"x": inputs_name[0], "axis": inputs_name[0] + "_list"}, outputs=[inputs_name[0] + "_var"], @@ -941,7 +912,7 @@ def aten_constant_pad_nd(mapper, graph, node): scope_name=scope_name, **layer_attrs) block.add_layer( - "paddle.tensor.squeeze", + "paddle.squeeze", inputs={"x": output_name, "axis": inputs_name[0] + "_list"}, outputs=[output_name], @@ -1703,7 +1674,7 @@ def aten_expand_as(mapper, graph, node): outputs=[inputs_name[1] + "_type"], scope_name=scope_name) block.add_layer( - "fluid.layers.cast", + "paddle.cast", inputs={"x": inputs_name[0]}, outputs=[inputs_name[0]], scope_name=scope_name, @@ -1722,7 +1693,7 @@ def aten_expand_as(mapper, graph, node): if_layer = graph.layers[list(graph.layers.keys())[-1]] block = PaddleGraph(source_type="pytorch", parent_layer=if_layer, graph_type="dygraph") block.add_layer( - "fluid.layers.cast", + "paddle.cast", inputs={"x": layer_outputs[0]}, outputs=copy.deepcopy(layer_outputs), scope_name=scope_name, @@ -2515,6 +2486,89 @@ def aten_log(mapper, graph, node): return current_inputs, current_outputs +def aten_lstm(mapper, graph, node): + """ 构造长短期记忆网络(LSTM)的PaddleLayer。 + + TorchScript示例: + %input.96, %551, %552 = aten::lstm(%input.95, %734, %549, %526, %525, %524, %526, %526, %526) + 参数含义: + %input.96 (Tensor): 输出,由前向和后向cell的输出拼接得到。 + %551 (Tensor): cell state。 + %552 (Tensor): hidden state。 + %input.95 (Tensor): 网络输入。 + %734 (Tensor): 网络的初始状态。 + %549 (list): 所有权重组合成的list。 + %526 (bool): 是否使用bias。 + %525 (int): 网络层数。 + %524 (float): dropout概率。 + %526 (bool): 是否为训练阶段。 + %526 (bool): 是否使用双向LSTM。 + %526 (bool): 第一个维度是否为batch size。 + """ + scope_name = mapper.normalize_scope_name(node) + op_name = name_generator("lstm", mapper.nn_name2id) + output_names = mapper._get_outputs_name(node) + layer_outputs = [op_name] + layer_outputs.extend(output_names) + layer_inputs = {} + layer_attrs = {} + inputs_name, inputs_node = mapper._get_inputs_name(node) + # 获取当前节点输出的list + current_outputs = output_names + # 处理输入0,即%input.95 + mapper._check_input(graph, inputs_node[0], inputs_name[0], current_outputs, scope_name) + layer_inputs["input0"] = inputs_name[0] + # 处理输入1,即%734 + mapper._check_input(graph, inputs_node[1], inputs_name[1], current_outputs, scope_name) + layer_inputs["input1"] = inputs_name[1] + # 获取当前节点输入、输出的list + current_inputs = list(layer_inputs.values()) + # 处理输入2,即%734 + mapper._check_input(graph, inputs_node[2], inputs_name[2], current_outputs, scope_name) + graph.layers.pop(mapper.output2id[inputs_name[2]]) + param_inputs_name, _ = mapper._get_inputs_name(inputs_node[2]) + new_param_inputs_name = list() + for i, param_name in enumerate(param_inputs_name): + if i == 0: + layer_attrs["hidden_size"] = int(mapper.paddle_params[param_name].shape[0] / 4) + layer_attrs["input_size"] = int(mapper.paddle_params[param_name].shape[1]) + if len(mapper.paddle_params[param_name].shape) > 1: + part_name = param_name.split("_weight_")[-1] + mapper.paddle_params["{}.weight_{}".format(op_name, part_name)] = mapper.paddle_params[param_name] + new_param_inputs_name.append("{}.weight_{}".format(op_name, part_name)) + else: + part_name = param_name.split("_bias_")[-1] + mapper.paddle_params["{}.bias_{}".format(op_name, part_name)] = mapper.paddle_params[param_name] + mapper.paddle_params.pop(param_name) + + # 处理输入3,即%526 + is_bias = mapper.attrs[inputs_name[3]] + if not is_bias: + for param_name in new_param_inputs_name: + bias_name = param_name.replace("weight", "bias") + bias_shape= mapper.paddle_params[param_name].shape[:1] + mapper.paddle_params[bias_name] = np.zeros(bias_shape).astype("float32") + # 处理输入4,即%525 + layer_attrs["num_layers"] = mapper.attrs[inputs_name[4]] + # 处理输入5,即%524 + layer_attrs["dropout"] = mapper.attrs[inputs_name[5]] + # 处理输入7,即%526 + is_bidirectional = mapper.attrs[inputs_name[7]] + if is_bidirectional: + layer_attrs["direction"] = string("bidirectional") + # 处理输入8,即%526 + batch_first = mapper.attrs[inputs_name[8]] + if not batch_first: + layer_attrs["time_major"] = True + graph.add_layer( + "paddle.nn.LSTM", + inputs=layer_inputs, + outputs=layer_outputs, + scope_name=scope_name, + **layer_attrs) + return current_inputs, current_outputs + + def aten_lt(mapper, graph, node): """ 构造对比大小的PaddleLayer。 @@ -2847,22 +2901,13 @@ def aten_max_pool2d(mapper, graph, node): # 处理输入5,即%19 layer_attrs["ceil_mode"] = mapper.attrs[inputs_name[5]] layer_attrs_tmp["ceil_mode"] = mapper.attrs[inputs_name[5]] - - if mapper.attrs[inputs_name[5]] == True: - layer_attrs["pool_type"] = string("max") - graph.add_layer( - "fluid.layers.pool2d", - inputs=layer_inputs, - outputs=layer_outputs[1:], - scope_name=scope_name, - **layer_attrs_tmp) - else: - graph.add_layer( - "paddle.nn.MaxPool2D", - inputs=layer_inputs, - outputs=layer_outputs, - scope_name=scope_name, - **layer_attrs) + + graph.add_layer( + "paddle.nn.MaxPool2D", + inputs=layer_inputs, + outputs=layer_outputs, + scope_name=scope_name, + **layer_attrs) return current_inputs, current_outputs @@ -3991,7 +4036,7 @@ def aten_squeeze(mapper, graph, node): layer_inputs["axis"] = inputs_name[1] current_inputs.append(inputs_name[1]) graph.add_layer( - "paddle.tensor.squeeze", + "paddle.squeeze", inputs=layer_inputs, outputs=layer_outputs, scope_name=scope_name, diff --git a/x2paddle/op_mapper/dygraph/pytorch2paddle/prim.py b/x2paddle/op_mapper/dygraph/pytorch2paddle/prim.py index fb47a31e13fd7169f2ca297ca62ec9a5198f798c..9aaed676a301aaf5f89b5d9c7e0e287ed396d5d5 100644 --- a/x2paddle/op_mapper/dygraph/pytorch2paddle/prim.py +++ b/x2paddle/op_mapper/dygraph/pytorch2paddle/prim.py @@ -33,11 +33,33 @@ def prim_Constant(mapper, graph, node): output_type = output.type() if isinstance(value, str): value = string(value) - if str(output_type) == "Tensor": + if "Tensor" in str(output_type): tensor_value = value value = "{}".format(value) if "tensor" in value: - mapper.pytorch_params[output_name] = tensor_value.cpu().detach().numpy() + if isinstance(tensor_value, list) or isinstance(tensor_value, tuple): + name_dict = dict() + for i, tv in enumerate(tensor_value): + output_name_i = "{}_p{}".format(output_name,i) + key_i = "input{}".format(i) + mapper.paddle_params[output_name_i] = tv.cpu().detach().numpy() + graph.add_layer( + "self.create_parameter", + inputs={}, + outputs=[output_name_i], + scope_name=scope_name, + dtype=string(str(mapper.paddle_params[output_name_i].dtype)), + shape = mapper.paddle_params[output_name_i].shape, + default_initializer="paddle.nn.initializer.Constant(value=0.0)") + name_dict[key_i] = output_name_i + graph.add_layer( + "prim.list", + inputs=name_dict, + outputs=[output_name], + scope_name=scope_name) + return [], [output_name] + else: + mapper.pytorch_params[output_name] = tensor_value.cpu().detach().numpy() if "inf" in str(value): t = str(type(value)).split("'")[1] @@ -218,11 +240,13 @@ def prim_ListConstruct(mapper, graph, node): current_outputs = [output_name] # 处理每个输入 for i, input_name in enumerate(inputs_name): + mapper._check_input(graph, inputs_node[i], input_name, current_outputs, scope_name) layer_inputs["input{}".format(i)] = input_name # 获取当前节点输入的list current_inputs = list(layer_inputs.values()) - graph.add_layer("prim.list", inputs=layer_inputs, outputs=layer_outputs, scope_name=scope_name) + layer_id = graph.add_layer("prim.list", inputs=layer_inputs, outputs=layer_outputs, scope_name=scope_name) + mapper.output2id[output_name] = layer_id return current_inputs, current_outputs diff --git a/x2paddle/op_mapper/dygraph/pytorch2paddle/pytorch_custom_layer/gather.py b/x2paddle/op_mapper/dygraph/pytorch2paddle/pytorch_custom_layer/gather.py index 71b51f6222ea43262a0718b6730a6f24ecf773ee..10850ee5bbf91fa42e39f4dbd67ec1fa0d6682d7 100644 --- a/x2paddle/op_mapper/dygraph/pytorch2paddle/pytorch_custom_layer/gather.py +++ b/x2paddle/op_mapper/dygraph/pytorch2paddle/pytorch_custom_layer/gather.py @@ -13,7 +13,6 @@ # limitations under the License. import paddle -import paddle.fluid as fluid from itertools import product import numpy as np diff --git a/x2paddle/op_mapper/dygraph/pytorch2paddle/pytorch_op_mapper.py b/x2paddle/op_mapper/dygraph/pytorch2paddle/pytorch_op_mapper.py index 0df7cb7c05c74831f405cc4de323cd5162ec5d6c..5f43e33804d8e6d9ddd4b1ad4afc13e3c1a3c252 100644 --- a/x2paddle/op_mapper/dygraph/pytorch2paddle/pytorch_op_mapper.py +++ b/x2paddle/op_mapper/dygraph/pytorch2paddle/pytorch_op_mapper.py @@ -37,6 +37,7 @@ class PyTorchOpMapper(OpMapper): self.scope_name_list = list() self.scope_name2id = dict() self.inputs_info = dict() + self.output2id = dict() # output名字和layer_id的关系,用于lstm去除前面的node # 转换 if not self.op_checker(decoder.graph): raise Exception("Model is not supported yet.") @@ -175,7 +176,7 @@ class PyTorchOpMapper(OpMapper): if add_dim: param = param[np.newaxis, :] self.paddle_params[output_name] = param - graph.add_layer( + layer_id = graph.add_layer( "self.create_parameter", inputs={}, outputs=[output_name], @@ -183,6 +184,7 @@ class PyTorchOpMapper(OpMapper): dtype=string(str(param.dtype)), shape = param.shape, default_initializer="paddle.nn.initializer.Constant(value=0.0)") + self.output2id[output_name] = layer_id else: if isinstance(param, dict) and "Tensor" in param and \ "parent_layer_id" in param: @@ -202,7 +204,7 @@ class PyTorchOpMapper(OpMapper): if add_dim: param = param[np.newaxis, :] self.paddle_params[output_name] = param - graph.add_layer( + layer_id = graph.add_layer( "self.create_parameter", inputs={}, outputs=[output_name], @@ -211,6 +213,7 @@ class PyTorchOpMapper(OpMapper): shape = param.shape, default_initializer="paddle.nn.initializer.Constant(value=0.0)") node_outputs.append(output_name) + self.output2id[output_name] = layer_id return # 若if-else外,则可直接引用if-else中的赋值结果 graph.add_layer( @@ -231,14 +234,15 @@ class PyTorchOpMapper(OpMapper): elif node.kind() == "prim::Constant" and output_name in self.pytorch_params: param = self.pytorch_params[output_name] self.paddle_params[output_name] = param - graph.add_layer( + layer_id = graph.add_layer( "self.create_parameter", inputs={}, outputs=[output_name], scope_name=scope_name, dtype=string(str(param.dtype)), shape = param.shape, - default_initializer="paddle.nn.initializer.Constant(value=0.0)") + default_initializer="paddle.nn.initializer.Constant(value=0.0)") + self.output2id[output_name] = layer_id def _get_inputs_name(self, node): diff --git a/x2paddle/op_mapper/dygraph/tf2paddle/tf_op_mapper.py b/x2paddle/op_mapper/dygraph/tf2paddle/tf_op_mapper.py index e0baa027dd87ed49e4e3feb34f3a345677f6b161..ad03f06dd262acd4eaddee3b68854fb4363ea051 100644 --- a/x2paddle/op_mapper/dygraph/tf2paddle/tf_op_mapper.py +++ b/x2paddle/op_mapper/dygraph/tf2paddle/tf_op_mapper.py @@ -70,7 +70,7 @@ class TFOpMapper(OpMapper): 'AddV2': 'paddle.add', 'RealDiv': 'paddle.divide', 'DivNoNan': 'paddle.divide', - 'Sub': 'fluid.layers.elementwise_sub', + 'Sub': 'paddle.subtract', 'Maximum': 'paddle.maximum', 'Minimum': 'paddle.minimum', 'Mul': 'paddle.multiply', @@ -346,7 +346,7 @@ class TFOpMapper(OpMapper): shape=[0, c, h, w]) self.paddle_graph.add_layer( - kernel="fluid.layers.pixel_shuffle", + kernel="paddle.nn.functional.pixel_shuffle", inputs={"x": reshape_name}, outputs=[node.name], upscale_factor=block_size) @@ -858,22 +858,22 @@ class TFOpMapper(OpMapper): layer_outputs = [op_name, output_name] # TODO(syf): The op has diff. -# self.paddle_graph.add_layer( -# kernel="paddle.nn.AvgPool2D", -# inputs={"input": input_name}, -# outputs=layer_outputs, -# kernel_size=k_size[2:4], -# stride=strides[2:4], -# padding=string(pad_mode)) - self.paddle_graph.add_layer( - kernel="fluid.layers.pool2d", + kernel="paddle.nn.AvgPool2D", inputs={"input": input_name}, - outputs=[node.name], - pool_size=k_size[2:4], - pool_type=string("avg"), - pool_stride=strides[2:4], - pool_padding=string(pad_mode)) + outputs=layer_outputs, + kernel_size=k_size[2:4], + stride=strides[2:4], + padding=string(pad_mode)) + +# self.paddle_graph.add_layer( +# kernel="fluid.layers.pool2d", +# inputs={"input": input_name}, +# outputs=[node.name], +# pool_size=k_size[2:4], +# pool_type=string("avg"), +# pool_stride=strides[2:4], +# pool_padding=string(pad_mode)) if data_format == "NHWC": self.paddle_graph.add_layer( @@ -1118,14 +1118,6 @@ class TFOpMapper(OpMapper): begin = begin.value.tolist() attrs['offsets'] = begin else: - # shape = begin.out_shapes[0] - # reshape_name = gen_name("slice", "reshape") - # self.paddle_graph.add_layer( - # kernel="fluid.layers.reshape", - # inputs={"x": begin.name}, - # outputs=[reshape_name], - # shape=shape) - # inputs['offsets'] = reshape_name begin = self.decoder.infer_tensor(begin, use_diff_inputs=False).tolist() attrs['offsets'] = begin if size.layer_type == "Const": @@ -1433,7 +1425,7 @@ class TFOpMapper(OpMapper): y_shape = y.out_shapes[0] # TODO(syf) layer_id = self.paddle_graph.add_layer( - "fluid.layers.elementwise_sub", inputs=inputs, outputs=[node.name]) + "paddle.subtract", inputs=inputs, outputs=[node.name]) self.paddle_graph.layers[layer_id].input_shapes = {"x": x_shape, "y": y_shape} inputs = {"x": node.name, "y": node.name} diff --git a/x2paddle/op_mapper/static/caffe2paddle/caffe_op_mapper.py b/x2paddle/op_mapper/static/caffe2paddle/caffe_op_mapper.py index 53e7222bb31bd14cd40b2bdc4e384870029bd1e0..d9ba3fd4774669f96f7276fd3eb1414f74c8965a 100644 --- a/x2paddle/op_mapper/static/caffe2paddle/caffe_op_mapper.py +++ b/x2paddle/op_mapper/static/caffe2paddle/caffe_op_mapper.py @@ -401,18 +401,14 @@ class CaffeOpMapper(OpMapper): padding=pad, ceil_mode=ceil_mode) else: - # TODO(syf): The op has diff. self.paddle_graph.add_layer( - kernel="fluid.layers.pool2d", - inputs={"input": input.name}, + kernel="paddle.nn.functional.avg_pool2d", + inputs={"x": input.name}, outputs=[node.name], - pool_size=kernel, - pool_type=string("avg"), - pool_stride=stride, - pool_padding=pad, - ceil_mode=ceil_mode, - exclusive=False, - global_pooling=False) + kernel_size=kernel, + stride=stride, + padding=pad, + ceil_mode=ceil_mode) def LRN(self, node): assert len(node.inputs) == 1, 'The count of LRN node\'s input is not 1.' @@ -433,7 +429,7 @@ class CaffeOpMapper(OpMapper): 'name': string(node.name) } self.paddle_graph.add_layer( - kernel="fluid.layers.lrn", + kernel="paddle.fluid.layers.lrn", inputs={"input": input.name}, outputs=[node.name], **layer_attrs) @@ -1184,7 +1180,7 @@ class CaffeOpMapper(OpMapper): input = self.graph.get_input_node(node, idx=0, copy=True) params = node.layer.shuffle_channel_param self.paddle_graph.add_layer( - "fluid.layers.shuffle_channel", + "paddle.fluid.layers.shuffle_channel", inputs={"x": input.name}, outputs=[node.layer_name], group=params.group) diff --git a/x2paddle/op_mapper/static/onnx2paddle/opset9/opset.py b/x2paddle/op_mapper/static/onnx2paddle/opset9/opset.py index 5d00e6644d07a9508a64f83b8c4f313f2ef65858..3b436f07bba8aba053ada18f57ad2ae72efcd31a 100644 --- a/x2paddle/op_mapper/static/onnx2paddle/opset9/opset.py +++ b/x2paddle/op_mapper/static/onnx2paddle/opset9/opset.py @@ -14,8 +14,6 @@ from x2paddle.decoder.onnx_decoder import ONNXGraph, ONNXGraphNode, ONNXGraphDataNode from x2paddle.core.graph import GraphNode -from x2paddle.core.fluid_code import Layer -from x2paddle.core.fluid_code import FluidCode from x2paddle.core.util import string from functools import reduce import numpy as np @@ -88,7 +86,7 @@ class OpSet9(): elementwise_ops = { 'Add': 'paddle.add', 'Div': 'paddle.divide', - 'Sub': 'fluid.layers.elementwise_sub', + 'Sub': 'paddle.subtract', 'Mul': 'paddle.multiply', 'Pow': 'paddle.pow', } @@ -271,16 +269,11 @@ class OpSet9(): inputs={"x": var_hw}, outputs=[var_hw], dtype=string('int32')) -# inputs['size'] = var_hw - - # TODO(syf): all use - inputs['out_shape'] = var_hw - ipt = inputs.pop("x") - inputs["input"] = ipt - mode = node.get_attr('mode', 'nearest') - attrs.update({"align_corners": False}) + inputs['size'] = var_hw + attrs = {"align_corners": False, + "mode": string(node.get_attr('mode', 'nearest'))} self.paddle_graph.add_layer( - kernel="fluid.layers.resize_nearest", + kernel="paddle.nn.functional.interpolate", inputs=inputs, outputs=[node.name], **attrs) @@ -346,7 +339,7 @@ class OpSet9(): 'sampling_ratio': sampling_ratio, } self.paddle_graph.add_layer( - 'fluid.layers.roi_align', + 'paddle.fluid.layers.roi_align', inputs={'input': val_x.name, 'rois': val_rois.name}, outputs=[node.name], @@ -365,7 +358,7 @@ class OpSet9(): 'spatial_scale': spatial_scale, } self.paddle_graph.add_layer( - 'fluid.layers.roi_pool', + 'paddle.fluid.layers.roi_pool', inputs={'input': val_x.name, 'rois': val_rois.name}, outputs=[node.name], @@ -394,7 +387,7 @@ class OpSet9(): layer_attrs['data_format'] = string('NCHW') layer_attrs['value'] = value else: - paddle_op = 'fluid.layers.pad' + paddle_op = 'paddle.fluid.layers.pad' layer_attrs["pad_value"] = value if len(pads) == 4: paddings = np.array(pads).reshape( @@ -1046,23 +1039,21 @@ class OpSet9(): strides[1]) paddings = pad_h + pad_w - paddle_op = 'fluid.layers.pool{}d'.format(poolnd) - assert 2 <= poolnd <= 3, 'only pool2d and pool3d are supported' + paddle_op = 'paddle.nn.functional.avg_pool{}d'.format(poolnd) + assert 1 <= poolnd <= 3, 'only avg_pool1d, avg_pool2d and avg_pool3d are supported' layer_attrs = { - "pool_size": kernel_shape, - "pool_type": string('avg'), - "pool_stride": strides, - "pool_padding": paddings, + "kernel_size": kernel_shape, + "stride": strides, + "padding": paddings, "ceil_mode": ceil_mode, - "exclusive": 'True', + "exclusive": True, "name": string(node.name) } self.paddle_graph.add_layer( paddle_op, - inputs={'input': val_x if isinstance(val_x, str) else val_x.name}, + inputs={'x': val_x if isinstance(val_x, str) else val_x.name}, outputs=[node.name], **layer_attrs) - # TODO(syf): op has diff @print_mapping_info def Concat(self, node): diff --git a/x2paddle/op_mapper/static/tf2paddle/tf_op_mapper.py b/x2paddle/op_mapper/static/tf2paddle/tf_op_mapper.py index 7667cf9c629c75cbdbb1e9a17c8acb6825a0360f..700ac74a0d9bd512f0016ee64cfd1ff792ad4a5f 100644 --- a/x2paddle/op_mapper/static/tf2paddle/tf_op_mapper.py +++ b/x2paddle/op_mapper/static/tf2paddle/tf_op_mapper.py @@ -72,7 +72,7 @@ class TFOpMapper(OpMapper): 'RealDiv': 'paddle.divide', 'DivNoNan': 'paddle.divide', # TODO (syf): replace - 'Sub': 'fluid.layers.elementwise_sub', + 'Sub': 'paddle.subtract', 'Maximum': 'paddle.maximum', 'Minimum': 'paddle.minimum', 'Mul': 'paddle.multiply', @@ -315,7 +315,7 @@ class TFOpMapper(OpMapper): shape=[0, c, h, w]) self.paddle_graph.add_layer( - kernel="fluid.layers.pixel_shuffle", + kernel="paddle.nn.functional.pixel_shuffle", inputs={"x": reshape_name}, outputs=[node.name], upscale_factor=block_size) @@ -437,8 +437,6 @@ class TFOpMapper(OpMapper): if c == -1: attr = {"shape": [0, k_size[2], 0, 0]} - node.fluid_code.add_layer( - "reshape", inputs=input, output=input, param_attr=attr) self.paddle_graph.add_layer( kernel="paddle.reshape", inputs={"x": input_name}, @@ -842,13 +840,12 @@ class TFOpMapper(OpMapper): # TODO(syf): The op has diff. self.paddle_graph.add_layer( - kernel="fluid.layers.pool2d", - inputs={"input": input_name}, + kernel="paddle.nn.functional.avg_pool2d", + inputs={"x": input_name}, outputs=[node.name], - pool_size=k_size[2:4], - pool_type=string("avg"), - pool_stride=strides[2:4], - pool_padding=string(pad_mode)) + kernel_size=k_size[2:4], + stride=strides[2:4], + padding=string(pad_mode)) if data_format == "NHWC": self.paddle_graph.add_layer( @@ -1406,7 +1403,7 @@ class TFOpMapper(OpMapper): y_shape = y.out_shapes[0] # TODO(syf) layer_id = self.paddle_graph.add_layer( - "fluid.layers.elementwise_sub", inputs=inputs, outputs=[node.name]) + "paddle.subtract", inputs=inputs, outputs=[node.name]) self.paddle_graph.layers[layer_id].input_shapes = {"x": x_shape, "y": y_shape} inputs = {"x": node.name, "y": node.name} diff --git a/x2paddle/optimizer/fusion/dygraph/bn_scale_fuser.py b/x2paddle/optimizer/fusion/dygraph/bn_scale_fuser.py index 5b093d1b6b40871637f169dc858dd16d8e51a413..9fbed4c53b0bf5859f16048d6308bf753ca7f6a1 100644 --- a/x2paddle/optimizer/fusion/dygraph/bn_scale_fuser.py +++ b/x2paddle/optimizer/fusion/dygraph/bn_scale_fuser.py @@ -21,47 +21,94 @@ from x2paddle.core.util import * class DygraphBNScaleFuser(FuseBase): def __init__(self): super(DygraphBNScaleFuser, self).__init__(graph_type="dygraph") + patterns = list() def build_pattern(self): """ 描述需要替换的batchnorm2d图结构。 batchnorm2d层模式python实现代码示例: + 模式一: bn_conv1 = self.batchnorm0(conv1) scale_conv1_cparam1 = self.scale_conv1_cparam1 scale_conv1_mul = paddle.multiply(x=bn_conv1, y=scale_conv1_cparam1, axis=1) scale_conv1_cparam2 = self.scale_conv1_cparam2 - scale_conv1 = fluid.layers.elementwise_add(x=scale_conv1_mul, y=scale_conv1_cparam2, axis=1) + scale_conv1 = paddle.add(x=scale_conv1_mul, y=scale_conv1_cparam2, axis=1) + 模式二: + bn_conv1 = self.batchnorm0(conv1) + scale_conv1_cparam1 = self.scale_conv1_cparam1 + scale_conv1_mul = paddle.multiply(x=bn_conv1, y=scale_conv1_cparam1, axis=1) + scale_conv1_cparam2 = self.scale_conv1_cparam2 + scale_conv1_cparam2 = paddle.reshape(x=scale_conv1_cparam2, shape=[32, 1, 1]) + scale_conv1 = paddle.add(x=scale_conv1_mul, y=scale_conv1_cparam2, axis=1) """ def gen_name(id): return "x" + str(id) - self.pattern.add_layer( + pattern = PaddleGraph(graph_type="dygraph") + pattern.add_layer( + "paddle.nn.BatchNorm2D", + inputs={"input": "bn-input-0"}, + outputs=[gen_name(0)]) + pattern.add_layer( + "self.create_parameter", + inputs={}, + outputs=[gen_name(1)]) + inputs_dict = {} + inputs_dict['x'] = gen_name(0) + inputs_dict['y'] = gen_name(1) + pattern.add_layer( + "paddle.multiply", + inputs=inputs_dict, + outputs=[gen_name(2)]) + pattern.add_layer( + "self.create_parameter", + inputs={}, + outputs=[gen_name(3)]) + inputs_dict = {} + inputs_dict['x'] = gen_name(2) + inputs_dict['y'] = gen_name(3) + pattern.add_layer( + "paddle.add", + inputs=inputs_dict, + outputs=[gen_name(4)]) + pattern.build(inputs={"input-0": "bn-input-0"}) + self.patterns.append(pattern) + + pattern = PaddleGraph(graph_type="dygraph") + pattern.add_layer( "paddle.nn.BatchNorm2D", inputs={"input": "bn-input-0"}, outputs=[gen_name(0)]) - self.pattern.add_layer( + pattern.add_layer( "self.create_parameter", inputs={}, outputs=[gen_name(1)]) inputs_dict = {} inputs_dict['x'] = gen_name(0) inputs_dict['y'] = gen_name(1) - self.pattern.add_layer( + pattern.add_layer( "paddle.multiply", inputs=inputs_dict, outputs=[gen_name(2)]) - self.pattern.add_layer( + pattern.add_layer( "self.create_parameter", inputs={}, outputs=[gen_name(3)]) + pattern.add_layer( + "paddle.reshape", + inputs={"x": gen_name(3)}, + outputs=[gen_name(3)]) inputs_dict = {} inputs_dict['x'] = gen_name(2) inputs_dict['y'] = gen_name(3) - self.pattern.add_layer( - "fluid.layers.elementwise_add", + pattern.add_layer( + "paddle.add", inputs=inputs_dict, outputs=[gen_name(4)]) - self.pattern.build(inputs={"input-0": "bn-input-0"}) + pattern.build(inputs={"input-0": "bn-input-0"}) + self.patterns.append(pattern) + + def insert_new_layer(self, graph, parameters, matches): new_layer = self.gen_new_layer(parameters, matches) @@ -78,7 +125,7 @@ class DygraphBNScaleFuser(FuseBase): layer_attrs = layer.attrs layer_attrs.pop("weight_attr") layer_attrs.pop("bias_attr") - layer = matches[layers_id[4]] + layer = matches[layers_id[-1]] layer_outputs = [bn_name] + layer.outputs layer = matches[layers_id[1]] data0_name = layer.outputs[0] diff --git a/x2paddle/optimizer/fusion/dygraph/reshape_fuser.py b/x2paddle/optimizer/fusion/dygraph/reshape_fuser.py index a5a68258da941a5da302051055b22d3eb8a65f90..c49bab18c8762f99e8f6471e0f1ad61ac3031eae 100644 --- a/x2paddle/optimizer/fusion/dygraph/reshape_fuser.py +++ b/x2paddle/optimizer/fusion/dygraph/reshape_fuser.py @@ -27,7 +27,7 @@ class DygraphReshapeFuser(FuseBase): reshape层模式python实现代码示例: x165 = int(x164) x166 = [x158, x159, x165] - x167 = fluid.layers.reshape(x=x157, shape=x166) + x167 = paddle.reshape(x=x157, shape=x166) """ def gen_name(id): @@ -46,7 +46,7 @@ class DygraphReshapeFuser(FuseBase): }, outputs=[gen_name(1)]) self.pattern.add_layer( - "fluid.layers.reshape", + "paddle.reshape", inputs={"x": "reshape-input-3", "shape": gen_name(1)}, outputs=[gen_name(2)]) diff --git a/x2paddle/optimizer/fusion/dygraph/trace_fc_fuser.py b/x2paddle/optimizer/fusion/dygraph/trace_fc_fuser.py index fdce28a7453ab20114ba9897192a0fa994844b47..ca8058fd5579868b45d90ab26de4cbcfa5180ebf 100644 --- a/x2paddle/optimizer/fusion/dygraph/trace_fc_fuser.py +++ b/x2paddle/optimizer/fusion/dygraph/trace_fc_fuser.py @@ -49,7 +49,7 @@ class TraceFcFuser(FuseBase): inputs={}, outputs=[gen_name(0)]) pattern.add_layer( - "fluid.layers.transpose", + "paddle.transpose", inputs={"x": gen_name(0)}, outputs=[gen_name(1)], perm=[1, 0]) diff --git a/x2paddle/optimizer/fusion/static/bn_scale_fuser.py b/x2paddle/optimizer/fusion/static/bn_scale_fuser.py index 946e7fe804d58a0885a5b1d3e1decc7d16e1d687..bd1e5a77dee95dfc52c1a02dc13d705eff3cf7b4 100644 --- a/x2paddle/optimizer/fusion/static/bn_scale_fuser.py +++ b/x2paddle/optimizer/fusion/static/bn_scale_fuser.py @@ -21,12 +21,14 @@ from x2paddle.core.util import * class Static_BNScaleFuser(FuseBase): def __init__(self): super(Static_BNScaleFuser, self).__init__(graph_type="static") - patterns = list() + self.patterns = list() def build_pattern(self): """ 描述需要替换的batchnorm2d图结构。 batchnorm2d层模式python实现代码示例: 模式一: + conv1_bn_mean = paddle.static.create_parameter(shape=(128,), dtype='float32', name='conv1_bn_mean') + conv1_bn_variance = paddle.static.create_parameter(shape=(128,), dtype='float32', name='conv1_bn_variance') conv1_bn = paddle.nn.functional.batch_norm(x=conv1, weight=conv1_bn_weight, bias=conv1_bn_bias, running_mean=conv1_bn_mean, running_var=conv1_bn_variance, epsilon=9.999999747378752e-06, momentum=0.9990000128746033) conv1_scale_cparam1 = paddle.static.create_parameter(shape=(32,), dtype='float32', name='conv1_scale_cparam1') conv1_scale_mul = paddle.multiply(x=conv1_bn, y=conv1_scale_cparam1, axis=1) @@ -34,6 +36,8 @@ class Static_BNScaleFuser(FuseBase): conv1_scale_cparam2 = paddle.reshape(x=conv1_scale_cparam2, shape=[32, 1, 1]) conv1_scale = paddle.add(x=conv1_scale_mul, y=conv1_scale_cparam2) 模式二: + conv1_bn_mean = paddle.static.create_parameter(shape=(128,), dtype='float32', name='conv1_bn_mean') + conv1_bn_variance = paddle.static.create_parameter(shape=(128,), dtype='float32', name='conv1_bn_variance') conv1_bn = paddle.nn.functional.batch_norm(x=conv1, weight=conv1_bn_weight, bias=conv1_bn_bias, running_mean=conv1_bn_mean, running_var=conv1_bn_variance, epsilon=9.999999747378752e-06, momentum=0.9990000128746033) conv1_scale_cparam1 = paddle.static.create_parameter(shape=(32,), dtype='float32', name='conv1_scale_cparam1') conv1_scale_mul = paddle.multiply(x=conv1_bn, y=conv1_scale_cparam1, axis=1) @@ -45,13 +49,21 @@ class Static_BNScaleFuser(FuseBase): return "x" + str(id) pattern = PaddleGraph(graph_type="dygraph") + pattern.add_layer( + "paddle.static.create_parameter", + inputs={}, + outputs=[gen_name(10)]) + pattern.add_layer( + "paddle.static.create_parameter", + inputs={}, + outputs=[gen_name(11)]) pattern.add_layer( "paddle.nn.functional.batch_norm", inputs={"input": "bn-input-0", "weight": "bn-input-1", "bias": "bn-input-2", - "running_mean": "bn-input-3", - "running_var": "bn-input-4",}, + "running_mean": gen_name(10), + "running_var": gen_name(11)}, outputs=[gen_name(0)]) pattern.add_layer( "paddle.static.create_parameter", @@ -81,19 +93,25 @@ class Static_BNScaleFuser(FuseBase): outputs=[gen_name(5)]) pattern.build(inputs={"input-0": "bn-input-0", "input-1": "bn-input-1", - "input-2": "bn-input-2", - "input-3": "bn-input-3", - "input-4": "bn-input-4"}) + "input-2": "bn-input-2"}) self.patterns.append(pattern) pattern = PaddleGraph(graph_type="dygraph") + pattern.add_layer( + "paddle.static.create_parameter", + inputs={}, + outputs=[gen_name(10)]) + pattern.add_layer( + "paddle.static.create_parameter", + inputs={}, + outputs=[gen_name(11)]) pattern.add_layer( "paddle.nn.functional.batch_norm", inputs={"input": "bn-input-0", "weight": "bn-input-1", "bias": "bn-input-2", - "running_mean": "bn-input-3", - "running_var": "bn-input-4",}, + "running_mean": gen_name(10), + "running_var": gen_name(11),}, outputs=[gen_name(0)]) pattern.add_layer( "paddle.static.create_parameter", @@ -119,25 +137,25 @@ class Static_BNScaleFuser(FuseBase): outputs=[gen_name(4)]) pattern.build(inputs={"input-0": "bn-input-0", "input-1": "bn-input-1", - "input-2": "bn-input-2", - "input-3": "bn-input-3", - "input-4": "bn-input-4"}) + "input-2": "bn-input-2"}) self.patterns.append(pattern) def insert_new_layer(self, graph, parameters, matches): new_layer = self.gen_new_layer(parameters, matches) new_layer_id = list(matches.keys())[-1] graph.layers[new_layer_id] = new_layer + matches.pop(list(matches.keys())[0]) + matches.pop(list(matches.keys())[0]) matches.pop(list(matches.keys())[1]) matches.pop(list(matches.keys())[2]) matches.pop(new_layer_id) def gen_new_layer(self, parameters, matches): layers_id = list(matches.keys()) - bn_layer = matches[layers_id[0]] - layer = matches[layers_id[1]] - bn_layer.inputs["weight"] = layer.outputs[0] + bn_layer = matches[layers_id[2]] layer = matches[layers_id[3]] + bn_layer.inputs["weight"] = layer.outputs[0] + layer = matches[layers_id[5]] bn_layer.inputs["bias"] = layer.outputs[0] bn_layer.id = layers_id[-1] layer = matches[layers_id[-1]] diff --git a/x2paddle/optimizer/pattern_matcher.py b/x2paddle/optimizer/pattern_matcher.py index 22e23cae425ebaf36d223a6e812a6899e498a935..8c3d796203fa3a67555f7dcb4e50cf14aff8a519 100644 --- a/x2paddle/optimizer/pattern_matcher.py +++ b/x2paddle/optimizer/pattern_matcher.py @@ -99,7 +99,7 @@ class PatternMatcher(object): return False else: subgraph_id2layers.pop(layer_id) - continue + continue else: if len(graph.edges_out[layer_id]) != len( pattern.edges_out[pattern_layer_id]): @@ -116,7 +116,20 @@ class PatternMatcher(object): else: subgraph_id2layers.pop(layer_id) continue - + else: + layer_out = graph.edges_out[layer_id] + pattern_layer_out = pattern.edges_out[pattern_layer_id] + is_pop = False + for i in range(len(layer_out)): + layer_id_out = layer_out[i] + pattern_layer_id_out = pattern_layer_out[i] + if layer_id_out != -1: + if graph_layers[layer_id_out].kernel != pattern.layers[pattern_layer_id_out].kernel: + is_pop = True + break + if is_pop: + subgraph_id2layers.pop(layer_id) + continue # 当为控制流时的处理 if layer.kernel == "prim.if" or layer.kernel == "prim.loop": if len(pattern_layer.blocks) != len(layer.blocks): @@ -161,7 +174,7 @@ class PatternMatcher(object): for i, (layer_id, layer) in enumerate(graph.layers.items()): match_info = get_subgraph(self.pattern, graph, i) - if match_info: + if match_info and match_info not in self.matches: self.matches.append(match_info) for j, block in enumerate(layer.blocks): if len(block.layers) > 0: @@ -343,4 +356,5 @@ class FuseBase(object): if layer_id in subgraph.layers: # layer_id可能是属于子图的,此时删除父layer,即删除整个子图 subgraph.layers.pop(layer_id) + \ No newline at end of file diff --git a/x2paddle/optimizer/code_optimizer/__init__.py b/x2paddle/optimizer/pytorch_code_optimizer/__init__.py similarity index 78% rename from x2paddle/optimizer/code_optimizer/__init__.py rename to x2paddle/optimizer/pytorch_code_optimizer/__init__.py index 6aba8a196de57797d27af44e916c349a38002b4a..bcef73ab268de515374480743351ca0f6d96b0ff 100644 --- a/x2paddle/optimizer/code_optimizer/__init__.py +++ b/x2paddle/optimizer/pytorch_code_optimizer/__init__.py @@ -13,5 +13,5 @@ # limitations under the License. -from x2paddle.optimizer.code_optimizer.hierachical_tree import HierarchicalTree -from x2paddle.optimizer.code_optimizer.module_graph import ModuleGraph \ No newline at end of file +from x2paddle.optimizer.pytorch_code_optimizer.hierachical_tree import HierarchicalTree +from x2paddle.optimizer.pytorch_code_optimizer.module_graph import ModuleGraph \ No newline at end of file diff --git a/x2paddle/optimizer/code_optimizer/hierachical_tree.py b/x2paddle/optimizer/pytorch_code_optimizer/hierachical_tree.py similarity index 90% rename from x2paddle/optimizer/code_optimizer/hierachical_tree.py rename to x2paddle/optimizer/pytorch_code_optimizer/hierachical_tree.py index 774bad699f9f319de28165c82282e7a313799800..ee36d2d3748d3915c09b0e3683f38675ad2119c4 100644 --- a/x2paddle/optimizer/code_optimizer/hierachical_tree.py +++ b/x2paddle/optimizer/pytorch_code_optimizer/hierachical_tree.py @@ -18,10 +18,10 @@ import copy import os.path as osp from treelib import Tree from queue import Queue -from x2paddle.optimizer.code_optimizer.layer_code_generator import gen_layer_code, rename_layers, NN_KERNEL_WITH_PARAMS, NN_KERNEL_NAME -from x2paddle.optimizer.code_optimizer.subgraphs_union import distinguish_sequential, get_inputs_outputs +from x2paddle.optimizer.pytorch_code_optimizer.layer_code_generator import gen_layer_code, rename_layers, NN_KERNEL_WITH_PARAMS, NN_KERNEL_NAME +from x2paddle.optimizer.pytorch_code_optimizer.subgraphs_union import distinguish_sequential, get_inputs_outputs from x2paddle.core.program import PaddleLayer -from x2paddle.optimizer.code_optimizer.parameter_tree import PamareterNode, PamareterTree +from x2paddle.optimizer.pytorch_code_optimizer.parameter_tree import PamareterNode, PamareterTree SEPARATOR_IN_SCOPE = "/" @@ -39,6 +39,7 @@ class HierarchicalTree(Tree): self.identifier_idx = dict() self.param_tree = PamareterTree() self.module_name2count = dict() + self.scope_name_list = list() def insert(self, layer): """ 往层次树中插入节点。 @@ -47,6 +48,7 @@ class HierarchicalTree(Tree): layer (PaddleLayer): 需要插入的节点。 """ scope_name = layer.scope_name + self.scope_name_list.append(scope_name) if scope_name == "": if layer.kernel == "prim.tuple" or layer.kernel == "prim.tuple_unpack": layer_id = layer.id @@ -55,12 +57,36 @@ class HierarchicalTree(Tree): layer_id_list.append(int(input_layer_id)) layer_id_list = list(set(layer_id_list)) layer_id_list.sort(reverse=True) - for input_layer_id in layer_id_list: - input_layer_id_str = str(input_layer_id) - if self.pd_graph.layers[input_layer_id_str].scope_name != "": + + if layer.kernel == "prim.tuple": + for i, input_layer_id in enumerate(layer_id_list): + input_layer_id_str = str(input_layer_id) scope_name = self.pd_graph.layers[input_layer_id_str].scope_name - break - layer.scope_name = scope_name + if i == 0: + min_scope_name = scope_name + else: + len1 = len(min_scope_name.split("/")) + len2 = len(scope_name.split("/")) + if scope_name not in self.scope_name_list: + min_scope_name = scope_name + continue + if len1 > len2: + min_scope_name = scope_name + if min_scope_name == "": + self.create_node(tag=layer.id, + identifier="no_scope_" + layer.id, + parent=self.pd_graph.name, + data=layer) + return + layer.scope_name = min_scope_name + scope_name = min_scope_name + else: + for input_layer_id in layer_id_list: + input_layer_id_str = str(input_layer_id) + if self.pd_graph.layers[input_layer_id_str].scope_name != "": + scope_name = self.pd_graph.layers[input_layer_id_str].scope_name + break + layer.scope_name = scope_name else: self.create_node(tag=layer.id, identifier="no_scope_" + layer.id, @@ -369,9 +395,6 @@ class HierarchicalTree(Tree): self.convert_subgraph_to_layer() self.update_parameters() import_list = ["import paddle", - "import paddle.fluid as fluid", - "from paddle.fluid.initializer import Constant", - "from paddle.fluid.param_attr import ParamAttr", "import math", "from x2paddle.op_mapper.dygraph.pytorch2paddle " + \ "import pytorch_custom_layer as x2paddle_nn" diff --git a/x2paddle/optimizer/code_optimizer/layer_code_generator.py b/x2paddle/optimizer/pytorch_code_optimizer/layer_code_generator.py similarity index 90% rename from x2paddle/optimizer/code_optimizer/layer_code_generator.py rename to x2paddle/optimizer/pytorch_code_optimizer/layer_code_generator.py index 3c5589a2e90177ca3223149dceba311c0e0d0d72..dc89e7ff26e0538d9ca81e06a43a949fe377f418 100644 --- a/x2paddle/optimizer/code_optimizer/layer_code_generator.py +++ b/x2paddle/optimizer/pytorch_code_optimizer/layer_code_generator.py @@ -14,7 +14,11 @@ # limitations under the License. import copy -from x2paddle.optimizer.code_optimizer.parameter_tree import PamareterNode +import os.path as osp +import x2paddle +from x2paddle.optimizer.pytorch_code_optimizer.parameter_tree import PamareterNode +from x2paddle.core.util import * + NN_KERNEL_NAME = {"paddle.nn.BatchNorm": "bn", "paddle.nn.LayerNorm": "layernorm", @@ -22,6 +26,7 @@ NN_KERNEL_NAME = {"paddle.nn.BatchNorm": "bn", "paddle.nn.Embedding": "embedding", "paddle.nn.Linear": "linear", "paddle.nn.Conv2DTranspose": "conv", + "paddle.nn.LSTM": "lstm", "paddle.nn.ReLU": "relu", "paddle.nn.ReLU6": "relu", "paddle.nn.Softmax": "softmax", @@ -36,7 +41,7 @@ NN_KERNEL_NAME = {"paddle.nn.BatchNorm": "bn", "paddle.nn.GELU": "gelu", "paddle.nn.Hardtanh": "tanh", "paddle.nn.LeakyReLU": "leakly_relu"} -NN_KERNEL_WITH_PARAMS = list(NN_KERNEL_NAME.keys())[:6] +NN_KERNEL_WITH_PARAMS = list(NN_KERNEL_NAME.keys())[:7] def rename_layers(layers, param_tree=None, is_rename_module=False): """ 对子模块的输入输出等进行重命名。 @@ -125,14 +130,30 @@ def rename_layers(layers, param_tree=None, is_rename_module=False): return layers_cp, nn_param_nodes, new_names -def gen_layer_code(graph, sub_layers, sub_layers_name, different_attrs=list()): +def _update_attrs(layer, different_attrs): + if "module" in layer.kernel or "prim" in layer.kernel: + return + common_attrs = copy.deepcopy(layer.attrs) + special_attrs = dict() + for k, v in layer.attrs.items(): + if len(layer.outputs) < 1: + break + key_name = "{}_{}".format(layer.outputs[0], k) + if key_name in different_attrs: + common_attrs.pop(k) + special_attrs[k] = v + remove_default_attrs(layer.kernel, common_attrs) + common_attrs.update(special_attrs) + layer.attrs = common_attrs + +def gen_layer_code(graph, sub_layers, sub_layers_name, different_attrs=dict()): """ 根据sub_layers生成对应的Module代码。 Args: graph (x2paddle.core.program.PaddleGraph): 整个Paddle图。 sub_layers (dict): 子图的id和其对应layer组成的字典。 sub_layers_name (str): 子图的名字。 - different_attrs (list): 属性列表,这些属性表明在被调用时赋予不同值。 + different_attrs (dict/list): 属性字典/列表,这些属性表明在被调用时赋予不同值。 """ def gen_codes(code_list, indent=0): """ 根据code_list生成代码段。 @@ -157,7 +178,13 @@ def gen_layer_code(graph, sub_layers, sub_layers_name, different_attrs=list()): # 生成Layer的头部代码 head = gen_codes(["class {}(paddle.nn.Layer):".format(sub_layers_name)], indent=0) # 生成init函数的头部代码 - attrs_str = ", ".join(different_attrs) + diff_str_list = list() + if isinstance(different_attrs, dict): + for k, v in different_attrs.items(): + diff_str_list.append("{}={}".format(k, v)) + attrs_str = ", ".join(diff_str_list) + else: + attrs_str = ", ".join(different_attrs) init_func_head = \ gen_codes(["def __init__(self, {}):".format(attrs_str)], indent=1) + \ gen_codes(["super({}, self).__init__()".format(sub_layers_name)], indent=2) @@ -213,6 +240,7 @@ def gen_layer_code(graph, sub_layers, sub_layers_name, different_attrs=list()): outputs.append(layer.outputs[0]) no_output_count = 0 for i, (layer_id, layer) in enumerate(sub_layers.items()): + _update_attrs(layer, different_attrs) if ("paddle.nn" in layer.kernel and "functional" not in layer.kernel) or \ layer.kernel.startswith("custom_layer"): line = "self.{}".format(layer.outputs[0]) @@ -235,7 +263,10 @@ def gen_layer_code(graph, sub_layers, sub_layers_name, different_attrs=list()): elif len(layer.outputs) == 2: line = layer.outputs[1] else: - line = ','.join(layer.outputs[1:]) + if layer.kernel == "paddle.nn.LSTM": + line = "{}, ({})".format(layer.outputs[1], ', '.join(layer.outputs[-2:])) + else: + line = ','.join(layer.outputs[1:]) line += " = self.{}(".format(layer.outputs[0]) for k, v in layer.inputs.items(): @@ -263,7 +294,7 @@ def gen_layer_code(graph, sub_layers, sub_layers_name, different_attrs=list()): init_func=init_func, forward_func=forward_func, layer_id=layer_id, - different_attrs=different_attrs) + different_attrs=list(different_attrs.keys()) if isinstance(different_attrs, dict) else different_attrs) cur_outputs.extend(layer.outputs) else: raise Exception( diff --git a/x2paddle/optimizer/code_optimizer/module_graph.py b/x2paddle/optimizer/pytorch_code_optimizer/module_graph.py similarity index 92% rename from x2paddle/optimizer/code_optimizer/module_graph.py rename to x2paddle/optimizer/pytorch_code_optimizer/module_graph.py index 9045ba67a9fff7f8db51f43436fc99e26266dc4a..54e2cbd34dd0111d3cd4b91aeffb7a255201a829 100644 --- a/x2paddle/optimizer/code_optimizer/module_graph.py +++ b/x2paddle/optimizer/pytorch_code_optimizer/module_graph.py @@ -17,9 +17,9 @@ import copy import os import os.path as osp from x2paddle.core.program import PaddleLayer -from x2paddle.optimizer.code_optimizer.subgraphs_union import construct_attrs_table, get_inputs_outputs -from x2paddle.optimizer.code_optimizer.layer_code_generator import gen_layer_code, rename_layers -from x2paddle.optimizer.code_optimizer.parameter_tree import PamareterNode, PamareterTree +from x2paddle.optimizer.pytorch_code_optimizer.subgraphs_union import construct_attrs_table, get_inputs_outputs +from x2paddle.optimizer.pytorch_code_optimizer.layer_code_generator import gen_layer_code, rename_layers +from x2paddle.optimizer.pytorch_code_optimizer.parameter_tree import PamareterNode, PamareterTree NoModuleStart = ["paddle.nn.ReLU"] @@ -179,16 +179,27 @@ class ModuleGraph(object): def analyze_attrs_table(self, attrs_table): """ 分析属性表格,哪些属性取值不一致。 """ - diff_attrs_column = list() + diff_attrs_column = dict() for column in list(attrs_table.columns): elements = list(attrs_table.get(column)) - base = elements[0] - for element in elements[1:]: - if isinstance(base, str) and "'" not in base: - break - if element != base: - diff_attrs_column.append(column) + elements_list = list() + count_list = list() + for element in elements: + if isinstance(element, str) and "'" not in element: break + if element not in elements_list: + count_list.append(1) + elements_list.append(element) + else: + index = elements_list.index(element) + count_list[index] += 1 + if len(elements_list) > 1: + max_ct = 0 + for k, v in zip(elements_list, count_list): + if v > max_ct and str(k) != "nan" : + max_ele = k + max_ct = v + diff_attrs_column[column] = max_ele return diff_attrs_column def analyze_graph(self, sub_layers_list): @@ -258,8 +269,10 @@ class ModuleGraph(object): outputs = ["{}_{}".format(mn, index)] + outputs node_name = "{}_{}".format(module_name, index) diff_attrs = dict() - for column in diff_attrs_column: - diff_attrs[column] = attrs_table.get(column).loc[node_name] + for column, element in diff_attrs_column.items(): + current_element = attrs_table.get(column).loc[node_name] + if current_element != element: + diff_attrs[column] = current_element new_layer = PaddleLayer(id=list(sub_layers.keys())[-1], kernel="module", inputs=inputs_dict, @@ -352,9 +365,6 @@ class ModuleGraph(object): self.convert_subgraph_to_layer(combination, combination_id) self.update_parameters() import_list = ["import paddle", - "import paddle.fluid as fluid", - "from paddle.fluid.initializer import Constant", - "from paddle.fluid.param_attr import ParamAttr", "import math", "from x2paddle.op_mapper.dygraph.pytorch2paddle " + \ "import pytorch_custom_layer as x2paddle_nn" diff --git a/x2paddle/optimizer/code_optimizer/parameter_tree.py b/x2paddle/optimizer/pytorch_code_optimizer/parameter_tree.py similarity index 100% rename from x2paddle/optimizer/code_optimizer/parameter_tree.py rename to x2paddle/optimizer/pytorch_code_optimizer/parameter_tree.py diff --git a/x2paddle/optimizer/code_optimizer/subgraphs_union.py b/x2paddle/optimizer/pytorch_code_optimizer/subgraphs_union.py similarity index 98% rename from x2paddle/optimizer/code_optimizer/subgraphs_union.py rename to x2paddle/optimizer/pytorch_code_optimizer/subgraphs_union.py index ee804eb3093caaaf99ef880acd2c65b85585714b..24779c38d7ae66ffe9af7faafd5076fef8341abb 100644 --- a/x2paddle/optimizer/code_optimizer/subgraphs_union.py +++ b/x2paddle/optimizer/pytorch_code_optimizer/subgraphs_union.py @@ -16,7 +16,7 @@ import copy import pandas as pd -from x2paddle.optimizer.code_optimizer.layer_code_generator import rename_layers +from x2paddle.optimizer.pytorch_code_optimizer.layer_code_generator import rename_layers def construct_attrs_table(sub_layers_list, node_name2sub_layers=None, module_name=None):