for 0.8.4

cf7f9b88 · sunyanfang01 · 85e44863 · cf7f9b88 · cf7f9b88 · cf7f9b88
66 changed file
--- a/x2paddle/__pycache__/__init__.cpython-37.pyc
+++ b/x2paddle/__pycache__/__init__.cpython-37.pyc
--- a/x2paddle/core/__pycache__/__init__.cpython-37.pyc
+++ b/x2paddle/core/__pycache__/__init__.cpython-37.pyc
--- a/x2paddle/core/__pycache__/program.cpython-37.pyc
+++ b/x2paddle/core/__pycache__/program.cpython-37.pyc
--- a/x2paddle/x2paddle/__init__.py
+++ b/x2paddle/x2paddle/__init__.py
+__version__ = "0.8.4"
+from .core.program import PaddleProgram
+program = PaddleProgram()
+name_counter = dict()
+def gen_name(op_name, var_name):
+    name = "{}.{}".format(op_name, var_name)
+    if name not in name_counter:
+        name_counter[name] = 0
+    else:
+        name_counter[name] += 1
+    name = name + "." + str(name_counter[name])
+    return name
--- a/x2paddle/x2paddle/convert.py
+++ b/x2paddle/x2paddle/convert.py
+# Copyright (c) 2020  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from six import text_type as _text_type
+import argparse
+import sys
+def arg_parser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model",
+        "-m",
+        type=_text_type,
+        default=None,
+        help="define model file path for tensorflow or onnx")
+    parser.add_argument(
+        "--prototxt",
+        "-p",
+        type=_text_type,
+        default=None,
+        help="prototxt file of caffe model")
+    parser.add_argument(
+        "--weight",
+        "-w",
+        type=_text_type,
+        default=None,
+        help="weight file of caffe model")
+    parser.add_argument(
+        "--save_dir",
+        "-s",
+        type=_text_type,
+        default=None,
+        help="path to save translated model")
+    parser.add_argument(
+        "--framework",
+        "-f",
+        type=_text_type,
+        default=None,
+        help="define which deeplearning framework(tensorflow/caffe/onnx/paddle2onnx)"
+    )
+    parser.add_argument(
+        "--caffe_proto",
+        "-c",
+        type=_text_type,
+        default=None,
+        help="optional: the .py file compiled by caffe proto file of caffe model"
+    )
+    parser.add_argument(
+        "--version",
+        "-v",
+        action="store_true",
+        default=False,
+        help="get version of x2paddle")
+    parser.add_argument(
+        "--without_data_format_optimization",
+        "-wo",
+        type=_text_type,
+        default="True",
+        help="tf model conversion without data format optimization")
+    parser.add_argument(
+        "--define_input_shape",
+        "-d",
+        action="store_true",
+        default=False,
+        help="define input shape for tf model")
+    parser.add_argument(
+        "--onnx_opset",
+        "-oo",
+        type=int,
+        default=10,
+        help="when paddle2onnx set onnx opset version to export")
+    parser.add_argument(
+        "--params_merge",
+        "-pm",
+        action="store_true",
+        default=False,
+        help="define whether merge the params")
+    return parser
+def tf2paddle(model_path,
+              save_dir,
+              without_data_format_optimization,
+              define_input_shape=False,
+              params_merge=False):
+    # check tensorflow installation and version
+    try:
+        import os
+        os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'
+        import tensorflow as tf
+        version = tf.__version__
+        if version >= '2.0.0' or version < '1.0.0':
+            print(
+                "[ERROR] 1.0.0<=tensorflow<2.0.0 is required, and v1.14.0 is recommended"
+            )
+            return
+    except:
+        print(
+            "[ERROR] Tensorflow is not installed, use \"pip install tensorflow\"."
+        )
+        return
+    from x2paddle.decoder.tf_decoder import TFDecoder
+    from x2paddle.op_mapper.tf_op_mapper import TFOpMapper
+    from x2paddle.op_mapper.tf_op_mapper_nhwc import TFOpMapperNHWC
+    from x2paddle.optimizer.tf_optimizer import TFOptimizer
+    print("Now translating model from tensorflow to paddle.")
+    model = TFDecoder(model_path, define_input_shape=define_input_shape)
+    if not without_data_format_optimization:
+        mapper = TFOpMapper(model)
+        optimizer = TFOptimizer(mapper)
+        # neccesary optimization
+        optimizer.delete_redundance_code()
+        # optimizer below is experimental
+        optimizer.optimize_elementwise_op()
+        optimizer.merge_activation()
+        optimizer.merge_bias()
+        optimizer.optimize_sub_graph()
+#        optimizer.merge_batch_norm()
+#        optimizer.merge_prelu()
+    else:
+        mapper = TFOpMapperNHWC(model)
+        optimizer = TFOptimizer(mapper)
+        optimizer.delete_redundance_code()
+        optimizer.strip_graph()
+        optimizer.merge_activation()
+        optimizer.merge_bias()
+        optimizer.make_nchw_input_output()
+        optimizer.remove_transpose()
+    mapper.save_inference_model(save_dir, params_merge)
+def caffe2paddle(proto, weight, save_dir, caffe_proto, params_merge=False):
+    from x2paddle.decoder.caffe_decoder import CaffeDecoder
+    from x2paddle.op_mapper.caffe_op_mapper import CaffeOpMapper
+    from x2paddle.optimizer.caffe_optimizer import CaffeOptimizer
+    import google.protobuf as gpb
+    ver_part = gpb.__version__.split('.')
+    version_satisfy = False
+    if (int(ver_part[0]) == 3 and int(ver_part[1]) >= 6) \
+        or (int(ver_part[0]) > 3):
+        version_satisfy = True
+    assert version_satisfy, '[ERROR] google.protobuf >= 3.6.0 is required'
+    print("Now translating model from caffe to paddle.")
+    model = CaffeDecoder(proto, weight, caffe_proto)
+    mapper = CaffeOpMapper(model)
+    optimizer = CaffeOptimizer(mapper)
+    optimizer.merge_bn_scale()
+    optimizer.merge_op_activation()
+    mapper.save_inference_model(save_dir, params_merge)
+def onnx2paddle(model_path, save_dir, params_merge=False):
+    # check onnx installation and version
+    try:
+        import onnx
+        version = onnx.version.version
+        if version < '1.6.0':
+            print("[ERROR] onnx>=1.6.0 is required")
+            return
+    except:
+        print("[ERROR] onnx is not installed, use \"pip install onnx==1.6.0\".")
+        return
+    print("Now translating model from onnx to paddle.")
+    from x2paddle.op_mapper.onnx2paddle.onnx_op_mapper import ONNXOpMapper
+    from x2paddle.decoder.onnx_decoder import ONNXDecoder
+    from x2paddle.optimizer.onnx_optimizer import ONNXOptimizer
+    model = ONNXDecoder(model_path)
+    mapper = ONNXOpMapper(model)
+    print("Model optimizing ...")
+    optimizer = ONNXOptimizer(mapper)
+    print("Model optimized.")
+    print("Paddle model and code generating ...")
+    mapper.save_inference_model(save_dir, params_merge)
+    print("Paddle model and code generated.")
+def paddle2onnx(model_path, save_dir, opset_version=10):
+    from x2paddle.decoder.paddle_decoder import PaddleDecoder
+    from x2paddle.op_mapper.paddle2onnx.paddle_op_mapper import PaddleOpMapper
+    import paddle.fluid as fluid
+    model = PaddleDecoder(model_path, '__model__', '__params__')
+    mapper = PaddleOpMapper()
+    mapper.convert(
+        model.program,
+        save_dir,
+        scope=fluid.global_scope(),
+        opset_version=opset_version)
+def main():
+    if len(sys.argv) < 2:
+        print("Use \"x2paddle -h\" to print the help information")
+        print("For more information, please follow our github repo below:)")
+        print("\nGithub: https://github.com/PaddlePaddle/X2Paddle.git\n")
+        return
+    parser = arg_parser()
+    args = parser.parse_args()
+    if args.version:
+        import x2paddle
+        print("x2paddle-{} with python>=3.5, paddlepaddle>=1.6.0\n".format(
+            x2paddle.__version__))
+        return
+    assert args.framework is not None, "--framework is not defined(support tensorflow/caffe/onnx)"
+    assert args.save_dir is not None, "--save_dir is not defined"
+    try:
+        import paddle
+        v0, v1, v2 = paddle.__version__.split('.')
+        print("paddle.__version__ = {}".format(paddle.__version__))
+        if v0 == '0' and v1 == '0' and v2 == '0':
+            print("[WARNING] You are use develop version of paddlepaddle")
+        elif int(v0) != 1 or int(v1) < 6:
+            print("[ERROR] paddlepaddle>=1.6.0 is required")
+            return
+    except:
+        print(
+            "[ERROR] paddlepaddle not installed, use \"pip install paddlepaddle\""
+        )
+    if args.framework == "tensorflow":
+        assert args.model is not None, "--model should be defined while translating tensorflow model"
+        assert args.without_data_format_optimization in [
+            "True", "False"
+        ], "--the param without_data_format_optimization should be defined True or False"
+        define_input_shape = False
+        params_merge = False
+        without_data_format_optimization = True if args.without_data_format_optimization == "True" else False
+        if args.define_input_shape:
+            define_input_shape = True
+        if args.params_merge:
+            params_merge = True
+        tf2paddle(args.model, args.save_dir, without_data_format_optimization,
+                  define_input_shape, params_merge)
+    elif args.framework == "caffe":
+        assert args.prototxt is not None and args.weight is not None, "--prototxt and --weight should be defined while translating caffe model"
+        params_merge = False
+        if args.params_merge:
+            params_merge = True
+        caffe2paddle(args.prototxt, args.weight, args.save_dir,
+                     args.caffe_proto, params_merge)
+    elif args.framework == "onnx":
+        assert args.model is not None, "--model should be defined while translating onnx model"
+        params_merge = False
+        if args.params_merge:
+            params_merge = True
+        onnx2paddle(args.model, args.save_dir, params_merge)
+    elif args.framework == "paddle2onnx":
+        assert args.model is not None, "--model should be defined while translating paddle model to onnx"
+        paddle2onnx(args.model, args.save_dir, opset_version=args.onnx_opset)
+    else:
+        raise Exception(
+            "--framework only support tensorflow/caffe/onnx/paddle2onnx now")
+if __name__ == "__main__":
+    main()
--- a/x2paddle/x2paddle/core/__init__.py
+++ b/x2paddle/x2paddle/core/__init__.py
--- a/x2paddle/x2paddle/core/fluid_code.py
+++ b/x2paddle/x2paddle/core/fluid_code.py
+#   Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from x2paddle.core.graph import GraphNode
+from x2paddle.core.util import *
+import collections
+import six
+class Layer(object):
+    def __init__(self):
+        self.op = None
+        self.param_attr = dict()
+        self.inputs = dict()
+        self.output = None
+        self.is_custom_layer = False
+        self.use_fluid = False
+    def get_code(self):
+        layer_code = ""
+        if self.output is not None:
+            if isinstance(self.output, six.string_types):
+                layer_code = self.output + " = "
+            else:
+                layer_code = self.output.layer_name + " = "
+        if self.is_custom_layer:
+            layer_code = layer_code + self.op + "("
+        elif self.op == "=":
+            layer_code = layer_code
+        elif self.use_fluid:
+            layer_code = layer_code + "fluid." + self.op + "("
+        else:
+            layer_code = layer_code + "fluid.layers." + self.op + "("
+        if isinstance(self.inputs, list):
+            in_list = "["
+            for input in self.inputs:
+                if isinstance(input, GraphNode):
+                    if hasattr(input, "index"):
+                        in_list += (
+                            input.layer_name + "[{}]".format(input.index) + ", "
+                        )
+                    else:
+                        in_list += (input.layer_name + ", ")
+                elif isinstance(input, six.string_types):
+                    in_list += (input + ", ")
+                else:
+                    raise Exception(
+                        "Element of inputs should GraphNode or String")
+            in_list = in_list.strip(", ") + "], "
+            layer_code += in_list
+        elif isinstance(self.inputs, dict):
+            inputs = collections.OrderedDict(self.inputs)
+            for key, input in inputs.items():
+                if isinstance(input, GraphNode):
+                    if hasattr(input, "index"):
+                        layer_code = layer_code + key + "={}, ".format(
+                            input.layer_name + "[{}]".format(input.index))
+                    else:
+                        layer_code = layer_code + key + "={}, ".format(
+                            input.layer_name)
+                else:
+                    layer_code = layer_code + key + "={}, ".format(input)
+        elif isinstance(self.inputs, GraphNode):
+            if hasattr(self.inputs, "index"):
+                layer_code += (
+                    self.inputs.layer_name + "[{}]".format(self.inputs.index))
+            else:
+                layer_code += (self.inputs.layer_name)
+            if self.op != "=":
+                layer_code += ", "
+        elif isinstance(self.inputs, six.string_types):
+            layer_code += (self.inputs)
+            if self.op != "=":
+                layer_code += ", "
+        else:
+            raise Exception("Unknown type of inputs.")
+        param_attr = collections.OrderedDict(self.param_attr)
+        for key, value in param_attr.items():
+            if '\n' in str(value):
+                value = string(str(value).replace('\n', ','))
+            if str(key) == 'attr':
+                value = 'ParamAttr(' + str(value) + ')'
+            layer_code = layer_code + key + "={}, ".format(value)
+        layer_code = layer_code.strip(", ")
+        if self.op != "=":
+            layer_code += ")"
+        return layer_code
+class FluidCode(object):
+    def __init__(self):
+        self.layers = list()
+    def add_layer(self,
+                  op,
+                  inputs,
+                  output,
+                  param_attr=None,
+                  use_fluid=False,
+                  is_custom_layer=False):
+        layer = Layer()
+        layer.op = op
+        layer.use_fluid = use_fluid
+        layer.is_custom_layer = is_custom_layer
+        if inputs is not None:
+            layer.inputs = inputs
+        layer.output = output
+        if param_attr is not None:
+            layer.param_attr = param_attr
+        self.layers.append(layer)
+    def add_note(self, note):
+        # note should be string
+        self.layers.append(note)
+    def clear(self):
+        self.layers = list()
+    def gen_codes(self):
+        codes = list()
+        for layer in self.layers:
+            if isinstance(layer, Layer):
+                codes.append(layer.get_code())
+            elif isinstance(layer, six.string_types):
+                codes.append(layer)
+        return codes
--- a/x2paddle/x2paddle/core/graph.py
+++ b/x2paddle/x2paddle/core/graph.py
+#   Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import print_function
+from __future__ import division
+import collections
+import copy as cp
+class GraphNode(object):
+    def __init__(self, layer, layer_name=None):
+        self.inputs = list()
+        self.outputs = list()
+        self.layer = layer
+        assert layer_name is not None, "layer_name for GraphNode should not be None"
+        self.layer_name = layer_name
+    def __hash__(self):
+        return hash(self.layer.name)
+    def __eq__(self, other):
+        if self.layer.name == other.layer.name:
+            return True
+        return False
+class Graph(object):
+    def __init__(self, model):
+        self.node_map = collections.OrderedDict()
+        self.input_nodes = list()
+        self.output_nodes = list()
+        self.topo_sort = list()
+        self.model = model
+    def build(self):
+        self.get_input_nodes()
+        self.get_output_nodes()
+        self.get_topo_sort()
+    def get_input_nodes(self):
+        for name, node in self.node_map.items():
+            name = name.replace('/', '_').replace('-', '_')
+            if len(node.inputs) == 0:
+                self.input_nodes.append(name)
+    def get_output_nodes(self):
+        for name, node in self.node_map.items():
+            name = name.replace('/', '_').replace('-', '_')
+            if len(node.outputs) == 0:
+                self.output_nodes.append(name)
+    def get_topo_sort(self):
+        num_inputs = dict()
+        for name, node in self.node_map.items():
+            num_inputs[name] = len(node.inputs)
+        self.topo_sort = self.input_nodes[:]
+        idx = 0
+        while idx < len(self.topo_sort):
+            current_node = self.node_map[self.topo_sort[idx]]
+            for node in current_node.outputs:
+                num_inputs[node] -= 1
+                if num_inputs[node] == 0:
+                    self.topo_sort.append(node)
+            idx += 1
+    def get_node(self, name, copy=False):
+        if name not in self.node_map:
+            if name.split(':')[0] in self.node_map:
+                name_prefix, idx = name.split(':')
+                if copy:
+                    node = cp.copy(self.node_map[name_prefix])
+                else:
+                    node = self.node_map[name_prefix]
+                node.index = int(idx)
+                return node
+            else:
+                return None
+        else:
+            if copy:
+                node = cp.copy(self.node_map[name])
+            else:
+                node = self.node_map[name]
+            return node
+    def connect(self, src, dst):
+        if dst not in self.node_map:
+            raise Exception("node[{}] not in graph".format(dst))
+        self.node_map[dst].inputs.append(src)
+        self.node_map[src].outputs.append(dst)
--- a/x2paddle/x2paddle/core/op_mapper.py
+++ b/x2paddle/x2paddle/core/op_mapper.py
+#   Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import paddle.fluid as fluid
+from paddle.fluid.proto import framework_pb2
+from x2paddle.core.util import *
+import inspect
+import os
+def export_paddle_param(param, param_name, dir):
+    dtype_map = {
+        "int16": [framework_pb2.VarType.INT16, 'h'],
+        "int32": [framework_pb2.VarType.INT32, 'i'],
+        "int64": [framework_pb2.VarType.INT64, 'q'],
+        "float16": [framework_pb2.VarType.FP16, 'e'],
+        "float32": [framework_pb2.VarType.FP32, 'f'],
+        "float64": [framework_pb2.VarType.FP64, 'd'],
+        "bool": [framework_pb2.VarType.BOOL, None]
+    }
+    shape = param.shape
+    if str(param.dtype) in ['uint8', 'uint_8', 'bool']:
+        param = param.astype('int64')
+    if len(shape) == 0:
+        assert param.size == 1, "Unexpected situation happend!"
+        shape = [1]
+    assert str(
+        param.dtype) in dtype_map, "Unknown dtype {} of params: {}.".format(
+            str(param.dtype), param_name)
+    fp = open(os.path.join(dir, param_name), 'wb')
+    numpy.array([0], dtype='int32').tofile(fp)
+    numpy.array([0], dtype='int64').tofile(fp)
+    numpy.array([0], dtype='int32').tofile(fp)
+    tensor_desc = framework_pb2.VarType.TensorDesc()
+    tensor_desc.data_type = dtype_map[str(param.dtype)][0]
+    tensor_desc.dims.extend(shape)
+    desc_size = tensor_desc.ByteSize()
+    numpy.array([desc_size], dtype='int32').tofile(fp)
+    fp.write(tensor_desc.SerializeToString())
+    param.tofile(fp)
+    fp.close()
+# This func will copy to generate code file
+def run_net(param_dir="./"):
+    import os
+    inputs, outputs = x2paddle_net()
+    ops = fluid.default_main_program().global_block().ops
+    used_vars = list()
+    for op in ops:
+        used_vars += op.input_arg_names
+    tmp = list()
+    for input in inputs:
+        if isinstance(input, list):
+            for ipt in input:
+                if ipt.name not in used_vars:
+                    continue
+                tmp.append(ipt)
+        else:
+            if input.name not in used_vars:
+                continue
+            tmp.append(input)
+    inputs = tmp
+    for i, out in enumerate(outputs):
+        if isinstance(out, list):
+            for out_part in out:
+                outputs.append(out_part)
+            del outputs[i]
+    exe = fluid.Executor(fluid.CPUPlace())
+    exe.run(fluid.default_startup_program())
+    def if_exist(var):
+        b = os.path.exists(os.path.join(param_dir, var.name))
+        return b
+    fluid.io.load_vars(
+        exe, param_dir, fluid.default_main_program(), predicate=if_exist)
+class OpMapper(object):
+    def __init__(self):
+        self.paddle_codes = ""
+        self.tab = "    "
+        self.net_code = list()
+        self.weights = dict()
+        self.inputs = list()
+        self.outputs = list()
+    def op_checker(self):
+        unsupported_ops = set()
+        for node_name in self.graph.topo_sort:
+            node = self.graph.get_node(node_name)
+            op = node.layer_type
+            if not hasattr(self, op):
+                unsupported_ops.add(op)
+        if len(unsupported_ops) == 0:
+            return True
+        else:
+            print("There are {} ops not supported yet, list as below".format(
+                len(unsupported_ops)))
+            for op in unsupported_ops:
+                print(op)
+            return False
+    def add_codes(self, codes, indent=0):
+        if isinstance(codes, list):
+            for code in codes:
+                self.paddle_codes += (
+                    self.tab * indent + code.strip('\n') + '\n')
+        elif isinstance(codes, str):
+            self.paddle_codes += (self.tab * indent + codes.strip('\n') + '\n')
+        else:
+            raise Exception("Unknown type of codes")
+    def add_heads(self):
+        self.add_codes("from paddle.fluid.initializer import Constant")
+        self.add_codes("from paddle.fluid.param_attr import ParamAttr")
+        self.add_codes("import paddle.fluid as fluid")
+        self.add_codes("")
+    def save_inference_model(self, save_dir, params_merge):
+        self.save_python_model(save_dir)
+        import sys
+        import paddle.fluid as fluid
+        py_code_dir = os.path.join(save_dir, "model_with_code")
+        sys.path.append(py_code_dir)
+        import model
+        try:
+            inputs, outputs = model.x2paddle_net()
+            ops = fluid.default_main_program().global_block().ops
+            used_vars = list()
+            for op in ops:
+                used_vars += op.input_arg_names
+            for i, out in enumerate(outputs):
+                if isinstance(out, list):
+                    for out_part in out:
+                        outputs.append(out_part)
+                    del outputs[i]
+            input_names = list()
+            for input in inputs:
+                if isinstance(input, list):
+                    for ipt in input:
+                        if ipt.name not in used_vars:
+                            continue
+                        input_names.append(ipt.name)
+                else:
+                    if input.name not in used_vars:
+                        continue
+                    input_names.append(input.name)
+            exe = fluid.Executor(fluid.CPUPlace())
+            exe.run(fluid.default_startup_program())
+            def if_exist(var):
+                b = os.path.exists(
+                    os.path.join(os.path.join(py_code_dir, var.name)))
+                return b
+            fluid.io.load_vars(
+                exe,
+                py_code_dir,
+                fluid.default_main_program(),
+                predicate=if_exist)
+            if params_merge:
+                fluid.io.save_inference_model(
+                    dirname=os.path.join(save_dir, "inference_model"),
+                    feeded_var_names=input_names,
+                    target_vars=outputs,
+                    executor=exe,
+                    params_filename="__params__")
+            else:
+                fluid.io.save_inference_model(
+                    dirname=os.path.join(save_dir, "inference_model"),
+                    feeded_var_names=input_names,
+                    target_vars=outputs,
+                    executor=exe,
+                    params_filename=None)
+        except:
+            raise Exception(
+                "Paddle code was saved in {}/model.py, but seems there's wrong exist, please check model.py manually."
+                .format(py_code_dir))
+    def save_python_model(self, save_dir):
+        if not os.path.exists(save_dir):
+            os.makedirs(save_dir)
+        py_code_dir = os.path.join(save_dir, "model_with_code")
+        if not os.path.exists(py_code_dir):
+            os.makedirs(py_code_dir)
+        for name, param in self.weights.items():
+            export_paddle_param(param, name, py_code_dir)
+        self.add_heads()
+        if hasattr(self, "used_custom_layers"):
+            for _, layer_code in self.used_custom_layers.items():
+                self.add_codes(layer_code, 0)
+                self.add_codes("", 0)
+        self.add_codes("\ndef x2paddle_net():", 0)
+        for i in range(len(self.graph.topo_sort)):
+            node_name = self.graph.topo_sort[i]
+            node = self.graph.get_node(node_name)
+            if node is None:
+                continue
+            if len(node.fluid_code.layers) == 0:
+                continue
+            self.add_codes(node.fluid_code.gen_codes(), 1)
+        self.add_codes("", 0)
+        input_str = "["
+        for name in self.graph.input_nodes:
+            input_str += (name + ", ")
+        input_str = input_str.strip(", ") + "]"
+        output_str = "["
+        for name in self.graph.output_nodes:
+            output_str += (name + ", ")
+        output_str = output_str.strip(", ") + "]"
+        return_code = "return {}, {}".format(input_str, output_str)
+        self.add_codes(return_code, 1)
+        self.add_codes("", 0)
+        self.add_codes(inspect.getsourcelines(run_net)[0])
+        fp = open(os.path.join(py_code_dir, "model.py"), 'w')
+        fp.write(self.paddle_codes)
+        fp.close()
--- a/x2paddle/x2paddle/core/program.py
+++ b/x2paddle/x2paddle/core/program.py
+#   Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import print_function
+from __future__ import division
+import collections
+import os
+class PaddleLayer(object):
+    def __init__(self, kernel, inputs, outputs, **kwargs):
+        assert isinstance(
+            inputs,
+            dict), "parameter 'inputs' for PaddleLayer should be type of dict"
+        assert isinstance(
+            outputs,
+            list), "parameter, 'outputs' for PaddleLayer should be type of list"
+        self.kernel = kernel
+        self.inputs = inputs
+        self.outputs = outputs
+        self.attrs = kwargs
+class PaddleProgram(object):
+    def __init__(self):
+        self.layers = list()
+        self.edges_out = dict()
+        self.edges_in = dict()
+        self.inputs = list()
+        self.outputs = list()
+        self.parameters = dict()
+    def add_layer(self, kernel, inputs, outputs, **kwargs):
+        layer = PaddleLayer(kernel, inputs, outputs, **kwargs)
+        self.layers.append(layer)
+    def build(self):
+        outputs = dict()
+        for i in range(len(self.layers)):
+            layer = self.layers[i]
+            for output in layer.outputs:
+                outputs[output] = i
+            for k, v in layer.inputs.items():
+                assert v in outputs, "Couldn't find {} in previous layers, the layers should be make by topological sort".format(
+                    v)
+                in_layer_index = outputs[v]
+                if in_layer_index not in self.edges_out:
+                    self.edges_out[in_layer_index] = list()
+                self.edges_out[in_layer_index].append(i)
+                if i not in self.edges_in:
+                    self.edges_in[i] = list()
+                self.edges_in[i].append(in_layer_index)
+    def get_layer_outputs(self, i):
+        return self.edges_out[i]
+    def get_layer_inputs(self, i):
+        return self.edges_in[i]
+    def gen_code(self, code_dir):
+        def write_code(f, code_list, indent=0):
+            indent_blank = "    " * indent
+            for code_line in code_list:
+                if code_line.strip() == "":
+                    f.write('\n')
+                else:
+                    f.write(indent_blank + code_line + '\n')
+        f = open(os.path.join(code_dir, 'model.py'), 'w')
+        write_code(
+            f, [
+                "from paddle.fluid.initializer import Constant",
+                "from paddle.fluid.param_attr import ParamAttr",
+                "import paddle.fluid as fluid"
+                "", "def x2paddle_net():"
+            ],
+            indent=0)
+        for i, layer in enumerate(self.layers):
+            if self.edges_in.get(i, 0) == 0 and self.edges_out.get(i, 0) == 0:
+                continue
+            line = ""
+            if len(layer.outputs) == 1:
+                line = layer.outputs[0]
+            else:
+                for output in layer.outputs:
+                    line += "{}, ".format(output)
+                line = line.strip(", ")
+            line += " = {}(".format(layer.kernel)
+            for k, v in layer.inputs.items():
+                line += "{}={}, ".format(k, v)
+            for k, v in layer.attrs.items():
+                line += "{}={}, ".format(k, v)
+            line = line.strip(", ")
+            line += ")"
+            write_code(f, [line], indent=1)
+        f.close()
+    def gen_parameters(self, code_dir):
+        pass
+    def gen_inference_model(self, model_dir):
+        pass
--- a/x2paddle/x2paddle/core/util.py
+++ b/x2paddle/x2paddle/core/util.py
+#   Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy
+import math
+import os
+def string(param):
+    return "\'{}\'".format(param)
--- a/x2paddle/x2paddle/decoder/__init__.py
+++ b/x2paddle/x2paddle/decoder/__init__.py
--- a/x2paddle/x2paddle/decoder/caffe.proto
+++ b/x2paddle/x2paddle/decoder/caffe.proto
+syntax = "proto2";
+package caffe;
+// Specifies the shape (dimensions) of a Blob.
+message BlobShape {
+  repeated int64 dim = 1 [packed = true];
+}
+message BlobProto {
+  optional BlobShape shape = 7;
+  repeated float data = 5 [packed = true];
+  repeated float diff = 6 [packed = true];
+  repeated double double_data = 8 [packed = true];
+  repeated double double_diff = 9 [packed = true];
+  // 4D dimensions -- deprecated.  Use "shape" instead.
+  optional int32 num = 1 [default = 0];
+  optional int32 channels = 2 [default = 0];
+  optional int32 height = 3 [default = 0];
+  optional int32 width = 4 [default = 0];
+}
+// The BlobProtoVector is simply a way to pass multiple blobproto instances
+// around.
+message BlobProtoVector {
+  repeated BlobProto blobs = 1;
+}
+message Datum {
+  optional int32 channels = 1;
+  optional int32 height = 2;
+  optional int32 width = 3;
+  // the actual image data, in bytes
+  optional bytes data = 4;
+  optional int32 label = 5;
+  // Optionally, the datum could also hold float data.
+  repeated float float_data = 6;
+  // If true data contains an encoded image that need to be decoded
+  optional bool encoded = 7 [default = false];
+}
+// The label (display) name and label id.
+message LabelMapItem {
+  // Both name and label are required.
+  optional string name = 1;
+  optional int32 label = 2;
+  // display_name is optional.
+  optional string display_name = 3;
+}
+message LabelMap {
+  repeated LabelMapItem item = 1;
+}
+// Sample a bbox in the normalized space [0, 1] with provided constraints.
+message Sampler {
+  // Minimum scale of the sampled bbox.
+  optional float min_scale = 1 [default = 1.];
+  // Maximum scale of the sampled bbox.
+  optional float max_scale = 2 [default = 1.];
+  // Minimum aspect ratio of the sampled bbox.
+  optional float min_aspect_ratio = 3 [default = 1.];
+  // Maximum aspect ratio of the sampled bbox.
+  optional float max_aspect_ratio = 4 [default = 1.];
+}
+// Constraints for selecting sampled bbox.
+message SampleConstraint {
+  // Minimum Jaccard overlap between sampled bbox and all bboxes in
+  // AnnotationGroup.
+  optional float min_jaccard_overlap = 1;
+  // Maximum Jaccard overlap between sampled bbox and all bboxes in
+  // AnnotationGroup.
+  optional float max_jaccard_overlap = 2;
+  // Minimum coverage of sampled bbox by all bboxes in AnnotationGroup.
+  optional float min_sample_coverage = 3;
+  // Maximum coverage of sampled bbox by all bboxes in AnnotationGroup.
+  optional float max_sample_coverage = 4;
+  // Minimum coverage of all bboxes in AnnotationGroup by sampled bbox.
+  optional float min_object_coverage = 5;
+  // Maximum coverage of all bboxes in AnnotationGroup by sampled bbox.
+  optional float max_object_coverage = 6;
+}
+// Sample a batch of bboxes with provided constraints.
+message BatchSampler {
+  // Use original image as the source for sampling.
+  optional bool use_original_image = 1 [default = true];
+  // Constraints for sampling bbox.
+  optional Sampler sampler = 2;
+  // Constraints for determining if a sampled bbox is positive or negative.
+  optional SampleConstraint sample_constraint = 3;
+  // If provided, break when found certain number of samples satisfing the
+  // sample_constraint.
+  optional uint32 max_sample = 4;
+  // Maximum number of trials for sampling to avoid infinite loop.
+  optional uint32 max_trials = 5 [default = 100];
+}
+// Condition for emitting annotations.
+message EmitConstraint {
+  enum EmitType {
+    CENTER = 0;
+    MIN_OVERLAP = 1;
+  }
+  optional EmitType emit_type = 1 [default = CENTER];
+  // If emit_type is MIN_OVERLAP, provide the emit_overlap.
+  optional float emit_overlap = 2;
+}
+// The normalized bounding box [0, 1] w.r.t. the input image size.
+message NormalizedBBox {
+  optional float xmin = 1;
+  optional float ymin = 2;
+  optional float xmax = 3;
+  optional float ymax = 4;
+  optional int32 label = 5;
+  optional bool difficult = 6;
+  optional float score = 7;
+  optional float size = 8;
+}
+// Annotation for each object instance.
+message Annotation {
+  optional int32 instance_id = 1 [default = 0];
+  optional NormalizedBBox bbox = 2;
+}
+// Group of annotations for a particular label.
+message AnnotationGroup {
+  optional int32 group_label = 1;
+  repeated Annotation annotation = 2;
+}
+// An extension of Datum which contains "rich" annotations.
+message AnnotatedDatum {
+  enum AnnotationType {
+    BBOX = 0;
+  }
+  optional Datum datum = 1;
+  // If there are "rich" annotations, specify the type of annotation.
+  // Currently it only supports bounding box.
+  // If there are no "rich" annotations, use label in datum instead.
+  optional AnnotationType type = 2;
+  // Each group contains annotation for a particular class.
+  repeated AnnotationGroup annotation_group = 3;
+}
+message FillerParameter {
+  // The filler type.
+  optional string type = 1 [default = 'constant'];
+  optional float value = 2 [default = 0]; // the value in constant filler
+  optional float min = 3 [default = 0]; // the min value in uniform filler
+  optional float max = 4 [default = 1]; // the max value in uniform filler
+  optional float mean = 5 [default = 0]; // the mean value in Gaussian filler
+  optional float std = 6 [default = 1]; // the std value in Gaussian filler
+  // The expected number of non-zero output weights for a given input in
+  // Gaussian filler -- the default -1 means don't perform sparsification.
+  optional int32 sparse = 7 [default = -1];
+  // Normalize the filler variance by fan_in, fan_out, or their average.
+  // Applies to 'xavier' and 'msra' fillers.
+  enum VarianceNorm {
+    FAN_IN = 0;
+    FAN_OUT = 1;
+    AVERAGE = 2;
+  }
+  optional VarianceNorm variance_norm = 8 [default = FAN_IN];
+}
+message NetParameter {
+  optional string name = 1; // consider giving the network a name
+  // DEPRECATED. See InputParameter. The input blobs to the network.
+  repeated string input = 3;
+  // DEPRECATED. See InputParameter. The shape of the input blobs.
+  repeated BlobShape input_shape = 8;
+  // 4D input dimensions -- deprecated.  Use "input_shape" instead.
+  // If specified, for each input blob there should be four
+  // values specifying the num, channels, height and width of the input blob.
+  // Thus, there should be a total of (4 * #input) numbers.
+  repeated int32 input_dim = 4;
+  // Whether the network will force every layer to carry out backward operation.
+  // If set False, then whether to carry out backward is determined
+  // automatically according to the net structure and learning rates.
+  optional bool force_backward = 5 [default = false];
+  // The current "state" of the network, including the phase, level, and stage.
+  // Some layers may be included/excluded depending on this state and the states
+  // specified in the layers' include and exclude fields.
+  optional NetState state = 6;
+  // Print debugging information about results while running Net::Forward,
+  // Net::Backward, and Net::Update.
+  optional bool debug_info = 7 [default = false];
+  // The layers that make up the net.  Each of their configurations, including
+  // connectivity and behavior, is specified as a LayerParameter.
+  repeated LayerParameter layer = 100;  // ID 100 so layers are printed last.
+  // DEPRECATED: use 'layer' instead.
+  repeated V1LayerParameter layers = 2;
+}
+// NOTE
+// Update the next available ID when you add a new SolverParameter field.
+//
+// SolverParameter next available ID: 44 (last added: plateau_winsize)
+message SolverParameter {
+  //////////////////////////////////////////////////////////////////////////////
+  // Specifying the train and test networks
+  //
+  // Exactly one train net must be specified using one of the following fields:
+  //     train_net_param, train_net, net_param, net
+  // One or more test nets may be specified using any of the following fields:
+  //     test_net_param, test_net, net_param, net
+  // If more than one test net field is specified (e.g., both net and
+  // test_net are specified), they will be evaluated in the field order given
+  // above: (1) test_net_param, (2) test_net, (3) net_param/net.
+  // A test_iter must be specified for each test_net.
+  // A test_level and/or a test_stage may also be specified for each test_net.
+  //////////////////////////////////////////////////////////////////////////////
+  // Proto filename for the train net, possibly combined with one or more
+  // test nets.
+  optional string net = 24;
+  // Inline train net param, possibly combined with one or more test nets.
+  optional NetParameter net_param = 25;
+  optional string train_net = 1; // Proto filename for the train net.
+  repeated string test_net = 2; // Proto filenames for the test nets.
+  optional NetParameter train_net_param = 21; // Inline train net params.
+  repeated NetParameter test_net_param = 22; // Inline test net params.
+  // The states for the train/test nets. Must be unspecified or
+  // specified once per net.
+  //
+  // By default, all states will have solver = true;
+  // train_state will have phase = TRAIN,
+  // and all test_state's will have phase = TEST.
+  // Other defaults are set according to the NetState defaults.
+  optional NetState train_state = 26;
+  repeated NetState test_state = 27;
+  // Evaluation type.
+  optional string eval_type = 41 [default = "classification"];
+  // ap_version: different ways of computing Average Precision.
+  //    Check https://sanchom.wordpress.com/tag/average-precision/ for details.
+  //    11point: the 11-point interpolated average precision. Used in VOC2007.
+  //    MaxIntegral: maximally interpolated AP. Used in VOC2012/ILSVRC.
+  //    Integral: the natural integral of the precision-recall curve.
+  optional string ap_version = 42 [default = "Integral"];
+  // If true, display per class result.
+  optional bool show_per_class_result = 44 [default = false];
+  // The number of iterations for each test net.
+  repeated int32 test_iter = 3;
+  // The number of iterations between two testing phases.
+  optional int32 test_interval = 4 [default = 0];
+  optional bool test_compute_loss = 19 [default = false];
+  // If true, run an initial test pass before the first iteration,
+  // ensuring memory availability and printing the starting value of the loss.
+  optional bool test_initialization = 32 [default = true];
+  optional float base_lr = 5; // The base learning rate
+  // the number of iterations between displaying info. If display = 0, no info
+  // will be displayed.
+  optional int32 display = 6;
+  // Display the loss averaged over the last average_loss iterations
+  optional int32 average_loss = 33 [default = 1];
+  optional int32 max_iter = 7; // the maximum number of iterations
+  // accumulate gradients over `iter_size` x `batch_size` instances
+  optional int32 iter_size = 36 [default = 1];
+  // The learning rate decay policy. The currently implemented learning rate
+  // policies are as follows:
+  //    - fixed: always return base_lr.
+  //    - step: return base_lr * gamma ^ (floor(iter / step))
+  //    - exp: return base_lr * gamma ^ iter
+  //    - inv: return base_lr * (1 + gamma * iter) ^ (- power)
+  //    - multistep: similar to step but it allows non uniform steps defined by
+  //      stepvalue
+  //    - poly: the effective learning rate follows a polynomial decay, to be
+  //      zero by the max_iter. return base_lr (1 - iter/max_iter) ^ (power)
+  //    - sigmoid: the effective learning rate follows a sigmod decay
+  //      return base_lr ( 1/(1 + exp(-gamma * (iter - stepsize))))
+  //    - plateau: decreases lr
+  //              if the minimum loss isn't updated for 'plateau_winsize' iters
+  //
+  // where base_lr, max_iter, gamma, step, stepvalue and power are defined
+  // in the solver parameter protocol buffer, and iter is the current iteration.
+  optional string lr_policy = 8;
+  optional float gamma = 9; // The parameter to compute the learning rate.
+  optional float power = 10; // The parameter to compute the learning rate.
+  optional float momentum = 11; // The momentum value.
+  optional float weight_decay = 12; // The weight decay.
+  // regularization types supported: L1 and L2
+  // controlled by weight_decay
+  optional string regularization_type = 29 [default = "L2"];
+  // the stepsize for learning rate policy "step"
+  optional int32 stepsize = 13;
+  // the stepsize for learning rate policy "multistep"
+  repeated int32 stepvalue = 34;
+  // the stepsize for learning rate policy "plateau"
+  repeated int32 plateau_winsize = 43;
+  // Set clip_gradients to >= 0 to clip parameter gradients to that L2 norm,
+  // whenever their actual L2 norm is larger.
+  optional float clip_gradients = 35 [default = -1];
+  optional int32 snapshot = 14 [default = 0]; // The snapshot interval
+  optional string snapshot_prefix = 15; // The prefix for the snapshot.
+  // whether to snapshot diff in the results or not. Snapshotting diff will help
+  // debugging but the final protocol buffer size will be much larger.
+  optional bool snapshot_diff = 16 [default = false];
+  enum SnapshotFormat {
+    HDF5 = 0;
+    BINARYPROTO = 1;
+  }
+  optional SnapshotFormat snapshot_format = 37 [default = BINARYPROTO];
+  // the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default.
+  enum SolverMode {
+    CPU = 0;
+    GPU = 1;
+  }
+  optional SolverMode solver_mode = 17 [default = GPU];
+  // the device_id will that be used in GPU mode. Use device_id = 0 in default.
+  optional int32 device_id = 18 [default = 0];
+  // If non-negative, the seed with which the Solver will initialize the Caffe
+  // random number generator -- useful for reproducible results. Otherwise,
+  // (and by default) initialize using a seed derived from the system clock.
+  optional int64 random_seed = 20 [default = -1];
+  // type of the solver
+  optional string type = 40 [default = "SGD"];
+  // numerical stability for RMSProp, AdaGrad and AdaDelta and Adam
+  optional float delta = 31 [default = 1e-8];
+  // parameters for the Adam solver
+  optional float momentum2 = 39 [default = 0.999];
+  // RMSProp decay value
+  // MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t)
+  optional float rms_decay = 38 [default = 0.99];
+  // If true, print information about the state of the net that may help with
+  // debugging learning problems.
+  optional bool debug_info = 23 [default = false];
+  // If false, don't save a snapshot after training finishes.
+  optional bool snapshot_after_train = 28 [default = true];
+  // DEPRECATED: old solver enum types, use string instead
+  enum SolverType {
+    SGD = 0;
+    NESTEROV = 1;
+    ADAGRAD = 2;
+    RMSPROP = 3;
+    ADADELTA = 4;
+    ADAM = 5;
+  }
+  // DEPRECATED: use type instead of solver_type
+  optional SolverType solver_type = 30 [default = SGD];
+}
+// A message that stores the solver snapshots
+message SolverState {
+  optional int32 iter = 1; // The current iteration
+  optional string learned_net = 2; // The file that stores the learned net.
+  repeated BlobProto history = 3; // The history for sgd solvers
+  optional int32 current_step = 4 [default = 0]; // The current step for learning rate
+  optional float minimum_loss = 5 [default = 1E38]; // Historical minimum loss
+  optional int32 iter_last_event = 6 [default = 0]; // The iteration when last lr-update or min_loss-update happend
+}
+enum Phase {
+   TRAIN = 0;
+   TEST = 1;
+}
+message NetState {
+  optional Phase phase = 1 [default = TEST];
+  optional int32 level = 2 [default = 0];
+  repeated string stage = 3;
+}
+message NetStateRule {
+  // Set phase to require the NetState have a particular phase (TRAIN or TEST)
+  // to meet this rule.
+  optional Phase phase = 1;
+  // Set the minimum and/or maximum levels in which the layer should be used.
+  // Leave undefined to meet the rule regardless of level.
+  optional int32 min_level = 2;
+  optional int32 max_level = 3;
+  // Customizable sets of stages to include or exclude.
+  // The net must have ALL of the specified stages and NONE of the specified
+  // "not_stage"s to meet the rule.
+  // (Use multiple NetStateRules to specify conjunctions of stages.)
+  repeated string stage = 4;
+  repeated string not_stage = 5;
+}
+// Specifies training parameters (multipliers on global learning constants,
+// and the name and other settings used for weight sharing).
+message ParamSpec {
+  // The names of the parameter blobs -- useful for sharing parameters among
+  // layers, but never required otherwise.  To share a parameter between two
+  // layers, give it a (non-empty) name.
+  optional string name = 1;
+  // Whether to require shared weights to have the same shape, or just the same
+  // count -- defaults to STRICT if unspecified.
+  optional DimCheckMode share_mode = 2;
+  enum DimCheckMode {
+    // STRICT (default) requires that num, channels, height, width each match.
+    STRICT = 0;
+    // PERMISSIVE requires only the count (num*channels*height*width) to match.
+    PERMISSIVE = 1;
+  }
+  // The multiplier on the global learning rate for this parameter.
+  optional float lr_mult = 3 [default = 1.0];
+  // The multiplier on the global weight decay for this parameter.
+  optional float decay_mult = 4 [default = 1.0];
+}
+// NOTE
+// Update the next available ID when you add a new LayerParameter field.
+//
+// LayerParameter next available layer-specific ID: 147 (last added: recurrent_param)
+message LayerParameter {
+  optional string name = 1; // the layer name
+  optional string type = 2; // the layer type
+  repeated string bottom = 3; // the name of each bottom blob
+  repeated string top = 4; // the name of each top blob
+  // The train / test phase for computation.
+  optional Phase phase = 10;
+  // The amount of weight to assign each top blob in the objective.
+  // Each layer assigns a default value, usually of either 0 or 1,
+  // to each top blob.
+  repeated float loss_weight = 5;
+  // Specifies training parameters (multipliers on global learning constants,
+  // and the name and other settings used for weight sharing).
+  repeated ParamSpec param = 6;
+  // The blobs containing the numeric parameters of the layer.
+  repeated BlobProto blobs = 7;
+  // Specifies whether to backpropagate to each bottom. If unspecified,
+  // Caffe will automatically infer whether each input needs backpropagation
+  // to compute parameter gradients. If set to true for some inputs,
+  // backpropagation to those inputs is forced; if set false for some inputs,
+  // backpropagation to those inputs is skipped.
+  //
+  // The size must be either 0 or equal to the number of bottoms.
+  repeated bool propagate_down = 11;
+  // Rules controlling whether and when a layer is included in the network,
+  // based on the current NetState.  You may specify a non-zero number of rules
+  // to include OR exclude, but not both.  If no include or exclude rules are
+  // specified, the layer is always included.  If the current NetState meets
+  // ANY (i.e., one or more) of the specified rules, the layer is
+  // included/excluded.
+  repeated NetStateRule include = 8;
+  repeated NetStateRule exclude = 9;
+  // Parameters for data pre-processing.
+  optional TransformationParameter transform_param = 100;
+  // Parameters shared by loss layers.
+  optional LossParameter loss_param = 101;
+  // Layer type-specific parameters.
+  //
+  // Note: certain layers may have more than one computational engine
+  // for their implementation. These layers include an Engine type and
+  // engine parameter for selecting the implementation.
+  // The default for the engine is set by the ENGINE switch at compile-time.
+  optional AccuracyParameter accuracy_param = 102;
+  optional AnnotatedDataParameter annotated_data_param = 200;
+  optional ArgMaxParameter argmax_param = 103;
+  optional BatchNormParameter batch_norm_param = 139;
+  optional BiasParameter bias_param = 141;
+  optional ConcatParameter concat_param = 104;
+  optional ContrastiveLossParameter contrastive_loss_param = 105;
+  optional ConvolutionParameter convolution_param = 106;
+  optional CropParameter crop_param = 144;
+  optional DataParameter data_param = 107;
+  optional DetectionEvaluateParameter detection_evaluate_param = 205;
+  optional DetectionOutputParameter detection_output_param = 204;
+  optional DropoutParameter dropout_param = 108;
+  optional DummyDataParameter dummy_data_param = 109;
+  optional EltwiseParameter eltwise_param = 110;
+  optional ELUParameter elu_param = 140;
+  optional EmbedParameter embed_param = 137;
+  optional ExpParameter exp_param = 111;
+  optional FlattenParameter flatten_param = 135;
+  optional HDF5DataParameter hdf5_data_param = 112;
+  optional HDF5OutputParameter hdf5_output_param = 113;
+  optional HingeLossParameter hinge_loss_param = 114;
+  optional ImageDataParameter image_data_param = 115;
+  optional InfogainLossParameter infogain_loss_param = 116;
+  optional InnerProductParameter inner_product_param = 117;
+  optional InputParameter input_param = 143;
+  optional LogParameter log_param = 134;
+  optional LRNParameter lrn_param = 118;
+  optional MemoryDataParameter memory_data_param = 119;
+  optional MultiBoxLossParameter multibox_loss_param = 201;
+  optional MVNParameter mvn_param = 120;
+  optional NormalizeParameter norm_param = 206;
+  optional ParameterParameter parameter_param = 145;
+  optional PermuteParameter permute_param = 202;
+  optional PoolingParameter pooling_param = 121;
+  optional PowerParameter power_param = 122;
+  optional PReLUParameter prelu_param = 131;
+  optional PriorBoxParameter prior_box_param = 203;
+  optional PythonParameter python_param = 130;
+  optional RecurrentParameter recurrent_param = 146;
+  optional ReductionParameter reduction_param = 136;
+  optional ReLUParameter relu_param = 123;
+  optional ReshapeParameter reshape_param = 133;
+  optional ScaleParameter scale_param = 142;
+  optional SigmoidParameter sigmoid_param = 124;
+  optional SoftmaxParameter softmax_param = 125;
+  optional SPPParameter spp_param = 132;
+  optional SliceParameter slice_param = 126;
+  optional TanHParameter tanh_param = 127;
+  optional ThresholdParameter threshold_param = 128;
+  optional TileParameter tile_param = 138;
+  optional VideoDataParameter video_data_param = 207;
+  optional WindowDataParameter window_data_param = 129;
+  optional AxpyParameter axpy_param = 210;
+  optional UpsampleParameter upsample_param = 211;
+  optional ROIPoolingParameter roi_pooling_param = 212;
+  optional ShuffleChannelParameter shuffle_channel_param = 213;
+}
+// Message that stores parameters used to apply transformation
+// to the data layer's data
+message TransformationParameter {
+  // For data pre-processing, we can do simple scaling and subtracting the
+  // data mean, if provided. Note that the mean subtraction is always carried
+  // out before scaling.
+  optional float scale = 1 [default = 1];
+  // Specify if we want to randomly mirror data.
+  optional bool mirror = 2 [default = false];
+  // Specify if we would like to randomly crop an image.
+  optional uint32 crop_size = 3 [default = 0];
+  optional uint32 crop_h = 11 [default = 0];
+  optional uint32 crop_w = 12 [default = 0];
+  // mean_file and mean_value cannot be specified at the same time
+  optional string mean_file = 4;
+  // if specified can be repeated once (would substract it from all the channels)
+  // or can be repeated the same number of times as channels
+  // (would subtract them from the corresponding channel)
+  repeated float mean_value = 5;
+  // Force the decoded image to have 3 color channels.
+  optional bool force_color = 6 [default = false];
+  // Force the decoded image to have 1 color channels.
+  optional bool force_gray = 7 [default = false];
+  // Resize policy
+  optional ResizeParameter resize_param = 8;
+  // Noise policy
+  optional NoiseParameter noise_param = 9;
+  // Distortion policy
+  optional DistortionParameter distort_param = 13;
+  // Expand policy
+  optional ExpansionParameter expand_param = 14;
+  // Constraint for emitting the annotation after transformation.
+  optional EmitConstraint emit_constraint = 10;
+}
+// Message that stores parameters used by data transformer for resize policy
+message ResizeParameter {
+  //Probability of using this resize policy
+  optional float prob = 1 [default = 1];
+  enum Resize_mode {
+    WARP = 1;
+    FIT_SMALL_SIZE = 2;
+    FIT_LARGE_SIZE_AND_PAD = 3;
+  }
+  optional Resize_mode resize_mode = 2 [default = WARP];
+  optional uint32 height = 3 [default = 0];
+  optional uint32 width = 4 [default = 0];
+  // A parameter used to update bbox in FIT_SMALL_SIZE mode.
+  optional uint32 height_scale = 8 [default = 0];
+  optional uint32 width_scale = 9 [default = 0];
+  enum Pad_mode {
+    CONSTANT = 1;
+    MIRRORED = 2;
+    REPEAT_NEAREST = 3;
+  }
+  // Padding mode for BE_SMALL_SIZE_AND_PAD mode and object centering
+  optional Pad_mode pad_mode = 5 [default = CONSTANT];
+  // if specified can be repeated once (would fill all the channels)
+  // or can be repeated the same number of times as channels
+  // (would use it them to the corresponding channel)
+  repeated float pad_value = 6;
+  enum Interp_mode { //Same as in OpenCV
+    LINEAR = 1;
+    AREA = 2;
+    NEAREST = 3;
+    CUBIC = 4;
+    LANCZOS4 = 5;
+  }
+  //interpolation for for resizing
+  repeated Interp_mode interp_mode = 7;
+}
+message SaltPepperParameter {
+  //Percentage of pixels
+  optional float fraction = 1 [default = 0];
+  repeated float value = 2;
+}
+// Message that stores parameters used by data transformer for transformation
+// policy
+message NoiseParameter {
+  //Probability of using this resize policy
+  optional float prob = 1 [default = 0];
+  // Histogram equalized
+  optional bool hist_eq = 2 [default = false];
+  // Color inversion
+  optional bool inverse = 3 [default = false];
+  // Grayscale
+  optional bool decolorize = 4 [default = false];
+  // Gaussian blur
+  optional bool gauss_blur = 5 [default = false];
+  // JPEG compression quality (-1 = no compression)
+  optional float jpeg = 6 [default = -1];
+  // Posterization
+  optional bool posterize = 7 [default = false];
+  // Erosion
+  optional bool erode = 8 [default = false];
+  // Salt-and-pepper noise
+  optional bool saltpepper = 9 [default = false];
+  optional SaltPepperParameter saltpepper_param = 10;
+  // Local histogram equalization
+  optional bool clahe = 11 [default = false];
+  // Color space conversion
+  optional bool convert_to_hsv = 12 [default = false];
+  // Color space conversion
+  optional bool convert_to_lab = 13 [default = false];
+}
+// Message that stores parameters used by data transformer for distortion policy
+message DistortionParameter {
+  // The probability of adjusting brightness.
+  optional float brightness_prob = 1 [default = 0.0];
+  // Amount to add to the pixel values within [-delta, delta].
+  // The possible value is within [0, 255]. Recommend 32.
+  optional float brightness_delta = 2 [default = 0.0];
+  // The probability of adjusting contrast.
+  optional float contrast_prob = 3 [default = 0.0];
+  // Lower bound for random contrast factor. Recommend 0.5.
+  optional float contrast_lower = 4 [default = 0.0];
+  // Upper bound for random contrast factor. Recommend 1.5.
+  optional float contrast_upper = 5 [default = 0.0];
+  // The probability of adjusting hue.
+  optional float hue_prob = 6 [default = 0.0];
+  // Amount to add to the hue channel within [-delta, delta].
+  // The possible value is within [0, 180]. Recommend 36.
+  optional float hue_delta = 7 [default = 0.0];
+  // The probability of adjusting saturation.
+  optional float saturation_prob = 8 [default = 0.0];
+  // Lower bound for the random saturation factor. Recommend 0.5.
+  optional float saturation_lower = 9 [default = 0.0];
+  // Upper bound for the random saturation factor. Recommend 1.5.
+  optional float saturation_upper = 10 [default = 0.0];
+  // The probability of randomly order the image channels.
+  optional float random_order_prob = 11 [default = 0.0];
+}
+// Message that stores parameters used by data transformer for expansion policy
+message ExpansionParameter {
+  //Probability of using this expansion policy
+  optional float prob = 1 [default = 1];
+  // The ratio to expand the image.
+  optional float max_expand_ratio = 2 [default = 1.];
+}
+// Message that stores parameters shared by loss layers
+message LossParameter {
+  // If specified, ignore instances with the given label.
+  optional int32 ignore_label = 1;
+  // How to normalize the loss for loss layers that aggregate across batches,
+  // spatial dimensions, or other dimensions.  Currently only implemented in
+  // SoftmaxWithLoss and SigmoidCrossEntropyLoss layers.
+  enum NormalizationMode {
+    // Divide by the number of examples in the batch times spatial dimensions.
+    // Outputs that receive the ignore label will NOT be ignored in computing
+    // the normalization factor.
+    FULL = 0;
+    // Divide by the total number of output locations that do not take the
+    // ignore_label.  If ignore_label is not set, this behaves like FULL.
+    VALID = 1;
+    // Divide by the batch size.
+    BATCH_SIZE = 2;
+    // Do not normalize the loss.
+    NONE = 3;
+  }
+  // For historical reasons, the default normalization for
+  // SigmoidCrossEntropyLoss is BATCH_SIZE and *not* VALID.
+  optional NormalizationMode normalization = 3 [default = VALID];
+  // Deprecated.  Ignored if normalization is specified.  If normalization
+  // is not specified, then setting this to false will be equivalent to
+  // normalization = BATCH_SIZE to be consistent with previous behavior.
+  optional bool normalize = 2;
+}
+// Messages that store parameters used by individual layer types follow, in
+// alphabetical order.
+message AccuracyParameter {
+  // When computing accuracy, count as correct by comparing the true label to
+  // the top k scoring classes.  By default, only compare to the top scoring
+  // class (i.e. argmax).
+  optional uint32 top_k = 1 [default = 1];
+  // The "label" axis of the prediction blob, whose argmax corresponds to the
+  // predicted label -- may be negative to index from the end (e.g., -1 for the
+  // last axis).  For example, if axis == 1 and the predictions are
+  // (N x C x H x W), the label blob is expected to contain N*H*W ground truth
+  // labels with integer values in {0, 1, ..., C-1}.
+  optional int32 axis = 2 [default = 1];
+  // If specified, ignore instances with the given label.
+  optional int32 ignore_label = 3;
+}
+message AnnotatedDataParameter {
+  // Define the sampler.
+  repeated BatchSampler batch_sampler = 1;
+  // Store label name and label id in LabelMap format.
+  optional string label_map_file = 2;
+  // If provided, it will replace the AnnotationType stored in each
+  // AnnotatedDatum.
+  optional AnnotatedDatum.AnnotationType anno_type = 3;
+}
+message ArgMaxParameter {
+  // If true produce pairs (argmax, maxval)
+  optional bool out_max_val = 1 [default = false];
+  optional uint32 top_k = 2 [default = 1];
+  // The axis along which to maximise -- may be negative to index from the
+  // end (e.g., -1 for the last axis).
+  // By default ArgMaxLayer maximizes over the flattened trailing dimensions
+  // for each index of the first / num dimension.
+  optional int32 axis = 3;
+}
+message ConcatParameter {
+  // The axis along which to concatenate -- may be negative to index from the
+  // end (e.g., -1 for the last axis).  Other axes must have the
+  // same dimension for all the bottom blobs.
+  // By default, ConcatLayer concatenates blobs along the "channels" axis (1).
+  optional int32 axis = 2 [default = 1];
+  // DEPRECATED: alias for "axis" -- does not support negative indexing.
+  optional uint32 concat_dim = 1 [default = 1];
+}
+message BatchNormParameter {
+  // If false, accumulate global mean/variance values via a moving average. If
+  // true, use those accumulated values instead of computing mean/variance
+  // across the batch.
+  optional bool use_global_stats = 1;
+  // How much does the moving average decay each iteration?
+  optional float moving_average_fraction = 2 [default = .999];
+  // Small value to add to the variance estimate so that we don't divide by
+  // zero.
+  optional float eps = 3 [default = 1e-5];
+}
+message BiasParameter {
+  // The first axis of bottom[0] (the first input Blob) along which to apply
+  // bottom[1] (the second input Blob).  May be negative to index from the end
+  // (e.g., -1 for the last axis).
+  //
+  // For example, if bottom[0] is 4D with shape 100x3x40x60, the output
+  // top[0] will have the same shape, and bottom[1] may have any of the
+  // following shapes (for the given value of axis):
+  //    (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60
+  //    (axis == 1 == -3)          3;     3x40;     3x40x60
+  //    (axis == 2 == -2)                   40;       40x60
+  //    (axis == 3 == -1)                                60
+  // Furthermore, bottom[1] may have the empty shape (regardless of the value of
+  // "axis") -- a scalar bias.
+  optional int32 axis = 1 [default = 1];
+  // (num_axes is ignored unless just one bottom is given and the bias is
+  // a learned parameter of the layer.  Otherwise, num_axes is determined by the
+  // number of axes by the second bottom.)
+  // The number of axes of the input (bottom[0]) covered by the bias
+  // parameter, or -1 to cover all axes of bottom[0] starting from `axis`.
+  // Set num_axes := 0, to add a zero-axis Blob: a scalar.
+  optional int32 num_axes = 2 [default = 1];
+  // (filler is ignored unless just one bottom is given and the bias is
+  // a learned parameter of the layer.)
+  // The initialization for the learned bias parameter.
+  // Default is the zero (0) initialization, resulting in the BiasLayer
+  // initially performing the identity operation.
+  optional FillerParameter filler = 3;
+}
+message ContrastiveLossParameter {
+  // margin for dissimilar pair
+  optional float margin = 1 [default = 1.0];
+  // The first implementation of this cost did not exactly match the cost of
+  // Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2.
+  // legacy_version = false (the default) uses (margin - d)^2 as proposed in the
+  // Hadsell paper. New models should probably use this version.
+  // legacy_version = true uses (margin - d^2). This is kept to support /
+  // reproduce existing models and results
+  optional bool legacy_version = 2 [default = false];
+}
+message ConvolutionParameter {
+  optional uint32 num_output = 1; // The number of outputs for the layer
+  optional bool bias_term = 2 [default = true]; // whether to have bias terms
+  // Pad, kernel size, and stride are all given as a single value for equal
+  // dimensions in all spatial dimensions, or once per spatial dimension.
+  repeated uint32 pad = 3; // The padding size; defaults to 0
+  repeated uint32 kernel_size = 4; // The kernel size
+  repeated uint32 stride = 6; // The stride; defaults to 1
+  // Factor used to dilate the kernel, (implicitly) zero-filling the resulting
+  // holes. (Kernel dilation is sometimes referred to by its use in the
+  // algorithme à trous from Holschneider et al. 1987.)
+  repeated uint32 dilation = 18; // The dilation; defaults to 1
+  // For 2D convolution only, the *_h and *_w versions may also be used to
+  // specify both spatial dimensions.
+  optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only)
+  optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only)
+  optional uint32 kernel_h = 11; // The kernel height (2D only)
+  optional uint32 kernel_w = 12; // The kernel width (2D only)
+  optional uint32 stride_h = 13; // The stride height (2D only)
+  optional uint32 stride_w = 14; // The stride width (2D only)
+  optional uint32 group = 5 [default = 1]; // The group size for group conv
+  optional FillerParameter weight_filler = 7; // The filler for the weight
+  optional FillerParameter bias_filler = 8; // The filler for the bias
+  enum Engine {
+    DEFAULT = 0;
+    CAFFE = 1;
+    CUDNN = 2;
+  }
+  optional Engine engine = 15 [default = DEFAULT];
+  // The axis to interpret as "channels" when performing convolution.
+  // Preceding dimensions are treated as independent inputs;
+  // succeeding dimensions are treated as "spatial".
+  // With (N, C, H, W) inputs, and axis == 1 (the default), we perform
+  // N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for
+  // groups g>1) filters across the spatial axes (H, W) of the input.
+  // With (N, C, D, H, W) inputs, and axis == 1, we perform
+  // N independent 3D convolutions, sliding (C/g)-channels
+  // filters across the spatial axes (D, H, W) of the input.
+  optional int32 axis = 16 [default = 1];
+  // Whether to force use of the general ND convolution, even if a specific
+  // implementation for blobs of the appropriate number of spatial dimensions
+  // is available. (Currently, there is only a 2D-specific convolution
+  // implementation; for input blobs with num_axes != 2, this option is
+  // ignored and the ND implementation will be used.)
+  optional bool force_nd_im2col = 17 [default = false];
+}
+message CropParameter {
+  // To crop, elements of the first bottom are selected to fit the dimensions
+  // of the second, reference bottom. The crop is configured by
+  // - the crop `axis` to pick the dimensions for cropping
+  // - the crop `offset` to set the shift for all/each dimension
+  // to align the cropped bottom with the reference bottom.
+  // All dimensions up to but excluding `axis` are preserved, while
+  // the dimensions including and trailing `axis` are cropped.
+  // If only one `offset` is set, then all dimensions are offset by this amount.
+  // Otherwise, the number of offsets must equal the number of cropped axes to
+  // shift the crop in each dimension accordingly.
+  // Note: standard dimensions are N,C,H,W so the default is a spatial crop,
+  // and `axis` may be negative to index from the end (e.g., -1 for the last
+  // axis).
+  optional int32 axis = 1 [default = 2];
+  repeated uint32 offset = 2;
+}
+message DataParameter {
+  enum DB {
+    LEVELDB = 0;
+    LMDB = 1;
+  }
+  // Specify the data source.
+  optional string source = 1;
+  // Specify the batch size.
+  optional uint32 batch_size = 4;
+  // The rand_skip variable is for the data layer to skip a few data points
+  // to avoid all asynchronous sgd clients to start at the same point. The skip
+  // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
+  // be larger than the number of keys in the database.
+  // DEPRECATED. Each solver accesses a different subset of the database.
+  optional uint32 rand_skip = 7 [default = 0];
+  optional DB backend = 8 [default = LEVELDB];
+  // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
+  // simple scaling and subtracting the data mean, if provided. Note that the
+  // mean subtraction is always carried out before scaling.
+  optional float scale = 2 [default = 1];
+  optional string mean_file = 3;
+  // DEPRECATED. See TransformationParameter. Specify if we would like to randomly
+  // crop an image.
+  optional uint32 crop_size = 5 [default = 0];
+  // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
+  // data.
+  optional bool mirror = 6 [default = false];
+  // Force the encoded image to have 3 color channels
+  optional bool force_encoded_color = 9 [default = false];
+  // Prefetch queue (Number of batches to prefetch to host memory, increase if
+  // data access bandwidth varies).
+  optional uint32 prefetch = 10 [default = 4];
+}
+// Message that store parameters used by DetectionEvaluateLayer
+message DetectionEvaluateParameter {
+  // Number of classes that are actually predicted. Required!
+  optional uint32 num_classes = 1;
+  // Label id for background class. Needed for sanity check so that
+  // background class is neither in the ground truth nor the detections.
+  optional uint32 background_label_id = 2 [default = 0];
+  // Threshold for deciding true/false positive.
+  optional float overlap_threshold = 3 [default = 0.5];
+  // If true, also consider difficult ground truth for evaluation.
+  optional bool evaluate_difficult_gt = 4 [default = true];
+  // A file which contains a list of names and sizes with same order
+  // of the input DB. The file is in the following format:
+  //    name height width
+  //    ...
+  // If provided, we will scale the prediction and ground truth NormalizedBBox
+  // for evaluation.
+  optional string name_size_file = 5;
+  // The resize parameter used in converting NormalizedBBox to original image.
+  optional ResizeParameter resize_param = 6;
+}
+message NonMaximumSuppressionParameter {
+  // Threshold to be used in nms.
+  optional float nms_threshold = 1 [default = 0.3];
+  // Maximum number of results to be kept.
+  optional int32 top_k = 2;
+  // Parameter for adaptive nms.
+  optional float eta = 3 [default = 1.0];
+}
+message SaveOutputParameter {
+  // Output directory. If not empty, we will save the results.
+  optional string output_directory = 1;
+  // Output name prefix.
+  optional string output_name_prefix = 2;
+  // Output format.
+  //    VOC - PASCAL VOC output format.
+  //    COCO - MS COCO output format.
+  optional string output_format = 3;
+  // If you want to output results, must also provide the following two files.
+  // Otherwise, we will ignore saving results.
+  // label map file.
+  optional string label_map_file = 4;
+  // A file which contains a list of names and sizes with same order
+  // of the input DB. The file is in the following format:
+  //    name height width
+  //    ...
+  optional string name_size_file = 5;
+  // Number of test images. It can be less than the lines specified in
+  // name_size_file. For example, when we only want to evaluate on part
+  // of the test images.
+  optional uint32 num_test_image = 6;
+  // The resize parameter used in saving the data.
+  optional ResizeParameter resize_param = 7;
+}
+// Message that store parameters used by DetectionOutputLayer
+message DetectionOutputParameter {
+  // Number of classes to be predicted. Required!
+  optional uint32 num_classes = 1;
+  // If true, bounding box are shared among different classes.
+  optional bool share_location = 2 [default = true];
+  // Background label id. If there is no background class,
+  // set it as -1.
+  optional int32 background_label_id = 3 [default = 0];
+  // Parameters used for non maximum suppression.
+  optional NonMaximumSuppressionParameter nms_param = 4;
+  // Parameters used for saving detection results.
+  optional SaveOutputParameter save_output_param = 5;
+  // Type of coding method for bbox.
+  optional PriorBoxParameter.CodeType code_type = 6 [default = CORNER];
+  // If true, variance is encoded in target; otherwise we need to adjust the
+  // predicted offset accordingly.
+  optional bool variance_encoded_in_target = 8 [default = false];
+  // Number of total bboxes to be kept per image after nms step.
+  // -1 means keeping all bboxes after nms step.
+  optional int32 keep_top_k = 7 [default = -1];
+  // Only consider detections whose confidences are larger than a threshold.
+  // If not provided, consider all boxes.
+  optional float confidence_threshold = 9;
+  // If true, visualize the detection results.
+  optional bool visualize = 10 [default = false];
+  // The threshold used to visualize the detection results.
+  optional float visualize_threshold = 11;
+  // If provided, save outputs to video file.
+  optional string save_file = 12;
+}
+message DropoutParameter {
+  optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio
+}
+// DummyDataLayer fills any number of arbitrarily shaped blobs with random
+// (or constant) data generated by "Fillers" (see "message FillerParameter").
+message DummyDataParameter {
+  // This layer produces N >= 1 top blobs.  DummyDataParameter must specify 1 or N
+  // shape fields, and 0, 1 or N data_fillers.
+  //
+  // If 0 data_fillers are specified, ConstantFiller with a value of 0 is used.
+  // If 1 data_filler is specified, it is applied to all top blobs.  If N are
+  // specified, the ith is applied to the ith top blob.
+  repeated FillerParameter data_filler = 1;
+  repeated BlobShape shape = 6;
+  // 4D dimensions -- deprecated.  Use "shape" instead.
+  repeated uint32 num = 2;
+  repeated uint32 channels = 3;
+  repeated uint32 height = 4;
+  repeated uint32 width = 5;
+}
+message EltwiseParameter {
+  enum EltwiseOp {
+    PROD = 0;
+    SUM = 1;
+    MAX = 2;
+  }
+  optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation
+  repeated float coeff = 2; // blob-wise coefficient for SUM operation
+  // Whether to use an asymptotically slower (for >2 inputs) but stabler method
+  // of computing the gradient for the PROD operation. (No effect for SUM op.)
+  optional bool stable_prod_grad = 3 [default = true];
+}
+// Message that stores parameters used by ELULayer
+message ELUParameter {
+  // Described in:
+  // Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate
+  // Deep Network Learning by Exponential Linear Units (ELUs). arXiv
+  optional float alpha = 1 [default = 1];
+}
+// Message that stores parameters used by EmbedLayer
+message EmbedParameter {
+  optional uint32 num_output = 1; // The number of outputs for the layer
+  // The input is given as integers to be interpreted as one-hot
+  // vector indices with dimension num_input.  Hence num_input should be
+  // 1 greater than the maximum possible input value.
+  optional uint32 input_dim = 2;
+  optional bool bias_term = 3 [default = true]; // Whether to use a bias term
+  optional FillerParameter weight_filler = 4; // The filler for the weight
+  optional FillerParameter bias_filler = 5; // The filler for the bias
+}
+// Message that stores parameters used by ExpLayer
+message ExpParameter {
+  // ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0.
+  // Or if base is set to the default (-1), base is set to e,
+  // so y = exp(shift + scale * x).
+  optional float base = 1 [default = -1.0];
+  optional float scale = 2 [default = 1.0];
+  optional float shift = 3 [default = 0.0];
+}
+/// Message that stores parameters used by FlattenLayer
+message FlattenParameter {
+  // The first axis to flatten: all preceding axes are retained in the output.
+  // May be negative to index from the end (e.g., -1 for the last axis).
+  optional int32 axis = 1 [default = 1];
+  // The last axis to flatten: all following axes are retained in the output.
+  // May be negative to index from the end (e.g., the default -1 for the last
+  // axis).
+  optional int32 end_axis = 2 [default = -1];
+}
+// Message that stores parameters used by HDF5DataLayer
+message HDF5DataParameter {
+  // Specify the data source.
+  optional string source = 1;
+  // Specify the batch size.
+  optional uint32 batch_size = 2;
+  // Specify whether to shuffle the data.
+  // If shuffle == true, the ordering of the HDF5 files is shuffled,
+  // and the ordering of data within any given HDF5 file is shuffled,
+  // but data between different files are not interleaved; all of a file's
+  // data are output (in a random order) before moving onto another file.
+  optional bool shuffle = 3 [default = false];
+}
+message HDF5OutputParameter {
+  optional string file_name = 1;
+}
+message HingeLossParameter {
+  enum Norm {
+    L1 = 1;
+    L2 = 2;
+  }
+  // Specify the Norm to use L1 or L2
+  optional Norm norm = 1 [default = L1];
+}
+message ImageDataParameter {
+  // Specify the data source.
+  optional string source = 1;
+  // Specify the batch size.
+  optional uint32 batch_size = 4 [default = 1];
+  // The rand_skip variable is for the data layer to skip a few data points
+  // to avoid all asynchronous sgd clients to start at the same point. The skip
+  // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
+  // be larger than the number of keys in the database.
+  optional uint32 rand_skip = 7 [default = 0];
+  // Whether or not ImageLayer should shuffle the list of files at every epoch.
+  optional bool shuffle = 8 [default = false];
+  // It will also resize images if new_height or new_width are not zero.
+  optional uint32 new_height = 9 [default = 0];
+  optional uint32 new_width = 10 [default = 0];
+  // Specify if the images are color or gray
+  optional bool is_color = 11 [default = true];
+  // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
+  // simple scaling and subtracting the data mean, if provided. Note that the
+  // mean subtraction is always carried out before scaling.
+  optional float scale = 2 [default = 1];
+  optional string mean_file = 3;
+  // DEPRECATED. See TransformationParameter. Specify if we would like to randomly
+  // crop an image.
+  optional uint32 crop_size = 5 [default = 0];
+  // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
+  // data.
+  optional bool mirror = 6 [default = false];
+  optional string root_folder = 12 [default = ""];
+}
+message InfogainLossParameter {
+  // Specify the infogain matrix source.
+  optional string source = 1;
+}
+message InnerProductParameter {
+  optional uint32 num_output = 1; // The number of outputs for the layer
+  optional bool bias_term = 2 [default = true]; // whether to have bias terms
+  optional FillerParameter weight_filler = 3; // The filler for the weight
+  optional FillerParameter bias_filler = 4; // The filler for the bias
+  // The first axis to be lumped into a single inner product computation;
+  // all preceding axes are retained in the output.
+  // May be negative to index from the end (e.g., -1 for the last axis).
+  optional int32 axis = 5 [default = 1];
+  // Specify whether to transpose the weight matrix or not.
+  // If transpose == true, any operations will be performed on the transpose
+  // of the weight matrix. The weight matrix itself is not going to be transposed
+  // but rather the transfer flag of operations will be toggled accordingly.
+  optional bool transpose = 6 [default = false];
+}
+message InputParameter {
+  // This layer produces N >= 1 top blob(s) to be assigned manually.
+  // Define N shapes to set a shape for each top.
+  // Define 1 shape to set the same shape for every top.
+  // Define no shape to defer to reshaping manually.
+  repeated BlobShape shape = 1;
+}
+// Message that stores parameters used by LogLayer
+message LogParameter {
+  // LogLayer computes outputs y = log_base(shift + scale * x), for base > 0.
+  // Or if base is set to the default (-1), base is set to e,
+  // so y = ln(shift + scale * x) = log_e(shift + scale * x)
+  optional float base = 1 [default = -1.0];
+  optional float scale = 2 [default = 1.0];
+  optional float shift = 3 [default = 0.0];
+}
+// Message that stores parameters used by LRNLayer
+message LRNParameter {
+  optional uint32 local_size = 1 [default = 5];
+  optional float alpha = 2 [default = 1.];
+  optional float beta = 3 [default = 0.75];
+  enum NormRegion {
+    ACROSS_CHANNELS = 0;
+    WITHIN_CHANNEL = 1;
+  }
+  optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS];
+  optional float k = 5 [default = 1.];
+  enum Engine {
+    DEFAULT = 0;
+    CAFFE = 1;
+    CUDNN = 2;
+  }
+  optional Engine engine = 6 [default = DEFAULT];
+}
+message MemoryDataParameter {
+  optional uint32 batch_size = 1;
+  optional uint32 channels = 2;
+  optional uint32 height = 3;
+  optional uint32 width = 4;
+}
+// Message that store parameters used by MultiBoxLossLayer
+message MultiBoxLossParameter {
+  // Localization loss type.
+  enum LocLossType {
+    L2 = 0;
+    SMOOTH_L1 = 1;
+  }
+  optional LocLossType loc_loss_type = 1 [default = SMOOTH_L1];
+  // Confidence loss type.
+  enum ConfLossType {
+    SOFTMAX = 0;
+    LOGISTIC = 1;
+  }
+  optional ConfLossType conf_loss_type = 2 [default = SOFTMAX];
+  // Weight for localization loss.
+  optional float loc_weight = 3 [default = 1.0];
+  // Number of classes to be predicted. Required!
+  optional uint32 num_classes = 4;
+  // If true, bounding box are shared among different classes.
+  optional bool share_location = 5 [default = true];
+  // Matching method during training.
+  enum MatchType {
+    BIPARTITE = 0;
+    PER_PREDICTION = 1;
+  }
+  optional MatchType match_type = 6 [default = PER_PREDICTION];
+  // If match_type is PER_PREDICTION, use overlap_threshold to
+  // determine the extra matching bboxes.
+  optional float overlap_threshold = 7 [default = 0.5];
+  // Use prior for matching.
+  optional bool use_prior_for_matching = 8 [default = true];
+  // Background label id.
+  optional uint32 background_label_id = 9 [default = 0];
+  // If true, also consider difficult ground truth.
+  optional bool use_difficult_gt = 10 [default = true];
+  // If true, perform negative mining.
+  // DEPRECATED: use mining_type instead.
+  optional bool do_neg_mining = 11;
+  // The negative/positive ratio.
+  optional float neg_pos_ratio = 12 [default = 3.0];
+  // The negative overlap upperbound for the unmatched predictions.
+  optional float neg_overlap = 13 [default = 0.5];
+  // Type of coding method for bbox.
+  optional PriorBoxParameter.CodeType code_type = 14 [default = CORNER];
+  // If true, encode the variance of prior box in the loc loss target instead of
+  // in bbox.
+  optional bool encode_variance_in_target = 16 [default = false];
+  // If true, map all object classes to agnostic class. It is useful for learning
+  // objectness detector.
+  optional bool map_object_to_agnostic = 17 [default = false];
+  // If true, ignore cross boundary bbox during matching.
+  // Cross boundary bbox is a bbox who is outside of the image region.
+  optional bool ignore_cross_boundary_bbox = 18 [default = false];
+  // If true, only backpropagate on corners which are inside of the image
+  // region when encode_type is CORNER or CORNER_SIZE.
+  optional bool bp_inside = 19 [default = false];
+  // Mining type during training.
+  //   NONE : use all negatives.
+  //   MAX_NEGATIVE : select negatives based on the score.
+  //   HARD_EXAMPLE : select hard examples based on "Training Region-based Object Detectors with Online Hard Example Mining", Shrivastava et.al.
+  enum MiningType {
+    NONE = 0;
+    MAX_NEGATIVE = 1;
+    HARD_EXAMPLE = 2;
+  }
+  optional MiningType mining_type = 20 [default = MAX_NEGATIVE];
+  // Parameters used for non maximum suppression durig hard example mining.
+  optional NonMaximumSuppressionParameter nms_param = 21;
+  optional int32 sample_size = 22 [default = 64];
+  optional bool use_prior_for_nms = 23 [default = false];
+}
+message MVNParameter {
+  // This parameter can be set to false to normalize mean only
+  optional bool normalize_variance = 1 [default = true];
+  // This parameter can be set to true to perform DNN-like MVN
+  optional bool across_channels = 2 [default = false];
+  // Epsilon for not dividing by zero while normalizing variance
+  optional float eps = 3 [default = 1e-9];
+}
+// Message that stores parameters used by NormalizeLayer
+message NormalizeParameter {
+  optional bool across_spatial = 1 [default = true];
+  // Initial value of scale. Default is 1.0 for all
+  optional FillerParameter scale_filler = 2;
+  // Whether or not scale parameters are shared across channels.
+  optional bool channel_shared = 3 [default = true];
+  // Epsilon for not dividing by zero while normalizing variance
+  optional float eps = 4 [default = 1e-10];
+}
+message ParameterParameter {
+  optional BlobShape shape = 1;
+}
+message PermuteParameter {
+  // The new orders of the axes of data. Notice it should be with
+  // in the same range as the input data, and it starts from 0.
+  // Do not provide repeated order.
+  repeated uint32 order = 1;
+}
+message PoolingParameter {
+  enum PoolMethod {
+    MAX = 0;
+    AVE = 1;
+    STOCHASTIC = 2;
+  }
+  optional PoolMethod pool = 1 [default = MAX]; // The pooling method
+  // Pad, kernel size, and stride are all given as a single value for equal
+  // dimensions in height and width or as Y, X pairs.
+  optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X)
+  optional uint32 pad_h = 9 [default = 0]; // The padding height
+  optional uint32 pad_w = 10 [default = 0]; // The padding width
+  optional uint32 kernel_size = 2; // The kernel size (square)
+  optional uint32 kernel_h = 5; // The kernel height
+  optional uint32 kernel_w = 6; // The kernel width
+  optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X)
+  optional uint32 stride_h = 7; // The stride height
+  optional uint32 stride_w = 8; // The stride width
+  enum Engine {
+    DEFAULT = 0;
+    CAFFE = 1;
+    CUDNN = 2;
+  }
+  optional Engine engine = 11 [default = DEFAULT];
+  // If global_pooling then it will pool over the size of the bottom by doing
+  // kernel_h = bottom->height and kernel_w = bottom->width
+  optional bool global_pooling = 12 [default = false];
+}
+message PowerParameter {
+  // PowerLayer computes outputs y = (shift + scale * x) ^ power.
+  optional float power = 1 [default = 1.0];
+  optional float scale = 2 [default = 1.0];
+  optional float shift = 3 [default = 0.0];
+}
+// Message that store parameters used by PriorBoxLayer
+message PriorBoxParameter {
+  // Encode/decode type.
+  enum CodeType {
+    CORNER = 1;
+    CENTER_SIZE = 2;
+    CORNER_SIZE = 3;
+  }
+  // Minimum box size (in pixels). Required!
+  repeated float min_size = 1;
+  // Maximum box size (in pixels). Required!
+  repeated float max_size = 2;
+  // Various of aspect ratios. Duplicate ratios will be ignored.
+  // If none is provided, we use default ratio 1.
+  repeated float aspect_ratio = 3;
+  // If true, will flip each aspect ratio.
+  // For example, if there is aspect ratio "r",
+  // we will generate aspect ratio "1.0/r" as well.
+  optional bool flip = 4 [default = true];
+  // If true, will clip the prior so that it is within [0, 1]
+  optional bool clip = 5 [default = false];
+  // Variance for adjusting the prior bboxes.
+  repeated float variance = 6;
+  // By default, we calculate img_height, img_width, step_x, step_y based on
+  // bottom[0] (feat) and bottom[1] (img). Unless these values are explicitely
+  // provided.
+  // Explicitly provide the img_size.
+  optional uint32 img_size = 7;
+  // Either img_size or img_h/img_w should be specified; not both.
+  optional uint32 img_h = 8;
+  optional uint32 img_w = 9;
+  // Explicitly provide the step size.
+  optional float step = 10;
+  // Either step or step_h/step_w should be specified; not both.
+  optional float step_h = 11;
+  optional float step_w = 12;
+  // Offset to the top left corner of each cell.
+  optional float offset = 13 [default = 0.5];
+}
+message PythonParameter {
+  optional string module = 1;
+  optional string layer = 2;
+  // This value is set to the attribute `param_str` of the `PythonLayer` object
+  // in Python before calling the `setup()` method. This could be a number,
+  // string, dictionary in Python dict format, JSON, etc. You may parse this
+  // string in `setup` method and use it in `forward` and `backward`.
+  optional string param_str = 3 [default = ''];
+  // Whether this PythonLayer is shared among worker solvers during data parallelism.
+  // If true, each worker solver sequentially run forward from this layer.
+  // This value should be set true if you are using it as a data layer.
+  optional bool share_in_parallel = 4 [default = false];
+}
+// Message that stores parameters used by RecurrentLayer
+message RecurrentParameter {
+  // The dimension of the output (and usually hidden state) representation --
+  // must be explicitly set to non-zero.
+  optional uint32 num_output = 1 [default = 0];
+  optional FillerParameter weight_filler = 2; // The filler for the weight
+  optional FillerParameter bias_filler = 3; // The filler for the bias
+  // Whether to enable displaying debug_info in the unrolled recurrent net.
+  optional bool debug_info = 4 [default = false];
+  // Whether to add as additional inputs (bottoms) the initial hidden state
+  // blobs, and add as additional outputs (tops) the final timestep hidden state
+  // blobs.  The number of additional bottom/top blobs required depends on the
+  // recurrent architecture -- e.g., 1 for RNNs, 2 for LSTMs.
+  optional bool expose_hidden = 5 [default = false];
+}
+// Message that stores parameters used by ReductionLayer
+message ReductionParameter {
+  enum ReductionOp {
+    SUM = 1;
+    ASUM = 2;
+    SUMSQ = 3;
+    MEAN = 4;
+  }
+  optional ReductionOp operation = 1 [default = SUM]; // reduction operation
+  // The first axis to reduce to a scalar -- may be negative to index from the
+  // end (e.g., -1 for the last axis).
+  // (Currently, only reduction along ALL "tail" axes is supported; reduction
+  // of axis M through N, where N < num_axes - 1, is unsupported.)
+  // Suppose we have an n-axis bottom Blob with shape:
+  //     (d0, d1, d2, ..., d(m-1), dm, d(m+1), ..., d(n-1)).
+  // If axis == m, the output Blob will have shape
+  //     (d0, d1, d2, ..., d(m-1)),
+  // and the ReductionOp operation is performed (d0 * d1 * d2 * ... * d(m-1))
+  // times, each including (dm * d(m+1) * ... * d(n-1)) individual data.
+  // If axis == 0 (the default), the output Blob always has the empty shape
+  // (count 1), performing reduction across the entire input --
+  // often useful for creating new loss functions.
+  optional int32 axis = 2 [default = 0];
+  optional float coeff = 3 [default = 1.0]; // coefficient for output
+}
+// Message that stores parameters used by ReLULayer
+message ReLUParameter {
+  // Allow non-zero slope for negative inputs to speed up optimization
+  // Described in:
+  // Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities
+  // improve neural network acoustic models. In ICML Workshop on Deep Learning
+  // for Audio, Speech, and Language Processing.
+  optional float negative_slope = 1 [default = 0];
+  enum Engine {
+    DEFAULT = 0;
+    CAFFE = 1;
+    CUDNN = 2;
+  }
+  optional Engine engine = 2 [default = DEFAULT];
+}
+message ReshapeParameter {
+  // Specify the output dimensions. If some of the dimensions are set to 0,
+  // the corresponding dimension from the bottom layer is used (unchanged).
+  // Exactly one dimension may be set to -1, in which case its value is
+  // inferred from the count of the bottom blob and the remaining dimensions.
+  // For example, suppose we want to reshape a 2D blob "input" with shape 2 x 8:
+  //
+  //   layer {
+  //     type: "Reshape" bottom: "input" top: "output"
+  //     reshape_param { ... }
+  //   }
+  //
+  // If "input" is 2D with shape 2 x 8, then the following reshape_param
+  // specifications are all equivalent, producing a 3D blob "output" with shape
+  // 2 x 2 x 4:
+  //
+  //   reshape_param { shape { dim:  2  dim: 2  dim:  4 } }
+  //   reshape_param { shape { dim:  0  dim: 2  dim:  4 } }
+  //   reshape_param { shape { dim:  0  dim: 2  dim: -1 } }
+  //   reshape_param { shape { dim:  0  dim:-1  dim:  4 } }
+  //
+  optional BlobShape shape = 1;
+  // axis and num_axes control the portion of the bottom blob's shape that are
+  // replaced by (included in) the reshape. By default (axis == 0 and
+  // num_axes == -1), the entire bottom blob shape is included in the reshape,
+  // and hence the shape field must specify the entire output shape.
+  //
+  // axis may be non-zero to retain some portion of the beginning of the input
+  // shape (and may be negative to index from the end; e.g., -1 to begin the
+  // reshape after the last axis, including nothing in the reshape,
+  // -2 to include only the last axis, etc.).
+  //
+  // For example, suppose "input" is a 2D blob with shape 2 x 8.
+  // Then the following ReshapeLayer specifications are all equivalent,
+  // producing a blob "output" with shape 2 x 2 x 4:
+  //
+  //   reshape_param { shape { dim: 2  dim: 2  dim: 4 } }
+  //   reshape_param { shape { dim: 2  dim: 4 } axis:  1 }
+  //   reshape_param { shape { dim: 2  dim: 4 } axis: -3 }
+  //
+  // num_axes specifies the extent of the reshape.
+  // If num_axes >= 0 (and axis >= 0), the reshape will be performed only on
+  // input axes in the range [axis, axis+num_axes].
+  // num_axes may also be -1, the default, to include all remaining axes
+  // (starting from axis).
+  //
+  // For example, suppose "input" is a 2D blob with shape 2 x 8.
+  // Then the following ReshapeLayer specifications are equivalent,
+  // producing a blob "output" with shape 1 x 2 x 8.
+  //
+  //   reshape_param { shape { dim:  1  dim: 2  dim:  8 } }
+  //   reshape_param { shape { dim:  1  dim: 2  }  num_axes: 1 }
+  //   reshape_param { shape { dim:  1  }  num_axes: 0 }
+  //
+  // On the other hand, these would produce output blob shape 2 x 1 x 8:
+  //
+  //   reshape_param { shape { dim: 2  dim: 1  dim: 8  }  }
+  //   reshape_param { shape { dim: 1 }  axis: 1  num_axes: 0 }
+  //
+  optional int32 axis = 2 [default = 0];
+  optional int32 num_axes = 3 [default = -1];
+}
+message ScaleParameter {
+  // The first axis of bottom[0] (the first input Blob) along which to apply
+  // bottom[1] (the second input Blob).  May be negative to index from the end
+  // (e.g., -1 for the last axis).
+  //
+  // For example, if bottom[0] is 4D with shape 100x3x40x60, the output
+  // top[0] will have the same shape, and bottom[1] may have any of the
+  // following shapes (for the given value of axis):
+  //    (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60
+  //    (axis == 1 == -3)          3;     3x40;     3x40x60
+  //    (axis == 2 == -2)                   40;       40x60
+  //    (axis == 3 == -1)                                60
+  // Furthermore, bottom[1] may have the empty shape (regardless of the value of
+  // "axis") -- a scalar multiplier.
+  optional int32 axis = 1 [default = 1];
+  // (num_axes is ignored unless just one bottom is given and the scale is
+  // a learned parameter of the layer.  Otherwise, num_axes is determined by the
+  // number of axes by the second bottom.)
+  // The number of axes of the input (bottom[0]) covered by the scale
+  // parameter, or -1 to cover all axes of bottom[0] starting from `axis`.
+  // Set num_axes := 0, to multiply with a zero-axis Blob: a scalar.
+  optional int32 num_axes = 2 [default = 1];
+  // (filler is ignored unless just one bottom is given and the scale is
+  // a learned parameter of the layer.)
+  // The initialization for the learned scale parameter.
+  // Default is the unit (1) initialization, resulting in the ScaleLayer
+  // initially performing the identity operation.
+  optional FillerParameter filler = 3;
+  // Whether to also learn a bias (equivalent to a ScaleLayer+BiasLayer, but
+  // may be more efficient).  Initialized with bias_filler (defaults to 0).
+  optional bool bias_term = 4 [default = false];
+  optional FillerParameter bias_filler = 5;
+}
+message SigmoidParameter {
+  enum Engine {
+    DEFAULT = 0;
+    CAFFE = 1;
+    CUDNN = 2;
+  }
+  optional Engine engine = 1 [default = DEFAULT];
+}
+message SliceParameter {
+  // The axis along which to slice -- may be negative to index from the end
+  // (e.g., -1 for the last axis).
+  // By default, SliceLayer concatenates blobs along the "channels" axis (1).
+  optional int32 axis = 3 [default = 1];
+  repeated uint32 slice_point = 2;
+  // DEPRECATED: alias for "axis" -- does not support negative indexing.
+  optional uint32 slice_dim = 1 [default = 1];
+}
+// Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer
+message SoftmaxParameter {
+  enum Engine {
+    DEFAULT = 0;
+    CAFFE = 1;
+    CUDNN = 2;
+  }
+  optional Engine engine = 1 [default = DEFAULT];
+  // The axis along which to perform the softmax -- may be negative to index
+  // from the end (e.g., -1 for the last axis).
+  // Any other axes will be evaluated as independent softmaxes.
+  optional int32 axis = 2 [default = 1];
+}
+message TanHParameter {
+  enum Engine {
+    DEFAULT = 0;
+    CAFFE = 1;
+    CUDNN = 2;
+  }
+  optional Engine engine = 1 [default = DEFAULT];
+}
+// Message that stores parameters used by TileLayer
+message TileParameter {
+  // The index of the axis to tile.
+  optional int32 axis = 1 [default = 1];
+  // The number of copies (tiles) of the blob to output.
+  optional int32 tiles = 2;
+}
+// Message that stores parameters used by ThresholdLayer
+message ThresholdParameter {
+  optional float threshold = 1 [default = 0]; // Strictly positive values
+}
+message VideoDataParameter{
+  enum VideoType {
+    WEBCAM = 0;
+    VIDEO = 1;
+  }
+  optional VideoType video_type = 1 [default = WEBCAM];
+  optional int32 device_id = 2 [default = 0];
+  optional string video_file = 3;
+  // Number of frames to be skipped before processing a frame.
+  optional uint32 skip_frames = 4 [default = 0];
+}
+message WindowDataParameter {
+  // Specify the data source.
+  optional string source = 1;
+  // For data pre-processing, we can do simple scaling and subtracting the
+  // data mean, if provided. Note that the mean subtraction is always carried
+  // out before scaling.
+  optional float scale = 2 [default = 1];
+  optional string mean_file = 3;
+  // Specify the batch size.
+  optional uint32 batch_size = 4;
+  // Specify if we would like to randomly crop an image.
+  optional uint32 crop_size = 5 [default = 0];
+  // Specify if we want to randomly mirror data.
+  optional bool mirror = 6 [default = false];
+  // Foreground (object) overlap threshold
+  optional float fg_threshold = 7 [default = 0.5];
+  // Background (non-object) overlap threshold
+  optional float bg_threshold = 8 [default = 0.5];
+  // Fraction of batch that should be foreground objects
+  optional float fg_fraction = 9 [default = 0.25];
+  // Amount of contextual padding to add around a window
+  // (used only by the window_data_layer)
+  optional uint32 context_pad = 10 [default = 0];
+  // Mode for cropping out a detection window
+  // warp: cropped window is warped to a fixed size and aspect ratio
+  // square: the tightest square around the window is cropped
+  optional string crop_mode = 11 [default = "warp"];
+  // cache_images: will load all images in memory for faster access
+  optional bool cache_images = 12 [default = false];
+  // append root_folder to locate images
+  optional string root_folder = 13 [default = ""];
+}
+message SPPParameter {
+  enum PoolMethod {
+    MAX = 0;
+    AVE = 1;
+    STOCHASTIC = 2;
+  }
+  optional uint32 pyramid_height = 1;
+  optional PoolMethod pool = 2 [default = MAX]; // The pooling method
+  enum Engine {
+    DEFAULT = 0;
+    CAFFE = 1;
+    CUDNN = 2;
+  }
+  optional Engine engine = 6 [default = DEFAULT];
+}
+// DEPRECATED: use LayerParameter.
+message V1LayerParameter {
+  repeated string bottom = 2;
+  repeated string top = 3;
+  optional string name = 4;
+  repeated NetStateRule include = 32;
+  repeated NetStateRule exclude = 33;
+  enum LayerType {
+    NONE = 0;
+    ABSVAL = 35;
+    ACCURACY = 1;
+    ARGMAX = 30;
+    BNLL = 2;
+    CONCAT = 3;
+    CONTRASTIVE_LOSS = 37;
+    CONVOLUTION = 4;
+    DATA = 5;
+    DECONVOLUTION = 39;
+    DROPOUT = 6;
+    DUMMY_DATA = 32;
+    EUCLIDEAN_LOSS = 7;
+    ELTWISE = 25;
+    EXP = 38;
+    FLATTEN = 8;
+    HDF5_DATA = 9;
+    HDF5_OUTPUT = 10;
+    HINGE_LOSS = 28;
+    IM2COL = 11;
+    IMAGE_DATA = 12;
+    INFOGAIN_LOSS = 13;
+    INNER_PRODUCT = 14;
+    LRN = 15;
+    MEMORY_DATA = 29;
+    MULTINOMIAL_LOGISTIC_LOSS = 16;
+    MVN = 34;
+    POOLING = 17;
+    POWER = 26;
+    RELU = 18;
+    SIGMOID = 19;
+    SIGMOID_CROSS_ENTROPY_LOSS = 27;
+    SILENCE = 36;
+    SOFTMAX = 20;
+    SOFTMAX_LOSS = 21;
+    SPLIT = 22;
+    SLICE = 33;
+    TANH = 23;
+    WINDOW_DATA = 24;
+    THRESHOLD = 31;
+  }
+  optional LayerType type = 5;
+  repeated BlobProto blobs = 6;
+  repeated string param = 1001;
+  repeated DimCheckMode blob_share_mode = 1002;
+  enum DimCheckMode {
+    STRICT = 0;
+    PERMISSIVE = 1;
+  }
+  repeated float blobs_lr = 7;
+  repeated float weight_decay = 8;
+  repeated float loss_weight = 35;
+  optional AccuracyParameter accuracy_param = 27;
+  optional ArgMaxParameter argmax_param = 23;
+  optional ConcatParameter concat_param = 9;
+  optional ContrastiveLossParameter contrastive_loss_param = 40;
+  optional ConvolutionParameter convolution_param = 10;
+  optional DataParameter data_param = 11;
+  optional DropoutParameter dropout_param = 12;
+  optional DummyDataParameter dummy_data_param = 26;
+  optional EltwiseParameter eltwise_param = 24;
+  optional ExpParameter exp_param = 41;
+  optional HDF5DataParameter hdf5_data_param = 13;
+  optional HDF5OutputParameter hdf5_output_param = 14;
+  optional HingeLossParameter hinge_loss_param = 29;
+  optional ImageDataParameter image_data_param = 15;
+  optional InfogainLossParameter infogain_loss_param = 16;
+  optional InnerProductParameter inner_product_param = 17;
+  optional LRNParameter lrn_param = 18;
+  optional MemoryDataParameter memory_data_param = 22;
+  optional MVNParameter mvn_param = 34;
+  optional PoolingParameter pooling_param = 19;
+  optional PowerParameter power_param = 21;
+  optional ReLUParameter relu_param = 30;
+  optional SigmoidParameter sigmoid_param = 38;
+  optional SoftmaxParameter softmax_param = 39;
+  optional SliceParameter slice_param = 31;
+  optional TanHParameter tanh_param = 37;
+  optional ThresholdParameter threshold_param = 25;
+  optional WindowDataParameter window_data_param = 20;
+  optional TransformationParameter transform_param = 36;
+  optional LossParameter loss_param = 42;
+  optional V0LayerParameter layer = 1;
+}
+// DEPRECATED: V0LayerParameter is the old way of specifying layer parameters
+// in Caffe.  We keep this message type around for legacy support.
+message V0LayerParameter {
+  optional string name = 1; // the layer name
+  optional string type = 2; // the string to specify the layer type
+  // Parameters to specify layers with inner products.
+  optional uint32 num_output = 3; // The number of outputs for the layer
+  optional bool biasterm = 4 [default = true]; // whether to have bias terms
+  optional FillerParameter weight_filler = 5; // The filler for the weight
+  optional FillerParameter bias_filler = 6; // The filler for the bias
+  optional uint32 pad = 7 [default = 0]; // The padding size
+  optional uint32 kernelsize = 8; // The kernel size
+  optional uint32 group = 9 [default = 1]; // The group size for group conv
+  optional uint32 stride = 10 [default = 1]; // The stride
+  enum PoolMethod {
+    MAX = 0;
+    AVE = 1;
+    STOCHASTIC = 2;
+  }
+  optional PoolMethod pool = 11 [default = MAX]; // The pooling method
+  optional float dropout_ratio = 12 [default = 0.5]; // dropout ratio
+  optional uint32 local_size = 13 [default = 5]; // for local response norm
+  optional float alpha = 14 [default = 1.]; // for local response norm
+  optional float beta = 15 [default = 0.75]; // for local response norm
+  optional float k = 22 [default = 1.];
+  // For data layers, specify the data source
+  optional string source = 16;
+  // For data pre-processing, we can do simple scaling and subtracting the
+  // data mean, if provided. Note that the mean subtraction is always carried
+  // out before scaling.
+  optional float scale = 17 [default = 1];
+  optional string meanfile = 18;
+  // For data layers, specify the batch size.
+  optional uint32 batchsize = 19;
+  // For data layers, specify if we would like to randomly crop an image.
+  optional uint32 cropsize = 20 [default = 0];
+  // For data layers, specify if we want to randomly mirror data.
+  optional bool mirror = 21 [default = false];
+  // The blobs containing the numeric parameters of the layer
+  repeated BlobProto blobs = 50;
+  // The ratio that is multiplied on the global learning rate. If you want to
+  // set the learning ratio for one blob, you need to set it for all blobs.
+  repeated float blobs_lr = 51;
+  // The weight decay that is multiplied on the global weight decay.
+  repeated float weight_decay = 52;
+  // The rand_skip variable is for the data layer to skip a few data points
+  // to avoid all asynchronous sgd clients to start at the same point. The skip
+  // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
+  // be larger than the number of keys in the database.
+  optional uint32 rand_skip = 53 [default = 0];
+  // Fields related to detection (det_*)
+  // foreground (object) overlap threshold
+  optional float det_fg_threshold = 54 [default = 0.5];
+  // background (non-object) overlap threshold
+  optional float det_bg_threshold = 55 [default = 0.5];
+  // Fraction of batch that should be foreground objects
+  optional float det_fg_fraction = 56 [default = 0.25];
+  // optional bool OBSOLETE_can_clobber = 57 [default = true];
+  // Amount of contextual padding to add around a window
+  // (used only by the window_data_layer)
+  optional uint32 det_context_pad = 58 [default = 0];
+  // Mode for cropping out a detection window
+  // warp: cropped window is warped to a fixed size and aspect ratio
+  // square: the tightest square around the window is cropped
+  optional string det_crop_mode = 59 [default = "warp"];
+  // For ReshapeLayer, one needs to specify the new dimensions.
+  optional int32 new_num = 60 [default = 0];
+  optional int32 new_channels = 61 [default = 0];
+  optional int32 new_height = 62 [default = 0];
+  optional int32 new_width = 63 [default = 0];
+  // Whether or not ImageLayer should shuffle the list of files at every epoch.
+  // It will also resize images if new_height or new_width are not zero.
+  optional bool shuffle_images = 64 [default = false];
+  // For ConcatLayer, one needs to specify the dimension for concatenation, and
+  // the other dimensions must be the same for all the bottom blobs.
+  // By default it will concatenate blobs along the channels dimension.
+  optional uint32 concat_dim = 65 [default = 1];
+  optional HDF5OutputParameter hdf5_output_param = 1001;
+}
+message PReLUParameter {
+  // Parametric ReLU described in K. He et al, Delving Deep into Rectifiers:
+  // Surpassing Human-Level Performance on ImageNet Classification, 2015.
+  // Initial value of a_i. Default is a_i=0.25 for all i.
+  optional FillerParameter filler = 1;
+  // Whether or not slope paramters are shared across channels.
+  optional bool channel_shared = 2 [default = false];
+}
+message AxpyParameter{
+}
+message UpsampleParameter{
+  optional int32 scale = 1 [default = 1];
+}
+message ROIPoolingParameter {
+// Pad, kernel size, and stride are all given as a single value for    equal
+// dimensions in height and width or as Y, X pairs.
+  optional uint32 pooled_h = 1 [default = 0]; // The pooled output height
+  optional uint32 pooled_w = 2 [default = 0]; // The pooled output width
+  // Multiplicative spatial scale factor to translate ROI coords from their
+  // input scale to the scale used when pooling
+  optional float spatial_scale = 3 [default = 1];
+}
+message ShuffleChannelParameter {
+  optional uint32 group = 1[default = 1]; // The number of group
+}
--- a/x2paddle/x2paddle/decoder/caffe_decoder.py
+++ b/x2paddle/x2paddle/decoder/caffe_decoder.py
+#   Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import sys
+from google.protobuf import text_format
+import numpy as np
+from x2paddle.core.graph import GraphNode, Graph
+from x2paddle.core.fluid_code import FluidCode
+from x2paddle.op_mapper import caffe_shape
+class CaffeResolver(object):
+    def __init__(self, caffe_proto):
+        self.caffe_proto = caffe_proto
+        self.import_caffe()
+    def import_caffepb(self):
+        if self.caffe_proto is None:
+            from x2paddle.decoder import caffe_pb2
+            out = caffe_pb2
+        else:
+            if not os.path.isfile(self.caffe_proto):
+                raise Exception(
+                    "The .py file compiled by caffe.proto is not exist.")
+            (filepath,
+             tempfilename) = os.path.split(os.path.abspath(self.caffe_proto))
+            (filename, extension) = os.path.splitext(tempfilename)
+            sys.path.append(filepath)
+            out = __import__(filename)
+        return out
+    def import_caffe(self):
+        self.caffepb = self.import_caffepb()
+        self.NetParameter = self.caffepb.NetParameter
+class CaffeGraphNode(GraphNode):
+    def __init__(self, layer, type_str, layer_name=None):
+        if layer_name is None:
+            super(CaffeGraphNode, self).__init__(
+                layer, layer.name.replace('/', '_').replace('-', '_'))
+        else:
+            super(CaffeGraphNode, self).__init__(
+                layer, layer_name.replace('/', '_').replace('-', '_'))
+        self.layer_type = type_str
+        self.fluid_code = FluidCode()
+        self.data = None
+    def set_params(self, params):
+        self.data = params
+class CaffeGraph(Graph):
+    def __init__(self, model, params, caffe_pb):
+        self.params = params
+        self.caffe_pb = caffe_pb
+        super(CaffeGraph, self).__init__(model)
+    def filter_layers(self, layers):
+        '''Filter out layers based on the current phase.'''
+        phase_map = {0: 'train', 1: 'test'}
+        filtered_layer_names = set()
+        filtered_layers = []
+        for layer in layers:
+            if hasattr(layer, 'input'):
+                continue
+            type_str = self.get_layer_type(layer)
+            phase = 'test'
+            if len(layer.include):
+                phase = phase_map[layer.include[0].phase]
+            if len(layer.exclude):
+                phase = phase_map[1 - layer.include[0].phase]
+            exclude = (phase != 'test')
+            # Dropout layers appear in a fair number of Caffe
+            # test-time networks. These are just ignored. We'll
+            # filter them out here.
+            if (not exclude) and (phase == 'test'):
+                exclude = (type_str == 'Dropout')
+                if layer.type == 'Dropout':
+                    drop_layer_top = layer.top[0]
+                    drop_layer_bottom = layer.bottom[0]
+                    if drop_layer_top != drop_layer_bottom:
+                        for next_layer in layers:
+                            for next_layer_bottom_idx, next_layer_bottom in enumerate(
+                                    next_layer.bottom):
+                                if drop_layer_top == next_layer_bottom:
+                                    next_layer.bottom.remove(drop_layer_top)
+                                    next_layer.bottom.insert(
+                                        next_layer_bottom_idx,
+                                        drop_layer_bottom)
+            if not exclude:
+                filtered_layers.append(layer)
+                # Guard against dupes.
+                assert layer.name not in filtered_layer_names
+                filtered_layer_names.add(layer.name)
+            else:
+                print('The filter layer:' + layer.name)
+        return filtered_layers
+    def generate_input_layer(self, dims, index):
+        dim_str = ''
+        for dim in dims:
+            dim_str += 'dim: {}\n'.format(str(dim))
+        input_str = 'layer {\n'
+        input_str += 'name: \"{}\"\n '.format(str(self.model.input[index]))
+        input_str += 'type: "Input"\n'
+        input_str += 'top: \"{}\"\n'.format(str(self.model.input[index]))
+        input_str += 'input_param {\n'
+        input_str += 'shape {\n'
+        input_str += dim_str
+        input_str += '}}}'
+        input_str = str.encode(input_str)
+        net = self.caffe_pb.NetParameter()
+        text_format.Merge(input_str, net)
+        return net.layers or net.layer
+    def input2layers(self, input_layers=[]):
+        inputs_num = len(self.model.input)
+        if inputs_num != 0:
+            input_dims_num = len(self.model.input_dim)
+            if input_dims_num != 0:
+                if input_dims_num > 0 and input_dims_num != inputs_num * 4:
+                    raise Error('invalid input_dim[%d] param in prototxt' %
+                                (input_dims_num))
+                for i in range(inputs_num):
+                    dims = self.model.input_dim[i * 4:(i + 1) * 4]
+                    l = self.generate_input_layer(dims, i)
+                    input_layers.append(l[0])
+            else:
+                for i in range(inputs_num):
+                    dims = self.model.input_shape[i].dim[0:4]
+                    l = self.generate_input_layer(dims, i)
+                    input_layers.append(l[0])
+    def transform_input_layers(self, layers, input_layers=[]):
+        for layer in layers:
+            if hasattr(layer, 'input'):
+                input_dims_num = len(layers.input_dim)
+                if input_dims_num > 0 and input_dims_num != 4:
+                    raise Error('invalid input_dim[%d] param in prototxt' %
+                                (input_dims_num))
+                dims = self.model.input_dim[0:4]
+                l = self.generate_input_layer(dims, i)
+                input_layers.append(l[0])
+    def get_layer_type(self, layer):
+        if isinstance(layer.type, int):
+            enum_values = self.caffe_pb._V1LAYERPARAMETER_LAYERTYPE.values
+            vals = [val for val in enum_values if val.number == layer.type]
+            part = vals[0].name.split('_')
+            part = [s.capitalize() for s in part]
+            type_str = ''
+            type_str = type_str.join(part)
+            if 'relu' in type_str.lower():
+                type_str = type_str.replace('elu', 'eLU')
+            elif type_str.lower() == 'lrn':
+                type_str = 'LRN'
+            return type_str
+        else:
+            return layer.type
+    def build(self):
+        layers = self.model.layers or self.model.layer
+        layers = self.filter_layers(layers)
+        input_layers = []
+        self.input2layers(input_layers)
+        self.transform_input_layers(layers, input_layers)
+        layers = input_layers + layers
+        for layer in layers:
+            if hasattr(layer, 'name'):
+                name = getattr(layer, 'name')
+                setattr(layer, 'name', name.replace('/', '_').replace('-', '_'))
+            for i, name in enumerate(layer.bottom):
+                layer.bottom[i] = name.replace('/', '_').replace('-', '_')
+            for i, name in enumerate(layer.top):
+                layer.top[i] = name.replace('/', '_').replace('-', '_')
+        top_layer = {}
+        for layer in layers:
+            if hasattr(layer, 'input'):
+                continue
+            type_str = self.get_layer_type(layer)
+            self.node_map[layer.name] = CaffeGraphNode(layer, type_str)
+            for in_name in layer.bottom:
+                if in_name in top_layer:
+                    self.connect(top_layer[in_name][-1], layer.name)
+                else:
+                    raise Exception(
+                        'input[{}] of node[{}] does not exist in node_map'.
+                        format(in_name, layer.name))
+            for out_name in layer.top:
+                if out_name not in top_layer:
+                    top_layer[out_name] = [layer.name]
+                else:
+                    top_layer[out_name].append(layer.name)
+        for layer_name, data in self.params:
+            if layer_name in self.node_map:
+                node = self.node_map[layer_name]
+                node.set_params(data)
+            else:
+                print('Ignoring parameters for non-existent layer: %s' % \
+                       layer_name)
+        super(CaffeGraph, self).build()
+    def get_bottom_node(self, node, idx=0, copy=False):
+        input_node_name = node.inputs[idx]
+        assert input_node_name in self.node_map, 'The {} isn\'t a valid node'.format(
+            name)
+        input_node = self.node_map[input_node_name]
+        if len(input_node.layer.top) > 1:
+            need_idx = list(input_node.layer.top).index(node.layer.bottom[idx])
+            name = input_node_name + ':' + str(need_idx)
+        else:
+            name = input_node_name
+        return self.get_node(name, copy=copy)
+class CaffeDecoder(object):
+    def __init__(self, proto_path, model_path, caffe_proto):
+        self.proto_path = proto_path
+        self.model_path = model_path
+        self.resolver = CaffeResolver(caffe_proto=caffe_proto)
+        self.net = self.resolver.NetParameter()
+        with open(proto_path, 'rb') as proto_file:
+            proto_str = proto_file.read()
+            text_format.Merge(proto_str, self.net)
+        self.load_using_pb()
+        self.caffe_graph = CaffeGraph(self.net, self.params,
+                                      self.resolver.caffepb)
+        self.caffe_graph.build()
+    def load_using_pb(self):
+        data = self.resolver.NetParameter()
+        data.MergeFromString(open(self.model_path, 'rb').read())
+        layers = data.layers or data.layer
+        for layer in layers:
+            setattr(layer, 'name',
+                    layer.name.replace('/', '_').replace('-', '_'))
+        pair = lambda layer: (layer.name, self.normalize_pb_data(layer))
+        self.params = [pair(layer) for layer in layers if layer.blobs]
+    def normalize_pb_data(self, layer):
+        transformed = []
+        for blob in layer.blobs:
+            if len(blob.shape.dim):
+                dims = blob.shape.dim
+                if layer.type == 'PReLU':
+                    c_o, c_i, h, w = map(int, [1] + \
+                        list(dims) + [1]* (3 - len(dims)))
+                elif layer.type == 'Normalize' and len(dims) == 4:
+                    data = np.asarray(list(blob.data), dtype=np.float32)
+                    transformed.append(data)
+                    continue
+                else:
+                    c_o, c_i, h, w = map(int,
+                                         [1] * (4 - len(dims)) + list(dims))
+            else:
+                c_o = blob.num
+                c_i = blob.channels
+                h = blob.height
+                w = blob.width
+            data = np.asarray(
+                list(blob.data), dtype=np.float32).reshape(c_o, c_i, h, w)
+            transformed.append(data)
+        return transformed
--- a/x2paddle/x2paddle/decoder/caffe_pb2.py
+++ b/x2paddle/x2paddle/decoder/caffe_pb2.py
--- a/x2paddle/x2paddle/decoder/onnx_decoder.py
+++ b/x2paddle/x2paddle/decoder/onnx_decoder.py
+#   Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from x2paddle.core.graph import GraphNode, Graph
+from x2paddle.core.fluid_code import FluidCode
+from x2paddle.decoder.onnx_shape_inference import SymbolicShapeInference
+from onnx.checker import ValidationError
+from onnx.checker import check_model
+from onnx.utils import polish_model
+from onnx import helper
+from onnx.helper import get_attribute_value, make_attribute
+from onnx.shape_inference import infer_shapes
+from onnx.mapping import TENSOR_TYPE_TO_NP_TYPE
+from onnx.numpy_helper import to_array
+from onnx import AttributeProto, TensorProto, GraphProto
+from collections import OrderedDict as Dict
+import onnx
+from onnx.helper import ValueInfoProto
+import numpy as np
+from copy import deepcopy
+import logging as _logging
+import os
+default_op_domain = 'ai.onnx'
+_logger = _logging.getLogger(__name__)
+class ONNXGraphNode(GraphNode):
+    def __init__(self, layer, layer_name=None):
+        if layer_name is None:
+            super(ONNXGraphNode, self).__init__(layer, layer.name)
+        else:
+            super(ONNXGraphNode, self).__init__(layer, layer_name)
+        self.layer_type = layer.op_type
+        self.fluid_code = FluidCode()
+        self.attr_map = self.get_attr_map()
+        self.out_shapes = list()
+        self.dtype = None
+        self.which_child = {}
+    def get_attr_map(self):
+        """
+        convert ONNX node attributes to dict
+        """
+        return {
+            attr.name: self.get_attribute_value(attr)
+            for attr in self.layer.attribute
+        }
+    @property
+    def value(self):
+        assert 'Constant' in self.layer_type, "Only Constant | ConstantOfShape node has value."
+        if 'value' not in self.attr_map:
+            return None
+        return self.attr_map['value']
+    def get_attribute_value(self, attr):
+        """
+        get_attribute_value enhanced
+        """
+        if attr.type == onnx.AttributeProto.TENSOR:
+            dtype = np.dtype(TENSOR_TYPE_TO_NP_TYPE[attr.t.data_type])
+            data = attr.t.raw_data
+            value = np.frombuffer(
+                data, dtype=dtype, count=(len(data) // dtype.itemsize))
+        elif attr.type == onnx.AttributeProto.STRING:
+            value = attr.s
+            value = value.decode() if isinstance(value, bytes) else value
+        else:
+            value = get_attribute_value(attr)
+        return value
+    def get_attr(self, name, default=None):
+        """
+        get_attribute_value from attr_map
+        """
+        if name not in self.attr_map:
+            return default
+        return self.attr_map[name]
+class ONNXGraphDataNode(GraphNode):
+    def __init__(self, layer, layer_name=None, is_global_input=False):
+        if layer_name is None:
+            super(ONNXGraphDataNode, self).__init__(layer, layer.name)
+        else:
+            super(ONNXGraphDataNode, self).__init__(layer, layer_name)
+        if is_global_input:
+            self.layer_type = 'place_holder'
+        else:
+            self.layer_type = 'create_parameter'
+        self.layer_name = layer_name
+        self.fluid_code = FluidCode()
+        self.weight = None
+        self.embeded_as = None
+        self.which_child = {}
+    @property
+    def out_shapes(self):
+        if isinstance(self.layer, ValueInfoProto):
+            values = self.layer.type.tensor_type.shape.dim
+            out_shapes = list()
+            out_shapes.append([dim.dim_value for dim in values])
+            return out_shapes
+        else:
+            values = self.layer.dims
+            out_shapes = list()
+            out_shapes.append(values)
+            return out_shapes
+    @property
+    def dtype(self):
+        if isinstance(self.layer, ValueInfoProto):
+            dtype = self.layer.type.tensor_type.elem_type
+            return TENSOR_TYPE_TO_NP_TYPE[dtype]
+        else:
+            dtype = self.layer.data_type
+            return TENSOR_TYPE_TO_NP_TYPE[dtype]
+class ONNXGraph(Graph):
+    def __init__(self, onnx_model):
+        super(ONNXGraph, self).__init__(onnx_model)
+        self.fixed_input_shape = {}
+        self.initializer = {}
+        self.place_holder_nodes = list()
+        self.value_infos = {}
+        self.graph = onnx_model.graph
+        self.get_place_holder_nodes()
+        print("shape inferencing ...")
+        self.graph = SymbolicShapeInference.infer_shapes(
+            onnx_model, fixed_input_shape=self.fixed_input_shape)
+        print("shape inferenced.")
+        self.build()
+        self.collect_value_infos()
+        self.allocate_shapes()
+    def get_inner_nodes(self):
+        """
+        generate inner node of ONNX model
+        """
+        inner_nodes = []
+        if not isinstance(self.graph, onnx.GraphProto):
+            logger.error('graph is not a GraphProto instance')
+            return
+        for initializer in self.graph.initializer:
+            name = initializer.name
+            inner_nodes.append(name)
+        return inner_nodes
+    def get_symbolic_shape(self, dims):
+        shape = []
+        for dim in dims:
+            if dim.HasField('dim_param'):
+                shape.append(dim.dim_param)
+            else:
+                shape.append(dim.dim_value)
+        return shape
+    def check_input_shape(self, vi):
+        if vi.type.HasField('tensor_type'):
+            for dim in vi.type.tensor_type.shape.dim:
+                if dim.HasField(
+                        'dim_param') and vi.name not in self.fixed_input_shape:
+                    shape = self.get_symbolic_shape(
+                        vi.type.tensor_type.shape.dim)
+                    print(
+                        "Unknown shape for input tensor[tensor name: '{}'] -> shape: {}, Please define shape of input here,\nNote:you can use visualization tools like Netron to check input shape."
+                        .format(vi.name, shape))
+                    right_shape_been_input = False
+                    while not right_shape_been_input:
+                        try:
+                            shape = raw_input(
+                                "Shape of Input(e.g. -1,3,224,224), enter 'N' to skip: "
+                            )
+                        except:
+                            shape = input(
+                                "Shape of Input(e.g. -1,3,224,224), enter 'N' to skip: "
+                            )
+                        if shape.count("-1") > 1:
+                            print("Only 1 dimension can be -1, type again:)")
+                        else:
+                            right_shape_been_input = True
+                    if shape == 'N':
+                        break
+                    shape = [int(dim) for dim in shape.strip().split(',')]
+                    assert shape.count(-1) <= 1, "Only one dimension can be -1"
+                    self.fixed_input_shape[vi.name] = shape
+                    break
+    def get_place_holder_nodes(self):
+        """
+        generate place_holder node of ONNX model
+        """
+        inner_nodes = self.get_inner_nodes()
+        for ipt_vi in self.graph.input:
+            if ipt_vi.name not in inner_nodes:
+                self.check_input_shape(ipt_vi)
+                self.place_holder_nodes.append(ipt_vi.name)
+    def get_output_nodes(self):
+        """
+        generate output_nodes node of ONNX model
+        """
+        inner_nodes = self.get_inner_nodes()
+        output_nodes = [value.name for value in self.graph.output]
+        for opt_data in output_nodes:
+            if opt_data not in inner_nodes:
+                self.output_nodes.append(opt_data)
+    def is_place_holder_nodes(self, layer):
+        """
+        return layer is or not place_holder node
+        """
+        if layer in self.place_holder_nodes:
+            return True
+        return False
+    def build(self):
+        """
+        build topo_sort of ONNX model
+        """
+        for layer in self.graph.node:
+            node = ONNXGraphNode(layer)
+            self.node_map[layer.name] = node
+        for layer in self.graph.input:
+            if layer.name not in self.node_map:
+                is_place_holder = self.is_place_holder_nodes(layer.name)
+                self.node_map[layer.name] = ONNXGraphDataNode(
+                    layer,
+                    layer_name=layer.name,
+                    is_global_input=is_place_holder)
+        #set data node's weight
+        for initializer in self.graph.initializer:
+            name = initializer.name
+            weight = to_array(initializer)
+            if name in self.node_map:
+                if isinstance(self.node_map[name], ONNXGraphDataNode):
+                    self.node_map[name].weight = weight
+                    self.node_map[name].embeded_as = []
+            else:
+                self.node_map[name] = ONNXGraphDataNode(
+                    initializer, layer_name=name, is_global_input=False)
+                self.node_map[name].weight = weight
+                self.node_map[name].embeded_as = []
+        #generate connection between nodes for topo
+        for layer_name, node in self.node_map.items():
+            if isinstance(node, ONNXGraphNode):
+                self.build_connection(layer_name, node)
+        #generate topo
+        super(ONNXGraph, self).build()
+        self.input_nodes = self.place_holder_nodes
+    def build_connection(self, layer_name, node):
+        """
+        find connection for nodes
+        """
+        for idx, in_node in enumerate(node.layer.input):
+            if in_node == '':
+                continue
+            if in_node not in self.node_map:
+                flag = 0
+                for nd in self.graph.node:
+                    for idx, opt in enumerate(nd.output):
+                        if opt == in_node:
+                            self.connect(nd.name, layer_name)
+                            flag = 1
+                            node.which_child[nd.name] = idx
+                            self.node_map[nd.name].index = 0
+                            break
+                    if flag == 1:
+                        break
+                if flag == 0:
+                    raise Exception(
+                        'input[{}] of node[{}] does not exist in node_map'.
+                        format(in_node, layer_name))
+            else:
+                self.connect(in_node, layer_name)
+    def get_input_node(self, node, idx=0, copy=False):
+        if len(node.which_child) == 0:
+            ipt_node = super(ONNXGraph, self).get_node(node.inputs[idx], copy)
+            return ipt_node
+        else:
+            ipt_node = super(ONNXGraph, self).get_node(node.inputs[idx], copy)
+            if ipt_node.layer_name in node.which_child:
+                ipt_node.index = node.which_child[ipt_node.layer_name]
+            return ipt_node
+    def graph_weights(self):
+        """
+        generator for weights
+        """
+        if not isinstance(self.graph, onnx.GraphProto):
+            logger.error('graph is not a GraphProto instance')
+            return
+        for initializer in self.graph.initializer:
+            name = initializer.name
+            weight = to_array(initializer)
+            yield name, weight
+    def collect_value_infos(self):
+        """
+        collect value/type info for an ONNX model
+        """
+        assert isinstance(self.graph,
+                          onnx.GraphProto), 'model is not a ModelProto instance'
+        for item in self.graph.value_info:
+            self.value_infos[item.name] = {
+                'dtype':
+                TENSOR_TYPE_TO_NP_TYPE[item.type.tensor_type.elem_type],
+                'shape':
+                [dim.dim_value for dim in item.type.tensor_type.shape.dim],
+                'external': False
+            }
+    def allocate_shapes(self):
+        """
+        run shape inference
+        """
+        for layer in self.graph.node:
+            node = self.node_map[layer.name]
+            for opt in layer.output:
+                if opt in self.value_infos:
+                    value_info = self.value_infos[opt]
+                    #if len(value_info['shape']) == 0 or value_info[
+                    #        'dtype'] is None or 0 in value_info['shape']:
+                    #    #TODO add node shape inference
+                    node.dtype = value_info['dtype']
+                    node.out_shapes.append(value_info['shape'])
+                else:
+                    node.out_shapes.append([])
+class ONNXDecoder(object):
+    def __init__(self, onnx_model):
+        onnx_model = onnx.load(onnx_model)
+        print('model ir_version: {}, op version: {}'.format(
+            onnx_model.ir_version, onnx_model.opset_import[0].version))
+        self.op_set = onnx_model.opset_import[0].version
+        check_model(onnx_model)
+        onnx_model = self.optimize_model_skip_op(onnx_model)
+        onnx_model = self.optimize_model_strip_initializer(onnx_model)
+        onnx_model = self.optimize_node_name(onnx_model)
+        self.graph = ONNXGraph(onnx_model)
+        #self.onnx_model = onnx_model
+    def build_value_refs(self, nodes):
+        """
+        build op reference of inputs and outputs
+        """
+        input_refs = Dict()
+        output_refs = Dict()
+        for idx, node in enumerate(nodes):
+            for val_name in node.input:
+                input_refs.setdefault(val_name, set()).add(idx)
+            for val_name in node.output:
+                output_refs.setdefault(val_name, set()).add(idx)
+        return input_refs, output_refs
+    def skip_node_forward(self, nodes, src_output_name, dst_input_name,
+                          input_refs):
+        """
+        skip nodes between src_output_name -> dst_input_name and connect this pair
+        """
+        processed = 0
+        for next_idx in input_refs[src_output_name]:
+            next_node = nodes[next_idx]
+            for val_idx, next_input_name in enumerate(next_node.input):
+                if next_input_name == src_output_name:
+                    next_node.input[val_idx] = dst_input_name
+                    processed += 1
+        return processed
+    def skip_node_backward(self, nodes, src_input_name, dst_output_name,
+                           output_refs):
+        """
+        skip nodes between dst_output_name -> src_input_name and connect this pair
+        """
+        processed = 0
+        for prev_idx in output_refs[src_input_name]:
+            prev_node = nodes[prev_idx]
+            for val_idx, prev_output_name in enumerate(prev_node.output):
+                if prev_output_name == src_input_name:
+                    prev_node.output[val_idx] = dst_output_name
+                    processed += 1
+        return processed
+    def optimize_model_skip_op(self, model, op_list=None):
+        """
+        skip ops can be bypassed for inference
+        """
+        nodes = model.graph.node
+        if op_list is None:
+            op_list = ['Dropout']
+        input_refs, output_refs = self.build_value_refs(nodes)
+        ret = type(model)()
+        ret.CopyFrom(model)
+        ret_nodes = ret.graph.node
+        nodes_to_remove = []
+        for node_idx, node in enumerate(nodes):
+            if not (node.domain == default_op_domain or node.domain == ''):
+                continue
+            op_type = node.op_type
+            if not (op_type in op_list):
+                continue
+            if op_type in ['Dropout']:
+                input_name = node.input[0]
+                output_name = node.output[0]
+            elif not (len(node.input) == 1 and len(node.output) == 1):
+                print(
+                    'currently only 1-input-1-output op supported, skip required %d: %s',
+                    node_idx, node.op_type)
+                continue
+            else:
+                input_name = node.input[0]
+                output_name = node.output[0]
+            if output_name in input_refs:
+                processed = self.skip_node_forward(ret_nodes, output_name,
+                                                   input_name, input_refs)
+            elif input_name in output_refs:
+                processed = self.skip_node_backward(ret_nodes, input_name,
+                                                    output_name, output_refs)
+            else:
+                processed = -1
+            if processed > 0:
+                nodes_to_remove.append(node_idx)
+                for value_info in ret.graph.value_info:
+                    for output in node.output:
+                        if value_info.name == output:
+                            ret.graph.value_info.remove(value_info)
+                print('skip op {}: {} -> {} -> {}'.format(
+                    node_idx, input_name, node.op_type, output_name))
+            elif processed == 0:
+                print('weird, no node processed')
+            else:
+                print('standalone op {}: {} -> {} -> {} not skipped'.format(
+                    node_idx, input_name, node.op_type, output_name))
+        nodes_to_remove.sort(reverse=True)
+        for node_idx in nodes_to_remove:
+            ret_nodes.pop(node_idx)
+        return ret
+    def optimize_model_strip_initializer(self, model, keep_input_only=True):
+        """
+        strip weights for inference
+        """
+        nodes = model.graph.node
+        input_refs, output_refs = self.build_value_refs(nodes)
+        out_names = [val.name for val in model.graph.output]
+        ret = type(model)()
+        ret.CopyFrom(model)
+        # strip initializers
+        ret.graph.ClearField('initializer')
+        ret_initializers = ret.graph.initializer
+        for initializer in model.graph.initializer:
+            name = initializer.name
+            if name in input_refs:
+                ret_initializers.add().CopyFrom(initializer)
+            elif not keep_input_only and name in output_refs:
+                ret_initializers.add().CopyFrom(initializer)
+            else:
+                dtype = TENSOR_TYPE_TO_NP_TYPE[initializer.data_type]
+        # strip inputs
+        ret.graph.ClearField('input')
+        ret_inputs = ret.graph.input
+        for item in model.graph.input:
+            name = item.name
+            if name in input_refs or name in out_names:
+                ret_inputs.add().CopyFrom(item)
+        return ret
+    def make_variable_name(self, name):
+        """
+        make a valid code name for ParamAttr
+        """
+        if name == '':
+            raise ValueError('name should not be empty')
+        for s in ' .*?\\/-:':
+            name = name.replace(s, '_')
+        return 'x2paddle_' + name
+    def optimize_node_name(self, model):
+        """
+        standardize variable name for paddle's code
+        """
+        graph = model.graph
+        for initializer in graph.initializer:
+            initializer.name = self.make_variable_name(initializer.name)
+        for ipt in graph.input:
+            ipt.name = self.make_variable_name(ipt.name)
+        for output in graph.output:
+            output.name = self.make_variable_name(output.name)
+        for item in graph.value_info:
+            item.name = self.make_variable_name(item.name)
+        for node in graph.node:
+            node.name = node.output[0]
+            node.name = self.make_variable_name(node.name)
+            for i in range(len(node.input)):
+                if node.input[i] == '':
+                    continue
+                else:
+                    node.input[i] = self.make_variable_name(node.input[i])
+            for i in range(len(node.output)):
+                node.output[i] = self.make_variable_name(node.output[i])
+        return model
--- a/x2paddle/x2paddle/decoder/onnx_shape_inference.py
+++ b/x2paddle/x2paddle/decoder/onnx_shape_inference.py
+# Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Reference Code from https://github.com/microsoft/onnxruntime,  Licensed under the MIT License.
+# -*- coding: UTF-8 -*-
+import argparse
+import numpy as np
+import onnx
+import sys
+from onnx import helper, numpy_helper, shape_inference
+import sympy
+from packaging import version
+assert version.parse(onnx.__version__) >= version.parse("1.5.0")
+def get_attribute(node, attr_name, default_value=None):
+    found = [attr for attr in node.attribute if attr.name == attr_name]
+    if found:
+        return helper.get_attribute_value(found[0])
+    return default_value
+def get_dim_from_type_proto(dim):
+    return getattr(dim, dim.WhichOneof('value')) if type(
+        dim.WhichOneof('value')) == str else None
+def get_shape_from_type_proto(type_proto):
+    return [
+        get_dim_from_type_proto(d) for d in type_proto.tensor_type.shape.dim
+    ]
+def get_shape_from_sympy_shape(sympy_shape):
+    sympy_shape = [
+        None if i is None else (int(i) if is_literal(i) else str(i))
+        for i in sympy_shape
+    ]
+    return sympy_shape
+def is_literal(dim):
+    return type(dim) in [int, np.int64, np.int32, sympy.Integer] or (
+        hasattr(dim, 'is_number') and
+        dim.is_number)  # or (hasattr(dim, 'is_integer') and dim.is_integer)
+def handle_negative_axis(axis, rank):
+    assert axis < rank and axis >= -rank
+    return axis if axis >= 0 else rank + axis
+def get_opset(mp, domain=['', 'onnx', 'ai.onnx']):
+    if type(domain) != list:
+        domain = [domain]
+    for opset in mp.opset_import:
+        if opset.domain in domain:
+            return opset.version
+    return None
+def as_scalar(x):
+    if type(x) == list:
+        assert len(x) == 1
+        return x[0]
+    elif type(x) == np.ndarray:
+        return np.asscalar(x)
+    else:
+        return x
+def as_list(x, keep_none):
+    if type(x) == list:
+        return x
+    elif type(x) == np.ndarray:
+        return list(x)
+    elif keep_none and x is None:
+        return None
+    else:
+        return [x]
+def sympy_reduce_product(x):
+    if type(x) == list:
+        value = sympy.Integer(1)
+        for v in x:
+            value = value * v
+    else:
+        value = x
+    return value
+class SymbolicShapeInference:
+    def __init__(self, int_max, auto_merge, guess_output_rank, verbose):
+        self.dispatcher_ = {
+            'Add': self._infer_symbolic_compute_ops,
+            'ArrayFeatureExtractor': self._infer_ArrayFeatureExtractor,
+            'AveragePool': self._infer_Pool,
+            'Cast': self._infer_Cast,
+            'CategoryMapper': self._infer_CategoryMapper,
+            'Compress': self._infer_Compress,
+            'Concat': self._infer_Concat,
+            'ConstantOfShape': self._infer_ConstantOfShape,
+            'Conv': self._infer_Conv,
+            'CumSum': self._pass_on_shape_and_type,
+            'Div': self._infer_symbolic_compute_ops,
+            'Expand': self._infer_Expand,
+            'Equal': self._infer_symbolic_compute_ops,
+            'Gather': self._infer_Gather,
+            'GatherElements': self._infer_GatherElements,
+            'GatherND': self._infer_GatherND,
+            'If': self._infer_If,
+            'Loop': self._infer_Loop,
+            'MatMul': self._infer_MatMul,
+            'MatMulInteger16': self._infer_MatMulInteger,
+            'MaxPool': self._infer_Pool,
+            'Max': self._infer_symbolic_compute_ops,
+            'Min': self._infer_symbolic_compute_ops,
+            'Mul': self._infer_symbolic_compute_ops,
+            'NonMaxSuppression': self._infer_NonMaxSuppression,
+            'NonZero': self._infer_NonZero,
+            'OneHot': self._infer_OneHot,
+            'Pad': self._infer_Pad,
+            'Range': self._infer_Range,
+            'ReduceProd': self._infer_ReduceProd,
+            'Reshape': self._infer_Reshape,
+            'Resize': self._infer_Resize,
+            'Round': self._pass_on_shape_and_type,
+            'Scan': self._infer_Scan,
+            'ScatterElements': self._infer_ScatterElements,
+            'Shape': self._infer_Shape,
+            'Size': self._infer_Size,
+            'Slice': self._infer_Slice,
+            'Split': self._infer_Split,
+            'Squeeze': self._infer_Squeeze,
+            'Sub': self._infer_symbolic_compute_ops,
+            'Tile': self._infer_Tile,
+            'TopK': self._infer_TopK,
+            'Unsqueeze': self._infer_Unsqueeze,
+            'Where': self._infer_symbolic_compute_ops,
+            'Transpose': self._infer_Transpose,
+            'ZipMap': self._infer_ZipMap
+        }
+        self.run_ = True
+        self.suggested_merge_ = {}
+        self.symbolic_dims_ = {}
+        self.input_symbols_ = {}
+        self.auto_merge_ = auto_merge
+        self.guess_output_rank_ = guess_output_rank
+        self.verbose_ = verbose
+        self.int_max_ = int_max
+    def _add_suggested_merge(self, symbols, apply=False):
+        assert all([(type(s) == str and s in self.symbolic_dims_) or
+                    is_literal(s) for s in symbols])
+        symbols = set(symbols)
+        for k, v in self.suggested_merge_.items():
+            if k in symbols:
+                symbols.remove(k)
+                symbols.add(v)
+        map_to = None
+        # if there is literal, map to it first
+        for s in symbols:
+            if is_literal(s):
+                map_to = s
+                break
+        # when no literals, map to input symbolic dims, then existing symbolic dims
+        if map_to is None:
+            for s in symbols:
+                if s in self.input_symbols_:
+                    map_to = s
+                    break
+        if map_to is None:
+            for s in symbols:
+                if type(self.symbolic_dims_[s]) == sympy.Symbol:
+                    map_to = s
+                    break
+        # when nothing to map to, use the shorter one
+        if map_to is None:
+            if self.verbose_ > 0:
+                print(
+                    'Potential unsafe merge between symbolic expressions: ({})'.
+                    format(','.join(symbols)))
+            symbols_list = list(symbols)
+            lens = [len(s) for s in symbols_list]
+            map_to = symbols_list[lens.index(min(lens))]
+            symbols.remove(map_to)
+        for s in symbols:
+            if s == map_to:
+                continue
+            if is_literal(map_to) and is_literal(s):
+                assert int(map_to) == int(s)
+            self.suggested_merge_[s] = int(map_to) if is_literal(
+                map_to) else map_to
+            for k, v in self.suggested_merge_.items():
+                if v == s:
+                    self.suggested_merge_[k] = map_to
+        if apply and self.auto_merge_:
+            self._apply_suggested_merge()
+    def _apply_suggested_merge(self, graph_input_only=False):
+        if not self.suggested_merge_:
+            return
+        for i in list(self.out_mp_.graph.input) + (
+            [] if graph_input_only else list(self.out_mp_.graph.value_info)):
+            for d in i.type.tensor_type.shape.dim:
+                if d.dim_param in self.suggested_merge_:
+                    v = self.suggested_merge_[d.dim_param]
+                    if is_literal(v):
+                        d.dim_value = int(v)
+                    else:
+                        d.dim_param = v
+    def _preprocess(self, in_mp, input_shapes=None):
+        out_mp = onnx.ModelProto()
+        out_mp.CopyFrom(in_mp)
+        out_mp.graph.ClearField('node')
+        self.out_mp_ = out_mp
+        defined = set([
+            i.name
+            for i in list(in_mp.graph.input) + list(in_mp.graph.initializer)
+        ])
+        pending_nodes = []
+        # returns True if no more ready nodes
+        def _insert_ready_nodes():
+            ready_nodes = [
+                pn for pn in pending_nodes
+                if all([i in defined for i in pn.input if i])
+            ]
+            for rn in ready_nodes:
+                self.out_mp_.graph.node.add().CopyFrom(rn)
+                for o in rn.output:
+                    defined.add(o)
+                pending_nodes.remove(rn)
+            return not ready_nodes
+        # constant op -> initializer, topological sort
+        for in_n in in_mp.graph.node:
+            if in_n.op_type == 'Constant':
+                t = get_attribute(in_n, 'value')
+                t.name = in_n.output[0]
+                self.out_mp_.graph.initializer.add().CopyFrom(t)
+                defined.add(t.name)
+            else:
+                pending_nodes.append(in_n)
+            _insert_ready_nodes()
+        while pending_nodes:
+            if _insert_ready_nodes():
+                break
+        if pending_nodes and self.verbose_ > 0:
+            print('SymbolicShapeInference: orphaned nodes discarded: ')
+            for n in pending_nodes:
+                print(n.op_type + ': ' + n.output[0])
+        if input_shapes is not None:
+            for input_name, shape in input_shapes.items():
+                for idx in range(len(self.out_mp_.graph.input)):
+                    if self.out_mp_.graph.input[idx].name == input_name:
+                        value_info = self.out_mp_.graph.input[idx]
+                        del self.out_mp_.graph.input[idx]
+                        self.out_mp_.graph.input.append(
+                            helper.make_tensor_value_info(
+                                value_info.name,
+                                value_info.type.tensor_type.elem_type, shape))
+        self.initializers_ = dict(
+            [(i.name, i) for i in self.out_mp_.graph.initializer])
+        self.known_vi_ = dict(
+            [(i.name, i) for i in list(self.out_mp_.graph.input)])
+        self.known_vi_.update(
+            dict([(i.name, helper.make_tensor_value_info(i.name, i.data_type,
+                                                         list(i.dims)))
+                  for i in self.out_mp_.graph.initializer]))
+    def _merge_symbols(self, dims):
+        if not all([type(d) == str for d in dims]):
+            if self.auto_merge_:
+                assert len(
+                    dims
+                ) == 2  # only allow symbol->int merge in binary ops for now
+                is_int = [is_literal(d) for d in dims]
+                if sum(is_int) == 1:
+                    int_dim = is_int.index(1)
+                    if self.verbose_ > 0:
+                        print('dim {} has been merged with value {}'.format(
+                            dims[1 - int_dim], dims[int_dim]))
+                    self._check_merged_dims(dims, allow_broadcast=False)
+                    return dims[int_dim]
+                else:
+                    if self.verbose_ > 0:
+                        print('dim {} has been mergd with dim {}'.format(dims[
+                            0], dims[1]))
+                    return dims[0]
+            else:
+                return None
+        if all([d == dims[0] for d in dims]):
+            return dims[0]
+        merged = [
+            self.suggested_merge_[d] if d in self.suggested_merge_ else d
+            for d in dims
+        ]
+        if all([d == merged[0] for d in merged]):
+            assert merged[0] in self.symbolic_dims_
+            return merged[0]
+        else:
+            return None
+    # broadcast from right to left, and merge symbolic dims if needed
+    def _broadcast_shapes(self, shape1, shape2):
+        new_shape = []
+        rank1 = len(shape1)
+        rank2 = len(shape2)
+        new_rank = max(rank1, rank2)
+        for i in range(new_rank):
+            dim1 = shape1[rank1 - 1 - i] if i < rank1 else 1
+            dim2 = shape2[rank2 - 1 - i] if i < rank2 else 1
+            if dim1 == 1 or dim1 == dim2:
+                new_dim = dim2
+            elif dim2 == 1:
+                new_dim = dim1
+            else:
+                new_dim = self._merge_symbols([dim1, dim2])
+                if not new_dim:
+                    # warning about unsupported broadcast when not auto merge
+                    # note that auto merge has the risk of incorrectly merge symbols while one of them being 1
+                    # for example, 'a' = 1, 'b' = 5 at runtime is valid broadcasting, but with auto merge 'a' == 'b'
+                    if self.auto_merge_:
+                        self._add_suggested_merge([dim1, dim2], apply=True)
+                    else:
+                        print('unsupported broadcast between ' + str(dim1) + ' '
+                              + str(dim2))
+            new_shape = [new_dim] + new_shape
+        return new_shape
+    def _get_shape(self, node, idx):
+        name = node.input[idx]
+        shape = []
+        if name in self.known_vi_:
+            shape = get_shape_from_type_proto(self.known_vi_[name].type)
+        elif name in self.initializers_:
+            assert name in self.initializers_
+            shape = list(self.initializers_[name].dims)
+        return shape
+    def _get_initializer_value(self, node, idx):
+        name = node.input[idx]
+        if name in self.initializers_:
+            value = numpy_helper.to_array(self.initializers_[name])
+            return value
+        else:
+            return False
+    def _get_shape_rank(self, node, idx):
+        return len(self._get_shape(node, idx))
+    def _get_sympy_shape(self, node, idx):
+        sympy_shape = []
+        for d in self._get_shape(node, idx):
+            if type(d) is str:
+                sympy_shape.append(self.symbolic_dims_[d] if d in
+                                   self.symbolic_dims_ else sympy.Symbol(
+                                       d, integer=True))
+            else:
+                assert None != d
+                sympy_shape.append(d)
+        return sympy_shape
+    def _get_value(self, node, idx):
+        name = node.input[idx]
+        assert name in self.sympy_data_ or name in self.initializers_
+        return self.sympy_data_[
+            name] if name in self.sympy_data_ else numpy_helper.to_array(
+                self.initializers_[name])
+    def _try_get_value(self, node, idx):
+        if idx >= len(node.input):
+            return None
+        name = node.input[idx]
+        if name in self.sympy_data_ or name in self.initializers_:
+            return self._get_value(node, idx)
+        return None
+    def _update_computed_dims(self, new_sympy_shape):
+        for i, new_dim in enumerate(new_sympy_shape):
+            if not is_literal(new_dim) and not type(new_dim) == str:
+                str_dim = str(new_dim)
+                if str_dim in self.suggested_merge_:
+                    new_sympy_shape[i] = self.symbolic_dims_[
+                        self.suggested_merge_[str_dim]]
+                else:
+                    # add new_dim if it's a computational expression
+                    if not str(new_dim) in self.symbolic_dims_:
+                        self.symbolic_dims_[str(new_dim)] = new_dim
+    def _onnx_infer_single_node(self, node):
+        # skip onnx shape inference for Scan/Loop
+        skip_infer = node.op_type in ['Scan', 'Loop']
+        if not skip_infer:
+            # run single node inference with self.known_vi_ shapes
+            # note that inference rely on initializer values is not handled
+            # as we don't copy initializer weights to tmp_graph for inference speed purpose
+            tmp_graph = helper.make_graph(
+                [node], 'tmp', [self.known_vi_[i] for i in node.input if i], [
+                    helper.make_tensor_value_info(i, onnx.TensorProto.UNDEFINED,
+                                                  None) for i in node.output
+                ])
+            self.tmp_mp_.graph.CopyFrom(tmp_graph)
+            self.tmp_mp_ = shape_inference.infer_shapes(self.tmp_mp_)
+        for i_o in range(len(node.output)):
+            o = node.output[i_o]
+            vi = self.out_mp_.graph.value_info.add()
+            if not skip_infer:
+                vi.CopyFrom(self.tmp_mp_.graph.output[i_o])
+            self.known_vi_[o] = vi
+    def _onnx_infer_subgraph(self, node, subgraph, use_node_input=True):
+        if self.verbose_ > 2:
+            print('Inferencing subgraph of node {} with output({}...): {}'.
+                  format(node.name, node.output[0], node.op_type))
+        # node inputs are not passed directly to the subgraph
+        # it's up to the node dispatcher to prepare subgraph input
+        # for example, with Scan/Loop, subgraph input shape would be trimmed from node input shape
+        # besides, inputs in subgraph could shadow implicit inputs
+        subgraph_inputs = set([
+            i.name for i in list(subgraph.initializer) + list(subgraph.input)
+        ])
+        subgraph_implicit_input = set([
+            name for name in self.known_vi_.keys()
+            if not name in subgraph_inputs
+        ])
+        tmp_graph = helper.make_graph(
+            list(subgraph.node), 'tmp',
+            list(subgraph.input) +
+            [self.known_vi_[i] for i in subgraph_implicit_input], [
+                helper.make_tensor_value_info(i.name,
+                                              onnx.TensorProto.UNDEFINED, None)
+                for i in subgraph.output
+            ])
+        tmp_graph.initializer.extend([
+            i for i in self.out_mp_.graph.initializer
+            if i.name in subgraph_implicit_input
+        ])
+        tmp_graph.initializer.extend(subgraph.initializer)
+        self.tmp_mp_.graph.CopyFrom(tmp_graph)
+        symbolic_shape_inference = SymbolicShapeInference(
+            self.int_max_, self.auto_merge_, self.guess_output_rank_,
+            self.verbose_)
+        all_shapes_inferred = False
+        symbolic_shape_inference._preprocess(self.tmp_mp_)
+        symbolic_shape_inference.suggested_merge_ = self.suggested_merge_.copy()
+        while symbolic_shape_inference.run_:
+            all_shapes_inferred = symbolic_shape_inference._infer_impl(
+                self.tmp_mp_, self.sympy_data_.copy())
+        symbolic_shape_inference._update_output_from_vi()
+        if use_node_input:
+            # if subgraph uses node input, it needs to update to merged dims
+            subgraph.ClearField('input')
+            subgraph.input.extend(
+                symbolic_shape_inference.out_mp_.graph.input[:len(node.input)])
+        subgraph.ClearField('output')
+        subgraph.output.extend(symbolic_shape_inference.out_mp_.graph.output)
+        subgraph.ClearField('value_info')
+        subgraph.value_info.extend(
+            symbolic_shape_inference.out_mp_.graph.value_info)
+        subgraph.ClearField('node')
+        subgraph.node.extend(symbolic_shape_inference.out_mp_.graph.node)
+        # for new symbolic dims from subgraph output, add to main graph symbolic dims
+        subgraph_shapes = [
+            get_shape_from_type_proto(o.type)
+            for o in symbolic_shape_inference.out_mp_.graph.output
+        ]
+        subgraph_new_symbolic_dims = set([
+            d for s in subgraph_shapes
+            if s for d in s if type(d) == str and not d in self.symbolic_dims_
+        ])
+        new_dims = {}
+        for d in subgraph_new_symbolic_dims:
+            assert d in symbolic_shape_inference.symbolic_dims_
+            new_dims[d] = symbolic_shape_inference.symbolic_dims_[d]
+        self.symbolic_dims_.update(new_dims)
+        return symbolic_shape_inference
+    def _get_int_values(self, node, broadcast=False):
+        values = [self._try_get_value(node, i) for i in range(len(node.input))]
+        if all([v is not None for v in values]):
+            # some shape compute is in floating point, cast to int for sympy
+            for i, v in enumerate(values):
+                if type(v) != np.ndarray:
+                    continue
+                if len(v.shape) > 1:
+                    new_v = None  # ignore value for rank > 1
+                elif len(v.shape) == 0:
+                    new_v = int(np.asscalar(v))
+                else:
+                    assert len(v.shape) == 1
+                    new_v = [int(vv) for vv in v]
+                values[i] = new_v
+        values_len = [len(v) if type(v) == list else 0 for v in values]
+        max_len = max(values_len)
+        if max_len >= 1 and broadcast:
+            # broadcast
+            for i, v in enumerate(values):
+                if v is None:
+                    continue  # don't broadcast if value is unknown
+                if type(v) == list:
+                    if len(v) < max_len:
+                        values[i] = v * max_len
+                    else:
+                        assert len(v) == max_len
+                else:
+                    values[i] = [v] * max_len
+        return values
+    def _compute_on_sympy_data(self, node, op_func):
+        assert len(node.output) == 1
+        values = self._get_int_values(node, broadcast=True)
+        if all([v is not None for v in values]):
+            new_shape = []
+            is_list = [type(v) == list for v in values]
+            as_list = any(is_list)
+            if as_list:
+                data = [op_func(vs) for vs in zip(*values)]
+                self.sympy_data_[node.output[0]] = data
+                new_shape = np.array(data).shape
+            else:
+                data = op_func(values)
+                self.sympy_data_[node.output[0]] = data
+                new_shape = np.array(data).shape
+            vi = self.known_vi_[node.output[0]]
+    def _pass_on_sympy_data(self, node):
+        assert len(node.input) == 1 or node.op_type == 'Reshape'
+        self._compute_on_sympy_data(node, lambda x: x[0])
+    def _pass_on_shape_and_type(self, node):
+        vi = self.known_vi_[node.output[0]]
+        vi.CopyFrom(
+            helper.make_tensor_value_info(node.output[0], self.known_vi_[
+                node.input[0]].type.tensor_type.elem_type,
+                                          self._get_shape(node, 0)))
+    def _new_symbolic_dim(self, prefix, dim):
+        new_dim = '{}_d{}'.format(prefix, dim)
+        if new_dim in self.suggested_merge_:
+            v = self.suggested_merge_[new_dim]
+            new_dim = sympy.Integer(int(v)) if is_literal(v) else v
+        else:
+            self.symbolic_dims_[new_dim] = sympy.Symbol(new_dim, integer=True)
+        return new_dim
+    def _new_symbolic_dim_from_output(self, node, out_idx=0, dim=0):
+        return self._new_symbolic_dim('{}{}_o{}_'.format(
+            node.op_type, list(self.out_mp_.graph.node).index(node), out_idx),
+                                      dim)
+    def _new_symbolic_shape(self, rank, node, out_idx=0):
+        return [
+            self._new_symbolic_dim_from_output(node, out_idx, i)
+            for i in range(rank)
+        ]
+    def _compute_conv_pool_shape(self, node):
+        sympy_shape = self._get_sympy_shape(node, 0)
+        if len(node.input) > 1:
+            W_shape = self._get_sympy_shape(node, 1)
+            rank = len(W_shape) - 2  # number of spatial axes
+            kernel_shape = W_shape[-rank:]
+            sympy_shape[1] = W_shape[0]
+        else:
+            W_shape = None
+            kernel_shape = get_attribute(node, 'kernel_shape')
+            rank = len(kernel_shape)
+        assert len(sympy_shape) == rank + 2
+        # only need to symbolic shape inference if input has symbolic dims in spatial axes
+        is_symbolic_dims = [not is_literal(i) for i in sympy_shape[-rank:]]
+        if not any(is_symbolic_dims):
+            shape = get_shape_from_type_proto(self.known_vi_[node.output[0]]
+                                              .type)
+            if len(shape) > 0:
+                assert len(sympy_shape) == len(shape)
+                sympy_shape[-rank:] = [sympy.Integer(d) for d in shape[-rank:]]
+                return sympy_shape
+        dilations = get_attribute(node, 'dilations', [1] * rank)
+        strides = get_attribute(node, 'strides', [1] * rank)
+        effective_kernel_shape = [(k - 1) * d + 1
+                                  for k, d in zip(kernel_shape, dilations)]
+        pads = get_attribute(node, 'pads')
+        if pads is None:
+            pads = [0] * (2 * rank)
+            auto_pad = get_attribute(node, 'auto_pad',
+                                     b'NOTSET').decode('utf-8')
+            if auto_pad != 'VALID' and auto_pad != 'NOTSET':
+                try:
+                    residual = [
+                        sympy.Mod(d, s)
+                        for d, s in zip(sympy_shape[-rank:], strides)
+                    ]
+                    total_pads = [
+                        max(0, (k - s) if r == 0 else (k - r))
+                        for k, s, r in zip(effective_kernel_shape, strides,
+                                           residual)
+                    ]
+                except TypeError:  # sympy may throw TypeError: cannot determine truth value of Relational
+                    total_pads = [
+                        max(0, (k - s))
+                        for k, s in zip(effective_kernel_shape, strides)
+                    ]  # assuming no residual if sympy throws error
+            elif auto_pad == 'VALID':
+                total_pads = []
+            else:
+                total_pads = [0] * rank
+        else:
+            assert len(pads) == 2 * rank
+            total_pads = [p1 + p2 for p1, p2 in zip(pads[:rank], pads[rank:])]
+        ceil_mode = get_attribute(node, 'ceil_mode', 0)
+        for i in range(rank):
+            effective_input_size = sympy_shape[-rank + i]
+            if len(total_pads) > 0:
+                effective_input_size = effective_input_size + total_pads[i]
+            if ceil_mode:
+                strided_kernel_positions = sympy.ceiling(
+                    (effective_input_size - effective_kernel_shape[i]) /
+                    strides[i])
+            else:
+                strided_kernel_positions = (
+                    effective_input_size - effective_kernel_shape[i]
+                ) // strides[i]
+            sympy_shape[-rank + i] = strided_kernel_positions + 1
+        return sympy_shape
+    def _check_merged_dims(self, dims, allow_broadcast=True):
+        if allow_broadcast:
+            dims = [d for d in dims if not (is_literal(d) and int(d) <= 1)]
+        if not all([d == dims[0] for d in dims]):
+            self._add_suggested_merge(dims, apply=True)
+    def _compute_matmul_shape(self, node, output_dtype=None):
+        lhs_shape = self._get_shape(node, 0)
+        rhs_shape = self._get_shape(node, 1)
+        lhs_rank = len(lhs_shape)
+        rhs_rank = len(rhs_shape)
+        lhs_reduce_dim = 0
+        rhs_reduce_dim = 0
+        assert lhs_rank > 0 and rhs_rank > 0
+        if lhs_rank == 1 and rhs_rank == 1:
+            new_shape = []
+        elif lhs_rank == 1:
+            rhs_reduce_dim = -2
+            new_shape = rhs_shape[:rhs_reduce_dim] + [rhs_shape[-1]]
+        elif rhs_rank == 1:
+            lhs_reduce_dim = -1
+            new_shape = lhs_shape[:lhs_reduce_dim]
+        else:
+            lhs_reduce_dim = -1
+            rhs_reduce_dim = -2
+            new_shape = self._broadcast_shapes(
+                lhs_shape[:-2], rhs_shape[:-2]) + [lhs_shape[-2]
+                                                   ] + [rhs_shape[-1]]
+        # merge reduce dim
+        self._check_merged_dims(
+            [lhs_shape[lhs_reduce_dim], rhs_shape[rhs_reduce_dim]],
+            allow_broadcast=False)
+        if output_dtype is None:
+            # infer output_dtype from input type when not specified
+            output_dtype = self.known_vi_[node.input[
+                0]].type.tensor_type.elem_type
+        vi = self.known_vi_[node.output[0]]
+        vi.CopyFrom(
+            helper.make_tensor_value_info(node.output[0], output_dtype,
+                                          new_shape))
+    def _infer_ArrayFeatureExtractor(self, node):
+        data_shape = self._get_shape(node, 0)
+        indices_shape = self._get_shape(node, 1)
+        vi = self.known_vi_[node.output[0]]
+        vi.CopyFrom(
+            helper.make_tensor_value_info(node.output[0], self.known_vi_[
+                node.input[0]].type.tensor_type.elem_type, data_shape[:-1] +
+                                          indices_shape))
+    def _infer_symbolic_compute_ops(self, node):
+        funcs = {
+            'Add': lambda l: l[0] + l[1],
+            'Div': lambda l: l[0] // l[1],  # integer div in sympy
+            'Equal': lambda l: l[0] == l[1],
+            'Max':
+            lambda l: l[1] if is_literal(l[0]) and int(l[0]) < -self.int_max_ else (l[0] if is_literal(l[1]) and int(l[1]) < -self.int_max_ else sympy.Max(l[0], l[1])),
+            'Min':
+            lambda l: l[1] if is_literal(l[0]) and int(l[0]) > self.int_max_ else (l[0] if is_literal(l[1]) and int(l[1]) > self.int_max_ else sympy.Min(l[0], l[1])),
+            'Mul': lambda l: l[0] * l[1],
+            'Sub': lambda l: l[0] - l[1],
+            'Where': lambda l: l[1] if l[0] else l[2]
+        }
+        assert node.op_type in funcs
+        self._compute_on_sympy_data(node, funcs[node.op_type])
+    def _infer_Cast(self, node):
+        self._pass_on_sympy_data(node)
+    def _infer_CategoryMapper(self, node):
+        input_type = self.known_vi_[node.input[0]].type.tensor_type.elem_type
+        if input_type == onnx.TensorProto.STRING:
+            output_type = onnx.TensorProto.INT64
+        else:
+            output_type = onnx.TensorProto.STRING
+        vi = self.known_vi_[node.output[0]]
+        vi.CopyFrom(
+            helper.make_tensor_value_info(node.output[0], output_type,
+                                          self._get_shape(node, 0)))
+    def _infer_Transpose(self, node):
+        input_shape = self._get_shape(node, 0)
+        perm = get_attribute(node, 'perm')
+        output_shape = np.array(input_shape)[perm].tolist()
+        vi = self.known_vi_[node.output[0]]
+        vi.CopyFrom(
+            helper.make_tensor_value_info(node.output[0], self.known_vi_[
+                node.input[0]].type.tensor_type.elem_type, output_shape))
+    def _infer_Compress(self, node):
+        input_shape = self._get_shape(node, 0)
+        # create a new symbolic dimension for Compress output
+        compress_len = self._new_symbolic_dim_from_output(node)
+        axis = get_attribute(node, 'axis')
+        if axis == None:
+            # when axis is not specified, input is flattened before compress so output is 1D
+            output_shape = [compress_len]
+        else:
+            output_shape = input_shape
+            output_shape[handle_negative_axis(axis, len(
+                input_shape))] = compress_len
+        vi = self.known_vi_[node.output[0]]
+        vi.CopyFrom(
+            helper.make_tensor_value_info(node.output[0], self.known_vi_[
+                node.input[0]].type.tensor_type.elem_type, output_shape))
+    def _infer_Concat(self, node):
+        if any([i in self.sympy_data_ for i in node.input]):
+            values = self._get_int_values(node)
+            if all([v is not None for v in values]):
+                assert 0 == get_attribute(node, 'axis')
+                self.sympy_data_[node.output[0]] = []
+                for i in range(len(node.input)):
+                    value = values[i]
+                    if type(value) == list:
+                        self.sympy_data_[node.output[0]].extend(value)
+                    else:
+                        self.sympy_data_[node.output[0]].append(value)
+        sympy_shape = self._get_sympy_shape(node, 0)
+        axis = handle_negative_axis(
+            get_attribute(node, 'axis'), len(sympy_shape))
+        for i_idx in range(1, len(node.input)):
+            input_shape = self._get_sympy_shape(node, i_idx)
+            if input_shape:
+                sympy_shape[axis] = sympy_shape[axis] + input_shape[axis]
+        self._update_computed_dims(sympy_shape)
+        # merge symbolic dims for non-concat axes
+        for d in range(len(sympy_shape)):
+            if d == axis:
+                continue
+            dims = [
+                self._get_shape(node, i_idx)[d]
+                for i_idx in range(len(node.input))
+                if self._get_shape(node, i_idx)
+            ]
+            if all([d == dims[0] for d in dims]):
+                continue
+            merged = self._merge_symbols(dims)
+            if type(merged) == str:
+                sympy_shape[d] = self.symbolic_dims_[merged] if merged else None
+            else:
+                sympy_shape[d] = merged
+        vi = self.known_vi_[node.output[0]]
+        vi.CopyFrom(
+            helper.make_tensor_value_info(
+                node.output[0], self.known_vi_[node.input[0]].type.tensor_type.
+                elem_type, get_shape_from_sympy_shape(sympy_shape)))
+    def _infer_Conv(self, node):
+        sympy_shape = self._compute_conv_pool_shape(node)
+        self._update_computed_dims(sympy_shape)
+        vi = self.known_vi_[node.output[0]]
+        vi.CopyFrom(
+            helper.make_tensor_value_info(
+                node.output[0], vi.type.tensor_type.elem_type,
+                get_shape_from_sympy_shape(sympy_shape)))
+    def _infer_ConstantOfShape(self, node):
+        sympy_shape = self._get_int_values(node)[0]
+        vi = self.known_vi_[node.output[0]]
+        if sympy_shape is not None:
+            if type(sympy_shape) != list:
+                sympy_shape = [sympy_shape]
+            self._update_computed_dims(sympy_shape)
+            # update sympy data if output type is int, and shape is known
+            if vi.type.tensor_type.elem_type == onnx.TensorProto.INT64 and all(
+                [is_literal(x) for x in sympy_shape]):
+                self.sympy_data_[node.output[0]] = np.ones(
+                    [int(x) for x in sympy_shape],
+                    dtype=np.int64) * numpy_helper.to_array(
+                        get_attribute(node, 'value', 0))
+        else:
+            # create new dynamic shape
+            sympy_shape = self._new_symbolic_shape(
+                self._get_shape_rank(node, 0), node)
+        vi.CopyFrom(
+            helper.make_tensor_value_info(
+                node.output[0], vi.type.tensor_type.elem_type,
+                get_shape_from_sympy_shape(sympy_shape)))
+    def _infer_Expand(self, node):
+        expand_to_shape = self._try_get_value(node, 1)
+        if expand_to_shape is not None:
+            # new_shape's dim can come from shape value
+            self._update_computed_dims(expand_to_shape)
+            shape = self._get_shape(node, 0)
+            new_shape = self._broadcast_shapes(
+                shape, get_shape_from_sympy_shape(expand_to_shape))
+            vi = self.known_vi_[node.output[0]]
+            vi.CopyFrom(
+                helper.make_tensor_value_info(node.output[0], self.known_vi_[
+                    node.input[0]].type.tensor_type.elem_type, new_shape))
+    def _infer_Gather(self, node):
+        data_shape = self._get_shape(node, 0)
+        axis = handle_negative_axis(
+            get_attribute(node, 'axis', 0), len(data_shape))
+        indices_shape = self._get_shape(node, 1)
+        new_shape = data_shape[:axis] + indices_shape + data_shape[axis + 1:]
+        vi = self.known_vi_[node.output[0]]
+        vi.CopyFrom(
+            helper.make_tensor_value_info(node.output[
+                0], vi.type.tensor_type.elem_type, new_shape))
+        if node.input[0] in self.sympy_data_:
+            assert 0 == get_attribute(node, 'axis',
+                                      0)  # only handle 1D sympy compute
+            idx = self._get_value(node, 1)
+            data = self.sympy_data_[node.input[0]]
+            if type(data) == list:
+                if type(idx) == np.ndarray and len(idx.shape) == 1:
+                    self.sympy_data_[node.output[0]] = [
+                        data[int(i)] for i in idx
+                    ]
+                else:
+                    self.sympy_data_[node.output[0]] = data[int(idx)]
+            else:
+                assert idx == 0
+                self.sympy_data_[node.output[0]] = data
+    def _infer_GatherElements(self, node):
+        indices_shape = self._get_shape(node, 1)
+        vi = self.known_vi_[node.output[0]]
+        vi.CopyFrom(
+            helper.make_tensor_value_info(node.output[0], self.known_vi_[
+                node.input[0]].type.tensor_type.elem_type, indices_shape))
+    def _infer_GatherND(self, node):
+        data_shape = self._get_shape(node, 0)
+        data_rank = len(data_shape)
+        indices_shape = self._get_shape(node, 1)
+        indices_rank = len(indices_shape)
+        last_index_dimension = indices_shape[-1]
+        assert is_literal(
+            last_index_dimension) and last_index_dimension <= data_rank
+        new_shape = indices_shape[:-1] + data_shape[last_index_dimension:]
+        vi = self.known_vi_[node.output[0]]
+        vi.CopyFrom(
+            helper.make_tensor_value_info(node.output[0], self.known_vi_[
+                node.input[0]].type.tensor_type.elem_type, new_shape))
+    def _infer_If(self, node):
+        # special case for constant condition, in case there are mismatching shape from the non-executed branch
+        subgraphs = [
+            get_attribute(node, 'then_branch'),
+            get_attribute(node, 'else_branch')
+        ]
+        cond = self._try_get_value(node, 0)
+        if cond is not None:
+            if cond > 0:
+                subgraphs[1].CopyFrom(subgraphs[0])
+            else:
+                subgraphs[0].CopyFrom(subgraphs[1])
+        for i_sub, subgraph in enumerate(subgraphs):
+            subgraph_infer = self._onnx_infer_subgraph(
+                node, subgraph, use_node_input=False)
+            for i_out in range(len(node.output)):
+                vi = self.known_vi_[node.output[i_out]]
+                if i_sub == 0:
+                    vi.CopyFrom(subgraph.output[i_out])
+                    vi.name = node.output[i_out]
+                else:
+                    assert all([
+                        d1 == d2
+                        for d1, d2 in zip(vi.type.tensor_type.shape.dim,
+                                          subgraph.output[
+                                              i_out].type.tensor_type.shape.dim)
+                    ])
+                # pass on sympy data from subgraph, if cond is constant
+                if cond is not None and i_sub == (0 if cond > 0 else 1):
+                    if subgraph.output[
+                            i_out].name in subgraph_infer.sympy_data_:
+                        self.sympy_data_[vi.name] = subgraph_infer.sympy_data_[
+                            subgraph.output[i_out].name]
+    def _infer_Loop(self, node):
+        subgraph = get_attribute(node, 'body')
+        assert len(subgraph.input) == len(node.input)
+        for i, si in enumerate(subgraph.input):
+            subgraph_name = si.name
+            si.CopyFrom(self.known_vi_[node.input[i]])
+            si.name = subgraph_name
+        self._onnx_infer_subgraph(node, subgraph)
+        # create a new symbolic dimension for iteration dependent dimension
+        loop_iter_dim = self._new_symbolic_dim_from_output(node)
+        num_loop_carried = len(node.input) - 2
+        for i in range(len(node.output)):
+            vi = self.known_vi_[node.output[i]]
+            vi.CopyFrom(
+                subgraph.output[i + 1]
+            )  # first subgraph output is condition, not in node output
+            if i >= num_loop_carried:
+                subgraph_vi_dim = subgraph.output[i +
+                                                  1].type.tensor_type.shape.dim
+                vi.type.tensor_type.shape.ClearField('dim')
+                vi_dim = vi.type.tensor_type.shape.dim
+                vi_dim.add().dim_param = loop_iter_dim
+                vi_dim.extend(list(subgraph_vi_dim))
+            vi.name = node.output[i]
+    def _infer_MatMul(self, node):
+        self._compute_matmul_shape(node)
+    def _infer_MatMulInteger(self, node):
+        self._compute_matmul_shape(node, onnx.TensorProto.INT32)
+    def _infer_NonMaxSuppression(self, node):
+        selected = self._new_symbolic_dim_from_output(node)
+        vi = self.known_vi_[node.output[0]]
+        vi.CopyFrom(
+            helper.make_tensor_value_info(node.output[
+                0], onnx.TensorProto.INT64, [selected, 3]))
+    def _infer_NonZero(self, node):
+        input_rank = self._get_shape_rank(node, 0)
+        # create a new symbolic dimension for NonZero output
+        nz_len = self._new_symbolic_dim_from_output(node, 0, 1)
+        vi = self.known_vi_[node.output[0]]
+        vi.CopyFrom(
+            helper.make_tensor_value_info(node.output[
+                0], vi.type.tensor_type.elem_type, [input_rank, nz_len]))
+    def _infer_OneHot(self, node):
+        shape = self._get_shape(node, 0)
+        axis = get_attribute(node, 'axis', -1)
+        axis = handle_negative_axis(axis, len(shape) + 1)
+        new_shape = shape[:axis] + [self._new_symbolic_dim_from_output(node)
+                                    ] + shape[axis:]
+        vi = self.known_vi_[node.output[0]]
+        vi.CopyFrom(
+            helper.make_tensor_value_info(node.output[0], self.known_vi_[
+                node.input[2]].type.tensor_type.elem_type, new_shape))
+    def _infer_Pad(self, node):
+        if get_opset(self.out_mp_) <= 10:
+            pads = get_attribute(node, 'pads')
+        else:
+            pads = self._try_get_value(node, 1)
+        vi = self.known_vi_[node.output[0]]
+        output_shape = get_shape_from_type_proto(vi.type)
+        if len(output_shape) == 0 or None in output_shape:
+            sympy_shape = self._get_sympy_shape(node, 0)
+            rank = len(sympy_shape)
+            if pads is not None:
+                assert len(pads) == 2 * rank
+                new_sympy_shape = [
+                    d + pad_up + pad_down
+                    for d, pad_up, pad_down in zip(sympy_shape, pads[:rank],
+                                                   pads[rank:])
+                ]
+                self._update_computed_dims(new_sympy_shape)
+            else:
+                # dynamic pads, create new symbolic dimensions
+                new_sympy_shape = self._new_symbolic_shape(rank, node)
+            output_tp = self.known_vi_[node.input[0]].type.tensor_type.elem_type
+            vi.CopyFrom(
+                helper.make_tensor_value_info(node.output[
+                    0], output_tp, get_shape_from_sympy_shape(new_sympy_shape)))
+    def _infer_Pool(self, node):
+        sympy_shape = self._compute_conv_pool_shape(node)
+        self._update_computed_dims(sympy_shape)
+        for o in node.output:
+            if not o:
+                continue
+            vi = self.known_vi_[o]
+            vi.CopyFrom(
+                helper.make_tensor_value_info(o, vi.type.tensor_type.elem_type,
+                                              get_shape_from_sympy_shape(
+                                                  sympy_shape)))
+    def _infer_Range(self, node):
+        vi = self.known_vi_[node.output[0]]
+        input_data = self._get_int_values(node)
+        if all([i is not None for i in input_data]):
+            start = as_scalar(input_data[0])
+            limit = as_scalar(input_data[1])
+            delta = as_scalar(input_data[2])
+            new_sympy_shape = [
+                sympy.Max(sympy.ceiling((limit - start) / delta), 0)
+            ]
+        else:
+            new_dim = self._new_symbolic_dim_from_output(node)
+            new_sympy_shape = [self.symbolic_dims_[new_dim]]
+        self._update_computed_dims(new_sympy_shape)
+        vi.CopyFrom(
+            helper.make_tensor_value_info(
+                node.output[0], self.known_vi_[node.input[0]].type.tensor_type.
+                elem_type, get_shape_from_sympy_shape(new_sympy_shape)))
+    def _infer_ReduceProd(self, node):
+        axes = get_attribute(node, 'axes')
+        keep_dims = get_attribute(node, 'keepdims')
+        if keep_dims == 0 and axes == [0]:
+            data = self._get_int_values(node)[0]
+            if data is not None:
+                self.sympy_data_[node.output[0]] = sympy_reduce_product(data)
+    def _infer_Reshape(self, node):
+        shape_value = self._try_get_value(node, 1)
+        vi = self.known_vi_[node.output[0]]
+        if shape_value is None:
+            shape_shape = self._get_shape(node, 1)
+            assert len(shape_shape) == 1
+            shape_rank = shape_shape[0]
+            assert is_literal(shape_rank)
+            vi.CopyFrom(
+                helper.make_tensor_value_info(
+                    node.output[0], vi.type.tensor_type.elem_type,
+                    get_shape_from_sympy_shape(
+                        self._new_symbolic_shape(shape_rank, node))))
+        else:
+            input_shape = self._get_shape(node, 0)
+            input_sympy_shape = self._get_sympy_shape(node, 0)
+            total = int(1)
+            for d in input_sympy_shape:
+                total = total * d
+            new_sympy_shape = []
+            deferred_dim_idx = -1
+            non_deferred_size = int(1)
+            for i, d in enumerate(shape_value):
+                if type(d) == sympy.Symbol:
+                    new_sympy_shape.append(d)
+                elif d == 0:
+                    new_sympy_shape.append(input_sympy_shape[i])
+                    non_deferred_size = non_deferred_size * input_sympy_shape[i]
+                else:
+                    new_sympy_shape.append(d)
+                if d == -1:
+                    deferred_dim_idx = i
+                elif d != 0:
+                    non_deferred_size = non_deferred_size * d
+            assert new_sympy_shape.count(-1) < 2
+            if -1 in new_sympy_shape:
+                new_dim = total // non_deferred_size
+                new_sympy_shape[deferred_dim_idx] = new_dim
+                self._update_computed_dims(new_sympy_shape)
+            vi.CopyFrom(
+                helper.make_tensor_value_info(
+                    node.output[0], vi.type.tensor_type.elem_type,
+                    get_shape_from_sympy_shape(new_sympy_shape)))
+        self._pass_on_sympy_data(node)
+    def _infer_Resize(self, node):
+        vi = self.known_vi_[node.output[0]]
+        input_sympy_shape = self._get_sympy_shape(node, 0)
+        if get_opset(self.out_mp_) <= 10:
+            scales = self._try_get_value(node, 1)
+            if scales is not None:
+                new_sympy_shape = [
+                    sympy.simplify(sympy.floor(d * s))
+                    for d, s in zip(input_sympy_shape, scales)
+                ]
+                self._update_computed_dims(new_sympy_shape)
+                vi.CopyFrom(
+                    helper.make_tensor_value_info(
+                        node.output[0], self.known_vi_[node.input[
+                            0]].type.tensor_type.elem_type,
+                        get_shape_from_sympy_shape(new_sympy_shape)))
+        else:
+            roi = self._try_get_value(node, 1)
+            scales = self._try_get_value(node, 2)
+            sizes = self._try_get_value(node, 3)
+            if sizes is not None:
+                new_sympy_shape = [
+                    sympy.simplify(sympy.floor(s)) for s in sizes
+                ]
+                self._update_computed_dims(new_sympy_shape)
+            elif roi is not None and scales is not None:
+                rank = len(scales)
+                assert len(roi) == 2 * rank
+                roi_start = list(roi)[:rank]
+                roi_end = list(roi)[rank:]
+                scales = list(scales)
+                new_sympy_shape = [
+                    sympy.simplify(sympy.floor(d * (end - start) * scale))
+                    for d, start, end, scale in zip(input_sympy_shape,
+                                                    roi_start, roi_end, scales)
+                ]
+                self._update_computed_dims(new_sympy_shape)
+            else:
+                new_sympy_shape = self._new_symbolic_shape(
+                    self._get_shape_rank(node, 0), node)
+            vi.CopyFrom(
+                helper.make_tensor_value_info(node.output[0], self.known_vi_[
+                    node.input[0]].type.tensor_type.elem_type,
+                                              get_shape_from_sympy_shape(
+                                                  new_sympy_shape)))
+    def _infer_Scan(self, node):
+        subgraph = get_attribute(node, 'body')
+        num_scan_inputs = get_attribute(node, 'num_scan_inputs')
+        scan_input_axes = get_attribute(node, 'scan_input_axes',
+                                        [0] * num_scan_inputs)
+        num_scan_states = len(node.input) - num_scan_inputs
+        scan_input_axes = [
+            handle_negative_axis(
+                ax, self._get_shape_rank(node, i + num_scan_states))
+            for i, ax in enumerate(scan_input_axes)
+        ]
+        # We may have cases where the subgraph has optionial inputs that appear in both subgraph's input and initializer,
+        # but not in the node's input. In such cases, the input model might be invalid, but let's skip those optional inputs.
+        assert len(subgraph.input) >= len(node.input)
+        subgraph_inputs = subgraph.input[:len(node.input)]
+        for i, si in enumerate(subgraph_inputs):
+            subgraph_name = si.name
+            si.CopyFrom(self.known_vi_[node.input[i]])
+            if i >= num_scan_states:
+                scan_input_dim = si.type.tensor_type.shape.dim
+                scan_input_dim.remove(scan_input_dim[scan_input_axes[
+                    i - num_scan_states]])
+            si.name = subgraph_name
+        self._onnx_infer_subgraph(node, subgraph)
+        num_scan_outputs = len(node.output) - num_scan_states
+        scan_output_axes = get_attribute(node, 'scan_output_axes',
+                                         [0] * num_scan_outputs)
+        scan_input_dim = get_shape_from_type_proto(self.known_vi_[node.input[
+            -1]].type)[scan_input_axes[-1]]
+        for i, o in enumerate(node.output):
+            vi = self.known_vi_[o]
+            if i >= num_scan_states:
+                shape = get_shape_from_type_proto(subgraph.output[i].type)
+                new_dim = handle_negative_axis(
+                    scan_output_axes[i - num_scan_states], len(shape) + 1)
+                shape = shape[:new_dim] + [scan_input_dim] + shape[new_dim:]
+                vi.CopyFrom(
+                    helper.make_tensor_value_info(o, subgraph.output[
+                        i].type.tensor_type.elem_type, shape))
+            else:
+                vi.CopyFrom(subgraph.output[i])
+            vi.name = o
+    def _infer_ScatterElements(self, node):
+        data_shape = self._get_shape(node, 0)
+        vi = self.known_vi_[node.output[0]]
+        vi.CopyFrom(
+            helper.make_tensor_value_info(node.output[0], self.known_vi_[
+                node.input[0]].type.tensor_type.elem_type, data_shape))
+    def _infer_Shape(self, node):
+        self.sympy_data_[node.output[0]] = self._get_sympy_shape(node, 0)
+    def _infer_Size(self, node):
+        sympy_shape = self._get_sympy_shape(node, 0)
+        self.sympy_data_[node.output[0]] = sympy_reduce_product(sympy_shape)
+        self.known_vi_[node.output[0]].CopyFrom(
+            helper.make_tensor_value_info(node.output[0],
+                                          onnx.TensorProto.INT64, []))
+    def _infer_Slice(self, node):
+        if get_opset(self.out_mp_) <= 9:
+            axes = get_attribute(node, 'axes')
+            starts = get_attribute(node, 'starts')
+            ends = get_attribute(node, 'ends')
+            steps = [1] * len(axes)
+        else:
+            starts = as_list(self._try_get_value(node, 1), keep_none=True)
+            ends = as_list(self._try_get_value(node, 2), keep_none=True)
+            axes = self._try_get_value(node, 3)
+            steps = self._try_get_value(node, 4)
+            if axes is None and not (starts is None and ends is None):
+                axes = list(
+                    range(0, len(starts if starts is not None else ends)))
+            if steps is None and not (starts is None and ends is None):
+                steps = [1] * len(starts if starts is not None else ends)
+            axes = as_list(axes, keep_none=True)
+            steps = as_list(steps, keep_none=True)
+        new_sympy_shape = self._get_sympy_shape(node, 0)
+        if starts is None or ends is None:
+            if axes is None:
+                for i in range(len(new_sympy_shape)):
+                    new_sympy_shape[i] = self._new_symbolic_dim_from_output(
+                        node, 0, i)
+            else:
+                new_sympy_shape = get_shape_from_sympy_shape(new_sympy_shape)
+                for i in axes:
+                    new_sympy_shape[i] = self._new_symbolic_dim_from_output(
+                        node, 0, i)
+        else:
+            for i, s, e, t in zip(axes, starts, ends, steps):
+                idx = handle_negative_axis(i, len(new_sympy_shape))
+                if is_literal(e):
+                    if e >= self.int_max_:
+                        e = new_sympy_shape[i]
+                    elif e <= -self.int_max_:
+                        e = 0 if s > 0 else -1
+                    elif is_literal(new_sympy_shape[i]):
+                        if e < 0:
+                            e = e + new_sympy_shape[i]
+                        e = min(e, new_sympy_shape[i])
+                    else:
+                        if e > 0:
+                            e = sympy.Min(
+                                e, new_sympy_shape[i]
+                            ) if e > 1 else e  #special case for slicing first to make computation easier
+                        else:
+                            e = new_sympy_shape[i] + e
+                else:
+                    if is_literal(new_sympy_shape[i]):
+                        e = sympy.Min(e, new_sympy_shape[i])
+                    else:
+                        try:
+                            if e >= new_sympy_shape[i]:
+                                e = new_sympy_shape[i]
+                        except Exception:
+                            print(
+                                'Unable to determine if {} <= {}, treat as equal'
+                                .format(e, new_sympy_shape[i]))
+                            e = new_sympy_shape[i]
+                if is_literal(s) and int(s) < 0:
+                    s = new_sympy_shape[i] + s
+                new_sympy_shape[idx] = (e - s + t + (-1 if t > 0 else 1)) // t
+            self._update_computed_dims(new_sympy_shape)
+        vi = self.known_vi_[node.output[0]]
+        vi.CopyFrom(
+            helper.make_tensor_value_info(
+                node.output[0], vi.type.tensor_type.elem_type,
+                get_shape_from_sympy_shape(new_sympy_shape)))
+        # handle sympy_data if needed, for slice in shape computation
+        if node.input[0] in self.sympy_data_:
+            assert [0] == axes
+            assert len(starts) == 1
+            assert len(ends) == 1
+            self.sympy_data_[node.output[0]] = self.sympy_data_[node.input[0]][
+                starts[0]:ends[0]]
+    def _infer_Split(self, node):
+        input_sympy_shape = self._get_sympy_shape(node, 0)
+        axis = handle_negative_axis(
+            get_attribute(node, 'axis', 0), len(input_sympy_shape))
+        split = get_attribute(node, 'split')
+        if not split:
+            num_outputs = len(node.output)
+            split = [input_sympy_shape[axis] /
+                     sympy.Integer(num_outputs)] * num_outputs
+            self._update_computed_dims(split)
+        else:
+            split = [sympy.Integer(s) for s in split]
+        for i_o in range(len(split)):
+            vi = self.known_vi_[node.output[i_o]]
+            vi.CopyFrom(
+                helper.make_tensor_value_info(
+                    node.output[i_o], self.known_vi_[node.input[
+                        0]].type.tensor_type.elem_type,
+                    get_shape_from_sympy_shape(input_sympy_shape[:axis] + [
+                        split[i_o]
+                    ] + input_sympy_shape[axis + 1:])))
+            self.known_vi_[vi.name] = vi
+    def _infer_Squeeze(self, node):
+        self._pass_on_sympy_data(node)
+    def _infer_Tile(self, node):
+        repeats_value = self._get_value(node, 1)
+        input_sympy_shape = self._get_sympy_shape(node, 0)
+        new_sympy_shape = []
+        for i, d in enumerate(input_sympy_shape):
+            new_dim = d * repeats_value[i]
+            new_sympy_shape.append(new_dim)
+        self._update_computed_dims(new_sympy_shape)
+        vi = self.known_vi_[node.output[0]]
+        vi.CopyFrom(
+            helper.make_tensor_value_info(
+                node.output[0], vi.type.tensor_type.elem_type,
+                get_shape_from_sympy_shape(new_sympy_shape)))
+    def _infer_TopK(self, node):
+        rank = self._get_shape_rank(node, 0)
+        axis = handle_negative_axis(get_attribute(node, 'axis', -1), rank)
+        new_shape = self._get_shape(node, 0)
+        if get_opset(self.out_mp_) <= 9:
+            k = get_attribute(node, 'k')
+        else:
+            k = self._get_int_values(node)[1]
+        if k == None:
+            k = self._new_symbolic_dim_from_output(node)
+        else:
+            k = as_scalar(k)
+        if type(k) in [int, str]:
+            new_shape[axis] = k
+        else:
+            new_sympy_shape = self._get_sympy_shape(node, 0)
+            new_sympy_shape[axis] = k
+            self._update_computed_dims(
+                new_sympy_shape
+            )  # note that TopK dim could be computed in sympy_data, so need to update computed_dims when it enters shape
+            new_shape = get_shape_from_sympy_shape(new_sympy_shape)
+        for i_o in range(len(node.output)):
+            vi = self.known_vi_[node.output[i_o]]
+            vi.CopyFrom(
+                helper.make_tensor_value_info(node.output[
+                    i_o], vi.type.tensor_type.elem_type, new_shape))
+    def _infer_Unsqueeze(self, node):
+        self._pass_on_sympy_data(node)
+    def _infer_ZipMap(self, node):
+        map_key_type = None
+        if get_attribute(node, 'classlabels_int64s') is not None:
+            map_key_type = onnx.TensorProto.INT64
+        elif get_attribute(node, 'classlabels_strings') is not None:
+            map_key_type = onnx.TensorProto.STRING
+        assert map_key_type is not None
+        new_vi = onnx.ValueInfoProto()
+        new_vi.name = node.output[0]
+        new_vi.type.sequence_type.elem_type.map_type.value_type.tensor_type.elem_type = onnx.TensorProto.FLOAT
+        new_vi.type.sequence_type.elem_type.map_type.key_type = map_key_type
+        vi = self.known_vi_[node.output[0]]
+        vi.CopyFrom(new_vi)
+    def _infer_impl(self, in_mp, start_sympy_data={}):
+        self.sympy_data_ = start_sympy_data
+        self.out_mp_.graph.ClearField('value_info')
+        self._apply_suggested_merge(graph_input_only=True)
+        self.input_symbols_ = set()
+        for i in self.out_mp_.graph.input:
+            input_dims = i.type.tensor_type.shape.dim
+            for i_dim in range(len(input_dims)):
+                if get_dim_from_type_proto(input_dims[i_dim]) is None:
+                    # some models use None for symbolic dim in input, replace it with a string
+                    input_dims[i_dim].dim_param = self._new_symbolic_dim(i.name,
+                                                                         i_dim)
+            self.input_symbols_.update([
+                d for d in get_shape_from_type_proto(i.type) if type(d) == str
+            ])
+        for s in self.input_symbols_:
+            if s in self.suggested_merge_:
+                s_merge = self.suggested_merge_[s]
+                assert s_merge in self.symbolic_dims_
+                self.symbolic_dims_[s] = self.symbolic_dims_[s_merge]
+            else:
+                self.symbolic_dims_[s] = sympy.Symbol(s, integer=True)
+        # create a temporary ModelProto for single node inference
+        # note that we remove initializer to have faster inference
+        # for tensor ops like Reshape/Tile/Expand that read initializer, we need to do sympy computation based inference anyways
+        self.tmp_mp_ = onnx.ModelProto()
+        self.tmp_mp_.CopyFrom(self.out_mp_)
+        self.tmp_mp_.graph.ClearField('initializer')
+        for node in self.out_mp_.graph.node:
+            assert all([i in self.known_vi_ for i in node.input if i])
+            self._onnx_infer_single_node(node)
+            if node.op_type in self.dispatcher_:
+                self.dispatcher_[node.op_type](node)
+            if self.verbose_ > 2:
+                print(node.op_type + ': ' + node.name)
+                for i, name in enumerate(node.input):
+                    print('  Input {}: {} {}55555'.format(
+                        i, name, 'initializer'
+                        if name in self.initializers_ else ''))
+            # onnx automatically merge dims with value, i.e. Mul(['aaa', 'bbb'], [1000, 1]) -> [1000, 'bbb']
+            # symbolic shape inference needs to apply merge of 'aaa' -> 1000 in this case
+            if node.op_type in [
+                    'Add', 'Sub', 'Mul', 'Div', 'MatMul', 'MatMulInteger',
+                    'MatMulInteger16', 'Where', 'Sum'
+            ]:
+                vi = self.known_vi_[node.output[0]]
+                out_rank = len(get_shape_from_type_proto(vi.type))
+                in_shapes = [
+                    self._get_shape(node, i) for i in range(len(node.input))
+                ]
+                for d in range(out_rank - (2 if node.op_type in [
+                        'MatMul', 'MatMulInteger', 'MatMulInteger16'
+                ] else 0)):
+                    in_dims = [
+                        s[len(s) - out_rank + d] for s in in_shapes
+                        if len(s) + d >= out_rank
+                    ]
+                    if len(in_dims) > 1:
+                        self._check_merged_dims(in_dims, allow_broadcast=True)
+            for i_o in range(len(node.output)):
+                vi = self.known_vi_[node.output[i_o]]
+                out_type = vi.type
+                out_type_kind = out_type.WhichOneof('value')
+                # only TensorProto and SparseTensorProto have shape
+                if out_type_kind != 'tensor_type' and out_type_kind != 'sparse_tensor_type':
+                    continue
+                out_shape = get_shape_from_type_proto(vi.type)
+                out_type_undefined = out_type.tensor_type.elem_type == onnx.TensorProto.UNDEFINED
+                if self.verbose_ > 2:
+                    print('  {}: {} {}'.format(node.output[
+                        i_o], str(out_shape), vi.type.tensor_type.elem_type))
+                    if node.output[i_o] in self.sympy_data_:
+                        print('  Sympy Data: ' + str(self.sympy_data_[
+                            node.output[i_o]]))
+                if None in out_shape or out_type_undefined:
+                    if self.auto_merge_:
+                        if node.op_type in [
+                                'Add', 'Sub', 'Mul', 'Div', 'MatMul',
+                                'MatMulInteger', 'MatMulInteger16', 'Concat',
+                                'Where', 'Sum'
+                        ]:
+                            shapes = [
+                                self._get_shape(node, i)
+                                for i in range(len(node.input))
+                            ]
+                            if node.op_type in [
+                                    'MatMul', 'MatMulInteger', 'MatMulInteger16'
+                            ]:
+                                # only support auto merge for MatMul for dim < rank-2 when rank > 2
+                                assert len(shapes[0]) > 2 and dim_idx[0] < len(
+                                    shapes[0]) - 2
+                                assert len(shapes[1]) > 2 and dim_idx[1] < len(
+                                    shapes[1]) - 2
+                        elif node.op_type == 'Expand':
+                            # auto merge for cases like Expand([min(batch, 1), min(seq, 512)], [batch, seq])
+                            shapes = [
+                                self._get_shape(node, 0),
+                                self._get_value(node, 1)
+                            ]
+                        else:
+                            shapes = []
+                        if shapes:
+                            for idx in range(len(out_shape)):
+                                if out_shape[idx] is not None:
+                                    continue
+                                dim_idx = [
+                                    len(s) - len(out_shape) + idx
+                                    for s in shapes
+                                ]
+                                assert all([d >= 0 for d in dim_idx])
+                                self._add_suggested_merge([
+                                    s[i] if is_literal(s[i]) else str(s[i])
+                                    for s, i in zip(shapes, dim_idx)
+                                ])
+                            self.run_ = True
+                        else:
+                            self.run_ = False
+                    else:
+                        self.run_ = False
+                    # create new dynamic dims for ops not handled by symbolic shape inference
+                    if self.run_ == False and not node.op_type in self.dispatcher_:
+                        is_unknown_op = (out_type_undefined and
+                                         len(out_shape) == 0)
+                        if is_unknown_op:
+                            # unknown op to ONNX, maybe from higher opset or other domain
+                            # only guess the output rank from input 0 when using guess_output_rank option
+                            out_rank = self._get_shape_rank(
+                                node, 0) if self.guess_output_rank_ else -1
+                        else:
+                            # valid ONNX op, but not handled by symbolic shape inference, just assign dynamic shape
+                            out_rank = len(out_shape)
+                        if out_rank >= 0:
+                            new_shape = self._new_symbolic_shape(out_rank, node,
+                                                                 i_o)
+                            vi.CopyFrom(
+                                helper.make_tensor_value_info(
+                                    vi.name, self.known_vi_[node.input[
+                                        0]].type.tensor_type.elem_type,
+                                    get_shape_from_sympy_shape(new_shape)))
+                            if self.verbose_ > 0:
+                                if is_unknown_op:
+                                    print(
+                                        "Possible unknown op: {} node: {}, guessing {} shape"
+                                        .format(node.op_type, node.name,
+                                                vi.name))
+                                if self.verbose_ > 2:
+                                    print('  {}: {} {}'.format(
+                                        node.output[i_o],
+                                        str(new_shape),
+                                        vi.type.tensor_type.elem_type))
+                            self.run_ = True
+                            continue  # continue the inference after guess, no need to stop as no merge is needed
+                    if self.verbose_ > 0 or not self.auto_merge_ or out_type_undefined:
+                        print('Stopping at incomplete shape inference at ' +
+                              node.op_type + ': ' + node.name)
+                        print('node inputs:')
+                        for i in node.input:
+                            print(self.known_vi_[i])
+                        print('node outputs:')
+                        for o in node.output:
+                            print(self.known_vi_[o])
+                        if self.auto_merge_ and not out_type_undefined:
+                            print('Merging: ' + str(self.suggested_merge_))
+                    return False
+        self.run_ = False
+        return True
+    def _update_output_from_vi(self):
+        for output in self.out_mp_.graph.output:
+            if output.name in self.known_vi_:
+                tmp_output = self.known_vi_[output.name]
+                output.CopyFrom(tmp_output)
+    @staticmethod
+    def infer_shapes(in_mp,
+                     int_max=2**31 - 1,
+                     fixed_input_shape=None,
+                     auto_merge=True,
+                     guess_output_rank=False,
+                     verbose=0):
+        if get_opset(in_mp) < 7:
+            print('Only support shape inferencing models of opset 7 and above.')
+            return
+        symbolic_shape_inference = SymbolicShapeInference(
+            int_max, auto_merge, guess_output_rank, verbose)
+        all_shapes_inferred = False
+        symbolic_shape_inference._preprocess(
+            in_mp, input_shapes=fixed_input_shape)
+        try:
+            while symbolic_shape_inference.run_:
+                all_shapes_inferred = symbolic_shape_inference._infer_impl(
+                    in_mp)
+            symbolic_shape_inference._update_output_from_vi()
+            if not all_shapes_inferred:
+                print('!' * 10)
+                symbolic_shape_inference.out_mp_ = shape_inference.infer_shapes(
+                    symbolic_shape_inference.out_mp_)
+            #onnx.save(symbolic_shape_inference.out_mp_, 'tmp.onnx')
+        except:
+            print('Stopping at incomplete shape inference')
+            symbolic_shape_inference.out_mp_ = shape_inference.infer_shapes(
+                symbolic_shape_inference.out_mp_)
+        return symbolic_shape_inference.out_mp_.graph
--- a/x2paddle/x2paddle/decoder/paddle_decoder.py
+++ b/x2paddle/x2paddle/decoder/paddle_decoder.py
+#   Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import paddle.fluid as fluid
+class PaddleDecoder(object):
+    def __init__(self,
+                 model_dir,
+                 model_filename='__model__',
+                 params_filename=None):
+        exe = fluid.Executor(fluid.CPUPlace())
+        [self.program, feed, fetchs] = fluid.io.load_inference_model(
+            model_dir,
+            exe,
+            model_filename=model_filename,
+            params_filename=params_filename)
--- a/x2paddle/x2paddle/decoder/tf_decoder.py
+++ b/x2paddle/x2paddle/decoder/tf_decoder.py
+#   Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from x2paddle.core.graph import GraphNode, Graph
+from x2paddle.core.fluid_code import FluidCode
+from tensorflow.python.framework import tensor_util
+from tensorflow.core.framework import attr_value_pb2
+import tensorflow as tf
+import copy as cp
+import numpy
+import sys
+class TFGraphNode(GraphNode):
+    def __init__(self, layer, layer_name=None, data_format="NHWC"):
+        if layer_name is None:
+            super(TFGraphNode, self).__init__(
+                layer,
+                layer.name.replace('/', '_').replace('-', '_').replace('^', ''))
+        else:
+            super(TFGraphNode, self).__init__(
+                layer,
+                layer_name.replace('/', '_').replace('-', '_').replace('^', ''))
+        self.layer_type = layer.op
+        self.tf_data_format = data_format
+        self.pd_data_format = "NCHW"
+        self.fluid_code = FluidCode()
+        self.dtype_map = {
+            1: "float32",
+            3: "int32",
+            4: "uint8",
+            9: "int64",
+            10: "bool"
+        }
+    @property
+    def out_shapes(self):
+        if self.layer_type == "OneShotIterator" or self.layer_type == "IteratorV2":
+            values = self.layer.attr["output_shapes"].list.shape
+        else:
+            values = self.layer.attr["_output_shapes"].list.shape
+        out_shapes = list()
+        for value in values:
+            shape = [dim.size for dim in value.dim]
+            out_shapes.append(shape)
+        return out_shapes
+    @property
+    def dtype(self):
+        keys = ['dtype', 'Tidx', 'T', 'DstT']
+        for k in keys:
+            dtype = self.layer.attr[k].type
+            if dtype > 0:
+                break
+        if dtype == 0:
+            dtype = self.layer.attr['output_types'].list.type[0]
+        if dtype not in self.dtype_map:
+            raise Exception("Dtype[{}] of node({}) not in dtype_map".format(
+                dtype, self.layer.name))
+        return self.dtype_map[dtype]
+    @property
+    def raw_dtype(self):
+        keys = ['dtype', 'Tidx', 'T', 'DstT']
+        for k in keys:
+            dtype = self.layer.attr[k].type
+            if dtype > 0:
+                break
+        return dtype
+    @property
+    def value(self):
+        assert self.layer_type == "Const", "Only Const node has value."
+        attr = self.layer.attr['value']
+        field = getattr(attr, attr.WhichOneof('value'))
+        return tensor_util.MakeNdarray(field)
+    def get_attr(self, name):
+        if name not in self.layer.attr:
+            return None
+        attr = self.layer.attr[name]
+        field = attr.WhichOneof('value')
+        value = getattr(attr, field) if field else None
+        if isinstance(value, attr_value_pb2.AttrValue.ListValue):
+            result = list(value.ListFields()[0][1])
+            for i in range(len(result)):
+                if isinstance(result[i], int):
+                    result[i] = int(result[i])
+                try:
+                    if isinstance(result[i], long):
+                        result[i] = int(result[i])
+                except:
+                    pass
+            return result
+        else:
+            return value
+class TFGraph(Graph):
+    def __init__(self, model, data_format="NHWC"):
+        super(TFGraph, self).__init__(model)
+        self.identity_map = dict()
+        self.multi_out_ops = ['Split', 'SplitV', 'IteratorV2']
+        self.tf_data_format = data_format
+    def build(self):
+        for layer in self.model.node:
+            if layer.op == 'Assert':
+                continue
+            self.node_map[layer.name.replace('/', '_').replace(
+                '-', '_')] = TFGraphNode(
+                    layer, data_format=self.tf_data_format)
+        for layer_name, node in self.node_map.items():
+            if node.layer_type == 'Const':
+                continue
+            for in_node in node.layer.input:
+                in_node = in_node.replace('/', '_').replace('-', '_').replace(
+                    '^', '')
+                if in_node not in self.node_map:
+                    if in_node.strip().split(':')[0] in self.node_map:
+                        self.connect(in_node.strip().split(':')[0], layer_name)
+                    else:
+                        raise Exception(
+                            'input[{}] of node[{}] does not exist in node_map'.
+                            format(in_node, layer_name))
+                else:
+                    self.connect(in_node, layer_name)
+        super(TFGraph, self).build()
+        for layer in self.model.node:
+            if layer.op == 'Assert':
+                for ipt in layer.input:
+                    ipt_name = ipt.replace('-', '_').replace('/', '_')
+                    if ipt_name in self.output_nodes:
+                        idx = self.output_nodes.index(ipt_name)
+                        del self.output_nodes[idx]
+        # tensorflow graph optimize
+        self._remove_isolated_node()
+        self._optimize_dialiation_conv()
+        self._remove_identity_node()
+        self._remove_cast_node()
+    def get_node(self, node_name, copy=False):
+        items = node_name.strip().split(':')
+        items[0] = items[0].replace('/', '_').replace('-', '_')
+        if items[0] in self.identity_map:
+            items[0] = self.identity_map[items[0]]
+        new_node_name = ":".join(items)
+        node = super(TFGraph, self).get_node(new_node_name, copy)
+        if node is None:
+            return None
+        if node.layer_type == "Switch":
+            if hasattr(node, 'index'):
+                del node.index
+        if len(items) == 1 and node.layer_type in self.multi_out_ops:
+            node.index = 0
+        return node
+    def remove_node(self, node_name):
+        if node_name not in self.node_map:
+            raise Exception("Node[{}] not in graph".format(node_name))
+        inputs = self.node_map[node_name].inputs
+        outputs = self.node_map[node_name].outputs
+        #        assert len(inputs) == 1
+        input_node = self.node_map[inputs[0]]
+        idx = input_node.outputs.index(node_name)
+        del input_node.outputs[idx]
+        for output in outputs:
+            node = self.node_map[output]
+            idx = node.inputs.index(node_name)
+            node.inputs[idx] = inputs[0]
+            input_node.outputs.append(output)
+        del self.node_map[node_name]
+        idx = self.topo_sort.index(node_name)
+        del self.topo_sort[idx]
+    def _optimize_dialiation_conv(self):
+        for name in list(self.node_map.keys()):
+            node = self.node_map[name]
+            if node.layer_type == "SpaceToBatchND":
+                is_dilation = True
+                out_node0 = self.node_map[node.outputs[0]]
+                if out_node0.layer_type != 'ExpandDims':
+                    is_dilation = False
+                    continue
+                out_node1 = self.node_map[out_node0.outputs[0]]
+                if out_node1.layer_type != 'Conv2D':
+                    is_dilation = False
+                    continue
+                out_node2 = self.node_map[out_node1.outputs[0]]
+                if out_node2.layer_type != 'Squeeze':
+                    is_dilation = False
+                    continue
+                out_node3 = self.node_map[out_node2.outputs[0]]
+                if out_node3.layer_type != 'BatchToSpaceND':
+                    is_dilation = False
+                    continue
+                if is_dilation:
+                    node.skip = True
+                    out_node3.skip = True
+                    block_shape = self.node_map[node.inputs[1]]
+                    out_node1.dilation = block_shape.value.tolist()
+    def _remove_isolated_node(self):
+        # delete isolated nodes
+        isolated_nodes = list()
+        for node_name in self.node_map.keys():
+            if len(self.get_node(node_name).inputs) == 0 and len(
+                    self.get_node(node_name).outputs) == 0:
+                isolated_nodes.append(node_name)
+        for node_name in isolated_nodes:
+            del self.node_map[node_name]
+            if node_name in self.input_nodes:
+                idx = self.input_nodes.index(node_name)
+                del self.input_nodes[idx]
+            if node_name in self.output_nodes:
+                idx = self.output_nodes.index(node_name)
+                del self.output_nodes[idx]
+            idx = self.topo_sort.index(node_name)
+            del self.topo_sort[idx]
+    def _remove_identity_node(self):
+        identity_ops = [
+            'Identity', 'StopGradient', 'Switch', 'Merge',
+            'PlaceholderWithDefault', 'IteratorGetNext'
+        ]
+        identity_node = list()
+        for node_name, node in self.node_map.items():
+            if node.layer_type in identity_ops:
+                identity_node.append(node_name)
+        for node_name in identity_node:
+            node = self.get_node(node_name)
+            input_node = self.get_node(node.inputs[0])
+            self.remove_node(node_name)
+            self.identity_map[node_name] = input_node.layer_name
+            if node_name in self.output_nodes:
+                idx = self.output_nodes.index(node_name)
+                self.output_nodes[idx] = input_node.layer_name
+    def _remove_cast_node(self):
+        cast_node = list()
+        for node_name, node in self.node_map.items():
+            if node.layer_type == "Cast":
+                input = self.get_node(node.inputs[0])
+                if input.layer_type != "Placeholder" or len(input.outputs) != 1:
+                    continue
+                cast_node.append(node_name)
+        for node_name in cast_node:
+            node = self.get_node(node_name)
+            input_node = self.get_node(node.inputs[0])
+            input_node.layer.attr["dtype"].type = node.raw_dtype
+            self.remove_node(node_name)
+            self.identity_map[node_name] = input_node.layer_name
+            if node_name in self.output_nodes:
+                idx = self.output_nodes.index(node_name)
+                self.output_nodes[idx] = input_node.layer_name
+    def data_format_propagation(self, node):
+        current_node = self.node_map[node.layer_name]
+        outputs = current_node.outputs
+        if len(outputs) == 0:
+            return
+        for out in outputs:
+            next_node = self.node_map[out]
+            next_node.tf_data_format = node.tf_data_format
+            self.data_format_propagation(next_node)
+class TFDecoder(object):
+    def __init__(self, pb_model, data_format="NHWC", define_input_shape=False):
+        try:
+            self.sess = tf.compat.v1.Session()
+        except:
+            self.sess = tf.Session()
+        self.input_info = dict()
+        self.define_input_shape = define_input_shape
+        with open(pb_model, 'rb') as f:
+            try:
+                graph_def = tf.compat.v1.GraphDef()
+            except:
+                graph_def = tf.GraphDef()
+            graph_def.ParseFromString(f.read())
+            input_map = self._check_input_shape(graph_def)
+            self._fix_output_shape(graph_def)
+            self.sess.graph.as_default()
+            tf.import_graph_def(graph_def, name='', input_map=input_map)
+        try:
+            initializer = tf.compat.v1.global_variables_initializer()
+        except:
+            initializer = tf.global_variables_initializer()
+        self.sess.run(initializer)
+        self.tf_graph = TFGraph(
+            self.sess.graph._as_graph_def(add_shapes=True)[0], data_format)
+        self.tf_graph.build()
+    def _fix_output_shape(self, graph):
+        for i in range(len(graph.node)):
+            node = graph.node[i]
+            if node.op == "swish_f32":
+                graph.node[i].attr['_disable_call_shape_inference'].b = False
+    def _check_input_shape(self, graph_def):
+        numpy.random.seed(13)
+        graph_def = cp.deepcopy(graph_def)
+        input_map = dict()
+        for layer in graph_def.node:
+            if layer.op != "Placeholder" and layer.op != "OneShotIterator" and layer.op != "IteratorV2":
+                continue
+            graph_node = TFGraphNode(layer)
+            dtype = graph_node.layer.attr['dtype'].type
+            need_define_shape = 0
+            if self.define_input_shape:
+                need_define_shape = 3
+            elif graph_node.layer.attr[
+                    'shape'].shape.unknown_rank or not graph_node.get_attr(
+                        "shape"):
+                need_define_shape = 1
+            else:
+                value = graph_node.layer.attr["shape"].shape
+                shape = [dim.size for dim in value.dim]
+                if shape.count(-1) > 1:
+                    need_define_shape = 2
+            if need_define_shape == 1:
+                try:
+                    shape = graph_node.out_shapes[0]
+                    if len(shape) > 0 and shape.count(-1) < 2:
+                        need_define_shape = 0
+                except:
+                    pass
+            if need_define_shape > 0:
+                shape = None
+                if graph_node.get_attr("shape"):
+                    value = value = graph_node.layer.attr["shape"].shape
+                    shape = [dim.size for dim in value.dim]
+                if need_define_shape == 1:
+                    print("Unknown shape for input tensor[tensor name: \"{}\"]".
+                          format(layer.name))
+                elif need_define_shape == 2:
+                    print(
+                        "\nShape[now is {}] for input tensor[tensor name: \"{}\"] not support yet"
+                        .format(shape, layer.name))
+                else:
+                    print(
+                        "Define shape[now is {}] for input tensor[tensor name: \"{}\']"
+                        .format(shape, layer.name))
+                print(
+                    "Use your keyboard type the shape of input tensor below :)")
+                right_shape_been_input = False
+                while not right_shape_been_input:
+                    try:
+                        shape = raw_input(
+                            "Shape of Input(e.g. None,224,224,3): ")
+                    except:
+                        shape = input("Shape of Input(e.g. None,224,224,3): ")
+                    if shape.count("None") > 1:
+                        print("Only 1 dimension can be None, type again:)")
+                    else:
+                        right_shape_been_input = True
+                shape = [
+                    None if dim == "None" else int(dim)
+                    for dim in shape.strip().split(',')
+                ]
+                assert shape.count(None) <= 1, "Only one dimension can be None"
+                try:
+                    x2paddle_input = tf.compat.v1.placeholder(
+                        dtype=dtype,
+                        shape=shape,
+                        name="x2paddle_{}".format(layer.name))
+                except:
+                    x2paddle_input = tf.placeholder(
+                        dtype=dtype,
+                        shape=shape,
+                        name="x2paddle_{}".format(layer.name))
+                input_map["{}:0".format(layer.name)] = x2paddle_input
+                if shape.count(None) > 0:
+                    shape[shape.index(None)] = -1
+                self.input_info["x2paddle_{}".format(layer.name)] = (shape,
+                                                                     dtype)
+            else:
+                value = graph_node.layer.attr["shape"].shape
+                shape = [dim.size for dim in value.dim]
+                self.input_info[layer.name] = (shape, dtype)
+        return input_map
+    # trick method
+    # should be removed after PaddlePaddle V1.6 been released
+    def infer_tensor(self, graph_node):
+        if hasattr(graph_node, "index"):
+            tensor_name = graph_node.layer.name + ":{}".format(graph_node.index)
+        else:
+            tensor_name = graph_node.layer.name + ":0"
+        feed = dict()
+        for input_name, info in self.input_info.items():
+            (shape, dtype) = cp.deepcopy(info)
+            input_tensor = self.sess.graph.get_tensor_by_name(input_name + ":0")
+            if shape.count(-1) > 0:
+                shape[shape.index(-1)] = 2
+            feed[input_tensor] = numpy.random.random_sample(shape)
+        output_tensor = self.sess.graph.get_tensor_by_name(tensor_name)
+        return self.sess.run([output_tensor], feed)[0]
+    def infer_shape_tensor(self, graph_node, out_shape=None):
+        if hasattr(graph_node, "index"):
+            tensor_name = graph_node.layer.name + ":{}".format(graph_node.index)
+        else:
+            tensor_name = graph_node.layer.name + ":0"
+        feed = dict()
+        batch_size = [2, 3, 5]
+        results = list()
+        for b in batch_size:
+            for input_name, info in self.input_info.items():
+                (shape, dtype) = cp.deepcopy(info)
+                input_tensor = self.sess.graph.get_tensor_by_name(input_name +
+                                                                  ":0")
+                if shape.count(-1) > 0:
+                    shape[shape.index(-1)] = b
+                feed[input_tensor] = numpy.random.random_sample(shape)
+            output_tensor = self.sess.graph.get_tensor_by_name(tensor_name)
+            results.append(self.sess.run([output_tensor], feed)[0].flatten())
+        compare01 = (results[0] == results[1])
+        compare12 = (results[1] == results[2])
+        if compare01.all() and compare12.all():
+            return results[0].tolist()
+        if (compare01 == compare12).all():
+            index = numpy.argwhere(compare01 == False).flatten()
+            if index.shape[0] != 1:
+                raise Exception("There's not only one unstable dimension")
+            results[0][index[0]] = -1
+            index = numpy.argwhere(results[0] < 0).flatten()
+            if index.shape[0] > 2:
+                print("Warning: More than two dimension less than zero")
+            if index.shape[0] == 2 and out_shape is not None:
+                if out_shape[index[1]] > 0:
+                    results[0][index[1]] = out_shape[index[1]]
+                else:
+                    results[0][index[0]] = out_shape[index[0]]
+            return results[0].tolist()
+        else:
+            raise Exception("Couldn't infer a stable shape shape tensor value")
+    def infer_tensor_shape(self, graph_node):
+        if hasattr(graph_node, "index"):
+            tensor_name = graph_node.layer.name + ":{}".format(graph_node.index)
+        else:
+            tensor_name = graph_node.layer.name + ":0"
+        feed = dict()
+        batch_size = [2, 3, 5]
+        shapes = list()
+        for b in batch_size:
+            for input_name, info in self.input_info.items():
+                (shape, dtype) = cp.deepcopy(info)
+                input_tensor = self.sess.graph.get_tensor_by_name(input_name +
+                                                                  ":0")
+                if shape.count(-1) > 0:
+                    shape[shape.index(-1)] = b
+                feed[input_tensor] = numpy.random.random_sample(shape)
+            output_tensor = self.sess.graph.get_tensor_by_name(tensor_name)
+            shape = self.sess.run([output_tensor], feed)[0].shape
+            shapes.append(numpy.array(shape))
+        compare01 = (shapes[0] == shapes[1])
+        compare12 = (shapes[1] == shapes[2])
+        if compare01.all() and compare12.all():
+            return shape[0].tolist()
+        if (compare01 == compare12).all():
+            index = numpy.argwhere(compare01 == False).flatten()
+            if index.shape[0] != 1:
+                raise Exception("There's not only one unstable dimension")
+            if index[0] != 0:
+                raise Exception("Batch size not in the first dimension")
+            shapes[0][0] = -1
+            return shapes[0].tolist()
--- a/x2paddle/x2paddle/op_mapper/__init__.py
+++ b/x2paddle/x2paddle/op_mapper/__init__.py
--- a/x2paddle/x2paddle/op_mapper/caffe_custom_layer/__init__.py
+++ b/x2paddle/x2paddle/op_mapper/caffe_custom_layer/__init__.py
+from .register import get_registered_layers
+#custom layer import begins
+from . import roipooling
+from . import priorbox
+from . import permute
+from . import detectionoutput
+from . import normalize
+from . import select
+from . import shufflechannel
+from . import convolutiondepthwise
+from . import axpy
+from . import upsample
+from . import relu6
+#custom layer import ends
+custom_layers = get_registered_layers()
+def set_args(f, params):
+    """ set args for function 'f' using the parameters in node.layer.param
+    Args:
+        f (function): a python function object
+        params (object): a object contains attributes needed by f's arguments
+    Returns:
+        arg_names (list): a list of argument names
+        kwargs (dict): a dict contains needed arguments
+    """
+    argc = f.__code__.co_argcount
+    arg_list = f.__code__.co_varnames[0:argc]
+    kwargs = {}
+    for arg_name in arg_list:
+        if hasattr(params, arg_name) and params is not None:
+            kwargs[arg_name] = getattr(params, arg_name)
+    return arg_list, kwargs
+def has_layer(layer_type):
+    """ test whether this layer exists in custom layer
+    """
+    return layer_type in custom_layers
+def get_params(layer, layer_type):
+    import re
+    if layer_type.lower() == "deconvolution" or layer_type.lower(
+    ) == "convolutiondepthwise":
+        param_name = '_'.join(('convolution', 'param'))
+    elif layer_type.lower() == "normalize":
+        param_name = '_'.join(('norm', 'param'))
+    elif len(layer_type) - len(re.sub("[A-Z]", "", layer_type)) >= 2:
+        s = ''
+        tmp_name = ''
+        for i, ch in enumerate(layer_type):
+            if i == 0:
+                s += ch.lower()
+                continue
+            elif ch.isupper() and layer_type[i - 1].islower():
+                tmp_name += (s + '_')
+                s = ''
+            s += ch.lower()
+        tmp_name += s
+        param_name = '_'.join((tmp_name, 'param'))
+    else:
+        param_name = '_'.join((layer_type.lower(), 'param'))
+    return getattr(layer, param_name, None)
+def compute_output_shape(node):
+    """ compute the output shape of custom layer
+    """
+    layer_type = node.layer_type
+    assert layer_type in custom_layers, "layer[%s] not exist in custom layers" % (
+        layer_type)
+    shape_func = custom_layers[layer_type]['shape']
+    layer = node.layer
+    params = get_params(layer, layer_type)
+    arg_names, kwargs = set_args(shape_func, params)
+    input_shape = node.input_shape
+    return shape_func(input_shape, **kwargs)
+def make_custom_layer(node):
+    """ get the code which implement the custom layer function
+    """
+    layer_type = node.layer_type
+    assert layer_type in custom_layers, "layer[%s] not exist in custom layers" % (
+        layer_type)
+    layer_func = custom_layers[layer_type]['layer']
+    import inspect
+    return inspect.getsource(layer_func), layer_func
+def deal_weights(node, data=None):
+    """ deal the weights of the custom layer
+    """
+    layer_type = node.layer_type
+    weights_func = custom_layers[layer_type]['weights']
+    name = node.layer_name
+    return weights_func(name, data)
--- a/x2paddle/x2paddle/op_mapper/caffe_custom_layer/axpy.py
+++ b/x2paddle/x2paddle/op_mapper/caffe_custom_layer/axpy.py
+from .register import register
+from x2paddle.core.util import *
+def axpy_shape(input_shapes):
+    assert len(input_shapes) == 3, "not valid input shape for axpy layer"
+    assert len(input_shapes[0]) == len(input_shapes[1]), 'should have same dims'
+    output_shape = input_shapes[1]
+    assert (input_shapes[2] == output_shape),\
+            "shape not consistent for axpy[%s <--> %s]" \
+            % (str(output_shape), str(input_shapes[2]))
+    return [output_shape]
+def axpy_layer(inputs, input_shape=None, name=None):
+    alpha = inputs[0]
+    x = inputs[1]
+    y = inputs[2]
+    out = fluid.layers.elementwise_mul(x, alpha, axis=0)
+    out = fluid.layers.elementwise_add(out, y, name=name)
+    return out
+def axpy_weights(name, data=None):
+    weights_name = []
+    return weights_name
+register(kind='Axpy', shape=axpy_shape, layer=axpy_layer, weights=axpy_weights)
--- a/x2paddle/x2paddle/op_mapper/caffe_custom_layer/convolutiondepthwise.py
+++ b/x2paddle/x2paddle/op_mapper/caffe_custom_layer/convolutiondepthwise.py
+from .register import register
+from x2paddle.core.util import *
+import numbers
+def convolutiondepthwise_shape(input_shape,
+                               num_output=None,
+                               pad=None,
+                               kernel_size=None,
+                               stride=None,
+                               dilation=None,
+                               pad_h=None,
+                               pad_w=None,
+                               kernel_h=None,
+                               kernel_w=None,
+                               stride_h=None,
+                               stride_w=None):
+    [k_h, k_w] = [1, 1]
+    if isinstance(kernel_size, numbers.Number):
+        [k_h, k_w] = [kernel_size] * 2
+    elif len(kernel_size) > 0:
+        k_h = kernel_h if kernel_h > 0 else kernel_size[0]
+        k_w = kernel_w if kernel_w > 0 else kernel_size[len(kernel_size) - 1]
+    elif kernel_h > 0 or kernel_w > 0:
+        k_h = kernel_h
+        k_w = kernel_w
+    [s_h, s_w] = [1, 1]
+    if isinstance(stride, numbers.Number):
+        [s_h, s_w] = [stride] * 2
+    elif len(stride) > 0:
+        s_h = stride_h if stride_h > 0 else stride[0]
+        s_w = stride_w if stride_w > 0 else stride[len(stride) - 1]
+    elif stride_h > 0 or stride_w > 0:
+        s_h = stride_h
+        s_w = stride_w
+    [p_h, p_w] = [0, 0]
+    if isinstance(pad, numbers.Number):
+        [p_h, p_w] = [pad] * 2
+    elif len(pad) > 0:
+        p_h = pad_h if pad_h > 0 else pad[0]
+        p_w = pad_w if pad_w > 0 else pad[len(pad) - 1]
+    elif pad_h > 0 or pad_w > 0:
+        p_h = pad_h
+        p_w = pad_w
+    dila_len = len(dilation)
+    dila_h = 1
+    dila_w = 1
+    if dila_len == 2:
+        dila_h = dilation[0]
+        dila_w = dilation[1]
+    elif dila_len == 1:
+        dila_h = dila_w = dilation[0]
+    else:
+        assert dila_len == 0, "invalid length[%s] of dilation in convolution" % (
+            dila_len)
+    i_w = input_shape[0][2]
+    i_h = input_shape[0][3]
+    o_h = (i_h + 2 * p_h - (dila_h * (k_h - 1) + 1)) / float(s_h) + 1
+    o_w = (i_w + 2 * p_w - (dila_w * (k_w - 1) + 1)) / float(s_w) + 1
+    import math
+    o_h = int(math.floor(o_h))
+    o_w = int(math.floor(o_w))
+    c = num_output if num_output is not None else input_shape[0][1]
+    return [[input_shape[0][0], c, o_h, o_w]]
+def convolutiondepthwise_layer(inputs,
+                               num_output=None,
+                               pad=None,
+                               kernel_size=None,
+                               stride=None,
+                               dilation=None,
+                               pad_h=None,
+                               pad_w=None,
+                               kernel_h=None,
+                               kernel_w=None,
+                               stride_h=None,
+                               stride_w=None,
+                               input_shape=None,
+                               name=None):
+    import numbers
+    [k_h, k_w] = [1, 1]
+    if isinstance(kernel_size, numbers.Number):
+        [k_h, k_w] = [kernel_size] * 2
+    elif len(kernel_size) > 0:
+        k_h = kernel_h if kernel_h > 0 else kernel_size[0]
+        k_w = kernel_w if kernel_w > 0 else kernel_size[len(kernel_size) - 1]
+    elif kernel_h > 0 or kernel_w > 0:
+        k_h = kernel_h
+        k_w = kernel_w
+    [s_h, s_w] = [1, 1]
+    if isinstance(stride, numbers.Number):
+        [s_h, s_w] = [stride] * 2
+    elif len(stride) > 0:
+        s_h = stride_h if stride_h > 0 else stride[0]
+        s_w = stride_w if stride_w > 0 else stride[len(stride) - 1]
+    elif stride_h > 0 or stride_w > 0:
+        s_h = stride_h
+        s_w = stride_w
+    [p_h, p_w] = [0, 0]
+    if isinstance(pad, numbers.Number):
+        [p_h, p_w] = [pad] * 2
+    elif len(pad) > 0:
+        p_h = pad_h if pad_h > 0 else pad[0]
+        p_w = pad_w if pad_w > 0 else pad[len(pad) - 1]
+    elif pad_h > 0 or pad_w > 0:
+        p_h = pad_h
+        p_w = pad_w
+    input = inputs[0]
+    dila_len = len(dilation)
+    dila_h = 1
+    dila_w = 1
+    if dila_len == 2:
+        dila_h = dilation[0]
+        dila_w = dilation[1]
+    elif dila_len == 1:
+        dila_h = dila_w = dilation[0]
+    else:
+        assert dila_len == 0, "invalid length[%s] of dilation in convolution" % (
+            dila_len)
+    c_in = input_shape[0][1]
+    c_out = num_output if num_output is not None else input_shape[0][1]
+    group = int(c_in / (c_in / c_out)) if c_in > c_out else int(c_in /
+                                                                (c_out / c_in))
+    out = fluid.layers.conv2d(
+        input,
+        dilation=[dila_h, dila_w],
+        filter_size=[k_h, k_w],
+        stride=[s_h, s_w],
+        padding=[p_h, p_w],
+        groups=group,
+        num_filters=c_out,
+        param_attr=name + '_weights',
+        bias_attr=name + '_bias',
+        name=name)
+    return out
+def convolutiondepthwise_weights(name, data=None):
+    weights_name = []
+    weights_name.append(name + '_weights')
+    weights_name.append(name + '_bias')
+    return weights_name
+register(
+    kind='ConvolutionDepthwise',
+    shape=convolutiondepthwise_shape,
+    layer=convolutiondepthwise_layer,
+    weights=convolutiondepthwise_weights)
--- a/x2paddle/x2paddle/op_mapper/caffe_custom_layer/detectionoutput.py
+++ b/x2paddle/x2paddle/op_mapper/caffe_custom_layer/detectionoutput.py
+from .register import register
+from x2paddle.core.util import *
+def detectionoutput_shape(input_shape):
+    return [[-1, 6]]
+def detectionoutput_layer(inputs,
+                          nms_param=None,
+                          background_label_id=0,
+                          share_location=True,
+                          keep_top_k=100,
+                          confidence_threshold=0.1,
+                          input_shape=None,
+                          name=None):
+    nms_param_str = nms_param
+    nms_param = {}
+    part = nms_param_str.split(',')
+    for s in part:
+        if s == '':
+            break
+        else:
+            name, obj = s.split(': ')
+            if name == 'top_k':
+                nms_param[name] = int(obj)
+            else:
+                nms_param[name] = float(obj)
+    if nms_param is None:
+        nms_param = {"nms_threshold": 0.3, "top_k": 10, "eta": 1.0}
+    mbox_conf_flatten = inputs[1]
+    mbox_priorbox = inputs[2]
+    mbox_priorbox_list = fluid.layers.split(mbox_priorbox, 2, dim=1)
+    pb = mbox_priorbox_list[0]
+    pbv = mbox_priorbox_list[1]
+    pb = fluid.layers.reshape(x=pb, shape=[-1, 4])
+    pbv = fluid.layers.reshape(x=pbv, shape=[-1, 4])
+    mbox_loc = inputs[0]
+    mbox_loc = fluid.layers.reshape(x=mbox_loc, shape=[-1, pb.shape[0], 4])
+    mbox_conf_flatten = fluid.layers.reshape(
+        x=mbox_conf_flatten, shape=[0, pb.shape[0], -1])
+    default = {"nms_threshold": 0.3, "top_k": 10, "eta": 1.0}
+    fields = ['eta', 'top_k', 'nms_threshold']
+    for f in default.keys():
+        if f not in nms_param:
+            nms_param[f] = default[f]
+    out = fluid.layers.detection_output(
+        scores=mbox_conf_flatten,
+        loc=mbox_loc,
+        prior_box=pb,
+        prior_box_var=pbv,
+        background_label=background_label_id,
+        nms_threshold=nms_param["nms_threshold"],
+        nms_top_k=nms_param["top_k"],
+        keep_top_k=keep_top_k,
+        score_threshold=confidence_threshold,
+        nms_eta=nms_param["eta"])
+    return out
+def detectionoutput_weights(name, data=None):
+    weights_name = []
+    return weights_name
+register(
+    kind='DetectionOutput',
+    shape=detectionoutput_shape,
+    layer=detectionoutput_layer,
+    weights=detectionoutput_weights)
--- a/x2paddle/x2paddle/op_mapper/caffe_custom_layer/normalize.py
+++ b/x2paddle/x2paddle/op_mapper/caffe_custom_layer/normalize.py
+from .register import register
+from x2paddle.core.util import *
+def normalize_shape(input_shape):
+    return input_shape
+def normalize_layer(inputs,
+                    across_spatial=None,
+                    channel_shared=None,
+                    input_shape=None,
+                    name=None):
+    assert across_spatial == False, "Only support across_spatial == False for Normalize"
+    input = inputs[0]
+    l2_norm = fluid.layers.l2_normalize(input, axis=1, name=name + '_l2')
+    scale_param = fluid.layers.create_parameter(
+        shape=[1] if channel_shared else [1, 1, 1, input_shape[0][1]],
+        dtype=input.dtype,
+        attr=fluid.ParamAttr(name=name + '_scale'))
+    scale_param = fluid.layers.reshape(x=scale_param, \
+                  shape=[1] if channel_shared else [input_shape[0][1]])
+    out = fluid.layers.elementwise_mul(
+        x=l2_norm, y=scale_param, axis=-1 if channel_shared else 1)
+    return out
+def normalize_weights(name, data=None):
+    weights_name = [name + '_scale']
+    return weights_name
+register(
+    kind='Normalize',
+    shape=normalize_shape,
+    layer=normalize_layer,
+    weights=normalize_weights)
--- a/x2paddle/x2paddle/op_mapper/caffe_custom_layer/permute.py
+++ b/x2paddle/x2paddle/op_mapper/caffe_custom_layer/permute.py
+from .register import register
+from x2paddle.core.util import *
+def permute_shape(input_shape, order=None):
+    inshape = input_shape[0]
+    output_shape = []
+    for ii in order:
+        assert ii < len(inshape), "invalid order for permute[%s]" % (name)
+        output_shape.append(inshape[ii])
+    return [output_shape]
+def permute_layer(inputs, order=None, input_shape=None, name=None):
+    input = inputs[0]
+    order = list(order)
+    out = fluid.layers.transpose(input, perm=order, name=name)
+    return out
+def permute_weights(name, data=None):
+    weights_name = []
+    return weights_name
+register(
+    kind='Permute',
+    shape=permute_shape,
+    layer=permute_layer,
+    weights=permute_weights)
--- a/x2paddle/x2paddle/op_mapper/caffe_custom_layer/priorbox.py
+++ b/x2paddle/x2paddle/op_mapper/caffe_custom_layer/priorbox.py
+from .register import register
+from x2paddle.core.util import *
+def priorbox_shape(input_shape, max_size=None, aspect_ratio=None):
+    fc_shape = input_shape[0]
+    N = 1
+    if not max_size == None:
+        N += 1
+    if not aspect_ratio == None:
+        N += 2 * len(aspect_ratio)
+    N_bbx = fc_shape[2] * fc_shape[3] * N
+    output_shape = [1, 2, 4 * N_bbx]
+    return [output_shape]
+def priorbox_layer(inputs,
+                   step=0.0,
+                   offset=0.5,
+                   min_size=None,
+                   max_size=[],
+                   aspect_ratio=[1.0],
+                   flip=False,
+                   clip=False,
+                   variance=[0.1, 0.1, 0.2, 0.2],
+                   input_shape=None,
+                   name=None):
+    input = inputs[0]
+    image = inputs[1]
+    steps = tuple(step) if type(step) is list or type(step) is tuple else (step,
+                                                                           step)
+    box, variance_ = fluid.layers.prior_box(
+        input,
+        image,
+        min_sizes=min_size,
+        max_sizes=max_size,
+        aspect_ratios=aspect_ratio,
+        variance=variance,
+        flip=flip,
+        clip=clip,
+        steps=steps,
+        offset=offset,
+        name=name,
+        min_max_aspect_ratios_order=True)
+    box = fluid.layers.reshape(box, [1, 1, -1])
+    variance_ = fluid.layers.reshape(variance_, [1, 1, -1])
+    out = fluid.layers.concat([box, variance_], axis=1)
+    return out
+def priorbox_weights(name, data=None):
+    weights_name = []
+    return weights_name
+register(
+    kind='PriorBox',
+    shape=priorbox_shape,
+    layer=priorbox_layer,
+    weights=priorbox_weights)
--- a/x2paddle/x2paddle/op_mapper/caffe_custom_layer/register.py
+++ b/x2paddle/x2paddle/op_mapper/caffe_custom_layer/register.py
+""" this module provides 'register' for registering customized layers
+"""
+g_custom_layers = {}
+def register(kind, shape, layer, weights):
+    """ register a custom layer or a list of custom layers
+    Args:
+        @kind (str or list): type name of the layer
+        @shape (function): a function to generate the shape of layer's output
+        @layer (function): a function to generate the paddle code of layer
+        @weights (function): a function to deal with weights data
+    Returns:
+        None
+    """
+    assert type(shape).__name__ == 'function', 'shape should be a function'
+    assert type(layer).__name__ == 'function', 'layer should be a function'
+    if type(kind) is str:
+        kind = [kind]
+    else:
+        assert type(
+            kind) is list, 'invalid param "kind" for register, not a list or str'
+    for k in kind:
+        assert type(
+            k) is str, 'invalid param "kind" for register, not a list of str'
+        assert k not in g_custom_layers, 'this type[%s] has already been registered' % (
+            k)
+        g_custom_layers[k] = {
+            'shape': shape,
+            'layer': layer,
+            'weights': weights
+        }
+def get_registered_layers():
+    return g_custom_layers
--- a/x2paddle/x2paddle/op_mapper/caffe_custom_layer/relu6.py
+++ b/x2paddle/x2paddle/op_mapper/caffe_custom_layer/relu6.py
+from .register import register
+from x2paddle.core.util import *
+def relu6_shape(input_shape):
+    return input_shape
+def relu6_layer(inputs, input_shape=None, name=None):
+    input = inputs[0]
+    out = fluid.layers.relu6(x=input)
+    return out
+def relu6_weights(name, data=None):
+    weights_name = []
+    return weights_name
+register(
+    kind='ReLU6', shape=relu6_shape, layer=relu6_layer, weights=relu6_weights)
--- a/x2paddle/x2paddle/op_mapper/caffe_custom_layer/roipooling.py
+++ b/x2paddle/x2paddle/op_mapper/caffe_custom_layer/roipooling.py
+from .register import register
+from x2paddle.core.util import *
+def roipooling_shape(input_shape, pooled_w=None, pooled_h=None):
+    base_fea_shape = input_shapes[0]
+    rois_shape = input_shapes[1]
+    output_shape = base_fea_shape
+    output_shape[0] = rois_shape[0]
+    output_shape[2] = pooled_h
+    output_shape[3] = pooled_w
+    return [output_shape]
+def roipooling_layer(inputs,
+                     pooled_w=None,
+                     pooled_h=None,
+                     spatial_scale=None,
+                     input_shape=None,
+                     name=None):
+    input = inputs[0]
+    roi = inputs[1]
+    roi = fluid.layers.slice(roi, axes=[1], starts=[1], ends=[5])
+    out = fluid.layers.roi_pool(
+        input,
+        roi,
+        pooled_height=pooled_h,
+        pooled_width=pooled_w,
+        spatial_scale=spatial_scale)
+    return out
+def roipooling_weights(name, data=None):
+    weights_name = []
+    return weights_name
+register(
+    kind='ROIPooling',
+    shape=roipooling_shape,
+    layer=roipooling_layer,
+    weights=roipooling_weights)
--- a/x2paddle/x2paddle/op_mapper/caffe_custom_layer/select.py
+++ b/x2paddle/x2paddle/op_mapper/caffe_custom_layer/select.py
+from .register import register
+from x2paddle.core.util import *
+def select_shape(input_shape, axis=None, slice_point=None):
+    inshape = input_shape[0]
+    slice_point = slice_point
+    start = slice_point[0]
+    if len(slice_point) == 2:
+        end = slice_point[1]
+    else:
+        end = input_shape[axis]
+    assert end > start, "invalid slice_point with [start:%d, end:%d]" % (start,
+                                                                         end)
+    output_shape = input_shape
+    output_shape[axis] = end - start
+    return [output_shape]
+def select_layer(inputs,
+                 axis=None,
+                 slice_point=None,
+                 input_shape=None,
+                 name=None):
+    input = inputs[0]
+    maxint32 = 2147483647
+    slice_point = [0] + slice_point
+    slice_point.append(maxint32)
+    i = 0
+    out = []
+    for i in range(len(slice_point)):
+        out.append(
+            fluid.layers.slice(
+                input,
+                axes=[axis],
+                starts=[slice_point[i]],
+                ends=[slice_point[i + 1]],
+                name=name + '_' + str(i)))
+        if i == len(slice_point) - 2:
+            break
+    return out
+def select_weights(name, data=None):
+    weights_name = []
+    return weights_name
+register(
+    kind='Select',
+    shape=select_shape,
+    layer=select_layer,
+    weights=select_weights)
--- a/x2paddle/x2paddle/op_mapper/caffe_custom_layer/shufflechannel.py
+++ b/x2paddle/x2paddle/op_mapper/caffe_custom_layer/shufflechannel.py
+from .register import register
+from x2paddle.core.util import *
+def shufflechannel_shape(input_shape):
+    return input_shape
+def shufflechannel_layer(inputs, group=None, input_shape=None, name=None):
+    input = inputs[0]
+    out = fluid.layers.shuffle_channel(x=input, group=group)
+    return out
+def shufflechannel_weights(name, data=None):
+    weights_name = []
+    return weights_name
+register(
+    kind='ShuffleChannel',
+    shape=shufflechannel_shape,
+    layer=shufflechannel_layer,
+    weights=shufflechannel_weights)
--- a/x2paddle/x2paddle/op_mapper/caffe_custom_layer/upsample.py
+++ b/x2paddle/x2paddle/op_mapper/caffe_custom_layer/upsample.py
+# -*- coding: utf-8 -*-
+################################################################################
+#
+# Copyright (c) 2020 Baidu.com, Inc. All Rights Reserved
+#
+################################################################################
+"""
+Author: Drift
+Email:  wutuobang@baidu.com
+Date:   2020/04/22 18:45
+"""
+from .register import register
+from x2paddle.core.util import *
+def upsample_shape(input_shapes, scale):
+    """
+    :param input_shapes:
+    :param scale:
+    :return:
+    """
+    assert len(input_shapes) == 1, "not valid input shape for upsample layer"
+    assert type(scale) is int
+    input_shape = input_shapes[0]
+    new_h = scale * input_shape[2]
+    new_w = scale * input_shape[3]
+    output_shape = [input_shape[0], input_shape[1], new_h, new_w]
+    return [output_shape]
+def upsample_layer(inputs, scale, input_shape=None, name=None):
+    """
+    :param inputs:
+    :param scale:
+    :param input_shape:
+    :param name:
+    :return:
+    """
+    x = inputs[0]
+    out = fluid.layers.resize_nearest(
+        x, align_corners=False, scale=scale, name=name)
+    return out
+def upsample_weights(name, data=None):
+    """
+    :param name:
+    :param data:
+    :return:
+    """
+    weights_name = []
+    return weights_name
+register(
+    kind='Upsample',
+    shape=upsample_shape,
+    layer=upsample_layer,
+    weights=upsample_weights)
--- a/x2paddle/x2paddle/op_mapper/caffe_op_mapper.py
+++ b/x2paddle/x2paddle/op_mapper/caffe_op_mapper.py
+#   Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numbers
+import numpy as np
+from x2paddle.decoder.caffe_decoder import CaffeGraph
+from x2paddle.core.op_mapper import OpMapper
+from x2paddle.core.util import *
+from x2paddle.op_mapper import caffe_shape
+from x2paddle.op_mapper.caffe_custom_layer import *
+class CaffeOpMapper(OpMapper):
+    directly_map_ops = {
+        'AbsVal': 'abs',
+        'Sigmoid': 'sigmoid',
+        'TanH': 'tanh',
+    }
+    def __init__(self, decoder):
+        super(CaffeOpMapper, self).__init__()
+        self.graph = decoder.caffe_graph
+        self.weights = dict()
+        resolver = decoder.resolver
+        self.used_custom_layers = {}
+        print("Total nodes: {}".format(len(self.graph.topo_sort)))
+        for node_name in self.graph.topo_sort:
+            node = self.graph.get_node(node_name)
+            if node.layer_type == 'DepthwiseConvolution':
+                node.layer_type = 'ConvolutionDepthwise'
+            op = node.layer_type
+            if hasattr(self, op):
+                self.set_node_shape(node)
+                func = getattr(self, op)
+                func(node)
+            elif op in custom_layers:
+                self.set_node_shape(node, is_fluid_op=False)
+                self.deal_custom_layer(node)
+            elif op in self.directly_map_ops:
+                self.set_node_shape(node)
+                self.directly_map(node)
+            else:
+                raise Exception(
+                    "The op {} in model is not supported yet.".format(op))
+    def op_checker(self):
+        unsupported_ops = set()
+        for node_name in self.graph.topo_sort:
+            node = self.graph.get_node(node_name)
+            op = node.layer_type
+            if not hasattr(self, op) and op not in custom_layers:
+                unsupported_ops.add(op)
+        if len(unsupported_ops) == 0:
+            return True
+        else:
+            print("There are {} ops not supported yet, list as below".format(
+                len(unsupported_ops)))
+            for op in unsupported_ops:
+                print(op)
+            return False
+    def set_node_shape(self, node, is_fluid_op=True):
+        inputs = node.inputs
+        input_shape = []
+        for i, nm in enumerate(inputs):
+            last_node = self.graph.get_node(nm)
+            tmp = node.layer.bottom[i]
+            idx = list(last_node.layer.top).index(tmp)
+            input_shape.append(last_node.output_shape[idx])
+        node.input_shape = input_shape
+        func_name = 'shape_' + node.layer_type.lower()
+        if is_fluid_op:
+            node.output_shape = getattr(caffe_shape, func_name)(node.layer,
+                                                                input_shape)
+        else:
+            node.output_shape = compute_output_shape(node)
+    def adjust_parameters(self, node):
+        data = node.data
+        # When using the protobuf-backend, each parameter initially has four dimensions.
+        # In certain cases (like FC layers), we want to eliminate the singleton dimensions.
+        # This implementation takes care of the common cases. However, it does leave the
+        # potential for future issues.
+        # The Caffe-backend does not suffer from this problem.
+        data = list(data)
+        squeeze_indices = [1]  # Squeeze biases.
+        if node.layer_type == 'InnerProduct':
+            squeeze_indices.append(0)  # Squeeze FC.
+        for idx in squeeze_indices:
+            if idx >= len(data):
+                continue
+            d = data[idx]
+            assert len(
+                d.shape
+            ) == 4, 'invalid shape[%s] from caffe when adjust_parameters' % (
+                str(d.shape))
+            shape_old = d.shape
+            sq_axis = None
+            if idx == 0:
+                sq_axis = (0, 1)
+            elif idx == 1:
+                sq_axis = (0, 1, 2)
+            else:
+                continue
+            data[idx] = np.squeeze(d, axis=sq_axis)
+            shape_new = data[idx].shape
+        return data
+    def get_kernel_parameters(self, kind, params):
+        assert kind in ['Convolution', 'Pooling', 'Deconvolution']
+        [k_h, k_w] = [1, 1]
+        if isinstance(params.kernel_size, numbers.Number):
+            [k_h, k_w] = [params.kernel_size] * 2
+        elif len(params.kernel_size) > 0:
+            k_h = params.kernel_h if params.kernel_h > 0 else params.kernel_size[
+                0]
+            k_w = params.kernel_w if params.kernel_w > 0 else params.kernel_size[
+                len(params.kernel_size) - 1]
+        elif params.kernel_h > 0 or params.kernel_w > 0:
+            k_h = params.kernel_h
+            k_w = params.kernel_w
+        [s_h, s_w] = [1, 1]
+        if isinstance(params.stride, numbers.Number):
+            [s_h, s_w] = [params.stride] * 2
+        elif len(params.stride) > 0:
+            s_h = params.stride_h if params.stride_h > 0 else params.stride[0]
+            s_w = params.stride_w if params.stride_w > 0 else params.stride[len(
+                params.stride) - 1]
+        elif params.stride_h > 0 or params.stride_w > 0:
+            s_h = params.stride_h
+            s_w = params.stride_w
+        [p_h, p_w] = [0, 0]
+        if isinstance(params.pad, numbers.Number):
+            [p_h, p_w] = [params.pad] * 2
+        elif len(params.pad) > 0:
+            p_h = params.pad_h if params.pad_h > 0 else params.pad[0]
+            p_w = params.pad_w if params.pad_w > 0 else params.pad[len(
+                params.pad) - 1]
+        elif params.pad_h > 0 or params.pad_w > 0:
+            p_h = params.pad_h
+            p_w = params.pad_w
+        dila_h = dila_w = 1
+        group = 1
+        c_o = 1
+        if kind in ['Convolution', 'Deconvolution']:
+            c_o = params.num_output
+            dila_len = len(params.dilation)
+            if dila_len == 2:
+                dila_h = params.dilation[0]
+                dila_w = params.dilation[1]
+            elif dila_len == 1:
+                dila_h = dila_w = params.dilation[0]
+            else:
+                assert dila_len == 0, "invalid length[%s] of dilation in convolution" % (
+                    dila_len)
+        if kind in ['Convolution', 'Deconvolution']:
+            group = params.group
+        kernel = [k_h, k_w]
+        stride = [s_h, s_w]
+        pad = [p_h, p_w]
+        dilation = [dila_h, dila_w]
+        return c_o, kernel, stride, pad, dilation, group
+    def get_input_name(self, node):
+        if hasattr(node, "index"):
+            return node.layer_name + "[{}]".format(node.index)
+        else:
+            return node.layer_name
+    def Input(self, node):
+        shape = list(node.layer.input_param.shape[0].dim)[1:]
+        dtype = 'float32'
+        attr = {
+            'dtype': string(dtype),
+            'shape': shape,
+            'name': string(node.layer_name)
+        }
+        node.fluid_code.add_layer(
+            "data", inputs=None, output=node, param_attr=attr)
+    def MemoryData(self, node):
+        # TODO(syf): Paddlepaddle can't fully support
+        shape = node.output_shape[0][1:]
+        dtype = 'float32'
+        attr = {
+            'dtype': string(dtype),
+            'shape': shape,
+            'name': string(node.layer_name)
+        }
+        node.fluid_code.add_layer(
+            "data", inputs=None, output=node.layer_name + '0', param_attr=attr)
+        node.fluid_code.add_note('{} = [{}]'.format(node.layer_name,
+                                                    node.layer_name + '0'))
+    def Convolution(self, node):
+        data = node.data
+        params = node.layer.convolution_param
+        channel, kernel, stride, pad, dilation, group = self.get_kernel_parameters(
+            node.layer_type, params)
+        if data is None:
+            data = []
+            print(
+                'The parameter of {} (type is {}) is not set. So we set the parameters as 0'
+                .format(node.layer_name, node.layer_type))
+            input_c = node.input_shape[0][1]
+            output_c = channel
+            data.append(
+                np.zeros([output_c, input_c, kernel[0], kernel[1]]).astype(
+                    'float32'))
+            data.append(np.zeros([output_c, ]).astype('float32'))
+        else:
+            data = self.adjust_parameters(node)
+        self.weights[node.layer_name + '_weights'] = data[0]
+        if len(data) == 2:
+            self.weights[node.layer_name + '_bias'] = data[1]
+        assert len(node.inputs
+                   ) == 1, 'The count of Convolution node\'s input is not 1.'
+        input = self.graph.get_bottom_node(node, idx=0, copy=True)
+        attr = {
+            'filter_size': kernel,
+            'num_filters': channel,
+            'stride': stride,
+            'padding': pad,
+            'dilation': dilation,
+            'groups': group,
+            'name': string(node.layer_name),
+            'param_attr': string(node.layer_name + '_weights'),
+            'bias_attr': False
+            if len(data) == 1 else string(node.layer_name + '_bias'),
+        }
+        node.fluid_code.add_layer(
+            "conv2d", inputs=input, output=node, param_attr=attr)
+    def Deconvolution(self, node):
+        data = node.data
+        params = node.layer.convolution_param
+        channel, kernel, stride, pad, dilation, group = self.get_kernel_parameters(
+            node.layer_type, params)
+        if data is None:
+            data = []
+            print(
+                'The parameter of {} (type is {}) is not set. So we set the parameters as 0'
+                .format(node.layer_name, node.layer_type))
+            input_c = node.input_shape[0][1]
+            output_c = channel
+            data.append(
+                np.zeros([output_c, input_c, kernel[0], kernel[1]]).astype(
+                    'float32'))
+            data.append(np.zeros([output_c, ]).astype('float32'))
+        else:
+            data = self.adjust_parameters(node)
+        self.weights[node.layer_name + '_weights'] = data[0]
+        if len(data) == 2:
+            self.weights[node.layer_name + '_bias'] = data[1]
+        assert len(node.inputs
+                   ) == 1, 'The count of Deconvolution node\'s input is not 1.'
+        input = self.graph.get_bottom_node(node, idx=0, copy=True)
+        attr = {
+            'output_size': None,
+            'filter_size': kernel,
+            'num_filters': channel,
+            'stride': stride,
+            'padding': pad,
+            'dilation': dilation,
+            'groups': group,
+            'name': string(node.layer_name),
+            'param_attr': string(node.layer_name + '_weights'),
+            'bias_attr': False
+            if len(data) == 1 else string(node.layer_name + '_bias')
+        }
+        node.fluid_code.add_layer(
+            "conv2d_transpose", inputs=input, output=node, param_attr=attr)
+    def Pooling(self, node):
+        params = node.layer.pooling_param
+        ceil_mode = getattr(params, 'ceil_mode', True)
+        global_pool = getattr(params, 'global_pooling', False)
+        kernel_default = [1, 1]
+        channel, kernel, stride, pad, dilation, group = self.get_kernel_parameters(
+            node.layer_type, params)
+        if params.pool == 0:
+            pool_type = 'max'
+        else:
+            pool_type = 'avg'
+        assert len(
+            node.inputs) == 1, 'The count of Pooling node\'s input is not 1.'
+        input = self.graph.get_bottom_node(node, idx=0, copy=True)
+        attr = {
+            'pool_size': kernel,
+            'pool_stride': stride,
+            'pool_padding': pad,
+            'ceil_mode': ceil_mode,
+            'pool_type': string(pool_type),
+            'exclusive': False,
+            'global_pooling': global_pool,
+            'name': string(node.layer_name)
+        }
+        node.fluid_code.add_layer(
+            "pool2d", inputs=input, output=node, param_attr=attr)
+    def LRN(self, node):
+        assert len(node.inputs) == 1, 'The count of LRN node\'s input is not 1.'
+        params = node.layer.lrn_param
+        # The window size must be an odd value. For a window
+        # size of (2*n+1), Paddle defines depth_radius = n.
+        assert params.local_size % 2 == 1
+        # Caffe scales by (alpha/(2*n+1)), whereas Paddle
+        # just scales by alpha (as does Krizhevsky's paper).
+        # We'll account for that here.
+        alpha = params.alpha / float(params.local_size)
+        input = self.graph.get_bottom_node(node, idx=0, copy=True)
+        attr = {
+            'n': params.local_size,
+            'k': params.k,
+            'alpha': alpha,
+            'beta': params.beta,
+            'name': string(node.layer_name)
+        }
+        node.fluid_code.add_layer(
+            "lrn", inputs=input, output=node, param_attr=attr)
+    def InnerProduct(self, node):
+        data = node.data
+        params = node.layer.inner_product_param
+        if data is None:
+            print(
+                'The parameter of {} (type is {}) is not set. So we set the parameters as 0.'
+                .format(node.layer_name, node.layer_type))
+            input_c = node.input_shape[0][1]
+            output_c = params.num_output
+            data = []
+            data.append(
+                np.zeros([input_c, output_c]).astype('float32').astype(
+                    'float32'))
+            data.append(
+                np.zeros([output_c]).astype('float32').astype('float32'))
+        else:
+            data = self.adjust_parameters(node)
+            # Reshape the parameters to Paddle's ordering
+            transpose_order = (1, 0)
+            w = data[0]
+            fc_shape = w.shape
+            output_channels = fc_shape[0]
+            w = w.reshape((output_channels, -1))
+            w = w.transpose(transpose_order)
+            data[0] = w
+        self.weights[node.layer_name + '_weights'] = data[0]
+        if len(data) == 2:
+            self.weights[node.layer_name + '_bias'] = data[1]
+        assert len(node.inputs
+                   ) == 1, 'The count of InnerProduct node\'s input is not 1.'
+        #params = node.layer.inner_product_param
+        assert params.axis == 1
+        assert params.bias_term == True
+        input = self.graph.get_bottom_node(node, idx=0, copy=True)
+        attr = {
+            'size': params.num_output,
+            'name': string(node.layer_name),
+            'act': None,
+            'param_attr': string(node.layer_name + '_weights'),
+            'bias_attr': False
+            if len(data) == 1 else string(node.layer_name + '_bias')
+        }
+        node.fluid_code.add_layer(
+            "fc", inputs=input, output=node, param_attr=attr)
+    def Softmax(self, node):
+        assert len(
+            node.inputs) == 1, 'The count of Softmax node\'s input is not 1.'
+        input = self.graph.get_bottom_node(node, idx=0, copy=True)
+        params = node.layer.softmax_param
+        axis = params.axis
+        shape = node.input_shape[0]
+        dims = len(shape)
+        axis = axis + dims if axis < 0 else axis
+        attr = {'axis': axis, 'name': string(node.layer_name + '_softmax')}
+        node.fluid_code.add_layer(
+            "softmax", inputs=input, output=node, param_attr=attr)
+    def Slice(self, node):
+        assert len(
+            node.inputs) == 1, 'The count of Slice node\'s input is not 1.'
+        input = self.graph.get_bottom_node(node, idx=0, copy=True)
+        top_len = len(node.layer.top)
+        params = node.layer.slice_param
+        axis = params.axis
+        slice_dim = params.slice_dim
+        if slice_dim != 1 and axis == 1:
+            axis = slice_dim
+        output_shape = node.output_shape
+        sections_list = []
+        for s in output_shape:
+            sections_list.append(s[axis])
+        attr = {
+            'num_or_sections': sections_list,
+            'dim': axis,
+            'name': string(node.layer_name)
+        }
+        node.fluid_code.add_layer(
+            "split", inputs=input, output=node.layer_name, param_attr=attr)
+    def Concat(self, node):
+        assert len(
+            node.inputs
+        ) >= 1, 'The count of Concat node\'s input is not more than 1.'
+        inputs = []
+        for i in range(len(node.inputs)):
+            input = self.graph.get_bottom_node(node, idx=i, copy=True)
+            inputs.append(input)
+        params = node.layer.concat_param
+        axis = params.axis
+        attr = {'axis': axis, 'name': string(node.layer_name)}
+        node.fluid_code.add_layer(
+            "concat", inputs=inputs, output=node, param_attr=attr)
+    def ReLU(self, node):
+        """
+        :param node:
+        :return:
+        """
+        assert len(
+            node.inputs) == 1, 'The count of ReLU node\'s input is not 1.'
+        input = self.graph.get_bottom_node(node, idx=0, copy=True)
+        params = node.layer.relu_param
+        if params.HasField('negative_slope') and params.negative_slope != 0:
+            negative_slope = float(params.negative_slope)
+            attr = {'alpha': negative_slope}
+            node.fluid_code.add_layer(
+                'leaky_relu', inputs=input, output=node, param_attr=attr)
+        else:
+            node.fluid_code.add_layer('relu', inputs=input, output=node)
+    def PReLU(self, node):
+        assert len(
+            node.inputs) == 1, 'The count of PReLU node\'s input is not 1.'
+        input = self.graph.get_bottom_node(node, idx=0, copy=True)
+        params = node.layer.prelu_param
+        mode_bool = params.channel_shared
+        if mode_bool:
+            mode = 'all'
+        else:
+            mode = 'channel'
+        data = node.data
+        assert data is not None, 'The parameter of {} (type is {}) is not set. You need to use python package of caffe to set the default value.'.format(
+            node.layer_name, node.layer_type)
+        self.weights[node.layer_name + '_weights'] = data[0]
+        attr = {
+            'mode': string(mode),
+            'param_attr': string(node.layer_name + '_weights'),
+            'name': string(node.layer_name)
+        }
+        node.fluid_code.add_layer(
+            "prelu", inputs=input, output=node, param_attr=attr)
+    def Accuracy(self, node):
+        assert len(
+            node.inputs) == 2, 'The count of Accuracy node\'s input is not 2.'
+        inputs = []
+        inputs[0] = None
+        inputs[1] = None
+        i = 0
+        for shape in node.input_shape:
+            if shape[1] == 1:
+                input = self.graph.get_bottom_node(node, idx=i, copy=True)
+                inputs[1] = input
+            else:
+                input = self.graph.get_bottom_node(node, idx=i, copy=True)
+                inputs[0] = input
+            i += 1
+        params = node.layer.accuracy_param
+        top_k = params.top_k
+        axis = params.axis
+        ignore_label = params.ignore_label
+        assert axis == 1, 'PaddlePaddle can not support the situation when the axis is not 1.'
+        assert not ignore_label >= 0, 'PaddlePaddle can not support the situation when the model has ignore label.'
+        attr = {'k': top_k}
+        node.fluid_code.add_layer(
+            "accuracy", inputs=inputs, output=node, param_attr=attr)
+    def Eltwise(self, node):
+        assert len(
+            node.inputs) == 2, 'The count of TanH node\'s input is not 2.'
+        params = node.layer.eltwise_param
+        mode = params.operation
+        inputs = []
+        input0 = self.graph.get_bottom_node(node, idx=0, copy=True)
+        inputs.append(input0)
+        input1 = self.graph.get_bottom_node(node, idx=1, copy=True)
+        inputs.append(input1)
+        if mode == 0:
+            inputs_dict = {}
+            inputs_dict['x'] = inputs[0]
+            inputs_dict['y'] = inputs[1]
+            attr = {'act': None, 'name': string(node.layer_name)}
+            node.fluid_code.add_layer(
+                "elementwise_mul",
+                inputs=inputs_dict,
+                output=node,
+                param_attr=attr)
+        elif mode == 1:
+            if hasattr(params, 'coeff') and len(params.coeff) == 2:
+                coeff = params.coeff
+                input1_name = self.get_input_name(inputs[0])
+                attr = {
+                    'shape': [1],
+                    'value': coeff[0],
+                    'dtype': '{}.dtype'.format(input1_name)
+                }
+                node.fluid_code.add_layer(
+                    "fill_constant",
+                    inputs=None,
+                    output=node.layer_name + '_const1',
+                    param_attr=attr)
+                attr = {'act': None, 'name': string(node.layer_name + '_mul1')}
+                node.fluid_code.add_layer(
+                    "elementwise_mul",
+                    inputs=input1_name + ', ' + node.layer_name + '_const1',
+                    output=node.layer_name + '_mul1',
+                    param_attr=attr)
+                input2_name = self.get_input_name(inputs[1])
+                attr = {
+                    'shape': [1],
+                    'value': coeff[1],
+                    'dtype': '{}.dtype'.format(input2_name)
+                }
+                node.fluid_code.add_layer(
+                    "fill_constant",
+                    inputs=None,
+                    output=node.layer_name + '_const2',
+                    param_attr=attr)
+                attr = {'act': None, 'name': string(node.layer_name + '_mul2')}
+                node.fluid_code.add_layer(
+                    "elementwise_mul",
+                    inputs=input2_name + ', ' + node.layer_name + '_const2',
+                    output=node.layer_name + '_mul2',
+                    param_attr=attr)
+                attr = {'act': None, 'name': string(node.layer_name)}
+                node.fluid_code.add_layer(
+                    "elementwise_add",
+                    inputs='{}_mul1, {}_mul2'.format(node.layer_name,
+                                                     node.layer_name),
+                    output=node,
+                    param_attr=attr)
+            else:
+                inputs_dict = {}
+                inputs_dict['x'] = inputs[0]
+                inputs_dict['y'] = inputs[1]
+                attr = {'act': None, 'name': string(node.layer_name)}
+                node.fluid_code.add_layer(
+                    "elementwise_add",
+                    inputs=inputs_dict,
+                    output=node,
+                    param_attr=attr)
+        else:
+            inputs_dict = {}
+            inputs_dict['x'] = inputs[0]
+            inputs_dict['y'] = inputs[1]
+            attr = {'act': None, 'name': string(node.layer_name)}
+            node.fluid_code.add_layer(
+                "elementwise_max",
+                inputs=inputs_dict,
+                output=node,
+                param_attr=attr)
+    def BatchNorm(self, node):
+        assert len(
+            node.inputs) == 1, 'The count of BatchNorm node\'s input is not 1.'
+        input = self.graph.get_bottom_node(node, idx=0, copy=True)
+        params = node.layer.batch_norm_param
+        if hasattr(params, 'eps'):
+            eps = params.eps
+        else:
+            eps = 1e-5
+        if node.data is None or len(node.data) != 3:
+            print(
+                'The parameter of {} (type is {}) is not set. So we set the parameters as 0'
+                .format(node.layer_name, node.layer_type))
+            input_c = node.input_shape[0][1]
+            mean = np.zeros([input_c, ]).astype('float32')
+            variance = np.zeros([input_c, ]).astype('float32')
+            scale = 0
+        else:
+            node.data = [np.squeeze(i).astype('float32') for i in node.data]
+            mean, variance, scale = node.data
+        # Prescale the stats
+        scaling_factor = 1.0 / scale if scale != 0 else 0
+        mean *= scaling_factor
+        variance *= scaling_factor
+        self.weights[node.layer_name + '_mean'] = mean
+        self.weights[node.layer_name + '_variance'] = variance
+        attr = {
+            'is_test': True,
+            'param_attr': None,
+            'bias_attr': None,
+            'moving_mean_name': string(node.layer_name + '_mean'),
+            'moving_variance_name': string(node.layer_name + '_variance'),
+            'epsilon': eps,
+            'name': string(node.layer_name)
+        }
+        node.fluid_code.add_layer(
+            "batch_norm", inputs=input, output=node, param_attr=attr)
+    def Scale(self, node):
+        if node.data is None:
+            print(
+                'The parameter of {} (type is {}) is not set. So we set the parameters as 0'
+                .format(node.layer_name, node.layer_type))
+            input_c = node.input_shape[0][1]
+            self.weights[node.layer_name + '_scale'] = np.zeros([
+                input_c,
+            ]).astype('float32')
+            self.weights[node.layer_name + '_offset'] = np.zeros([
+                input_c,
+            ]).astype('float32')
+        else:
+            self.weights[node.layer_name + '_scale'] = np.squeeze(node.data[
+                0]).astype('float32')
+            self.weights[node.layer_name + '_offset'] = np.squeeze(node.data[
+                1]).astype('float32')
+        params = node.layer.scale_param
+        axis = params.axis
+        num_axes = params.num_axes
+        inputs = []
+        if len(node.inputs) == 2:
+            # for two tensor, here resets axis to 1. Maybe there is a bug for unkown case.
+            axis = 1
+            bias_shape = node.input_shape[0][axis:axis + num_axes]
+            input0 = self.graph.get_bottom_node(node, idx=0, copy=True)
+            input1 = self.graph.get_bottom_node(node, idx=1, copy=True)
+            inputs_dict = {}
+            inputs_dict['x'] = input0
+            inputs_dict['y'] = input1
+            attr = {'axis': axis, 'name': string(node.layer_name + '_mul')}
+            node.fluid_code.add_layer(
+                "elementwise_mul",
+                inputs=inputs_dict,
+                output=node.layer_name + '_mul',
+                param_attr=attr)
+        else:
+            bias_shape = node.input_shape[0][axis:axis + num_axes]
+            input0 = self.graph.get_bottom_node(node, idx=0, copy=True)
+            input0_name = self.get_input_name(input0)
+            attr = {
+                'dtype': '{}.dtype'.format(input0_name),
+                'shape': bias_shape,
+                'name': string(node.layer_name + '_cparam1'),
+                'attr': string(node.layer_name + '_scale'),
+                'is_bias': True,
+                'default_initializer': 'Constant(value=1.0)'
+            }
+            node.fluid_code.add_layer(
+                "create_parameter", inputs=None, output=node, param_attr=attr)
+            inputs_dict = {}
+            inputs_dict['x'] = input0
+            inputs_dict['y'] = node
+            attr = {'axis': axis, 'name': string(node.layer_name + '_mul')}
+            node.fluid_code.add_layer(
+                "elementwise_mul",
+                inputs=inputs_dict,
+                output=node.layer_name + '_mul',
+                param_attr=attr)
+        scale_shape = bias_shape
+        input0_name = self.get_input_name(input0)
+        attr = {
+            'dtype': '{}.dtype'.format(input0_name),
+            'shape': scale_shape,
+            'name': string(node.layer_name + '_cparam2'),
+            'attr': string(node.layer_name + '_offset'),
+            'is_bias': True,
+            'default_initializer': 'Constant(value=1.0)'
+        }
+        node.fluid_code.add_layer(
+            "create_parameter",
+            inputs=None,
+            output=node.layer_name + '_offset_param',
+            param_attr=attr)
+        attr = {'axis': axis, 'name': string(node.layer_name + '_add')}
+        node.fluid_code.add_layer(
+            "elementwise_add",
+            inputs='{}_mul, {}_offset_param'.format(node.layer_name,
+                                                    node.layer_name),
+            output=node,
+            param_attr=attr)
+    def Reshape(self, node):
+        input = self.graph.get_bottom_node(node, idx=0, copy=True)
+        top_count = len(input.layer.top)
+        is_inplace = False if top_count == 1 else True
+        output_shape = node.output_shape[0]
+        attr = {
+            'shape': output_shape,
+            'inplace': is_inplace,
+            'act': None,
+            'name': string(node.layer_name)
+        }
+        node.fluid_code.add_layer(
+            "reshape", inputs=input, output=node, param_attr=attr)
+    def ArgMax(self, node):
+        assert len(node.inputs) == 1 and len(
+            node.outputs
+        ) == 1, 'The count of ArgMax node\'s input and output is not 1.'
+        input = self.graph.get_bottom_node(node, idx=0, copy=True)
+        input_shape = node.input_shape[0]
+        params = node.layer.argmax_param
+        out_max_val = params.out_max_val if hasattr(params,
+                                                    out_max_val) else False
+        top_k = params.top_k if hasattr(params, top_k) else 1
+        axis = parmas.axis if hasattr(params, axis) else -1
+        if axis < 0:
+            axis += len(input_shape)
+        if out_max_val is True:
+            attr = {'k': top_k, 'name': string(node.layer_name + '_topk')}
+            node.fluid_code.add_layer(
+                "topk",
+                inputs=input,
+                output='{}_topk_var, {}_index_var'.format(node.layer_name,
+                                                          node.layer_name),
+                param_attr=attr)
+            attr = {'dtype': '{}_topk_var.dtype'.format(node.layer_name)}
+            node.fluid_code.add_layer(
+                "cast",
+                inputs='{}_index_var'.format(node.layer_name),
+                output='{}_index_var'.format(node.layer_name),
+                param_attr=attr)
+            attr = {'axis': axis, 'name': string(node.layer_name)}
+            node.fluid_code.add_layer(
+                "concat",
+                inputs='{}_topk_var, {}_index_var'.format(node.layer_name,
+                                                          node.layer_name),
+                output=node,
+                param_attr=attr)
+        else:
+            attr = {'k': top_k, 'name': string(node.layer_name)}
+            node.fluid_code.add_layer(
+                "topk",
+                inputs=input,
+                output='_, {}'.format(node.layer_name),
+                param_attr=attr)
+    def Crop(self, node):
+        assert len(
+            node.inputs) == 2, 'The count of Crop node\'s input is not 2.'
+        input = self.graph.get_bottom_node(node, idx=0, copy=True)
+        example = self.graph.get_bottom_node(node, idx=1, copy=True)
+        params = node.layer.crop_param
+        axis = params.axis
+        input_shape = node.input_shape[0]
+        if axis < 0:
+            axis += len(input_shape)
+        offset_real = [0] * len(input_shape)
+        if hasattr(params, "offset") and len(params.offset) > 0:
+            offset = list(params.offset)
+            assert (len(input_shape) - axis
+                    ) == len(offset), "invalid offset[%s] in crop layer" % (
+                        str(offset))
+            offset_real = [0] * axis + offset
+        attr = {'offsets': list(offset_real), 'name': string(node.layer_name)}
+        node.fluid_code.add_layer(
+            "crop",
+            inputs={'x': input,
+                    'shape': node.input_shape[1]},
+            output=node,
+            param_attr=attr)
+    def Flatten(self, node):
+        assert len(
+            node.
+            inputs) == 1, 'The count of DetectionOutput node\'s input is not 1.'
+        input = self.graph.get_bottom_node(node, idx=0, copy=True)
+        shape = node.output_shape[0]
+        attr = {'shape': shape, 'name': string(node.layer_name)}
+        node.fluid_code.add_layer(
+            "reshape", inputs=input, output=node, param_attr=attr)
+    def Power(self, node):
+        assert len(
+            node.inputs) == 1, 'The count of Permute node\'s input is not 1.'
+        input = self.graph.get_bottom_node(node, idx=0, copy=True)
+        params = node.layer.power_param
+        power = params.power
+        scale = params.scale
+        shift = params.shift
+        attr = {
+            'scale': scale,
+            'bias': shift,
+            'bias_after_scale': True,
+            'name': string(node.layer_name + '_scale')
+        }
+        node.fluid_code.add_layer(
+            "scale", inputs=input, output=node, param_attr=attr)
+        attr = {'factor': power, 'name': string(node.layer_name)}
+        node.fluid_code.add_layer(
+            "pow", inputs=node, output=node, param_attr=attr)
+    def Reduction(self, node):
+        assert len(
+            node.inputs) == 1, 'The count of Reduction node\'s input is not 1.'
+        input = self.graph.get_bottom_node(node, idx=0, copy=True)
+        params = node.layer.reduction_param
+        operation = params.operation
+        axis = params.axis
+        coeff = params.coeff
+        assert operation >= 1 and operation <= 4, "reduction reduction [%s] error" % (
+            operation)
+        input_len = len(node.input_shape[0])
+        if axis < 0:
+            axis += input_len + 1
+        dim = list(range(input_len))
+        if operation == 1:  ## operation = SUM
+            attr = {
+                'dim': dim[axis:],
+                'keep_dim': False,
+                'name': string(node.layer_name)
+            }
+            node.fluid_code.add_layer(
+                "reduce_sum", inputs=input, output=node, param_attr=attr)
+        elif operation == 2:  ## operation = ASUM
+            attr = {'name': string(node.layer_name + '_abs')}
+            node.fluid_code.add_layer(
+                "abs", inputs=input, output=node, param_attr=attr)
+            attr = {
+                'dim': dim[axis:],
+                'keep_dim': False,
+                'name': string(node.layer_name)
+            }
+            node.fluid_code.add_layer(
+                "reduce_sum", inputs=node, output=node, param_attr=attr)
+        elif operation == 3:  ## operation = SUMSQ
+            attr = {'factor': 2.0, 'name': string(node.layer_name + '_pow')}
+            node.fluid_code.add_layer(
+                "pow", inputs=input, output=node, param_attr=attr)
+            attr = {
+                'dim': dim[axis:],
+                'keep_dim': False,
+                'name': string(node.layer_name)
+            }
+            node.fluid_code.add_layer(
+                "reduce_sum", inputs=node, output=node, param_attr=attr)
+        else:  ## operation = MEAN
+            attr = {
+                'dim': dim[axis:],
+                'keep_dim': False,
+                'name': string(node.layer_name)
+            }
+            node.fluid_code.add_layer(
+                "reduce_mean", inputs=node, output=node, param_attr=attr)
+        attr = {'scale': coeff}
+        node.fluid_code.add_layer(
+            "scale", inputs=node, output=node, param_attr=attr)
+    def deal_custom_layer(self, node):
+        op = node.layer_type
+        custom_code, func = make_custom_layer(node)
+        params = get_params(node.layer, node.layer_type)
+        arg_names, kwargs = set_args(func, params)
+        kwargs['name'] = string(node.layer_name)
+        kwargs['input_shape'] = node.input_shape
+        data = node.data
+        if data is not None:
+            data = self.adjust_parameters(node)
+            weights_name = deal_weights(node)
+            for i in range(len(data)):
+                self.weights[weights_name[i]] = data[i]
+        inputs_node = []
+        for i in range(len(node.inputs)):
+            input = self.graph.get_bottom_node(node, idx=i, copy=True)
+            if i == 1 and op == 'DetectionOutput':
+                input = self.graph.get_bottom_node(node, idx=i, copy=True)
+                while input is not None \
+                      and input.layer_type != 'Softmax' \
+                      and input.layer_type != 'Sigmoid':
+                    input = self.graph.get_bottom_node(input, idx=0, copy=True)
+                assert input is not None, 'This kind of DetectionOutput is not supported!'
+                input = self.graph.get_bottom_node(input, idx=0, copy=True)
+            inputs_node.append(input)
+        node.fluid_code.add_layer(
+            func.__code__.co_name,
+            inputs=inputs_node,
+            output=node,
+            param_attr=kwargs,
+            is_custom_layer=True)
+        if op not in self.used_custom_layers:
+            self.used_custom_layers[op] = custom_code
+    def directly_map(self, node):
+        assert node.layer_type in self.directly_map_ops
+        op_info = self.directly_map_ops[node.layer_type]
+        input = self.graph.get_bottom_node(node, idx=0, copy=True)
+        attr = {'name': string(node.layer_name)}
+        node.fluid_code.add_layer(
+            op_info, inputs=input, output=node, param_attr=attr)
--- a/x2paddle/x2paddle/op_mapper/caffe_shape.py
+++ b/x2paddle/x2paddle/op_mapper/caffe_shape.py
+#   Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+import numbers
+from functools import reduce
+def get_kernel_parameters(params):
+    [k_h, k_w] = [1, 1]
+    if isinstance(params.kernel_size, numbers.Number):
+        [k_h, k_w] = [params.kernel_size] * 2
+    elif len(params.kernel_size) > 0:
+        k_h = params.kernel_h if params.kernel_h > 0 else params.kernel_size[0]
+        k_w = params.kernel_w if params.kernel_w > 0 else params.kernel_size[
+            len(params.kernel_size) - 1]
+    elif params.kernel_h > 0 or params.kernel_w > 0:
+        k_h = params.kernel_h
+        k_w = params.kernel_w
+    [s_h, s_w] = [1, 1]
+    if isinstance(params.stride, numbers.Number):
+        [s_h, s_w] = [params.stride] * 2
+    elif len(params.stride) > 0:
+        s_h = params.stride_h if params.stride_h > 0 else params.stride[0]
+        s_w = params.stride_w if params.stride_w > 0 else params.stride[len(
+            params.stride) - 1]
+    elif params.stride_h > 0 or params.stride_w > 0:
+        s_h = params.stride_h
+        s_w = params.stride_w
+    [p_h, p_w] = [0, 0]
+    if isinstance(params.pad, numbers.Number):
+        [p_h, p_w] = [params.pad] * 2
+    elif len(params.pad) > 0:
+        p_h = params.pad_h if params.pad_h > 0 else params.pad[0]
+        p_w = params.pad_w if params.pad_w > 0 else params.pad[len(params.pad) -
+                                                               1]
+    elif params.pad_h > 0 or params.pad_w > 0:
+        p_h = params.pad_h
+        p_w = params.pad_w
+    dila_h = dila_w = 1
+    if hasattr(params, 'dilation'):
+        dila_len = len(params.dilation)
+        if dila_len == 2:
+            dila_h = params.dilation[0]
+            dila_w = params.dilation[1]
+        elif dila_len == 1:
+            dila_h = dila_w = params.dilation[0]
+        else:
+            assert dila_len == 0, "invalid length[%s] of dilation in convolution" % (
+                dila_len)
+    return dila_h, dila_w, p_h, p_w, k_h, k_w, s_h, s_w
+def get_strided_kernel_output_shape(params, input_shape, round_func):
+    i_h = input_shape[2]
+    i_w = input_shape[3]
+    dila_h, dila_w, pad_h, pad_w, kernel_h, kernel_w, stride_h, stride_w = get_kernel_parameters(
+        params)
+    o_h = (i_h + 2 * pad_h - (dila_h *
+                              (kernel_h - 1) + 1)) / float(stride_h) + 1
+    o_w = (i_w + 2 * pad_w - (dila_w *
+                              (kernel_w - 1) + 1)) / float(stride_w) + 1
+    o_h = int(round_func(o_h))
+    o_w = int(round_func(o_w))
+    has_c_o = hasattr(params, 'num_output')
+    c = params.num_output if has_c_o else input_shape[1]
+    return [[input_shape[0], c, o_h, o_w]]
+def shape_convolution(layer, input_shape):
+    params = layer.convolution_param
+    return get_strided_kernel_output_shape(params, input_shape[0], math.floor)
+def shape_deconvolution(layer, input_shape):
+    h_i = input_shape[0][2]
+    w_i = input_shape[0][3]
+    params = layer.convolution_param
+    dila_h, dila_w, pad_h, pad_w, kernel_h, kernel_w, stride_h, stride_w = get_kernel_parameters(
+        params)
+    h_o = (h_i - 1) * stride_h - 2 * pad_h + dila_h * (kernel_h - 1) + 1
+    w_o = (w_i - 1) * stride_w - 2 * pad_w + dila_w * (kernel_w - 1) + 1
+    has_c_o = hasattr(params, 'num_output')
+    c = params.num_output if has_c_o else input_shape.channels
+    return [[input_shape[0][0], c, h_o, w_o]]
+def shape_pooling(layer, input_shape):
+    params = layer.pooling_param
+    global_pool = getattr(params, 'global_pooling', False)
+    if global_pool:
+        return [[input_shape[0][0], input_shape[0][1], 1, 1]]
+    ceil_mode = getattr(params, 'ceil_mode', True)
+    if ceil_mode is True:
+        method = math.ceil
+    else:
+        method = math.floor
+    return get_strided_kernel_output_shape(params, input_shape[0], method)
+def shape_innerproduct(layer, input_shape):
+    params = layer.inner_product_param
+    return [[input_shape[0][0], params.num_output]]
+def shape_lrn(layer, input_shape):
+    return input_shape
+def shape_relu(layer, input_shape):
+    return input_shape
+def shape_softmax(layer, input_shape):
+    return input_shape
+def shape_input(layer, input_shape):
+    return [list(layer.input_param.shape[0].dim)]
+def shape_memorydata(layer, input_shape):
+    params = layer.memory_data_param
+    shape = []
+    shape.append(int(params.batch_size))
+    shape.append(int(params.channels))
+    shape.append(int(params.height))
+    shape.append(int(params.width))
+    return [shape]
+def shape_concat(layer, input_shape):
+    params = layer.concat_param
+    axis = params.axis
+    output_shape = None
+    for shape in input_shape:
+        if output_shape is None:
+            output_shape = []
+            for i in range(len(shape)):
+                output_shape.append(shape[i])
+        else:
+            output_shape[axis] += shape[axis]
+    return [output_shape]
+def shape_slice(layer, input_shape):
+    inshape = input_shape[0]
+    top_len = len(layer.top)
+    params = layer.slice_param
+    axis = params.axis
+    slice_dim = params.slice_dim
+    if slice_dim != 1 and axis == 1:
+        axis = slice_dim
+    points = list(params.slice_point)
+    count = inshape[axis]
+    if len(points) == 0:
+        assert count % top_len == 0, "the parameter of Slice is wrong"
+        part = count / top_len
+        t = part
+        while t < count:
+            points.append(int(t))
+            t += part
+    points = [0] + points + [count]
+    output_shape = []
+    for i in range(len(points)):
+        shape = []
+        for ii in range(len(inshape)):
+            shape.append(inshape[ii])
+        size = points[i + 1] - points[i]
+        shape[axis] = size
+        output_shape.append(shape)
+        if i == len(points) - 2:
+            break
+    return output_shape
+def shape_prelu(layer, input_shape):
+    return input_shape
+def shape_sigmoid(layer, input_shape):
+    return input_shape
+def shape_absval(layer, input_shape):
+    return input_shape
+def shape_accuracy(layer, input_shape):
+    return [[1]]
+def shape_tanh(layer, input_shape):
+    return input_shape
+def shape_eltwise(layer, input_shape):
+    return [input_shape[0]]
+def shape_batchnorm(layer, input_shape):
+    return input_shape
+def shape_scale(layer, input_shape):
+    return input_shape
+def shape_reshape(layer, input_shape):
+    def count(num_list):
+        return reduce(lambda a, b: a * b, num_list)
+    inshape = input_shape[0]
+    params = layer.reshape_param
+    axis = params.axis if hasattr(params, 'axis') else 0
+    num_axes = params.num_axes if hasattr(params, 'num_axes') else -1
+    if inshape[0] == -1:
+        inshape[0] = 1
+    input_count = count(inshape)
+    input_num_axes = len(inshape)
+    input_start_axis = axis
+    start_axis = input_start_axis if input_start_axis >= 0 \
+            else input_num_axes + input_start_axis + 1
+    assert start_axis >= 0, "[Reshape]axis %d out of range" % (input_start_axis)
+    assert start_axis <= input_num_axes, "[Reshape]axis %d out of range for %d-D input data"\
+            % (input_start_axis, input_num_axes)
+    assert num_axes >= -1, "[Reshape]num_axes must be >= 0, or -1 for all"
+    end_axis = input_num_axes if num_axes == -1 else start_axis + num_axes
+    assert end_axis <= input_num_axes, "end_axis[%d] = axis[%d] + num_axes[%d] is out of range"\
+            % (end_axis, start_axis, num_axes)
+    num_axes_replaced = end_axis - start_axis
+    num_axes_retained = input_num_axes - num_axes_replaced
+    num_new_axes = len(list(params.shape.dim))
+    outshape = []
+    for i in range(start_axis):
+        outshape.append(inshape[i])
+    for i in range(num_new_axes):
+        outshape.append(params.shape.dim[i])
+    for i in range(end_axis, input_num_axes):
+        outshape.append(inshape[i])
+    assert len(outshape) == num_axes_retained + num_new_axes,\
+            "[Reshape]invalid dims of output shape[%s]" % (str(outshape))
+    inferred_axis = -1
+    copy_axes = []
+    constant_count = 1
+    for i in range(num_new_axes):
+        top_dim = params.shape.dim[i]
+        if top_dim == 0:
+            copy_axes.append(i)
+            copy_axis_index = start_axis + i
+            outshape[copy_axis_index] = inshape[copy_axis_index]
+        elif top_dim == -1:
+            assert inferred_axis == -1, "[Reshape]new shape contains multiple -1 dims"
+            inferred_axis = i
+        else:
+            constant_count *= top_dim
+    if inferred_axis >= 0:
+        explicit_count = constant_count
+        l = inshape[0:start_axis]
+        if len(l) > 0:
+            explicit_count *= count(l)
+        l = inshape[end_axis:]
+        if len(l) > 0:
+            explicit_count *= count(l)
+        for i in range(len(copy_axes)):
+            explicit_count *= outshape[start_axis + copy_axes[i]]
+        assert input_count % explicit_count == 0, "[Reshape]botom count[%d] "\
+                "must be divisible by product of the specified dimensions[%d] "\
+                % (input_count, explicit_count)
+        outshape[start_axis + inferred_axis] = int(input_count / explicit_count)
+    output_count = count(outshape)
+    assert output_count == input_count, "[Reshape]output count[%d] must match input count[%d]" % (
+        output_count, input_count)
+    outshape[0] = -1
+    return [outshape]
+def shape_argmax(layer, input_shape):
+    inshape = input_shape[0]
+    params = layer.argmax_param
+    out_max_val = params.out_max_val if hasattr(params, out_max_val) else False
+    top_k = params.top_k if hasattr(params, top_k) else 1
+    axis = parmas.axis if hasattr(params, axis) else -1
+    if axis < 0:
+        axis += len(inshape)
+    assert (axis + 1 == len(inshape)
+            ), 'only can be applied on the last dimension[axis:%d, %s] now,'\
+                    'make sure you have set axis param in xxx.prototxt file' \
+                    % (axis, str(inshape))
+    outshape = inshape
+    outshape[-1] = top_k
+    if out_max_val is True:
+        outshape[-1] *= 2
+    return [outshape]
+def shape_crop(layer, input_shape):
+    assert len(input_shape) == 2, "the number of crop's inputs must be 2"
+    return [input_shape[1]]
+def shape_flatten(layer, input_shape):
+    assert len(input_shape) == 1, "the number of flatten's inputs must be 1"
+    inshape = input_shape[0]
+    params = layer.flatten_param
+    start_axis = params.axis
+    end_axis = params.end_axis
+    if start_axis < 0:
+        start_axis += len(inshape)
+    if end_axis < 0:
+        end_axis += len(inshape) + 1
+    assert start_axis <= end_axis, 'invalid axis[%d] or end_axis[%d] params'\
+            % (start_axis, end_axis)
+    output_shape = inshape[0:start_axis]
+    if len(inshape[start_axis:end_axis]) != 0:
+        flat_sz = reduce(lambda a, b: a * b, inshape[start_axis:end_axis])
+        output_shape += [flat_sz]
+    output_shape += inshape[end_axis:len(inshape)]
+    output_shape[0] = -1
+    return [output_shape]
+def shape_power(layer, input_shape):
+    return input_shape
+def shape_reduction(layer, input_shape):
+    params = layer.reduction_param
+    axis = params.axis
+    if axis < 0:
+        axis += len(input_shape[0]) + 1
+    assert axis <= len(input_shape[0]), 'invalid axis[%d] error' % (axis)
+    return [input_shape[0:axis]]
--- a/x2paddle/x2paddle/op_mapper/onnx2paddle/__init__.py
+++ b/x2paddle/x2paddle/op_mapper/onnx2paddle/__init__.py
--- a/x2paddle/x2paddle/op_mapper/onnx2paddle/onnx_op_mapper.py
+++ b/x2paddle/x2paddle/op_mapper/onnx2paddle/onnx_op_mapper.py
+# Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from x2paddle.op_mapper.onnx2paddle.opset9 import OpSet9, custom_layers
+from x2paddle.core.op_mapper import OpMapper
+from x2paddle.decoder.onnx_decoder import ONNXGraph, ONNXGraphNode, ONNXGraphDataNode
+class ONNXOpMapper(OpMapper):
+    def __init__(self, decoder):
+        super(ONNXOpMapper, self).__init__()
+        self.support_op_sets = [9, ]
+        self.default_op_set = 9
+        self.graph = decoder.graph
+        self.opset = self.create_opset(decoder)
+        if not self.op_checker():
+            raise Exception("Model are not supported yet.")
+        #mapping op
+        print("Total nodes: {}".format(
+            sum([
+                isinstance(node, ONNXGraphNode)
+                for name, node in self.graph.node_map.items()
+            ])))
+        print("Nodes converting ...")
+        for node_name in self.graph.topo_sort:
+            node = self.graph.get_node(node_name)
+            op = node.layer_type
+            if hasattr(self.opset, op):
+                func = getattr(self.opset, op)
+                func(node)
+            elif op in self.opset.default_op_mapping:
+                self.opset.directly_map(node)
+            elif op in custom_layers:
+                self.opset.deal_custom_layer(node)
+            elif op in self.opset.elementwise_ops:
+                self.opset.elementwise_map(node)
+        print("Nodes converted.")
+        self.weights = self.opset.weights
+        self.omit_nodes = self.opset.omit_nodes
+        self.used_custom_layers = self.opset.used_custom_layers
+    def op_checker(self):
+        unsupported_ops = set()
+        for node_name in self.graph.topo_sort:
+            node = self.graph.get_node(node_name)
+            op = node.layer_type
+            if not hasattr(self.opset, op) and \
+                op not in self.opset.default_op_mapping and \
+                op not in custom_layers and \
+                op not in self.opset.elementwise_ops:
+                unsupported_ops.add(op)
+        if len(unsupported_ops) == 0:
+            return True
+        else:
+            print("There are {} ops not supported yet, list as below".format(
+                len(unsupported_ops)))
+            for op in unsupported_ops:
+                print(op)
+            return False
+    def create_opset(self, decoder):
+        run_op_set = self.default_op_set
+        opset = ''
+        if decoder.op_set in self.support_op_sets:
+            opset = 'OpSet' + str(decoder.op_set)
+        elif decoder.op_set < self.default_op_set:
+            opset = 'OpSet' + str(self.default_op_set)
+        else:
+            for op_set in self.support_op_sets:
+                if decoder.op_set > op_set:
+                    run_op_set = op_set
+                else:
+                    break
+            opset = 'OpSet' + str(run_op_set)
+        print(
+            'Now, onnx2paddle support convert onnx model opset_verison {},'
+            'opset_verison of your onnx model is {}, automatically treated as op_set: {}.'
+            .format(self.support_op_sets, decoder.op_set, run_op_set))
+        return eval(opset)(decoder)
--- a/x2paddle/x2paddle/op_mapper/onnx2paddle/opset9/__init__.py
+++ b/x2paddle/x2paddle/op_mapper/onnx2paddle/opset9/__init__.py
+from .opset import OpSet9
+from .custom_layer import custom_layers
--- a/x2paddle/x2paddle/op_mapper/onnx2paddle/opset9/custom_layer/__init__.py
+++ b/x2paddle/x2paddle/op_mapper/onnx2paddle/opset9/custom_layer/__init__.py
+#   Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .register import get_registered_layers
+custom_layers = get_registered_layers()
+def set_args(f, params):
+    """ set args for function 'f' using the parameters in node.layer.param
+    Args:
+        f (function): a python function object
+        params (object): a object contains attributes needed by f's arguments
+    Returns:
+        arg_names (list): a list of argument names
+        kwargs (dict): a dict contains needed arguments
+    """
+    argc = f.__code__.co_argcount
+    arg_list = f.__code__.co_varnames[0:argc]
+    kwargs = {}
+    for arg_name in arg_list:
+        if hasattr(params, arg_name) and params is not None:
+            kwargs[arg_name] = getattr(params, arg_name)
+    return arg_list, kwargs
+def has_layer(layer_type):
+    """ test whether this layer exists in custom layer
+    """
+    return layer_type in custom_layers
+def get_params(layer, layer_type):
+    import re
+    if layer_type.lower() == "deconvolution" or layer_type.lower(
+    ) == "convolutiondepthwise":
+        param_name = '_'.join(('convolution', 'param'))
+    elif layer_type.lower() == "normalize":
+        param_name = '_'.join(('norm', 'param'))
+    elif len(layer_type) - len(re.sub("[A-Z]", "", layer_type)) >= 2:
+        s = ''
+        tmp_name = ''
+        for i, ch in enumerate(layer_type):
+            if i == 0:
+                s += ch.lower()
+                continue
+            elif ch.isupper() and layer_type[i - 1].islower():
+                tmp_name += (s + '_')
+                s = ''
+            s += ch.lower()
+        tmp_name += s
+        param_name = '_'.join((tmp_name, 'param'))
+    else:
+        param_name = '_'.join((layer_type.lower(), 'param'))
+    return getattr(layer, param_name, None)
+def compute_output_shape(node):
+    """ compute the output shape of custom layer
+    """
+    layer_type = node.layer_type
+    assert layer_type in custom_layers, "layer[%s] not exist in custom layers" % (
+        layer_type)
+    shape_func = custom_layers[layer_type]['shape']
+    layer = node.layer
+    params = get_params(layer, layer_type)
+    arg_names, kwargs = set_args(shape_func, params)
+    input_shape = node.input_shape
+    return shape_func(input_shape, **kwargs)
+def make_custom_layer(node):
+    """ get the code which implement the custom layer function
+    """
+    layer_type = node.layer_type
+    assert layer_type in custom_layers, "layer[%s] not exist in custom layers" % (
+        layer_type)
+    layer_func = custom_layers[layer_type]['layer']
+    import inspect
+    return inspect.getsource(layer_func), layer_func
+def make_custom_child_func(node):
+    """ get the code which implement the custom layer function
+    """
+    layer_type = node.layer_type
+    child_func = custom_layers[layer_type]['child_func']
+    if child_func is None:
+        return None, child_func
+    import inspect
+    return inspect.getsource(child_func), child_func
+def deal_weights(node, data=None):
+    """ deal the weights of the custom layer
+    """
+    layer_type = node.layer_type
+    weights_func = custom_layers[layer_type]['weights']
+    name = node.layer_name
+    return weights_func(name, data)
--- a/x2paddle/x2paddle/op_mapper/onnx2paddle/opset9/custom_layer/register.py
+++ b/x2paddle/x2paddle/op_mapper/onnx2paddle/opset9/custom_layer/register.py
+#   Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" this module provides 'register' for registering customized layers
+"""
+g_custom_layers = {}
+def register(kind, shape, layer, child_func, weights):
+    """ register a custom layer or a list of custom layers
+    Args:
+        @kind (str or list): type name of the layer
+        @shape (function): a function to generate the shape of layer's output
+        @layer (function): a function to generate the paddle code of layer
+        @weights (function): a function to deal with weights data
+    Returns:
+        None
+    """
+    assert type(shape).__name__ == 'function', 'shape should be a function'
+    assert type(layer).__name__ == 'function', 'layer should be a function'
+    if type(kind) is str:
+        kind = [kind]
+    else:
+        assert type(
+            kind) is list, 'invalid param "kind" for register, not a list or str'
+    for k in kind:
+        assert type(
+            k) is str, 'invalid param "kind" for register, not a list of str'
+        assert k not in g_custom_layers, 'this type[%s] has already been registered' % (
+            k)
+        g_custom_layers[k] = {
+            'shape': shape,
+            'layer': layer,
+            'child_func': child_func,
+            'weights': weights
+        }
+def get_registered_layers():
+    return g_custom_layers
--- a/x2paddle/x2paddle/op_mapper/onnx2paddle/opset9/opset.py
+++ b/x2paddle/x2paddle/op_mapper/onnx2paddle/opset9/opset.py
+# Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from x2paddle.decoder.onnx_decoder import ONNXGraph, ONNXGraphNode, ONNXGraphDataNode
+from x2paddle.core.graph import GraphNode
+from x2paddle.core.fluid_code import Layer
+from x2paddle.core.fluid_code import FluidCode
+from x2paddle.core.util import string
+from x2paddle.op_mapper.onnx2paddle.opset9.custom_layer import *
+from functools import reduce
+import numpy as np
+import onnx
+import onnx.numpy_helper as numpy_helper
+from onnx.mapping import TENSOR_TYPE_TO_NP_TYPE
+import logging as _logging
+from collections import OrderedDict
+import math
+import os
+import shutil
+_logger = _logging.getLogger(__name__)
+def _const_weight_or_none(node, necessary=False):
+    if 'Constant' in node.layer_type:
+        return node.value
+    if isinstance(node, ONNXGraphDataNode):
+        return node.weight
+    if necessary:
+        assert '{} should be an initializer or Constant operator.'.format(
+            node.layer_name)
+    return None
+def _is_static_shape(shape):
+    negtive_dims = 0
+    error_dims = 0
+    for dim in shape:
+        if dim < 0:
+            negtive_dims += 1
+        if dim < -1:
+            error_dims += 1
+    if negtive_dims > 1:
+        return False
+    if error_dims > 0:
+        return False
+    return True
+def _get_same_padding(in_size, kernel_size, stride):
+    new_size = int(math.ceil(in_size * 1.0 / stride))
+    pad_size = (new_size - 1) * stride + kernel_size - in_size
+    pad0 = int(pad_size / 2)
+    pad1 = pad_size - pad0
+    return [pad0, pad1]
+def print_mapping_info(func):
+    def run_mapping(*args, **kwargs):
+        node = args[1]
+        try:
+            res = func(*args, **kwargs)
+        except:
+            print("convert failed node:{}, op_type is {}".format(
+                node.layer_name[9:], node.layer_type))
+            raise
+        else:
+            #print("convert successfully node:{}, op_type is {}".format(
+            #    node.layer_name[9:], node.layer_type))
+            return res
+    return run_mapping
+class OpSet9():
+    elementwise_ops = {
+        'Add': 'elementwise_add',
+        'Div': 'elementwise_div',
+        'Sub': 'elementwise_sub',
+        'Mul': 'elementwise_mul',
+        'Pow': 'elementwise_pow',
+    }
+    default_op_mapping_field_values = OrderedDict()
+    default_op_mapping_field_values['FLUID_OP'] = ''
+    default_op_mapping_field_values['FLUID_INPUT_ARGS'] = None
+    default_op_mapping_field_values['FLUID_OUTPUT_ARGS'] = None
+    default_op_mapping_field_values['ATTR_MAPPING'] = dict()
+    default_op_mapping_field_values['DEFAULTS'] = dict()
+    default_op_mapping_field_values['INPUT_PERM'] = None
+    default_op_mapping_field_values['OUTPUT_PERM'] = None
+    default_op_mapping_field_values['FILL_NAME_FIELD'] = True
+    default_op_mapping = {
+        'Shape': ['shape', ['X'], ['Out']],
+        'Clip': [
+            'clip', ['X'], ['Out'], dict(), dict(
+                min=(np.asarray(
+                    [255, 255, 127, 255], dtype=np.uint8).view(np.float32)[0]),
+                max=(np.asarray(
+                    [255, 255, 127, 127], dtype=np.uint8).view(np.float32)[0]),
+            )
+        ],
+        'Erf': ['erf', ['X'], ['Out']],
+        'Ceil': ['ceil', ['X'], ['Out']],
+        'ReduceMean': [
+            'reduce_mean', ['X'], ['Out'], dict(
+                axes='dim', keepdims='keep_dim'), dict(keep_dim=1)
+        ],
+        'ReduceSum': [
+            'reduce_sum', ['X'], ['Out'], dict(
+                axes='dim', keepdims='keep_dim'), dict(keep_dim=1)
+        ],
+        'ReduceMin': [
+            'reduce_min', ['X'], ['Out'], dict(
+                axes='dim', keepdims='keep_dim'), dict(keep_dim=1)
+        ],
+        'ReduceMax': [
+            'reduce_max', ['X'], ['Out'], dict(
+                axes='dim', keepdims='keep_dim'), dict(keep_dim=1)
+        ],
+        #active function
+        'Relu': ['relu', ['X'], ['Out']],
+        'LeakyRelu': ['leaky_relu', ['X'], ['Out'], dict(), dict(alpha=.01)],
+        'Elu': ['elu', ['X'], ['Out'], dict(), dict(alpha=1.)],
+        'ThresholdedRelu': [
+            'thresholded_relu', ['X'], ['Out'], dict(alpha='threshold'),
+            dict(alpha=1.)
+        ],
+        'Tanh': ['tanh', ['X'], ['Out']],
+        'Sigmoid': ['sigmoid', ['X'], ['Out']],
+        'HardSigmoid': [
+            'hard_sigmoid', ['X'], ['Out'], dict(
+                alpha='slope', beta='offset'), dict(
+                    slope=.2, offset=.5)
+        ],
+        'Softsign': ['softsign', ['X'], ['Out']],
+        'Softplus': ['softplus', ['X'], ['Out']],
+        'Exp': ['exp', ['X'], ['Out']],
+        'Softmax': ['softmax', ['X'], ['Out'], dict(), dict(axis=1)],
+        'Sqrt': ['sqrt', ['X'], ['Out']],
+        'Floor': ['floor', ['X'], ['Out']],
+        'Abs': ['abs', ['X'], ['Out']],
+    }
+    default_ioa_constraint = {}
+    def __init__(self, decoder):
+        super(OpSet9, self).__init__()
+        self.graph = decoder.graph
+        self.input_shapes = []
+        self.weights = dict()
+        self.omit_nodes = list()
+        self.used_custom_layers = dict()
+    @print_mapping_info
+    def directly_map(self, node, name='', *args, **kwargs):
+        inputs = node.layer.input
+        outputs = node.layer.output
+        op_type = node.layer_type
+        attrs = node.attr_map
+        info = self.default_op_mapping[op_type]
+        info.extend(
+            list(self.default_op_mapping_field_values.values())[len(info):])
+        (
+            fluid_op,
+            fluid_input_args,
+            fluid_output_args,
+            attr_mapping,
+            default_attrs,
+            input_perm,
+            output_perm,
+            fill_name_field, ) = info
+        if fluid_op in self.default_ioa_constraint:
+            for predicate, message in self.default_ioa_constraint[fluid_op]:
+                assert predicate(inputs, outputs, attrs), message
+        mapped_attrs = {
+            attr_mapping.get(key, key): value
+            for key, value in attrs.items()
+        }
+        if '' in mapped_attrs:
+            mapped_attrs.pop('')
+        if '_' in mapped_attrs:
+            mapped_attrs.pop('_')
+        fluid_attrs = default_attrs.copy()
+        fluid_attrs.update(mapped_attrs)
+        inputs = inputs if input_perm is None else list(
+            map(lambda i: inputs[i], input_perm))
+        val_inps = []
+        for idx, ipt in enumerate(inputs):
+            val_inps.append(self.graph.get_input_node(node, idx=idx, copy=True))
+        val_outs = outputs if output_perm is None else list(
+            map(lambda i: outputs[i], output_perm))
+        attr = fluid_attrs
+        assert len(val_inps) == 1, 'directly_map error with multi inputs'
+        if fluid_op not in ['shape', 'erf']:
+            attr['name'] = string(node.layer_name)
+        node.fluid_code.add_layer(
+            fluid_op, inputs=val_inps[0], output=val_outs[0], param_attr=attr)
+        if fluid_op in ['shape']:
+            node.fluid_code.add_layer(
+                'cast',
+                inputs=val_outs[0],
+                output=val_outs[0],
+                param_attr={'dtype': string('int64')})
+    @print_mapping_info
+    def deal_custom_layer(self, node):
+        op = node.layer_type
+        custom_code, func = make_custom_layer(node)
+        child_func_code, child_func = make_custom_child_func(node)
+        params = get_params(node.layer, node.layer_type)
+        arg_names, kwargs = set_args(func, params)
+        kwargs['name'] = string(node.layer_name)
+        node.fluid_code.add_layer(
+            func.__code__.co_name,
+            inputs=node.inputs,
+            output=node,
+            param_attr=kwargs,
+            is_custom_layer=True)
+        if op not in self.used_custom_layers:
+            self.used_custom_layers[op] = custom_code
+            if op + '_child_func' not in self.used_custom_layers:
+                if child_func_code is not None:
+                    self.used_custom_layers[op +
+                                            '_child_func'] = child_func_code
+    @print_mapping_info
+    def elementwise_map(self, node):
+        assert node.layer_type in self.elementwise_ops
+        op_type = self.elementwise_ops[node.layer_type]
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        val_y = self.graph.get_input_node(node, idx=1, copy=True)
+        inputs = {'x': val_x, 'y': val_y}
+        node.fluid_code.add_layer(
+            op_type, inputs=inputs, output=node, param_attr=None)
+    @print_mapping_info
+    def place_holder(self, node):
+        self.input_shapes.append(node.out_shapes[0])
+        shape = node.out_shapes[0]
+        for i, dim_shape in enumerate(shape):
+            if dim_shape == 0 and i == 0:
+                shape[i] = 1
+            if dim_shape == 0 and i != 0:
+                assert 'shape of input is not assigned'
+        attr = {
+            "dtype": string(node.dtype),
+            "shape": shape,
+            "name": string(node.layer_name),
+            "append_batch_size": 'False'
+        }
+        node.fluid_code.add_layer(
+            "data", inputs=None, output=node, param_attr=attr)
+    @print_mapping_info
+    def create_parameter(self, node, parameter=None):
+        if parameter is not None:
+            node = parameter
+        dtype = node.dtype
+        shape = node.out_shapes[0]
+        if len(node.weight.shape) == 0:
+            shape = [1]
+        self.weights[node.layer_name] = node.weight
+        attr = {
+            'dtype': string(dtype),
+            'shape': shape,
+            'name': string(node.layer_name),
+            'default_initializer': 'Constant(0.0)'
+        }
+        if dtype == 'bool':
+            attr['dtype'] = string('int64')
+            node.fluid_code.add_layer(
+                "create_parameter", inputs=None, output=node, param_attr=attr)
+            node.fluid_code.add_layer(
+                "cast",
+                inputs=node,
+                output=node,
+                param_attr={'dtype': string('bool')})
+        elif dtype == 'uint8':
+            attr['dtype'] = string('float32')
+            node.fluid_code.add_layer(
+                "create_parameter", inputs=None, output=node, param_attr=attr)
+        else:
+            node.fluid_code.add_layer(
+                "create_parameter", inputs=None, output=node, param_attr=attr)
+    def _pad_if_asymmetric(self, node, pads, val_name):  # pads: SSEE
+        assert len(pads) & 1 == 0
+        symmetric = True
+        ndims = len(pads) // 2
+        for idx_dim in range(ndims):
+            if pads[idx_dim] != pads[ndims + idx_dim]:
+                symmetric = False
+                break
+        if symmetric:
+            return pads[:ndims], val_name
+        val_padded = self.Pad(node, op_independent=False)
+        return [0] * ndims, val_padded
+    def _interpolate(self, node):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        inputs = {'input': val_x}
+        if node.layer_type == 'Resize':
+            if len(node.layer.input) == 2:
+                # opset 10
+                val_scales = self.graph.get_input_node(node, idx=1, copy=True)
+                inputs['scale'] = val_scales
+            elif len(node.layer.input) == 3:
+                # opset 11
+                val_scales = self.graph.get_input_node(node, idx=2, copy=True)
+                inputs['scale'] = val_scales
+            elif len(node.layer.input) == 4:
+                # opset 11
+                val_sizes = self.graph.get_input_node(node, idx=3, copy=True)
+                var_nc, var_hw = val_sizes.layer_name + '_nc', val_sizes.layer_name + '_hw'
+                node.fluid_code.add_layer(
+                    'split',
+                    inputs=val_sizes,
+                    output=var_nc + ',' + var_hw,
+                    param_attr={
+                        'dim': 0,
+                        'num_or_sections': [2, 2],
+                    })
+                node.fluid_code.add_layer(
+                    "cast",
+                    inputs=var_hw,
+                    output=var_hw,
+                    param_attr={'dtype': string('int32')})
+                inputs['out_shape'] = var_hw
+        elif node.layer_type == 'Upsample':
+            val_scales = self.graph.get_input_node(node, idx=1, copy=True)
+            inputs['scale'] = val_scales
+        attr = {'name': string(node.layer_name)}
+        mode = node.get_attr('mode', 'nearest')
+        fluid_op = 'resize_{}'.format(mode)
+        if 'linear' in mode:
+            print(
+                'Warnning: paddle not support op:resize wiht mode: linear, we use bilinear replace linear'
+            )
+            fluid_op = 'resize_bilinear'
+        node.fluid_code.add_layer(
+            fluid_op, inputs=inputs, output=node, param_attr=attr)
+    @print_mapping_info
+    def RoiAlign(self, node):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        val_rois = self.graph.get_input_node(node, idx=1, copy=True)
+        pooled_height = node.get_attr('output_height')
+        pooled_width = node.get_attr('output_width')
+        spatial_scale = node.get_attr('spatial_scale')
+        sampling_ratio = node.get_attr('sampling_ratio')
+        attr = {
+            'pooled_height': pooled_height,
+            'pooled_width': pooled_width,
+            'spatial_scale': spatial_scale,
+            'sampling_ratio': sampling_ratio,
+        }
+        node.fluid_code.add_layer(
+            'roi_align',
+            inputs={'input': val_x,
+                    'rois': val_rois},
+            output=node,
+            param_attr=attr)
+    @print_mapping_info
+    def MaxRoiPool(self, node):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        val_rois = self.graph.get_input_node(node, idx=1, copy=True)
+        spatial_scale = node.get_attr('spatial_scale')
+        pooled_height, pooled_width = node.get_attr('pooled_shape')
+        attr = {
+            'pooled_height': pooled_height,
+            'pooled_width': pooled_width,
+            'spatial_scale': spatial_scale,
+        }
+        node.fluid_code.add_layer(
+            'roi_pool',
+            inputs={'input': val_x,
+                    'rois': val_rois},
+            output=node,
+            param_attr=attr)
+    @print_mapping_info
+    def Pad(self, node, op_independent=True):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        pads = node.get_attr('pads')
+        mode = node.get_attr('mode', 'constant')
+        value = node.get_attr('value', 0.)
+        data_shape = val_x.out_shapes[0]
+        output_shape = node.out_shapes[0]
+        assume_pad2d = False
+        attr = {}
+        paddings = []
+        if len(pads) == 4:
+            assume_pad2d |= mode != 'constant'
+            if data_shape:
+                assume_pad2d |= data_shape and len(data_shape) == 4  # NCHW
+            if output_shape:
+                assume_pad2d |= output_shape and len(output_shape) == 4  # NCHW
+        if assume_pad2d:
+            fluid_op = 'pad2d'
+            attr['data_format'] = string('NCHW')
+            attr['mode'] = string(mode)
+        else:
+            attr = {'pad_value': value}
+            fluid_op = 'pad'
+        if len(pads) == 4:
+            paddings = np.array(pads).reshape(
+                (-1, 2)).transpose().flatten().tolist()  # SSEE -> SESE
+        elif len(pads) == 8:
+            paddings = np.array(pads).reshape(
+                (-1, 4)).transpose().flatten().tolist()  # SSEE -> SESE
+            if sum(paddings[:4]) == 0:
+                fluid_op = 'pad2d'
+                paddings = paddings[4:]
+                attr['mode'] = string(mode)
+        attr['paddings'] = paddings
+        if op_independent:
+            attr['name'] = string(node.layer_name)
+            node.fluid_code.add_layer(
+                fluid_op, inputs=val_x, output=node, param_attr=attr)
+        else:
+            attr['name'] = string(node.layer_name + '_paded')
+            node.fluid_code.add_layer(
+                fluid_op,
+                inputs=val_x,
+                output=node.layer_name + '_paded',
+                param_attr=attr)
+            return node.layer_name + '_paded'
+    @print_mapping_info
+    def Unsqueeze(self, node):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        axes = node.get_attr('axes')
+        attr = {'axes': axes, 'name': string(node.layer_name)}
+        if len(val_x.out_shapes[0]) == 0:
+            if node.layer_name:
+                node.fluid_code.add_layer(
+                    'reshape',
+                    inputs=val_x,
+                    output=node,
+                    param_attr={'shape': [1]})
+        else:
+            if str(val_x.dtype) == 'bool':
+                val_x_cast = val_x.layer_name + '_cast'
+                node.fluid_code.add_layer(
+                    'cast',
+                    inputs=val_x,
+                    output=val_x_cast,
+                    param_attr={'dtype': string('int64')})
+                node.fluid_code.add_layer(
+                    'unsqueeze',
+                    inputs=val_x_cast,
+                    output=node,
+                    param_attr=attr)
+            else:
+                node.fluid_code.add_layer(
+                    'unsqueeze', inputs=val_x, output=node, param_attr=attr)
+    @print_mapping_info
+    def Shrink(self, node):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        bias = node.get_attr('bias')
+        lambd = node.get_attr('lambd')
+        assert bias == 0.0, 'not support bias!=0'
+        attr = {'threshold': lambd, 'name': node.layer_name}
+        node.fluid_code.add_layer(
+            'hard_shrink', inputs=val_x, output=node, param_attr=attr)
+    @print_mapping_info
+    def Constant(self, node):
+        val_output = self.graph.get_node(node.layer.output[0], copy=True)
+        value = node.get_attr('value')
+        dtype = np.dtype(value.dtype)
+        output_dtype = val_output.dtype
+        if output_dtype:
+            assert dtype == output_dtype, 'tensor dtype unmatches storage dtype'
+        shape = node.get_attr('shape', None)
+        if shape is None:
+            shape = val_output.out_shapes[0]
+        if shape is None:
+            shape = list(value.shape)
+            _logger.warning('in (Constant -> %s): '
+                            'attribute "shape" of %s not inferred, '
+                            'using value as 1-D tensor may lead to fails',
+                            val_output.layer_name, val_output.layer_name)
+        if len(value) == 1:
+            value = value.tolist()
+            shape = [1]
+            value = value[0]
+            if dtype.name == 'int64':
+                dtype = 'int32'
+            attr = {'shape': shape, 'dtype': string(dtype), 'value': value}
+            node.fluid_code.add_layer(
+                'fill_constant', inputs=None, output=node, param_attr=attr)
+        else:
+            if dtype.name == 'uint8':
+                dtype = 'int64'
+            value = np.reshape(value, shape)
+            self.weights[node.layer_name] = value
+            attr = {
+                'dtype': string(dtype),
+                'shape': shape,
+                'name': string(node.layer_name),
+                'default_initializer': 'Constant(0.0)'
+            }
+            node.fluid_code.add_layer(
+                "create_parameter", inputs=None, output=node, param_attr=attr)
+    @print_mapping_info
+    def Resize(self, node):
+        self._interpolate(node)
+    @print_mapping_info
+    def Upsample(self, node):
+        self._interpolate(node)
+    @print_mapping_info
+    def InstanceNormalization(self, node):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        val_scale = self.graph.get_input_node(node, idx=1, copy=True)
+        val_b = self.graph.get_input_node(node, idx=2, copy=True)
+        epsilon = node.get_attr('epsilon', 1e-5)
+        attr = {
+            'epsilon': epsilon,
+            'param_attr': string(val_scale.layer_name),
+            'bias_attr': string(val_b.layer_name)
+        }
+        node.fluid_code.add_layer(
+            "instance_norm", inputs=val_x, output=node, param_attr=attr)
+    @print_mapping_info
+    def Expand(self, node):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        val_shape = self.graph.get_input_node(node, idx=1, copy=True)
+        if len(val_shape.outputs) == 1:
+            self.omit_nodes.append(val_shape.layer_name)
+        val_x_dtype = val_x.dtype
+        name_ones = node.layer_name + '_ones'
+        attr_ones = {
+            'shape': val_shape.layer_name,
+            'dtype': string(val_x_dtype),
+            'value': 1
+        }
+        node.fluid_code.add_layer(
+            'fill_constant',
+            inputs=None,
+            output=name_ones,
+            param_attr=attr_ones)
+        inputs = {'x': name_ones, 'y': val_x}
+        node.fluid_code.add_layer(
+            'elementwise_mul',
+            inputs=inputs,
+            output=node.layer_name,
+            param_attr=None)
+    @print_mapping_info
+    def Gather(self, node):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        indices = self.graph.get_input_node(node, idx=1, copy=True)
+        indices_shape = indices.out_shapes[0]
+        axis = node.get_attr('axis', 0)
+        #assert len(
+        #    indices_shape) <= 2, "Gather op don't support dim of indice >2 "
+        if axis == 0 and len(indices_shape) <= 1:
+            if len(val_x.out_shapes[0]) <= 1:
+                node.fluid_code.add_layer(
+                    'gather',
+                    inputs={'input': val_x,
+                            'index': indices},
+                    output=node,
+                    param_attr=None)
+            elif len(val_x.out_shapes[0]) > 1:
+                if len(indices_shape) == 0:
+                    gather_ = node.layer_name + '_1'
+                    node.fluid_code.add_layer(
+                        'gather',
+                        inputs={'input': val_x,
+                                'index': indices},
+                        output=gather_,
+                        param_attr=None)
+                    node.fluid_code.add_layer(
+                        'squeeze',
+                        inputs={'input': gather_,
+                                'axes': [0]},
+                        output=node,
+                        param_attr=None)
+                else:
+                    node.fluid_code.add_layer(
+                        'gather',
+                        inputs={'input': val_x,
+                                'index': indices},
+                        output=node,
+                        param_attr=None)
+        elif axis > 0 and len(indices_shape) <= 1:
+            perm = list(range(len(val_x.out_shapes[0])))
+            perm = [axis] + perm[:axis] + perm[axis + 1:]
+            attr_trans = {'perm': perm}
+            name_trans = val_x.layer_name + '_trans'
+            node.fluid_code.add_layer(
+                'transpose',
+                inputs=val_x,
+                output=name_trans,
+                param_attr=attr_trans)
+            node.fluid_code.add_layer(
+                'gather',
+                inputs={'input': name_trans,
+                        'index': indices},
+                output=node,
+                param_attr=None)
+            node.fluid_code.add_layer(
+                'transpose', inputs=node, output=node, param_attr=attr_trans)
+            if len(indices_shape) < 1:
+                node.fluid_code.add_layer(
+                    'squeeze',
+                    inputs={'input': node,
+                            'axes': [axis]},
+                    output=node,
+                    param_attr=None)
+        elif axis == 0 and len(indices_shape) > 1:
+            if val_x.out_shapes[0] is not None and isinstance(
+                    val_x, ONNXGraphDataNode):
+                indices_cast = indices.layer_name + '_cast'
+                node.fluid_code.add_layer(
+                    'cast',
+                    inputs=indices,
+                    output=indices_cast,
+                    param_attr={'dtype': string('int64')})
+                node.fluid_code.add_layer(
+                    'embedding',
+                    inputs=indices_cast,
+                    output=node,
+                    use_fluid=True,
+                    param_attr={
+                        'param_attr': string(val_x.layer_name),
+                        'size': val_x.out_shapes[0]
+                    })
+            else:
+                from functools import reduce
+                reshape_shape = reduce(lambda x, y: x * y, indices_shape)
+                indices_reshape = indices.layer_name + '_shape'
+                node.fluid_code.add_layer(
+                    'reshape',
+                    inputs=indices,
+                    output=indices_reshape,
+                    param_attr={'shape': [reshape_shape, ]})
+                perm = list(range(len(val_x.out_shapes[0])))
+                node.fluid_code.add_layer(
+                    'gather',
+                    inputs={'input': val_x,
+                            'index': indices_reshape},
+                    output=node,
+                    param_attr=None)
+                val_x_shape = val_x.out_shapes[0]
+                reshaped_shape = []
+                for i in perm:
+                    reshaped_shape.append(indices_shape[i])
+                for i in val_x_shape[:axis] + val_x_shape[axis + 1:]:
+                    reshaped_shape.append(i)
+                node.fluid_code.add_layer(
+                    'reshape',
+                    inputs=node,
+                    output=node,
+                    param_attr={'shape': reshaped_shape})
+        elif axis > 0 and len(indices_shape) > 1:
+            from functools import reduce
+            reshape_shape = reduce(lambda x, y: x * y, indices_shape)
+            indices_reshape = indices.layer_name + '_shape'
+            node.fluid_code.add_layer(
+                'reshape',
+                inputs=indices,
+                output=indices_reshape,
+                param_attr={'shape': [reshape_shape, ]})
+            perm = list(range(len(val_x.out_shapes[0])))
+            perm = [axis] + perm[:axis] + perm[axis + 1:]
+            attr_trans = {'perm': perm}
+            name_trans = val_x.layer_name + '_transpose'
+            node.fluid_code.add_layer(
+                'transpose',
+                inputs=val_x,
+                output=name_trans,
+                param_attr=attr_trans)
+            node.fluid_code.add_layer(
+                'gather',
+                inputs={'input': name_trans,
+                        'index': indices_reshape},
+                output=node,
+                param_attr=None)
+            input_transpose = node.layer_name + '_transpose'
+            node.fluid_code.add_layer(
+                'transpose',
+                inputs=node,
+                output=input_transpose,
+                param_attr=attr_trans)
+            val_x_shape = val_x.out_shapes[0]
+            reshaped_shape = []
+            for i in perm:
+                reshaped_shape.append(indices_shape[i])
+            for i in val_x_shape[:axis] + val_x_shape[axis + 1:]:
+                reshaped_shape.append(i)
+            node.fluid_code.add_layer(
+                'reshape',
+                inputs=input_transpose,
+                output=node,
+                param_attr={'shape': reshaped_shape})
+    @print_mapping_info
+    def ScatterND(self, node):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        indices = self.graph.get_input_node(node, idx=1, copy=True)
+        updates = self.graph.get_input_node(node, idx=2, copy=True)
+        if len(indices.out_shapes[0]) == 1:
+            node.fluid_code.add_layer(
+                'scatter',
+                inputs={'input': val_x,
+                        'index': indices,
+                        'updates': updates},
+                output=node,
+                param_attr=None)
+        else:
+            input_inner_indices = node.layer_name + '_input_inner_indices'
+            node.fluid_code.add_layer(
+                'scatter_nd',
+                inputs={
+                    'shape': val_x.out_shapes[0],
+                    'index': indices,
+                    'updates': updates
+                },
+                output=input_inner_indices,
+                param_attr=None)
+            constant_minus_one = node.layer_name + '_constant_minus_one'
+            node.fluid_code.add_layer(
+                'fill_constant',
+                inputs=None,
+                output=constant_minus_one,
+                param_attr={
+                    'shape': updates.out_shapes[0],
+                    'dtype': string(updates.dtype),
+                    'value': -1
+                })
+            indices_mask = node.layer_name + '_indices_mask'
+            node.fluid_code.add_layer(
+                'scatter_nd',
+                inputs={
+                    'shape': val_x.out_shapes[0],
+                    'index': indices,
+                    'updates': constant_minus_one
+                },
+                output=indices_mask,
+                param_attr=None)
+            constant_1 = node.layer_name + '_constant_1'
+            node.fluid_code.add_layer(
+                'fill_constant',
+                inputs=None,
+                output=constant_1,
+                param_attr={
+                    'shape': val_x.out_shapes[0],
+                    'dtype': string(val_x.dtype),
+                    'value': 1
+                })
+            input_out_indices_mask = node.layer_name + '_input_out_indices_mask'
+            node.fluid_code.add_layer(
+                "elementwise_add",
+                inputs={"x": indices_mask,
+                        "y": constant_1},
+                output=input_out_indices_mask,
+                param_attr=None)
+            input_out_indices = node.layer_name + '_input_out_indices'
+            node.fluid_code.add_layer(
+                "elementwise_mul",
+                inputs={"x": val_x,
+                        "y": input_out_indices_mask},
+                output=input_out_indices,
+                param_attr=None)
+            node.fluid_code.add_layer(
+                "elementwise_add",
+                inputs={"x": input_inner_indices,
+                        "y": input_out_indices},
+                output=node,
+                param_attr=None)
+    @print_mapping_info
+    def Range(self, node):
+        val_start = self.graph.get_input_node(node, idx=0, copy=True)
+        val_limit = self.graph.get_input_node(node, idx=1, copy=True)
+        val_delta = self.graph.get_input_node(node, idx=2, copy=True)
+        dtype = val_start.dtype
+        inputs = {'start': val_start, 'end': val_limit, 'step': val_delta}
+        node.fluid_code.add_layer(
+            'range',
+            inputs=inputs,
+            output=node,
+            param_attr={'dtype': string(dtype)})
+    @print_mapping_info
+    def Slice(self, node):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        starts, ends, axes, steps = None, None, None, None
+        attr = {}
+        if len(node.inputs) > 1:
+            starts = self.graph.get_input_node(node, idx=1, copy=True)
+            ends = self.graph.get_input_node(node, idx=2, copy=True)
+            if len(node.inputs) > 3:
+                axes = self.graph.get_input_node(node, idx=3, copy=True)
+                axes = _const_weight_or_none(axes, necessary=True)
+            if len(node.inputs) > 4:
+                steps = self.graph.get_input_node(node, idx=4, copy=True)
+                steps = _const_weight_or_none(steps)
+                if steps is not None:
+                    assert steps == 1, "Only support convert op:Slice, which attribute:steps == 1"
+            attr = {
+                "axes": axes,
+                "starts": starts.layer_name,
+                "ends": ends.layer_name
+            }
+            starts_value = _const_weight_or_none(starts)
+            ends_value = _const_weight_or_none(ends)
+            if starts_value is not None and ends_value is not None:
+                self.omit_nodes.append(starts.layer_name)
+                self.omit_nodes.append(ends.layer_name)
+                ends_value = ends_value.copy()
+                for idx in range(len(ends_value)):
+                    if ends_value[idx] > 2**31 - 1:
+                        ends_value[idx] = 2**31 - 1
+                attr = {
+                    "axes": axes,
+                    "starts": starts_value,
+                    "ends": ends_value
+                }
+            else:
+                if starts.dtype != 'int32':
+                    starts_cast = starts.layer_name + '_cast'
+                    node.fluid_code.add_layer(
+                        'cast',
+                        inputs=starts,
+                        output=starts_cast,
+                        param_attr={'dtype': string('int32')})
+                    attr['starts'] = starts_cast
+                if ends.dtype != 'int32':
+                    ends_cast = ends.layer_name + '_cast'
+                    node.fluid_code.add_layer(
+                        'cast',
+                        inputs=ends,
+                        output=ends_cast,
+                        param_attr={'dtype': string('int32')})
+                    attr['ends'] = ends_cast
+        else:
+            starts = node.get_attr('starts')
+            ends = node.get_attr('ends')
+            axes = node.get_attr('axes')
+            for idx in range(len(ends)):
+                if ends[idx] > 2**31 - 1:
+                    ends[idx] = 2**31 - 1
+            attr = {"axes": axes, "starts": starts, "ends": ends}
+        node.fluid_code.add_layer(
+            'slice', inputs=val_x, output=node, param_attr=attr)
+    @print_mapping_info
+    def ConstantOfShape(self, node):
+        val_shape = self.graph.get_input_node(node, idx=0, copy=True)
+        val_y = self.graph.get_node(node.layer.output[0], copy=True)
+        value = node.get_attr('value')
+        dtype = value.dtype
+        value = value.tolist()
+        assert len(value) == 1, ('given value not Scalar, shape of value > 1, '
+                                 'this is not supported')
+        if len(value) == 1:
+            value = value[0]
+            attr = {
+                'shape': val_shape.layer_name,
+                'dtype': string(dtype),
+                'value': value
+            }
+            node.fluid_code.add_layer(
+                'fill_constant', inputs=None, output=node, param_attr=attr)
+    @print_mapping_info
+    def Split(self, node):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        val_y = self.graph.get_node(node.layer.output[0], copy=True)
+        fluid_op = 'split'
+        split = node.get_attr('split')
+        axis = node.get_attr('axis', 0)
+        attr = {
+            'num_or_sections': split,
+            'dim': axis,
+            'name': string(node.layer_name)
+        }
+        node.fluid_code.add_layer(
+            'split', inputs=val_x, output=val_y, param_attr=attr)
+    @print_mapping_info
+    def Reshape(self, node):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        val_shape = self.graph.get_input_node(node, idx=1, copy=True)
+        val_reshaped = self.graph.get_node(node.layer.output[0], copy=True)
+        attr = {}
+        shape_value = _const_weight_or_none(val_shape)
+        shape_dims = len(val_shape.out_shapes[0])
+        if shape_value is not None:
+            node.fluid_code.add_layer(
+                'reshape',
+                inputs={'x': val_x},
+                output=node,
+                param_attr={'shape': shape_value.tolist()})
+        elif len(node.out_shapes[0]) > 0 and _is_static_shape(node.out_shapes[
+                0]):
+            node.fluid_code.add_layer(
+                'reshape',
+                inputs={'x': val_x,
+                        'shape': node.out_shapes[0]},
+                output=node,
+                param_attr=attr)
+        elif val_shape.dtype == 'int64':
+            val_shape_cast = val_shape.layer_name + '_cast'
+            node.fluid_code.add_layer(
+                'cast',
+                inputs=val_shape,
+                output=val_shape_cast,
+                param_attr={'dtype': string('int32')})
+            # shape may be [], come form Gather by scalar indices
+            if len(val_shape.out_shapes[0]) > 0:
+                node.fluid_code.add_layer(
+                    'reshape',
+                    inputs=val_shape_cast,
+                    output=val_shape_cast,
+                    param_attr={'shape': val_shape.out_shapes[0]})
+            node.fluid_code.add_layer(
+                'reshape',
+                inputs={'x': val_x,
+                        'shape': val_shape_cast},
+                output=node,
+                param_attr=attr)
+        else:
+            # shape may be [], come form Gather by scalar indices
+            if len(val_shape.out_shapes[0]) > 0:
+                node.fluid_code.add_layer(
+                    'reshape',
+                    inputs=val_shape,
+                    output=val_shape,
+                    param_attr={'shape': val_shape.out_shapes[0]})
+            node.fluid_code.add_layer(
+                'reshape',
+                inputs={'x': val_x,
+                        'shape': val_shape},
+                output=node,
+                param_attr=attr)
+    @print_mapping_info
+    def Cast(self, node):
+        val_input = self.graph.get_input_node(node, idx=0, copy=True)
+        val_output = self.graph.get_node(node.layer.output[0], copy=True)
+        dtype = node.get_attr('to')
+        if not isinstance(dtype, np.dtype):
+            dtype = TENSOR_TYPE_TO_NP_TYPE[dtype]
+        output_dtype = val_output.dtype
+        if output_dtype:
+            assert dtype == output_dtype, 'dtype of to unmatches output'
+        attr = {'dtype': string(dtype)}
+        node.fluid_code.add_layer(
+            'cast', inputs=val_input, output=node, param_attr=attr)
+    @print_mapping_info
+    def Not(self, node):
+        val_input = self.graph.get_input_node(node, idx=0, copy=True)
+        node.fluid_code.add_layer('logical_not', inputs=val_input, output=node)
+    @print_mapping_info
+    def AveragePool(self, node):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        auto_pad = node.get_attr('auto_pad', 'NOTSET')
+        kernel_shape = node.get_attr("kernel_shape")
+        poolnd = len(kernel_shape)
+        strides = node.get_attr("strides")
+        pad_mode = node.get_attr("pads")
+        ceil_mode = bool(node.get_attr('ceil_mode', 0))
+        pads = node.get_attr('pads', [0] * (poolnd * 2))
+        fluid_op = 'pool{}d'.format(poolnd)
+        assert 2 <= poolnd <= 3, 'only pool2d and pool3d is supported'
+        paddings, val_x = self._pad_if_asymmetric(node, pads, val_x)
+        if auto_pad == "SAME_UPPER" or auto_pad == "SAME_LOWER":
+            input_shape = val_x.out_shapes[0]
+            pad_h = _get_same_padding(input_shape[2], kernel_shape[0],
+                                      strides[0])
+            pad_w = _get_same_padding(input_shape[3], kernel_shape[1],
+                                      strides[1])
+            paddings = pad_h + pad_w
+        attr = {
+            "pool_size": kernel_shape,
+            "pool_type": string('avg'),
+            "pool_stride": strides,
+            "pool_padding": paddings,
+            "ceil_mode": ceil_mode,
+            "exclusive": 'True',
+            "name": string(node.layer_name)
+        }
+        node.fluid_code.add_layer(
+            fluid_op, inputs=val_x, output=node, param_attr=attr)
+    @print_mapping_info
+    def Concat(self, node):
+        inputs = []
+        dtypes = set()
+        for i in range(len(node.layer.input)):
+            ipt = self.graph.get_input_node(node, idx=i, copy=True)
+            if isinstance(ipt, str):
+                inputs.append(ipt)
+            else:
+                inputs.append(ipt.layer_name)
+                dtypes.add(ipt.dtype)
+        if len(dtypes) > 1:
+            assert 'Unspported situation happened, please create issue on https://github.com/PaddlePaddle/X2Paddle/issues.'
+        axis = node.get_attr('axis')
+        attr = {'axis': axis}
+        node.fluid_code.add_layer(
+            'concat', inputs=inputs, output=node, param_attr=attr)
+    @print_mapping_info
+    def Flatten(self, node):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        axis = node.get_attr('axis', 1)
+        attr = {"axis": str(axis), "name": string(node.layer_name)}
+        node.fluid_code.add_layer(
+            'flatten', inputs=val_x, output=node, param_attr=attr)
+    @print_mapping_info
+    def Gemm(self, node):
+        val_a = self.graph.get_input_node(node, idx=0, copy=True)
+        val_b = self.graph.get_input_node(node, idx=1, copy=True)
+        val_c = self.graph.get_input_node(node, idx=2, copy=True)
+        alpha = node.get_attr('alpha', 1.)  # optional
+        beta = node.get_attr('beta', 1.)  # optional
+        trans_a = bool(node.get_attr('transA', 0))  # optional
+        trans_b = bool(node.get_attr('transB', 0))  # optional
+        val_mm = node.layer_name + '_mm'
+        matmul_inputs = {"x": val_a, "y": val_b}
+        attr_matmul = {
+            "transpose_x": trans_a,
+            "transpose_y": trans_b,
+            "alpha": alpha,
+            "name": string(val_mm)
+        }
+        node.fluid_code.add_layer(
+            'matmul',
+            inputs=matmul_inputs,
+            output=val_mm,
+            param_attr=attr_matmul)
+        if beta != 0:
+            if beta == 1.:
+                add_inputs = {"x": val_mm, "y": val_c}
+                attr = {"name": string(node.layer_name)}
+                node.fluid_code.add_layer(
+                    "elementwise_add",
+                    inputs=add_inputs,
+                    output=node,
+                    param_attr=attr)
+            else:
+                var_beta = node.layer_name + '_beta'
+                matmul_beta_inputs = {"x": val_c, "y": var_beta}
+                node.fluid_code.add_layer(
+                    "Constant",
+                    inputs=matmul_beta_inputs,
+                    output=var_beta,
+                    param_attr={'value': beta})
+                add_inputs = {"x": val_mm, "y": var_beta}
+                attr = {"name": string(node.layer_name)}
+                node.fluid_code.add_layer(
+                    "elementwise_add",
+                    inputs=add_inputs,
+                    output=node,
+                    param_attr=attr)
+    @print_mapping_info
+    def Sum(self, node):
+        val_inps = node.layer.input
+        inputs = {
+            "x": self.graph.get_input_node(
+                node, idx=0, copy=True),
+            "y": self.graph.get_input_node(
+                node, idx=1, copy=True),
+        }
+        node.fluid_code.add_layer("elementwise_add", inputs=inputs, output=node)
+        for idx, ipt in enumerate(val_inps[2:]):
+            y = self.graph.get_input_node(node, idx=idx, copy=True)
+            inputs = {
+                "x": node.layer_name,
+                "y": y,
+            }
+            node.fluid_code.add_layer(
+                "elementwise_add", inputs=inputs, output=node)
+    @print_mapping_info
+    def MatMul(self, node):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        val_y = self.graph.get_input_node(node, idx=1, copy=True)
+        x_shape = val_x.out_shapes[0]
+        y_shape = val_y.out_shapes[0]
+        inputs = {"x": val_x, "y": val_y}
+        if y_shape[0] == 1 and x_shape[-1] != 1 and x_shape[0] != 1:
+            y_squeeze = val_y.layer_name + '_squeeze'
+            node.fluid_code.add_layer(
+                "squeeze",
+                inputs=val_y,
+                output=y_squeeze,
+                param_attr={'axes': [0]})
+            inputs['y'] = y_squeeze
+            node.fluid_code.add_layer(
+                "matmul", inputs=inputs, output=node, param_attr=None)
+        else:
+            node.fluid_code.add_layer(
+                "matmul", inputs=inputs, output=node, param_attr=None)
+    @print_mapping_info
+    def BatchNormalization(self, node):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        val_scale = self.graph.get_input_node(node, idx=1, copy=True)
+        val_b = self.graph.get_input_node(node, idx=2, copy=True)
+        val_mean = self.graph.get_input_node(node, idx=3, copy=True)
+        val_var = self.graph.get_input_node(node, idx=4, copy=True)
+        self.omit_nodes.append(val_scale.layer_name)
+        self.omit_nodes.append(val_b.layer_name)
+        self.omit_nodes.append(val_mean.layer_name)
+        self.omit_nodes.append(val_var.layer_name)
+        momentum = node.get_attr('momentum', .9)
+        epsilon = node.get_attr('epsilon', 1e-5)
+        # Attribute: spatial is used in BatchNormalization-1,6,7
+        spatial = bool(node.get_attr('spatial'))
+        attr = {
+            "momentum": momentum,
+            "epsilon": epsilon,
+            "data_layout": string('NCHW'),
+            "is_test": True,
+            "param_attr": string(val_scale.layer_name),
+            "bias_attr": string(val_b.layer_name),
+            "moving_mean_name": string(val_mean.layer_name),
+            "moving_variance_name": string(val_var.layer_name),
+            "use_global_stats": spatial,
+            "name": string(node.layer_name)
+        }
+        node.fluid_code.add_layer(
+            "batch_norm", inputs=val_x, output=node, param_attr=attr)
+    @print_mapping_info
+    def Transpose(self, node):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        perm = node.get_attr('perm')
+        attr = {'perm': perm, "name": string(node.layer_name)}
+        node.fluid_code.add_layer(
+            "transpose", inputs=val_x, output=node, param_attr=attr)
+    @print_mapping_info
+    def Relu(self, node):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        attr = {"name": string(node.layer_name)}
+        node.fluid_code.add_layer(
+            "relu", inputs=val_x, output=node, param_attr=attr)
+    @print_mapping_info
+    def PRelu(self, node):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        val_slope = self.graph.get_input_node(node, idx=1, copy=True)
+        mode = 'channel'
+        shape_slope = val_slope.out_shapes[0]
+        if len(shape_slope) == 1:
+            mode = 'all'
+        elif len(shape_slope) > 2:
+            mode = 'element'
+        attr = {
+            "param_attr": string(val_slope.layer_name),
+            'mode': string(mode)
+        }
+        node.fluid_code.add_layer(
+            "prelu", inputs=val_x, output=node, param_attr=attr)
+    @print_mapping_info
+    def Squeeze(self, node):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        axes = node.get_attr('axes')
+        attr = {'axes': axes, "name": string(node.layer_name)}
+        if len(val_x.out_shapes[0]) == 1:
+            node.fluid_code.add_layer(
+                "cast",
+                inputs=val_x,
+                output=node,
+                param_attr={'dtype': string(val_x.dtype)})
+        else:
+            node.fluid_code.add_layer(
+                "squeeze", inputs=val_x, output=node, param_attr=attr)
+    @print_mapping_info
+    def Equal(self, node):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        val_y = self.graph.get_input_node(node, idx=1, copy=True)
+        node.fluid_code.add_layer(
+            "equal",
+            inputs={'x': val_x,
+                    'y': val_y},
+            output=node,
+            param_attr=None)
+    @print_mapping_info
+    def Greater(self, node):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        val_y = self.graph.get_input_node(node, idx=1, copy=True)
+        node.fluid_code.add_layer(
+            "greater_than",
+            inputs={'x': val_x,
+                    'y': val_y},
+            output=node,
+            param_attr=None)
+    @print_mapping_info
+    def Where(self, node):
+        condition = self.graph.get_input_node(node, idx=0, copy=True)
+        val_x = self.graph.get_input_node(node, idx=1, copy=True)
+        val_y = self.graph.get_input_node(node, idx=2, copy=True)
+        not_condition = condition.layer_name + '_not'
+        node.fluid_code.add_layer(
+            "logical_not",
+            inputs=condition,
+            output=not_condition,
+            param_attr=None)
+        cast_not_condition = not_condition + '_cast'
+        node.fluid_code.add_layer(
+            "cast",
+            inputs=not_condition,
+            output=cast_not_condition,
+            param_attr={'dtype': string(val_x.dtype)})
+        cast_condition = condition.layer_name + '_cast'
+        node.fluid_code.add_layer(
+            "cast",
+            inputs=condition,
+            output=cast_condition,
+            param_attr={'dtype': string(val_x.dtype)})
+        mul_val_x = val_x.layer_name + '_mul'
+        node.fluid_code.add_layer(
+            "elementwise_mul",
+            inputs={'x': val_x,
+                    'y': cast_condition},
+            output=mul_val_x,
+            param_attr=None)
+        mul_val_y = val_y.layer_name + '_mul'
+        node.fluid_code.add_layer(
+            "elementwise_mul",
+            inputs={'x': val_y,
+                    'y': cast_not_condition},
+            output=mul_val_y,
+            param_attr=None)
+        node.fluid_code.add_layer(
+            "elementwise_add",
+            inputs={'x': mul_val_x,
+                    'y': mul_val_y},
+            output=node,
+            param_attr=None)
+    @print_mapping_info
+    def NonZero(self, node):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        val_x_dim = len(val_x.out_shapes[0])
+        if val_x_dim == 1:
+            node.fluid_code.add_layer("nonzero", inputs=val_x, output=val_x)
+            node.fluid_code.add_layer(
+                "transpose",
+                inputs=val_x,
+                output=node,
+                param_attr={'perm': [1, 0]})
+        if val_x_dim > 1:
+            node.fluid_code.add_layer("nonzero", inputs=val_x, output=val_x)
+            node.fluid_code.add_layer(
+                "split",
+                inputs=val_x,
+                output=val_x,
+                param_attr={'num_or_sections': 1,
+                            'dim': val_x_dim})
+            node.fluid_code.add_layer("concat", inputs=val_x, output=node)
+    @print_mapping_info
+    def Identity(self, node):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        node.fluid_code.add_layer("assign", inputs=val_x, output=node)
+    @print_mapping_info
+    def Tile(self, node):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        val_repeats = self.graph.get_input_node(node, idx=1, copy=True)
+        repeats = _const_weight_or_none(val_repeats)
+        if repeats is None:
+            repeats = val_repeats.layer_name
+            if val_repeats.dtype != 'int32':
+                attr = {"dtype": string("int32")}
+                node.fluid_code.add_layer(
+                    "cast",
+                    inputs=repeats,
+                    output="{}.tmp".format(repeats),
+                    param_attr=attr)
+                repeats = "{}.tmp".format(repeats)
+        elif isinstance(repeats, int):
+            repeats = [repeats]
+        attr = {
+            'expand_times': repeats,
+            "name": string(node.layer_name),
+        }
+        node.fluid_code.add_layer(
+            "expand", inputs=val_x, output=node, param_attr=attr)
+    @print_mapping_info
+    def MaxPool(self, node):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        auto_pad = node.get_attr('auto_pad', 'NOTSET')
+        assert node.get_attr(
+            "dilations") is None, 'only dilations = 0 is supported'  # optional
+        kernel_shape = node.get_attr("kernel_shape")
+        poolnd = len(kernel_shape)
+        strides = node.get_attr("strides")
+        pad_mode = node.get_attr("pads")
+        ceil_mode = bool(node.get_attr('ceil_mode', 0))  # optional
+        pads = node.get_attr('pads', [0] * (poolnd * 2))  # optional
+        fluid_op = 'pool{}d'.format(poolnd)
+        assert 2 <= poolnd <= 3, 'only pool2d and pool3d is supported'
+        paddings, val_x = self._pad_if_asymmetric(node, pads, val_x)
+        if auto_pad == "SAME_UPPER" or auto_pad == "SAME_LOWER":
+            input_shape = val_x.out_shapes[0]
+            pad_h = _get_same_padding(input_shape[2], kernel_shape[0],
+                                      strides[0])
+            pad_w = _get_same_padding(input_shape[3], kernel_shape[1],
+                                      strides[1])
+            paddings = pad_h + pad_w
+        attr = {
+            "pool_size": kernel_shape,
+            "pool_type": string("max"),
+            "pool_stride": strides,
+            "pool_padding": paddings,
+            "ceil_mode": ceil_mode,
+            "name": string(node.layer_name),
+            "exclusive": False
+        }
+        node.fluid_code.add_layer(
+            fluid_op, inputs=val_x, output=node, param_attr=attr)
+    def _global_pool(self, node):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        val_y = self.graph.get_node(node.layer.output[0], copy=True)
+        fluid_op = 'pool2d'
+        pool_type = None
+        if node.layer.op_type == 'GlobalMaxPool':
+            pool_type = 'max'
+        elif node.layer.op_type == 'GlobalAveragePool':
+            pool_type = 'avg'
+        attr = {
+            "pool_type": string(pool_type),
+            "global_pooling": True,
+            "name": string(node.layer_name)
+        }
+        node.fluid_code.add_layer(
+            fluid_op, inputs=val_x, output=node, param_attr=attr)
+    @print_mapping_info
+    def GlobalMaxPool(self, node):
+        self._global_pool(node)
+    @print_mapping_info
+    def GlobalAveragePool(self, node):
+        self._global_pool(node)
+    @print_mapping_info
+    def Conv(self, node):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        val_w = self.graph.get_input_node(node, idx=1, copy=True)
+        val_y = self.graph.get_node(node.layer.output[0], copy=True)
+        self.omit_nodes.append(val_w.layer_name)
+        has_bias = len(node.layer.input) == 3
+        if has_bias:
+            val_b = self.graph.get_input_node(node, idx=2, copy=True)
+            self.omit_nodes.append(val_b.layer_name)
+        auto_pad = node.get_attr('auto_pad', 'NOTSET')
+        kernel_shape = node.get_attr('kernel_shape')
+        convnd = len(kernel_shape)
+        assert 2 <= convnd <= 3, 'only conv2d and conv3d is supported'
+        num_out_channels = val_w.out_shapes[0][0]
+        fluid_op = 'conv{}d'.format(convnd)
+        num_groups = node.get_attr('group', 1)
+        strides = node.get_attr('strides', [1] * convnd)
+        dilations = node.get_attr('dilations', [1] * convnd)
+        pads = node.get_attr('pads', [0] * (convnd * 2))
+        input_shape = val_x.out_shapes[0]
+        paddings, val_x = self._pad_if_asymmetric(node, pads, val_x)
+        if auto_pad == "SAME_UPPER" or auto_pad == "SAME_LOWER":
+            pad_h = _get_same_padding(input_shape[2], kernel_shape[0],
+                                      strides[0])
+            pad_w = _get_same_padding(input_shape[3], kernel_shape[1],
+                                      strides[1])
+            paddings = pad_h + pad_w
+        attr = {
+            "num_filters": num_out_channels,
+            "filter_size": kernel_shape,
+            "stride": strides,
+            "padding": paddings,
+            "dilation": dilations,
+            "groups": num_groups,
+            'param_attr': string(val_w.layer_name),
+            "name": string(node.layer_name)
+        }
+        if has_bias:
+            attr["bias_attr"] = string(val_b.layer_name)
+        else:
+            attr["bias_attr"] = False
+        node.fluid_code.add_layer(
+            fluid_op, inputs=val_x, output=node, param_attr=attr)
+    @print_mapping_info
+    def ConvTranspose(self, node):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        val_w = self.graph.get_input_node(node, idx=1, copy=True)
+        val_b = None
+        if len(node.layer.input) > 2:
+            val_b = self.graph.get_input_node(node, idx=2, copy=True)
+            self.omit_nodes.append(val_b.layer_name)
+        self.omit_nodes.append(val_w.layer_name)
+        val_y = self.graph.get_node(node.layer.output[0], copy=True)
+        auto_pad = node.get_attr('auto_pad', 'NOTSET')
+        out_padding = node.get_attr('output_padding', [0, 0])
+        kernel_shape = node.get_attr('kernel_shape')
+        assert kernel_shape, 'kernel_shape not inferred'
+        convnd = len(kernel_shape)
+        assert 2 <= convnd <= 3, 'only conv2d_transpose and conv3d_transpose supported'
+        num_out_channels = val_w.out_shapes[0][1]
+        fluid_op = 'conv{}d_transpose'.format(convnd)
+        num_groups = node.get_attr('group', 1)
+        strides = node.get_attr('strides', [1] * convnd)
+        dilations = node.get_attr('dilations', [1] * convnd)
+        output_size = node.get_attr('output_shape', [])
+        pads = node.get_attr('pads', [0] * (convnd * 2))
+        paddings, var_x = self._pad_if_asymmetric(node, pads, val_x)
+        output_size = [0, 0]
+        output_size[0] = (val_x.out_shapes[0][2] - 1
+                          ) * strides[0] - 2 * paddings[0] + dilations[0] * (
+                              kernel_shape[0] - 1) + 1 + out_padding[0]
+        output_size[1] = (val_x.out_shapes[0][3] - 1
+                          ) * strides[1] - 2 * paddings[1] + dilations[1] * (
+                              kernel_shape[1] - 1) + 1 + out_padding[1]
+        attr = {
+            'num_filters': num_out_channels,
+            'output_size': output_size or None,
+            'filter_size': kernel_shape,
+            'padding': paddings,
+            'stride': strides,
+            'dilation': dilations,
+            'groups': num_groups,
+            'param_attr': string(val_w.layer_name),
+            'bias_attr': None if val_b is None else string(val_b.layer_name),
+            'name': string(node.layer_name),
+        }
+        node.fluid_code.add_layer(
+            fluid_op, inputs=val_x, output=node, param_attr=attr)
--- a/x2paddle/x2paddle/op_mapper/paddle2onnx/__init__.py
+++ b/x2paddle/x2paddle/op_mapper/paddle2onnx/__init__.py
--- a/x2paddle/x2paddle/op_mapper/paddle2onnx/opset10/__init__.py
+++ b/x2paddle/x2paddle/op_mapper/paddle2onnx/opset10/__init__.py
--- a/x2paddle/x2paddle/op_mapper/paddle2onnx/opset10/opset.py
+++ b/x2paddle/x2paddle/op_mapper/paddle2onnx/opset10/opset.py
+#   Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+import sys
+import x2paddle
+import os
+import numpy as np
+import paddle.fluid.core as core
+import paddle.fluid as fluid
+import onnx
+from onnx import helper, onnx_pb
+from x2paddle.op_mapper.paddle2onnx.opset9.opset import OpSet9
+class OpSet10(OpSet9):
+    def __init__(self):
+        super(OpSet10, self).__init__()
+    def slice(self, op, block):
+        axes = op.attr('axes')
+        starts = op.attr('starts')
+        ends = op.attr('ends')
+        axes_name = self.get_name(op.type, 'axes')
+        starts_name = self.get_name(op.type, 'starts')
+        ends_name = self.get_name(op.type, 'ends')
+        axes_node = self.make_constant_node(axes_name,
+                                            onnx_pb.TensorProto.INT64, axes)
+        starts_node = self.make_constant_node(starts_name,
+                                              onnx_pb.TensorProto.INT64, starts)
+        ends_node = self.make_constant_node(ends_name,
+                                            onnx_pb.TensorProto.INT64, ends)
+        node = helper.make_node(
+            "Slice",
+            inputs=[op.input('Input')[0], starts_name, ends_name, axes_name],
+            outputs=op.output('Out'), )
+        return [starts_node, ends_node, axes_node, node]
--- a/x2paddle/x2paddle/op_mapper/paddle2onnx/opset10/paddle_custom_layer/__init__.py
+++ b/x2paddle/x2paddle/op_mapper/paddle2onnx/opset10/paddle_custom_layer/__init__.py
--- a/x2paddle/x2paddle/op_mapper/paddle2onnx/opset11/__init__.py
+++ b/x2paddle/x2paddle/op_mapper/paddle2onnx/opset11/__init__.py
--- a/x2paddle/x2paddle/op_mapper/paddle2onnx/opset11/opset.py
+++ b/x2paddle/x2paddle/op_mapper/paddle2onnx/opset11/opset.py
+# Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+import sys
+import x2paddle
+import os
+import numpy as np
+import paddle.fluid.core as core
+import paddle.fluid as fluid
+import onnx
+from onnx import helper, onnx_pb
+from x2paddle.op_mapper.paddle2onnx.opset10.opset import OpSet10
+class OpSet11(OpSet10):
+    def __init__(self):
+        super(OpSet11, self).__init__()
+    def relu6(self, op, block):
+        min_name = self.get_name(op.type, 'min')
+        max_name = self.get_name(op.type, 'max')
+        min_node = self.make_constant_node(min_name, onnx_pb.TensorProto.FLOAT,
+                                           0)
+        max_node = self.make_constant_node(max_name, onnx_pb.TensorProto.FLOAT,
+                                           op.attr('threshold'))
+        node = helper.make_node(
+            'Clip',
+            inputs=[op.input('X')[0], min_name, max_name],
+            outputs=op.output('Out'), )
+        return [min_node, max_node, node]
+    def pad2d(self, op, block):
+        x_shape = block.var(op.input('X')[0]).shape
+        paddings = op.attr('paddings')
+        onnx_pads = []
+        #TODO support pads is Variable
+        if op.attr('data_format') == 'NCHW':
+            pads = [
+                0, 0, paddings[0], paddings[2], 0, 0, paddings[1], paddings[3]
+            ]
+        else:
+            pads = [
+                0, paddings[0], paddings[2], 0, 0, paddings[1], paddings[3], 0
+            ]
+        pads_name = self.get_name(op.type, 'pads')
+        pads_node = self.make_constant_node(pads_name,
+                                            onnx_pb.TensorProto.INT64, pads)
+        constant_value_name = self.get_name(op.type, 'constant_value')
+        constant_value_node = self.make_constant_node(constant_value_name,
+                                                      onnx_pb.TensorProto.FLOAT,
+                                                      op.attr('pad_value'))
+        node = helper.make_node(
+            'Pad',
+            inputs=op.input('X') + [pads_name, constant_value_name],
+            outputs=op.output('Out'),
+            mode=op.attr('mode'))
+        return [pads_node, constant_value_node, node]
+    def clip(self, op, block):
+        min_name = self.get_name(op.type, 'min')
+        max_name = self.get_name(op.type, 'max')
+        min_node = self.make_constant_node(min_name, onnx_pb.TensorProto.FLOAT,
+                                           op.attr('min'))
+        max_node = self.make_constant_node(max_name, onnx_pb.TensorProto.FLOAT,
+                                           op.attr('max'))
+        node = helper.make_node(
+            'Clip',
+            inputs=[op.input('X')[0], min_name, max_name],
+            outputs=op.output('Out'))
+        return [min_node, max_node, node]
+    def bilinear_interp(self, op, block):
+        input_names = op.input_names
+        coordinate_transformation_mode = ''
+        align_corners = op.attr('align_corners')
+        align_mode = op.attr('align_mode')
+        if align_corners:
+            coordinate_transformation_mode = 'align_corners'
+        elif align_mode == 1:
+            coordinate_transformation_mode = 'asymmetric'
+        else:
+            coordinate_transformation_mode = 'half_pixel'
+        roi_name = self.get_name(op.type, 'roi')
+        roi_node = self.make_constant_node(roi_name, onnx_pb.TensorProto.FLOAT,
+                                           [1, 1, 1, 1, 1, 1, 1, 1])
+        if ('OutSize' in input_names and len(op.input('OutSize')) > 0) or (
+                'SizeTensor' in input_names and
+                len(op.input('SizeTensor')) > 0):
+            node_list = list()
+            empty_name = self.get_name(op.type, 'empty')
+            empty_tensor = helper.make_tensor(
+                empty_name,
+                onnx_pb.TensorProto.FLOAT, (0, ),
+                np.array([]).astype('float32'),
+                raw=False)
+            empty_node = helper.make_node(
+                'Constant', [], outputs=[empty_name], value=empty_tensor)
+            shape_name0 = self.get_name(op.type, 'shape')
+            shape_node0 = helper.make_node(
+                'Shape', inputs=op.input('X'), outputs=[shape_name0])
+            starts_name = self.get_name(op.type, 'slice.starts')
+            starts_node = self.make_constant_node(
+                starts_name, onnx_pb.TensorProto.INT64, [0])
+            ends_name = self.get_name(op.type, 'slice.ends')
+            ends_node = self.make_constant_node(ends_name,
+                                                onnx_pb.TensorProto.INT64, [2])
+            shape_name1 = self.get_name(op.type, 'shape')
+            shape_node1 = helper.make_node(
+                'Slice',
+                inputs=[shape_name0, starts_name, ends_name],
+                outputs=[shape_name1])
+            node_list.extend([
+                roi_node, empty_node, shape_node0, starts_node, ends_node,
+                shape_node1
+            ])
+            if 'OutSize' in input_names and len(op.input('OutSize')) > 0:
+                cast_shape_name = self.get_name(op.type, "shape.cast")
+                cast_shape_node = helper.make_node(
+                    'Cast',
+                    inputs=op.input('OutSize'),
+                    outputs=[cast_shape_name],
+                    to=onnx_pb.TensorProto.INT64)
+                node_list.append(cast_shape_node)
+            else:
+                concat_shape_name = self.get_name(op.type, "shape.concat")
+                concat_shape_node = helper.make_node(
+                    "Concat",
+                    inputs=op.input('SizeTensor'),
+                    outputs=[concat_shape_name],
+                    axis=0)
+                cast_shape_name = self.get_name(op.type, "shape.cast")
+                cast_shape_node = helper.make_node(
+                    'Cast',
+                    inputs=[concat_shape_name],
+                    outputs=[cast_shape_name],
+                    to=onnx_pb.TensorProto.INT64)
+                node_list.extend([concat_shape_node, cast_shape_node])
+            shape_name3 = self.get_name(op.type, "shape.concat")
+            shape_node3 = helper.make_node(
+                'Concat',
+                inputs=[shape_name1, cast_shape_name],
+                outputs=[shape_name3],
+                axis=0)
+            result_node = helper.make_node(
+                'Resize',
+                inputs=[op.input('X')[0], roi_name, empty_name, shape_name3],
+                outputs=op.output('Out'),
+                mode='linear',
+                coordinate_transformation_mode=coordinate_transformation_mode)
+            node_list.extend([shape_node3, result_node])
+            return node_list
+        elif 'Scale' in input_names and len(op.input('Scale')) > 0:
+            node = helper.make_node(
+                'Resize',
+                inputs=[op.input('X')[0], roi_name, op.input('Scale')[0]],
+                outputs=op.output('Out'),
+                mode='linear',
+                coordinate_transformation_mode=coordinate_transformation_mode)
+        else:
+            out_shape = [op.attr('out_h'), op.attr('out_w')]
+            scale = op.attr('scale')
+            if out_shape.count(-1) > 0:
+                scale_name = self.get_name(op.type, 'scale')
+                scale_node = self.make_constant_node(scale_name,
+                                                     onnx_pb.TensorProto.FLOAT,
+                                                     [1, 1, scale, scale])
+                node = helper.make_node(
+                    'Resize',
+                    inputs=[op.input('X')[0], roi_name, scale_name],
+                    outputs=op.output('Out'),
+                    mode='nearest',
+                    coordinate_transformation_mode=coordinate_transformation_mode
+                )
+                return [scale_node, roi_node, node]
+            else:
+                raise Exception("Unexpected situation happend")
+        return [roi_node, node]
+    def nearest_interp(self, op, block):
+        input_names = op.input_names
+        coordinate_transformation_mode = ''
+        align_corners = op.attr('align_corners')
+        if align_corners:
+            coordinate_transformation_mode = 'align_corners'
+        else:
+            coordinate_transformation_mode = 'half_pixel'
+        roi_name = self.get_name(op.type, 'roi')
+        roi_node = self.make_constant_node(roi_name, onnx_pb.TensorProto.FLOAT,
+                                           [1, 1, 1, 1, 1, 1, 1, 1])
+        if 'OutSize' in input_names and len(op.input('OutSize')) > 0:
+            node = helper.make_node(
+                'Resize',
+                inputs=[op.input('X')[0], roi_name, op.input('OutSize')[0]],
+                outputs=op.output('Out'),
+                mode='nearest',
+                coordinate_transformation_mode=coordinate_transformation_mode)
+        elif 'Scale' in input_names and len(op.input('Scale')) > 0:
+            node = helper.make_node(
+                'Resize',
+                inputs=[op.input('X')[0], roi_name, op.input('Scale')[0]],
+                outputs=op.output('Out'),
+                mode='nearest',
+                coordinate_transformation_mode=coordinate_transformation_mode)
+        else:
+            out_shape = [op.attr('out_h'), op.attr('out_w')]
+            scale = op.attr('scale')
+            if out_shape.count(-1) > 0:
+                scale_name = self.get_name(op.type, 'scale')
+                scale_node = self.make_constant_node(scale_name,
+                                                     onnx_pb.TensorProto.FLOAT,
+                                                     [1, 1, scale, scale])
+                node = helper.make_node(
+                    'Resize',
+                    inputs=[op.input('X')[0], roi_name, scale_name],
+                    outputs=op.output('Out'),
+                    mode='nearest',
+                    coordinate_transformation_mode=coordinate_transformation_mode
+                )
+                return [scale_node, roi_node, node]
+            else:
+                raise Exception("Unexpected situation happend")
+        return [roi_node, node]
+    def hard_swish(self, op, block):
+        min_name = self.get_name(op.type, 'min')
+        max_name = self.get_name(op.type, 'max')
+        scale_name = self.get_name(op.type, 'scale')
+        offset_name = self.get_name(op.type, 'offset')
+        min_node = self.make_constant_node(min_name, onnx_pb.TensorProto.FLOAT,
+                                           0)
+        max_node = self.make_constant_node(max_name, onnx_pb.TensorProto.FLOAT,
+                                           op.attr('threshold'))
+        scale_node = self.make_constant_node(scale_name,
+                                             onnx_pb.TensorProto.FLOAT,
+                                             op.attr('scale'))
+        offset_node = self.make_constant_node(offset_name,
+                                              onnx_pb.TensorProto.FLOAT,
+                                              op.attr('offset'))
+        name0 = self.get_name(op.type, 'add')
+        node0 = helper.make_node(
+            'Add', inputs=[op.input('X')[0], offset_name], outputs=[name0])
+        name1 = self.get_name(op.type, 'relu')
+        node1 = helper.make_node(
+            'Clip',
+            inputs=[name0, min_name, max_name],
+            outputs=[name1], )
+        name2 = self.get_name(op.type, 'mul')
+        node2 = helper.make_node(
+            'Mul', inputs=[op.input('X')[0], name1], outputs=[name2])
+        node3 = helper.make_node(
+            'Div', inputs=[name2, scale_name], outputs=op.output('Out'))
+        return [
+            min_node, max_node, scale_node, offset_node, node0, node1, node2,
+            node3
+        ]
+    def yolo_box(self, op, block):
+        from .paddle_custom_layer.yolo_box import yolo_box
+        return yolo_box(op, block)
+    def multiclass_nms(self, op, block):
+        from .paddle_custom_layer.multiclass_nms import multiclass_nms
+        return multiclass_nms(op, block)
--- a/x2paddle/x2paddle/op_mapper/paddle2onnx/opset11/paddle_custom_layer/__init__.py
+++ b/x2paddle/x2paddle/op_mapper/paddle2onnx/opset11/paddle_custom_layer/__init__.py
--- a/x2paddle/x2paddle/op_mapper/paddle2onnx/opset11/paddle_custom_layer/multiclass_nms.py
+++ b/x2paddle/x2paddle/op_mapper/paddle2onnx/opset11/paddle_custom_layer/multiclass_nms.py
+#   Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+import sys
+import os
+import numpy as np
+import paddle.fluid.core as core
+import paddle.fluid as fluid
+import onnx
+import logging
+from onnx import helper, onnx_pb
+def multiclass_nms(op, block):
+    """
+    Convert the paddle multiclass_nms to onnx op.
+    This op is get the select boxes from origin boxes.
+    """
+    inputs = dict()
+    outputs = dict()
+    attrs = dict()
+    for name in op.input_names:
+        inputs[name] = op.input(name)
+    for name in op.output_names:
+        outputs[name] = op.output(name)
+    for name in op.attr_names:
+        attrs[name] = op.attr(name)
+    result_name = outputs['Out'][0]
+    background = attrs['background_label']
+    normalized = attrs['normalized']
+    if normalized == False:
+        logging.warn(
+                    "The parameter normalized of multiclass_nms OP of Paddle is False, which has diff with ONNX." \
+                    " Please set normalized=True in multiclass_nms of Paddle, see doc Q4 in https://github.com/PaddlePaddle/X2Paddle/blob/develop/FAQ.md")
+    #convert the paddle attribute to onnx tensor
+    name_score_threshold = [outputs['Out'][0] + "@score_threshold"]
+    name_iou_threshold = [outputs['Out'][0] + "@iou_threshold"]
+    name_keep_top_k = [outputs['Out'][0] + '@keep_top_k']
+    name_keep_top_k_2D = [outputs['Out'][0] + '@keep_top_k_1D']
+    node_score_threshold = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_score_threshold,
+        value=onnx.helper.make_tensor(
+            name=name_score_threshold[0] + "@const",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(),
+            vals=[float(attrs['score_threshold'])]))
+    node_iou_threshold = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_iou_threshold,
+        value=onnx.helper.make_tensor(
+            name=name_iou_threshold[0] + "@const",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(),
+            vals=[float(attrs['nms_threshold'])]))
+    node_keep_top_k = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_keep_top_k,
+        value=onnx.helper.make_tensor(
+            name=name_keep_top_k[0] + "@const",
+            data_type=onnx.TensorProto.INT64,
+            dims=(),
+            vals=[np.int64(attrs['keep_top_k'])]))
+    node_keep_top_k_2D = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_keep_top_k_2D,
+        value=onnx.helper.make_tensor(
+            name=name_keep_top_k_2D[0] + "@const",
+            data_type=onnx.TensorProto.INT64,
+            dims=[1, 1],
+            vals=[np.int64(attrs['keep_top_k'])]))
+    # the paddle data format is x1,y1,x2,y2
+    kwargs = {'center_point_box': 0}
+    name_select_nms = [outputs['Out'][0] + "@select_index"]
+    node_select_nms= onnx.helper.make_node(
+        'NonMaxSuppression',
+        inputs=inputs['BBoxes'] + inputs['Scores'] + name_keep_top_k +\
+            name_iou_threshold + name_score_threshold,
+        outputs=name_select_nms)
+    # step 1 nodes select the nms class
+    node_list = [
+        node_score_threshold, node_iou_threshold, node_keep_top_k,
+        node_keep_top_k_2D, node_select_nms
+    ]
+    # create some const value to use
+    name_const_value = [result_name+"@const_0",
+        result_name+"@const_1",\
+        result_name+"@const_2",\
+        result_name+"@const_-1"]
+    value_const_value = [0, 1, 2, -1]
+    for name, value in zip(name_const_value, value_const_value):
+        node = onnx.helper.make_node(
+            'Constant',
+            inputs=[],
+            outputs=[name],
+            value=onnx.helper.make_tensor(
+                name=name + "@const",
+                data_type=onnx.TensorProto.INT64,
+                dims=[1],
+                vals=[value]))
+        node_list.append(node)
+    # In this code block, we will deocde the raw score data, reshape N * C * M to 1 * N*C*M
+    # and the same time, decode the select indices to 1 * D, gather the select_indices
+    outputs_gather_1 = [result_name + "@gather_1"]
+    node_gather_1 = onnx.helper.make_node(
+        'Gather',
+        inputs=name_select_nms + [result_name + "@const_1"],
+        outputs=outputs_gather_1,
+        axis=1)
+    node_list.append(node_gather_1)
+    outputs_squeeze_gather_1 = [result_name + "@sequeeze_gather_1"]
+    node_squeeze_gather_1 = onnx.helper.make_node(
+        'Squeeze',
+        inputs=outputs_gather_1,
+        outputs=outputs_squeeze_gather_1,
+        axes=[1])
+    node_list.append(node_squeeze_gather_1)
+    outputs_gather_2 = [result_name + "@gather_2"]
+    node_gather_2 = onnx.helper.make_node(
+        'Gather',
+        inputs=name_select_nms + [result_name + "@const_2"],
+        outputs=outputs_gather_2,
+        axis=1)
+    node_list.append(node_gather_2)
+    #slice the class is not 0
+    if background == 0:
+        outputs_nonzero = [result_name + "@nonzero"]
+        node_nonzero = onnx.helper.make_node(
+            'NonZero', inputs=outputs_squeeze_gather_1, outputs=outputs_nonzero)
+        node_list.append(node_nonzero)
+    else:
+        name_thresh = [result_name + "@thresh"]
+        node_thresh = onnx.helper.make_node(
+            'Constant',
+            inputs=[],
+            outputs=name_thresh,
+            value=onnx.helper.make_tensor(
+                name=name_thresh[0] + "@const",
+                data_type=onnx.TensorProto.INT32,
+                dims=[1],
+                vals=[-1]))
+        node_list.append(node_thresh)
+        outputs_cast = [result_name + "@cast"]
+        node_cast = onnx.helper.make_node(
+            'Cast', inputs=outputs_squeeze_gather_1, outputs=outputs_cast, to=6)
+        node_list.append(node_cast)
+        outputs_greater = [result_name + "@greater"]
+        node_greater = onnx.helper.make_node(
+            'Greater',
+            inputs=outputs_cast + name_thresh,
+            outputs=outputs_greater)
+        node_list.append(node_greater)
+        outputs_nonzero = [result_name + "@nonzero"]
+        node_nonzero = onnx.helper.make_node(
+            'NonZero', inputs=outputs_greater, outputs=outputs_nonzero)
+        node_list.append(node_nonzero)
+    outputs_gather_1_nonzero = [result_name + "@gather_1_nonzero"]
+    node_gather_1_nonzero = onnx.helper.make_node(
+        'Gather',
+        inputs=outputs_gather_1 + outputs_nonzero,
+        outputs=outputs_gather_1_nonzero,
+        axis=0)
+    node_list.append(node_gather_1_nonzero)
+    outputs_gather_2_nonzero = [result_name + "@gather_2_nonzero"]
+    node_gather_2_nonzero = onnx.helper.make_node(
+        'Gather',
+        inputs=outputs_gather_2 + outputs_nonzero,
+        outputs=outputs_gather_2_nonzero,
+        axis=0)
+    node_list.append(node_gather_2_nonzero)
+    # reshape scores N * C * M to (N*C*M) * 1
+    outputs_reshape_scores_rank1 = [result_name + "@reshape_scores_rank1"]
+    node_reshape_scores_rank1 = onnx.helper.make_node(
+        "Reshape",
+        inputs=inputs['Scores'] + [result_name + "@const_-1"],
+        outputs=outputs_reshape_scores_rank1)
+    node_list.append(node_reshape_scores_rank1)
+    # get the shape of scores
+    outputs_shape_scores = [result_name + "@shape_scores"]
+    node_shape_scores = onnx.helper.make_node(
+        'Shape', inputs=inputs['Scores'], outputs=outputs_shape_scores)
+    node_list.append(node_shape_scores)
+    # gather the index: 2 shape of scores
+    outputs_gather_scores_dim1 = [result_name + "@gather_scores_dim1"]
+    node_gather_scores_dim1 = onnx.helper.make_node(
+        'Gather',
+        inputs=outputs_shape_scores + [result_name + "@const_2"],
+        outputs=outputs_gather_scores_dim1,
+        axis=0)
+    node_list.append(node_gather_scores_dim1)
+    # mul class * M
+    outputs_mul_classnum_boxnum = [result_name + "@mul_classnum_boxnum"]
+    node_mul_classnum_boxnum = onnx.helper.make_node(
+        'Mul',
+        inputs=outputs_gather_1_nonzero + outputs_gather_scores_dim1,
+        outputs=outputs_mul_classnum_boxnum)
+    node_list.append(node_mul_classnum_boxnum)
+    # add class * M * index
+    outputs_add_class_M_index = [result_name + "@add_class_M_index"]
+    node_add_class_M_index = onnx.helper.make_node(
+        'Add',
+        inputs=outputs_mul_classnum_boxnum + outputs_gather_2_nonzero,
+        outputs=outputs_add_class_M_index)
+    node_list.append(node_add_class_M_index)
+    # Squeeze the indices to 1 dim
+    outputs_squeeze_select_index = [result_name + "@squeeze_select_index"]
+    node_squeeze_select_index = onnx.helper.make_node(
+        'Squeeze',
+        inputs=outputs_add_class_M_index,
+        outputs=outputs_squeeze_select_index,
+        axes=[0, 2])
+    node_list.append(node_squeeze_select_index)
+    # gather the data from flatten scores
+    outputs_gather_select_scores = [result_name + "@gather_select_scores"]
+    node_gather_select_scores = onnx.helper.make_node('Gather',
+        inputs=outputs_reshape_scores_rank1 + \
+            outputs_squeeze_select_index,
+        outputs=outputs_gather_select_scores,
+        axis=0)
+    node_list.append(node_gather_select_scores)
+    # get nums to input TopK
+    outputs_shape_select_num = [result_name + "@shape_select_num"]
+    node_shape_select_num = onnx.helper.make_node(
+        'Shape',
+        inputs=outputs_gather_select_scores,
+        outputs=outputs_shape_select_num)
+    node_list.append(node_shape_select_num)
+    outputs_gather_select_num = [result_name + "@gather_select_num"]
+    node_gather_select_num = onnx.helper.make_node(
+        'Gather',
+        inputs=outputs_shape_select_num + [result_name + "@const_0"],
+        outputs=outputs_gather_select_num,
+        axis=0)
+    node_list.append(node_gather_select_num)
+    outputs_unsqueeze_select_num = [result_name + "@unsqueeze_select_num"]
+    node_unsqueeze_select_num = onnx.helper.make_node(
+        'Unsqueeze',
+        inputs=outputs_gather_select_num,
+        outputs=outputs_unsqueeze_select_num,
+        axes=[0])
+    node_list.append(node_unsqueeze_select_num)
+    outputs_concat_topK_select_num = [result_name + "@conat_topK_select_num"]
+    node_conat_topK_select_num = onnx.helper.make_node(
+        'Concat',
+        inputs=outputs_unsqueeze_select_num + name_keep_top_k_2D,
+        outputs=outputs_concat_topK_select_num,
+        axis=0)
+    node_list.append(node_conat_topK_select_num)
+    outputs_cast_concat_topK_select_num = [
+        result_name + "@concat_topK_select_num"
+    ]
+    node_outputs_cast_concat_topK_select_num = onnx.helper.make_node(
+        'Cast',
+        inputs=outputs_concat_topK_select_num,
+        outputs=outputs_cast_concat_topK_select_num,
+        to=6)
+    node_list.append(node_outputs_cast_concat_topK_select_num)
+    # get min(topK, num_select)
+    outputs_compare_topk_num_select = [result_name + "@compare_topk_num_select"]
+    node_compare_topk_num_select = onnx.helper.make_node(
+        'ReduceMin',
+        inputs=outputs_cast_concat_topK_select_num,
+        outputs=outputs_compare_topk_num_select,
+        keepdims=0)
+    node_list.append(node_compare_topk_num_select)
+    # unsqueeze the indices to 1D tensor
+    outputs_unsqueeze_topk_select_indices = [
+        result_name + "@unsqueeze_topk_select_indices"
+    ]
+    node_unsqueeze_topk_select_indices = onnx.helper.make_node(
+        'Unsqueeze',
+        inputs=outputs_compare_topk_num_select,
+        outputs=outputs_unsqueeze_topk_select_indices,
+        axes=[0])
+    node_list.append(node_unsqueeze_topk_select_indices)
+    # cast the indices to INT64
+    outputs_cast_topk_indices = [result_name + "@cast_topk_indices"]
+    node_cast_topk_indices = onnx.helper.make_node(
+        'Cast',
+        inputs=outputs_unsqueeze_topk_select_indices,
+        outputs=outputs_cast_topk_indices,
+        to=7)
+    node_list.append(node_cast_topk_indices)
+    # select topk scores  indices
+    outputs_topk_select_topk_indices = [result_name + "@topk_select_topk_values",\
+        result_name + "@topk_select_topk_indices"]
+    node_topk_select_topk_indices = onnx.helper.make_node(
+        'TopK',
+        inputs=outputs_gather_select_scores + outputs_cast_topk_indices,
+        outputs=outputs_topk_select_topk_indices)
+    node_list.append(node_topk_select_topk_indices)
+    # gather topk label, scores, boxes
+    outputs_gather_topk_scores = [result_name + "@gather_topk_scores"]
+    node_gather_topk_scores = onnx.helper.make_node(
+        'Gather',
+        inputs=outputs_gather_select_scores +
+        [outputs_topk_select_topk_indices[1]],
+        outputs=outputs_gather_topk_scores,
+        axis=0)
+    node_list.append(node_gather_topk_scores)
+    outputs_gather_topk_class = [result_name + "@gather_topk_class"]
+    node_gather_topk_class = onnx.helper.make_node(
+        'Gather',
+        inputs=outputs_gather_1_nonzero +
+        [outputs_topk_select_topk_indices[1]],
+        outputs=outputs_gather_topk_class,
+        axis=1)
+    node_list.append(node_gather_topk_class)
+    # gather the boxes need to gather the boxes id, then get boxes
+    outputs_gather_topk_boxes_id = [result_name + "@gather_topk_boxes_id"]
+    node_gather_topk_boxes_id = onnx.helper.make_node(
+        'Gather',
+        inputs=outputs_gather_2_nonzero +
+        [outputs_topk_select_topk_indices[1]],
+        outputs=outputs_gather_topk_boxes_id,
+        axis=1)
+    node_list.append(node_gather_topk_boxes_id)
+    # squeeze the gather_topk_boxes_id to 1 dim
+    outputs_squeeze_topk_boxes_id = [result_name + "@squeeze_topk_boxes_id"]
+    node_squeeze_topk_boxes_id = onnx.helper.make_node(
+        'Squeeze',
+        inputs=outputs_gather_topk_boxes_id,
+        outputs=outputs_squeeze_topk_boxes_id,
+        axes=[0, 2])
+    node_list.append(node_squeeze_topk_boxes_id)
+    outputs_gather_select_boxes = [result_name + "@gather_select_boxes"]
+    node_gather_select_boxes = onnx.helper.make_node(
+        'Gather',
+        inputs=inputs['BBoxes'] + outputs_squeeze_topk_boxes_id,
+        outputs=outputs_gather_select_boxes,
+        axis=1)
+    node_list.append(node_gather_select_boxes)
+    # concat the final result
+    # before concat need to cast the class to float
+    outputs_cast_topk_class = [result_name + "@cast_topk_class"]
+    node_cast_topk_class = onnx.helper.make_node(
+        'Cast',
+        inputs=outputs_gather_topk_class,
+        outputs=outputs_cast_topk_class,
+        to=1)
+    node_list.append(node_cast_topk_class)
+    outputs_unsqueeze_topk_scores = [result_name + "@unsqueeze_topk_scores"]
+    node_unsqueeze_topk_scores = onnx.helper.make_node(
+        'Unsqueeze',
+        inputs=outputs_gather_topk_scores,
+        outputs=outputs_unsqueeze_topk_scores,
+        axes=[0, 2])
+    node_list.append(node_unsqueeze_topk_scores)
+    inputs_concat_final_results = outputs_cast_topk_class + outputs_unsqueeze_topk_scores +\
+        outputs_gather_select_boxes
+    outputs_sort_by_socre_results = [result_name + "@concat_topk_scores"]
+    node_sort_by_socre_results = onnx.helper.make_node(
+        'Concat',
+        inputs=inputs_concat_final_results,
+        outputs=outputs_sort_by_socre_results,
+        axis=2)
+    node_list.append(node_sort_by_socre_results)
+    # select topk classes indices
+    outputs_squeeze_cast_topk_class = [result_name + "@squeeze_cast_topk_class"]
+    node_squeeze_cast_topk_class = onnx.helper.make_node(
+        'Squeeze',
+        inputs=outputs_cast_topk_class,
+        outputs=outputs_squeeze_cast_topk_class,
+        axes=[0, 2])
+    node_list.append(node_squeeze_cast_topk_class)
+    outputs_topk_select_classes_indices = [result_name + "@topk_select_topk_classes_scores",\
+        result_name + "@topk_select_topk_classes_indices"]
+    node_topk_select_topk_indices = onnx.helper.make_node(
+        'TopK',
+        inputs=outputs_squeeze_cast_topk_class + outputs_cast_topk_indices,
+        outputs=outputs_topk_select_classes_indices,
+        largest=0)
+    node_list.append(node_topk_select_topk_indices)
+    outputs_concat_final_results = outputs['Out']
+    node_concat_final_results = onnx.helper.make_node(
+        'Gather',
+        inputs=outputs_sort_by_socre_results +
+        [outputs_topk_select_classes_indices[1]],
+        outputs=outputs_concat_final_results,
+        axis=1)
+    node_list.append(node_concat_final_results)
+    return node_list
--- a/x2paddle/x2paddle/op_mapper/paddle2onnx/opset11/paddle_custom_layer/yolo_box.py
+++ b/x2paddle/x2paddle/op_mapper/paddle2onnx/opset11/paddle_custom_layer/yolo_box.py
+# Copyright (c) 2020  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import onnx
+import numpy as np
+from onnx import onnx_pb, helper
+from x2paddle.op_mapper.paddle2onnx.opset9.paddle_custom_layer.yolo_box import is_static_shape
+from x2paddle.op_mapper.paddle2onnx.opset9.paddle_custom_layer.yolo_box import get_old_name
+from x2paddle.op_mapper.paddle2onnx.opset9.paddle_custom_layer.yolo_box import MAX_FLOAT32
+def yolo_box(op, block):
+    inputs = dict()
+    outputs = dict()
+    attrs = dict()
+    for name in op.input_names:
+        inputs[name] = op.input(name)
+    for name in op.output_names:
+        outputs[name] = op.output(name)
+    for name in op.attr_names:
+        attrs[name] = op.attr(name)
+    model_name = outputs['Boxes'][0]
+    input_shape = block.vars[get_old_name(inputs['X'][0])].shape
+    is_static_shape(input_shape)
+    image_size = inputs['ImgSize']
+    input_height = input_shape[2]
+    input_width = input_shape[3]
+    class_num = attrs['class_num']
+    anchors = attrs['anchors']
+    num_anchors = int(len(anchors)) // 2
+    downsample_ratio = attrs['downsample_ratio']
+    input_size = input_height * downsample_ratio
+    conf_thresh = attrs['conf_thresh']
+    conf_thresh_mat = np.ones([num_anchors * input_height *
+                               input_width]) * conf_thresh
+    node_list = []
+    im_outputs = []
+    x_shape = [1, num_anchors, 5 + class_num, input_height, input_width]
+    name_x_shape = [model_name + "@x_shape"]
+    node_x_shape = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_x_shape,
+        value=onnx.helper.make_tensor(
+            name=name_x_shape[0] + "@const",
+            data_type=onnx.TensorProto.INT64,
+            dims=[5],
+            vals=x_shape))
+    node_list.append(node_x_shape)
+    outputs_x_reshape = [model_name + "@reshape"]
+    node_x_reshape = onnx.helper.make_node(
+        'Reshape', inputs=inputs['X'] + name_x_shape, outputs=outputs_x_reshape)
+    node_list.append(node_x_reshape)
+    outputs_x_transpose = [model_name + "@x_transpose"]
+    node_x_transpose = onnx.helper.make_node(
+        'Transpose',
+        inputs=outputs_x_reshape,
+        outputs=outputs_x_transpose,
+        perm=[0, 1, 3, 4, 2])
+    node_list.append(node_x_transpose)
+    range_x = []
+    range_y = []
+    for i in range(0, input_width):
+        range_x.append(i)
+    for j in range(0, input_height):
+        range_y.append(j)
+    name_range_x = [model_name + "@range_x"]
+    node_range_x = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_range_x,
+        value=onnx.helper.make_tensor(
+            name=name_range_x[0] + "@const",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=[input_width],
+            vals=range_x))
+    node_list.append(node_range_x)
+    name_range_y = [model_name + "@range_y"]
+    node_range_y = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_range_y,
+        value=onnx.helper.make_tensor(
+            name=name_range_y[0] + "@const",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=[input_height],
+            vals=range_y))
+    node_list.append(node_range_y)
+    range_x_new_shape = [1, input_width]
+    range_y_new_shape = [input_height, 1]
+    name_range_x_new_shape = [model_name + "@range_x_new_shape"]
+    node_range_x_new_shape = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_range_x_new_shape,
+        value=onnx.helper.make_tensor(
+            name=name_range_x_new_shape[0] + "@const",
+            data_type=onnx.TensorProto.INT64,
+            dims=[len(range_x_new_shape)],
+            vals=range_x_new_shape))
+    node_list.append(node_range_x_new_shape)
+    name_range_y_new_shape = [model_name + "@range_y_new_shape"]
+    node_range_y_new_shape = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_range_y_new_shape,
+        value=onnx.helper.make_tensor(
+            name=name_range_y_new_shape[0] + "@const",
+            data_type=onnx.TensorProto.INT64,
+            dims=[len(range_y_new_shape)],
+            vals=range_y_new_shape))
+    node_list.append(node_range_y_new_shape)
+    outputs_range_x_reshape = [model_name + "@range_x_reshape"]
+    node_range_x_reshape = onnx.helper.make_node(
+        'Reshape',
+        inputs=name_range_x + name_range_x_new_shape,
+        outputs=outputs_range_x_reshape)
+    node_list.append(node_range_x_reshape)
+    outputs_range_y_reshape = [model_name + "@range_y_reshape"]
+    node_range_y_reshape = onnx.helper.make_node(
+        'Reshape',
+        inputs=name_range_y + name_range_y_new_shape,
+        outputs=outputs_range_y_reshape)
+    node_list.append(node_range_y_reshape)
+    outputs_grid_x = [model_name + "@grid_x"]
+    node_grid_x = onnx.helper.make_node(
+        "Tile",
+        inputs=outputs_range_x_reshape + name_range_y_new_shape,
+        outputs=outputs_grid_x)
+    node_list.append(node_grid_x)
+    outputs_grid_y = [model_name + "@grid_y"]
+    node_grid_y = onnx.helper.make_node(
+        "Tile",
+        inputs=outputs_range_y_reshape + name_range_x_new_shape,
+        outputs=outputs_grid_y)
+    node_list.append(node_grid_y)
+    outputs_box_x = [model_name + "@box_x"]
+    outputs_box_y = [model_name + "@box_y"]
+    outputs_box_w = [model_name + "@box_w"]
+    outputs_box_h = [model_name + "@box_h"]
+    outputs_conf = [model_name + "@conf"]
+    outputs_prob = [model_name + "@prob"]
+    node_split_input = onnx.helper.make_node(
+        "Split",
+        inputs=outputs_x_transpose,
+        outputs=outputs_box_x + outputs_box_y + outputs_box_w\
+                + outputs_box_h + outputs_conf + outputs_prob,
+        axis=-1,
+        split=[1, 1, 1, 1, 1, class_num])
+    node_list.append(node_split_input)
+    outputs_box_x_sigmoid = [model_name + "@box_x_sigmoid"]
+    outputs_box_y_sigmoid = [model_name + "@box_y_sigmoid"]
+    node_box_x_sigmoid = onnx.helper.make_node(
+        "Sigmoid", inputs=outputs_box_x, outputs=outputs_box_x_sigmoid)
+    node_list.append(node_box_x_sigmoid)
+    node_box_y_sigmoid = onnx.helper.make_node(
+        "Sigmoid", inputs=outputs_box_y, outputs=outputs_box_y_sigmoid)
+    node_list.append(node_box_y_sigmoid)
+    outputs_box_x_squeeze = [model_name + "@box_x_squeeze"]
+    outputs_box_y_squeeze = [model_name + "@box_y_squeeze"]
+    node_box_x_squeeze = onnx.helper.make_node(
+        'Squeeze',
+        inputs=outputs_box_x_sigmoid,
+        outputs=outputs_box_x_squeeze,
+        axes=[4])
+    node_list.append(node_box_x_squeeze)
+    node_box_y_squeeze = onnx.helper.make_node(
+        'Squeeze',
+        inputs=outputs_box_y_sigmoid,
+        outputs=outputs_box_y_squeeze,
+        axes=[4])
+    node_list.append(node_box_y_squeeze)
+    outputs_box_x_add_grid = [model_name + "@box_x_add_grid"]
+    outputs_box_y_add_grid = [model_name + "@box_y_add_grid"]
+    node_box_x_add_grid = onnx.helper.make_node(
+        "Add",
+        inputs=outputs_grid_x + outputs_box_x_squeeze,
+        outputs=outputs_box_x_add_grid)
+    node_list.append(node_box_x_add_grid)
+    node_box_y_add_grid = onnx.helper.make_node(
+        "Add",
+        inputs=outputs_grid_y + outputs_box_y_squeeze,
+        outputs=outputs_box_y_add_grid)
+    node_list.append(node_box_y_add_grid)
+    name_input_h = [model_name + "@input_h"]
+    name_input_w = [model_name + "@input_w"]
+    node_input_h = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_input_h,
+        value=onnx.helper.make_tensor(
+            name=name_input_w[0] + "@const",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(),
+            vals=[input_height]))
+    node_list.append(node_input_h)
+    node_input_w = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_input_w,
+        value=onnx.helper.make_tensor(
+            name=name_input_w[0] + "@const",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(),
+            vals=[input_width]))
+    node_list.append(node_input_w)
+    outputs_box_x_encode = [model_name + "@box_x_encode"]
+    outputs_box_y_encode = [model_name + "@box_y_encode"]
+    node_box_x_encode = onnx.helper.make_node(
+        'Div',
+        inputs=outputs_box_x_add_grid + name_input_w,
+        outputs=outputs_box_x_encode)
+    node_list.append(node_box_x_encode)
+    node_box_y_encode = onnx.helper.make_node(
+        'Div',
+        inputs=outputs_box_y_add_grid + name_input_h,
+        outputs=outputs_box_y_encode)
+    node_list.append(node_box_y_encode)
+    name_anchor_tensor = [model_name + "@anchor_tensor"]
+    node_anchor_tensor = onnx.helper.make_node(
+        "Constant",
+        inputs=[],
+        outputs=name_anchor_tensor,
+        value=onnx.helper.make_tensor(
+            name=name_anchor_tensor[0] + "@const",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=[len(anchors)],
+            vals=anchors))
+    node_list.append(node_anchor_tensor)
+    anchor_shape = [int(num_anchors), 2]
+    name_anchor_shape = [model_name + "@anchor_shape"]
+    node_anchor_shape = onnx.helper.make_node(
+        "Constant",
+        inputs=[],
+        outputs=name_anchor_shape,
+        value=onnx.helper.make_tensor(
+            name=name_anchor_shape[0] + "@const",
+            data_type=onnx.TensorProto.INT64,
+            dims=[2],
+            vals=anchor_shape))
+    node_list.append(node_anchor_shape)
+    outputs_anchor_tensor_reshape = [model_name + "@anchor_tensor_reshape"]
+    node_anchor_tensor_reshape = onnx.helper.make_node(
+        "Reshape",
+        inputs=name_anchor_tensor + name_anchor_shape,
+        outputs=outputs_anchor_tensor_reshape)
+    node_list.append(node_anchor_tensor_reshape)
+    name_input_size = [model_name + "@input_size"]
+    node_input_size = onnx.helper.make_node(
+        "Constant",
+        inputs=[],
+        outputs=name_input_size,
+        value=onnx.helper.make_tensor(
+            name=name_input_size[0] + "@const",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(),
+            vals=[input_size]))
+    node_list.append(node_input_size)
+    outputs_anchors_div_input_size = [model_name + "@anchors_div_input_size"]
+    node_anchors_div_input_size = onnx.helper.make_node(
+        "Div",
+        inputs=outputs_anchor_tensor_reshape + name_input_size,
+        outputs=outputs_anchors_div_input_size)
+    node_list.append(node_anchors_div_input_size)
+    outputs_anchor_w = [model_name + "@anchor_w"]
+    outputs_anchor_h = [model_name + "@anchor_h"]
+    node_anchor_split = onnx.helper.make_node(
+        'Split',
+        inputs=outputs_anchors_div_input_size,
+        outputs=outputs_anchor_w + outputs_anchor_h,
+        axis=1,
+        split=[1, 1])
+    node_list.append(node_anchor_split)
+    new_anchor_shape = [1, int(num_anchors), 1, 1]
+    name_new_anchor_shape = [model_name + "@new_anchor_shape"]
+    node_new_anchor_shape = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_new_anchor_shape,
+        value=onnx.helper.make_tensor(
+            name=name_new_anchor_shape[0] + "@const",
+            data_type=onnx.TensorProto.INT64,
+            dims=[len(new_anchor_shape)],
+            vals=new_anchor_shape))
+    node_list.append(node_new_anchor_shape)
+    outputs_anchor_w_reshape = [model_name + "@anchor_w_reshape"]
+    outputs_anchor_h_reshape = [model_name + "@anchor_h_reshape"]
+    node_anchor_w_reshape = onnx.helper.make_node(
+        'Reshape',
+        inputs=outputs_anchor_w + name_new_anchor_shape,
+        outputs=outputs_anchor_w_reshape)
+    node_list.append(node_anchor_w_reshape)
+    node_anchor_h_reshape = onnx.helper.make_node(
+        'Reshape',
+        inputs=outputs_anchor_h + name_new_anchor_shape,
+        outputs=outputs_anchor_h_reshape)
+    node_list.append(node_anchor_h_reshape)
+    outputs_box_w_squeeze = [model_name + "@box_w_squeeze"]
+    node_box_w_squeeze = onnx.helper.make_node(
+        'Squeeze',
+        inputs=outputs_box_w,
+        outputs=outputs_box_w_squeeze,
+        axes=[4])
+    node_list.append(node_box_w_squeeze)
+    outputs_box_h_squeeze = [model_name + "@box_h_squeeze"]
+    node_box_h_squeeze = onnx.helper.make_node(
+        'Squeeze',
+        inputs=outputs_box_h,
+        outputs=outputs_box_h_squeeze,
+        axes=[4])
+    node_list.append(node_box_h_squeeze)
+    outputs_box_w_exp = [model_name + "@box_w_exp"]
+    node_box_w_exp = onnx.helper.make_node(
+        "Exp", inputs=outputs_box_w_squeeze, outputs=outputs_box_w_exp)
+    node_list.append(node_box_w_exp)
+    outputs_box_h_exp = [model_name + "@box_h_exp"]
+    node_box_h_exp = onnx.helper.make_node(
+        "Exp", inputs=outputs_box_h_squeeze, outputs=outputs_box_h_exp)
+    node_list.append(node_box_h_exp)
+    outputs_box_w_encode = [model_name + "box_w_encode"]
+    outputs_box_h_encode = [model_name + "box_h_encode"]
+    node_box_w_encode = onnx.helper.make_node(
+        'Mul',
+        inputs=outputs_box_w_exp + outputs_anchor_w_reshape,
+        outputs=outputs_box_w_encode)
+    node_list.append(node_box_w_encode)
+    node_box_h_encode = onnx.helper.make_node(
+        'Mul',
+        inputs=outputs_box_h_exp + outputs_anchor_h_reshape,
+        outputs=outputs_box_h_encode)
+    node_list.append(node_box_h_encode)
+    outputs_conf_sigmoid = [model_name + "@conf_sigmoid"]
+    node_conf_sigmoid = onnx.helper.make_node(
+        'Sigmoid', inputs=outputs_conf, outputs=outputs_conf_sigmoid)
+    node_list.append(node_conf_sigmoid)
+    name_conf_thresh = [model_name + "@conf_thresh"]
+    node_conf_thresh = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_conf_thresh,
+        value=onnx.helper.make_tensor(
+            name=name_conf_thresh[0] + "@const",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=[num_anchors * input_height * input_width],
+            vals=conf_thresh_mat))
+    node_list.append(node_conf_thresh)
+    conf_shape = [1, int(num_anchors), input_height, input_width, 1]
+    name_conf_shape = [model_name + "@conf_shape"]
+    node_conf_shape = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_conf_shape,
+        value=onnx.helper.make_tensor(
+            name=name_conf_shape[0] + "@const",
+            data_type=onnx.TensorProto.INT64,
+            dims=[len(conf_shape)],
+            vals=conf_shape))
+    node_list.append(node_conf_shape)
+    outputs_conf_thresh_reshape = [model_name + "@conf_thresh_reshape"]
+    node_conf_thresh_reshape = onnx.helper.make_node(
+        'Reshape',
+        inputs=name_conf_thresh + name_conf_shape,
+        outputs=outputs_conf_thresh_reshape)
+    node_list.append(node_conf_thresh_reshape)
+    outputs_conf_sub = [model_name + "@conf_sub"]
+    node_conf_sub = onnx.helper.make_node(
+        'Sub',
+        inputs=outputs_conf_sigmoid + outputs_conf_thresh_reshape,
+        outputs=outputs_conf_sub)
+    node_list.append(node_conf_sub)
+    outputs_conf_clip = [model_name + "@conf_clip"]
+    node_conf_clip = onnx.helper.make_node(
+        'Clip', inputs=outputs_conf_sub, outputs=outputs_conf_clip)
+    node_list.append(node_conf_clip)
+    zeros = [0]
+    name_zeros = [model_name + "@zeros"]
+    node_zeros = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_zeros,
+        value=onnx.helper.make_tensor(
+            name=name_zeros[0] + "@const",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(),
+            vals=zeros))
+    node_list.append(node_zeros)
+    outputs_conf_clip_bool = [model_name + "@conf_clip_bool"]
+    node_conf_clip_bool = onnx.helper.make_node(
+        'Greater',
+        inputs=outputs_conf_clip + name_zeros,
+        outputs=outputs_conf_clip_bool)
+    node_list.append(node_conf_clip_bool)
+    outputs_conf_clip_cast = [model_name + "@conf_clip_cast"]
+    node_conf_clip_cast = onnx.helper.make_node(
+        'Cast',
+        inputs=outputs_conf_clip_bool,
+        outputs=outputs_conf_clip_cast,
+        to=1)
+    node_list.append(node_conf_clip_cast)
+    outputs_conf_set_zero = [model_name + "@conf_set_zero"]
+    node_conf_set_zero = onnx.helper.make_node(
+        'Mul',
+        inputs=outputs_conf_sigmoid + outputs_conf_clip_cast,
+        outputs=outputs_conf_set_zero)
+    node_list.append(node_conf_set_zero)
+    outputs_prob_sigmoid = [model_name + "@prob_sigmoid"]
+    node_prob_sigmoid = onnx.helper.make_node(
+        'Sigmoid', inputs=outputs_prob, outputs=outputs_prob_sigmoid)
+    node_list.append(node_prob_sigmoid)
+    new_shape = [1, int(num_anchors), input_height, input_width, 1]
+    name_new_shape = [model_name + "@new_shape"]
+    node_new_shape = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_new_shape,
+        value=onnx.helper.make_tensor(
+            name=name_new_shape[0] + "@const",
+            data_type=onnx.TensorProto.INT64,
+            dims=[len(new_shape)],
+            vals=new_shape))
+    node_list.append(node_new_shape)
+    outputs_conf_new_shape = [model_name + "@_conf_new_shape"]
+    node_conf_new_shape = onnx.helper.make_node(
+        'Reshape',
+        inputs=outputs_conf_set_zero + name_new_shape,
+        outputs=outputs_conf_new_shape)
+    node_list.append(node_conf_new_shape)
+    outputs_score = [model_name + "@score"]
+    node_score = onnx.helper.make_node(
+        'Mul',
+        inputs=outputs_prob_sigmoid + outputs_conf_new_shape,
+        outputs=outputs_score)
+    node_list.append(node_score)
+    outputs_conf_bool = [model_name + "@conf_bool"]
+    node_conf_bool = onnx.helper.make_node(
+        'Greater',
+        inputs=outputs_conf_new_shape + name_zeros,
+        outputs=outputs_conf_bool)
+    node_list.append(node_conf_bool)
+    outputs_box_x_new_shape = [model_name + "@box_x_new_shape"]
+    node_box_x_new_shape = onnx.helper.make_node(
+        'Reshape',
+        inputs=outputs_box_x_encode + name_new_shape,
+        outputs=outputs_box_x_new_shape)
+    node_list.append(node_box_x_new_shape)
+    outputs_box_y_new_shape = [model_name + "@box_y_new_shape"]
+    node_box_y_new_shape = onnx.helper.make_node(
+        'Reshape',
+        inputs=outputs_box_y_encode + name_new_shape,
+        outputs=outputs_box_y_new_shape)
+    node_list.append(node_box_y_new_shape)
+    outputs_box_w_new_shape = [model_name + "@box_w_new_shape"]
+    node_box_w_new_shape = onnx.helper.make_node(
+        'Reshape',
+        inputs=outputs_box_w_encode + name_new_shape,
+        outputs=outputs_box_w_new_shape)
+    node_list.append(node_box_w_new_shape)
+    outputs_box_h_new_shape = [model_name + "@box_h_new_shape"]
+    node_box_h_new_shape = onnx.helper.make_node(
+        'Reshape',
+        inputs=outputs_box_h_encode + name_new_shape,
+        outputs=outputs_box_h_new_shape)
+    node_list.append(node_box_h_new_shape)
+    outputs_pred_box = [model_name + "@pred_box"]
+    node_pred_box = onnx.helper.make_node(
+        'Concat',
+        inputs=outputs_box_x_new_shape + outputs_box_y_new_shape + \
+               outputs_box_w_new_shape + outputs_box_h_new_shape,
+        outputs=outputs_pred_box,
+        axis=4)
+    node_list.append(node_pred_box)
+    outputs_conf_cast = [model_name + "conf_cast"]
+    node_conf_cast = onnx.helper.make_node(
+        'Cast', inputs=outputs_conf_bool, outputs=outputs_conf_cast, to=1)
+    node_list.append(node_conf_cast)
+    outputs_pred_box_mul_conf = [model_name + "@pred_box_mul_conf"]
+    node_pred_box_mul_conf = onnx.helper.make_node(
+        'Mul',
+        inputs=outputs_pred_box + outputs_conf_cast,
+        outputs=outputs_pred_box_mul_conf)
+    node_list.append(node_pred_box_mul_conf)
+    box_shape = [1, int(num_anchors) * input_height * input_width, 4]
+    name_box_shape = [model_name + "@box_shape"]
+    node_box_shape = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_box_shape,
+        value=onnx.helper.make_tensor(
+            name=name_box_shape[0] + "@const",
+            data_type=onnx.TensorProto.INT64,
+            dims=[len(box_shape)],
+            vals=box_shape))
+    node_list.append(node_box_shape)
+    outputs_pred_box_new_shape = [model_name + "@pred_box_new_shape"]
+    node_pred_box_new_shape = onnx.helper.make_node(
+        'Reshape',
+        inputs=outputs_pred_box_mul_conf + name_box_shape,
+        outputs=outputs_pred_box_new_shape)
+    node_list.append(node_pred_box_new_shape)
+    outputs_pred_box_x = [model_name + "@_pred_box_x"]
+    outputs_pred_box_y = [model_name + "@_pred_box_y"]
+    outputs_pred_box_w = [model_name + "@_pred_box_w"]
+    outputs_pred_box_h = [model_name + "@_pred_box_h"]
+    node_pred_box_split = onnx.helper.make_node(
+        'Split',
+        inputs=outputs_pred_box_new_shape,
+        outputs=outputs_pred_box_x + outputs_pred_box_y + outputs_pred_box_w +
+        outputs_pred_box_h,
+        axis=2)
+    node_list.append(node_pred_box_split)
+    name_number_two = [model_name + "@number_two"]
+    node_number_two = onnx.helper.make_node(
+        "Constant",
+        inputs=[],
+        outputs=name_number_two,
+        value=onnx.helper.make_tensor(
+            name=name_number_two[0] + "@const",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(),
+            vals=[2]))
+    node_list.append(node_number_two)
+    outputs_half_w = [model_name + "@half_w"]
+    node_half_w = onnx.helper.make_node(
+        "Div",
+        inputs=outputs_pred_box_w + name_number_two,
+        outputs=outputs_half_w)
+    node_list.append(node_half_w)
+    outputs_half_h = [model_name + "@half_h"]
+    node_half_h = onnx.helper.make_node(
+        "Div",
+        inputs=outputs_pred_box_h + name_number_two,
+        outputs=outputs_half_h)
+    node_list.append(node_half_h)
+    outputs_pred_box_x1 = [model_name + "@pred_box_x1"]
+    node_pred_box_x1 = onnx.helper.make_node(
+        'Sub',
+        inputs=outputs_pred_box_x + outputs_half_w,
+        outputs=outputs_pred_box_x1)
+    node_list.append(node_pred_box_x1)
+    outputs_pred_box_y1 = [model_name + "@pred_box_y1"]
+    node_pred_box_y1 = onnx.helper.make_node(
+        'Sub',
+        inputs=outputs_pred_box_y + outputs_half_h,
+        outputs=outputs_pred_box_y1)
+    node_list.append(node_pred_box_y1)
+    outputs_pred_box_x2 = [model_name + "@pred_box_x2"]
+    node_pred_box_x2 = onnx.helper.make_node(
+        'Add',
+        inputs=outputs_pred_box_x + outputs_half_w,
+        outputs=outputs_pred_box_x2)
+    node_list.append(node_pred_box_x2)
+    outputs_pred_box_y2 = [model_name + "@pred_box_y2"]
+    node_pred_box_y2 = onnx.helper.make_node(
+        'Add',
+        inputs=outputs_pred_box_y + outputs_half_h,
+        outputs=outputs_pred_box_y2)
+    node_list.append(node_pred_box_y2)
+    outputs_sqeeze_image_size = [model_name + "@sqeeze_image_size"]
+    node_sqeeze_image_size = onnx.helper.make_node(
+        "Squeeze",
+        axes=[0],
+        inputs=image_size,
+        outputs=outputs_sqeeze_image_size)
+    node_list.append(node_sqeeze_image_size)
+    output_img_height = [model_name + "@img_height"]
+    output_img_width = [model_name + "@img_width"]
+    node_image_size_split = onnx.helper.make_node(
+        "Split",
+        inputs=outputs_sqeeze_image_size,
+        outputs=output_img_height + output_img_width,
+        axis=-1,
+        split=[1, 1])
+    node_list.append(node_image_size_split)
+    output_img_width_cast = [model_name + "@img_width_cast"]
+    node_img_width_cast = onnx.helper.make_node(
+        'Cast', inputs=output_img_width, outputs=output_img_width_cast, to=1)
+    node_list.append(node_img_width_cast)
+    output_img_height_cast = [model_name + "@img_height_cast"]
+    node_img_height_cast = onnx.helper.make_node(
+        'Cast', inputs=output_img_height, outputs=output_img_height_cast, to=1)
+    node_list.append(node_img_height_cast)
+    outputs_pred_box_x1_decode = [model_name + "@pred_box_x1_decode"]
+    outputs_pred_box_y1_decode = [model_name + "@pred_box_y1_decode"]
+    outputs_pred_box_x2_decode = [model_name + "@pred_box_x2_decode"]
+    outputs_pred_box_y2_decode = [model_name + "@pred_box_y2_decode"]
+    node_pred_box_x1_decode = onnx.helper.make_node(
+        'Mul',
+        inputs=outputs_pred_box_x1 + output_img_width_cast,
+        outputs=outputs_pred_box_x1_decode)
+    node_list.append(node_pred_box_x1_decode)
+    node_pred_box_y1_decode = onnx.helper.make_node(
+        'Mul',
+        inputs=outputs_pred_box_y1 + output_img_height_cast,
+        outputs=outputs_pred_box_y1_decode)
+    node_list.append(node_pred_box_y1_decode)
+    node_pred_box_x2_decode = onnx.helper.make_node(
+        'Mul',
+        inputs=outputs_pred_box_x2 + output_img_width_cast,
+        outputs=outputs_pred_box_x2_decode)
+    node_list.append(node_pred_box_x2_decode)
+    node_pred_box_y2_decode = onnx.helper.make_node(
+        'Mul',
+        inputs=outputs_pred_box_y2 + output_img_height_cast,
+        outputs=outputs_pred_box_y2_decode)
+    node_list.append(node_pred_box_y2_decode)
+    name_number_one = [model_name + "@one"]
+    node_number_one = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_number_one,
+        value=onnx.helper.make_tensor(
+            name=name_number_one[0] + "@const",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(),
+            vals=[1]))
+    node_list.append(node_number_one)
+    output_new_img_height = [model_name + "@new_img_height"]
+    node_new_img_height = onnx.helper.make_node(
+        'Sub',
+        inputs=output_img_height_cast + name_number_one,
+        outputs=output_new_img_height)
+    node_list.append(node_new_img_height)
+    output_new_img_width = [model_name + "@new_img_width"]
+    node_new_img_width = onnx.helper.make_node(
+        'Sub',
+        inputs=output_img_width_cast + name_number_one,
+        outputs=output_new_img_width)
+    node_list.append(node_new_img_width)
+    outputs_pred_box_x2_sub_w = [model_name + "@pred_box_x2_sub_w"]
+    node_pred_box_x2_sub_w = onnx.helper.make_node(
+        'Sub',
+        inputs=outputs_pred_box_x2_decode + output_new_img_width,
+        outputs=outputs_pred_box_x2_sub_w)
+    node_list.append(node_pred_box_x2_sub_w)
+    outputs_pred_box_y2_sub_h = [model_name + "@pred_box_y2_sub_h"]
+    node_pred_box_y2_sub_h = onnx.helper.make_node(
+        'Sub',
+        inputs=outputs_pred_box_y2_decode + output_new_img_height,
+        outputs=outputs_pred_box_y2_sub_h)
+    node_list.append(node_pred_box_y2_sub_h)
+    outputs_pred_box_x1_clip = [model_name + "@pred_box_x1_clip"]
+    outputs_pred_box_y1_clip = [model_name + "@pred_box_y1_clip"]
+    outputs_pred_box_x2_clip = [model_name + "@pred_box_x2_clip"]
+    outputs_pred_box_y2_clip = [model_name + "@pred_box_y2_clip"]
+    min_const_name = model_name + "@pred_box_min_const"
+    max_const_name = model_name + "@pred_box_max_const"
+    min_const = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=[min_const_name],
+        value=onnx.helper.make_tensor(
+            name=min_const_name,
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(),
+            vals=[0.0]))
+    node_list.append(min_const)
+    max_const = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=[max_const_name],
+        value=onnx.helper.make_tensor(
+            name=max_const_name,
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(),
+            vals=[MAX_FLOAT32]))
+    node_list.append(max_const)
+    node_pred_box_x1_clip = onnx.helper.make_node(
+        'Clip',
+        inputs=outputs_pred_box_x1_decode + [min_const_name, max_const_name],
+        outputs=outputs_pred_box_x1_clip)
+    node_list.append(node_pred_box_x1_clip)
+    node_pred_box_y1_clip = onnx.helper.make_node(
+        'Clip',
+        inputs=outputs_pred_box_y1_decode + [min_const_name, max_const_name],
+        outputs=outputs_pred_box_y1_clip)
+    node_list.append(node_pred_box_y1_clip)
+    node_pred_box_x2_clip = onnx.helper.make_node(
+        'Clip',
+        inputs=outputs_pred_box_x2_sub_w + [min_const_name, max_const_name],
+        outputs=outputs_pred_box_x2_clip)
+    node_list.append(node_pred_box_x2_clip)
+    node_pred_box_y2_clip = onnx.helper.make_node(
+        'Clip',
+        inputs=outputs_pred_box_y2_sub_h + [min_const_name, max_const_name],
+        outputs=outputs_pred_box_y2_clip)
+    node_list.append(node_pred_box_y2_clip)
+    outputs_pred_box_x2_res = [model_name + "@box_x2_res"]
+    node_pred_box_x2_res = onnx.helper.make_node(
+        'Sub',
+        inputs=outputs_pred_box_x2_decode + outputs_pred_box_x2_clip,
+        outputs=outputs_pred_box_x2_res)
+    node_list.append(node_pred_box_x2_res)
+    outputs_pred_box_y2_res = [model_name + "@box_y2_res"]
+    node_pred_box_y2_res = onnx.helper.make_node(
+        'Sub',
+        inputs=outputs_pred_box_y2_decode + outputs_pred_box_y2_clip,
+        outputs=outputs_pred_box_y2_res)
+    node_list.append(node_pred_box_y2_res)
+    node_pred_box_result = onnx.helper.make_node(
+        'Concat',
+        inputs=outputs_pred_box_x1_clip + outputs_pred_box_y1_clip +
+        outputs_pred_box_x2_res + outputs_pred_box_y2_res,
+        outputs=outputs['Boxes'],
+        axis=-1)
+    node_list.append(node_pred_box_result)
+    score_shape = [1, input_height * input_width * int(num_anchors), class_num]
+    name_score_shape = [model_name + "@score_shape"]
+    node_score_shape = onnx.helper.make_node(
+        "Constant",
+        inputs=[],
+        outputs=name_score_shape,
+        value=onnx.helper.make_tensor(
+            name=name_score_shape[0] + "@const",
+            data_type=onnx.TensorProto.INT64,
+            dims=[len(score_shape)],
+            vals=score_shape))
+    node_list.append(node_score_shape)
+    node_score_new_shape = onnx.helper.make_node(
+        'Reshape',
+        inputs=outputs_score + name_score_shape,
+        outputs=outputs['Scores'])
+    node_list.append(node_score_new_shape)
+    return node_list
--- a/x2paddle/x2paddle/op_mapper/paddle2onnx/opset9/__init__.py
+++ b/x2paddle/x2paddle/op_mapper/paddle2onnx/opset9/__init__.py
--- a/x2paddle/x2paddle/op_mapper/paddle2onnx/opset9/opset.py
+++ b/x2paddle/x2paddle/op_mapper/paddle2onnx/opset9/opset.py
+# Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+import sys
+import x2paddle
+import os
+import numpy as np
+import paddle.fluid.core as core
+import paddle.fluid as fluid
+import onnx
+from onnx import helper, onnx_pb
+class OpSet9(object):
+    def __init__(self):
+        self.paddle_onnx_dtype_map = {
+            core.VarDesc.VarType.FP32: onnx_pb.TensorProto.FLOAT,
+            core.VarDesc.VarType.FP64: onnx_pb.TensorProto.DOUBLE,
+            core.VarDesc.VarType.INT32: onnx_pb.TensorProto.INT32,
+            core.VarDesc.VarType.INT16: onnx_pb.TensorProto.INT16,
+            core.VarDesc.VarType.INT16: onnx_pb.TensorProto.UINT16,
+            core.VarDesc.VarType.INT64: onnx_pb.TensorProto.INT64,
+            core.VarDesc.VarType.BOOL: onnx_pb.TensorProto.BOOL
+        }
+        self.name_counter = dict()
+    def get_name(self, op_name, var_name):
+        name = 'p2o.{}.{}'.format(op_name, var_name)
+        if name not in self.name_counter:
+            self.name_counter[name] = 0
+        else:
+            self.name_counter[name] += 1
+        return name + '.{}'.format(self.name_counter[name])
+    def make_constant_node(self, name, dtype, value=None):
+        if isinstance(value, list):
+            dims = (len(value), )
+        elif value is None:
+            dims = ()
+            value = []
+        else:
+            dims = ()
+            value = [value]
+        tensor = helper.make_tensor(
+            name=name, data_type=dtype, dims=dims, vals=value)
+        node = helper.make_node(
+            'Constant', inputs=[], outputs=[name], value=tensor)
+        return node
+    def convert_weights(self, program, scope=None):
+        var_names = program.global_block().vars
+        nodes = list()
+        for name in var_names:
+            var = program.global_block().var(name)
+            if name.endswith('feed') or name.endswith('fetch'):
+                continue
+            if not var.persistable:
+                continue
+            weight = np.array(scope.find_var(name).get_tensor())
+            tensor = helper.make_tensor(
+                name=name,
+                dims=var.shape,
+                data_type=self.paddle_onnx_dtype_map[var.dtype],
+                vals=weight.flatten().tolist())
+            node = helper.make_node(
+                'Constant', inputs=[], outputs=[name], value=tensor)
+            nodes.append(node)
+        return nodes
+    def conv2d(self, op, block):
+        kernel_shape = block.var(op.input('Filter')[0]).shape
+        node = helper.make_node(
+            'Conv',
+            inputs=op.input('Input') + op.input('Filter'),
+            outputs=op.output('Output'),
+            dilations=op.attr('dilations'),
+            kernel_shape=kernel_shape[-2:],
+            strides=op.attr('strides'),
+            group=op.attr('groups'),
+            pads=op.attr('paddings') + op.attr('paddings'))
+        return node
+    def conv2d_transpose(self, op, block):
+        kernel_shape = block.var(op.input('Filter')[0]).shape
+        node = helper.make_node(
+            'ConvTranspose',
+            inputs=op.input('Input') + op.input('Filter'),
+            outputs=op.output('Output'),
+            dilations=op.attr('dilations'),
+            kernel_shape=kernel_shape[-2:],
+            strides=op.attr('strides'),
+            group=1,
+            pads=op.attr('paddings') + op.attr('paddings'))
+        return node
+    def relu(self, op, block):
+        node = helper.make_node(
+            'Relu', inputs=op.input('X'), outputs=op.output('Out'))
+        return node
+    def tanh(self, op, block):
+        node = helper.make_node(
+            'Tanh', inputs=op.input('X'), outputs=op.output('Out'))
+        return node
+    def log(self, op, block):
+        node = helper.make_node(
+            'Log', inputs=op.input('X'), outputs=op.output('Out'))
+        return node
+    def sigmoid(self, op, block):
+        node = helper.make_node(
+            'Sigmoid', inputs=op.input('X'), outputs=op.output('Out'))
+        return node
+    def clip(self, op, block):
+        min_value = op.attr('min')
+        max_value = op.attr('max')
+        node = helper.make_node(
+            'Clip',
+            inputs=[op.input('X')[0]],
+            outputs=op.output('Out'),
+            max=max_value,
+            min=min_value)
+        return node
+    def exp(self, op, block):
+        node = helper.make_node(
+            'Exp', inputs=op.input('X'), outputs=op.output('Out'))
+        return node
+    def abs(self, op, block):
+        node = helper.make_node(
+            'Abs', inputs=op.input('X'), outputs=op.output('Out'))
+        return node
+    def leaky_relu(self, op, block):
+        node = helper.make_node(
+            'LeakyRelu',
+            inputs=op.input('X'),
+            outputs=op.output('Out'),
+            alpha=op.attr('alpha'))
+        return node
+    def elementwise_add(self, op, block):
+        axis = op.attr('axis')
+        x_shape = block.var(op.input('X')[0]).shape
+        y_shape = block.var(op.input('Y')[0]).shape
+        if len(y_shape) == 1 and axis == 1:
+            shape_name = self.get_name(op.type, 'shape')
+            shape_value = [1] * len(x_shape)
+            shape_value[axis] = y_shape[0]
+            shape_node = self.make_constant_node(
+                shape_name, onnx_pb.TensorProto.INT64, shape_value)
+            temp_value = self.get_name(op.type, 'temp')
+            y_node = helper.make_node(
+                'Reshape',
+                inputs=[op.input('Y')[0], shape_name],
+                outputs=[temp_value])
+            node = helper.make_node(
+                'Add',
+                inputs=[op.input('X')[0], temp_value],
+                outputs=op.output('Out'))
+            return [shape_node, y_node, node]
+        elif axis == -1 or axis == (len(x_shape) - 1
+                                    ) or len(x_shape) == len(y_shape):
+            node = helper.make_node(
+                'Add',
+                inputs=[op.input('X')[0], op.input('Y')[0]],
+                outputs=op.output('Out'))
+            return node
+        else:
+            raise Exception("Unexpected situation happend in elementwise_add")
+    def elementwise_sub(self, op, block):
+        axis = op.attr('axis')
+        x_shape = block.var(op.input('X')[0]).shape
+        y_shape = block.var(op.input('Y')[0]).shape
+        if len(y_shape) == 1 and axis == 1:
+            shape_name = self.get_name(op.type, 'shape')
+            shape_value = [1] * len(x_shape)
+            shape_value[axis] = y_shape[0]
+            shape_node = self.make_constant_node(
+                shape_name, onnx_pb.TensorProto.INT64, shape_value)
+            temp_value = self.get_name(op.type, 'temp')
+            y_node = helper.make_node(
+                'Reshape',
+                inputs=[op.input('Y')[0], shape_name],
+                outputs=[temp_value])
+            node = helper.make_node(
+                'Sub',
+                inputs=[op.input('X')[0], temp_value],
+                outputs=op.output('Out'))
+            return [shape_node, y_node, node]
+        elif axis == -1 or axis == (len(x_shape) - 1
+                                    ) or len(x_shape) == len(y_shape):
+            node = helper.make_node(
+                'Sub',
+                inputs=[op.input('X')[0], op.input('Y')[0]],
+                outputs=op.output('Out'))
+            return node
+        else:
+            raise Exception("Unexpected situation happend in elementwise_sub")
+    def pool2d(self, op, block):
+        pool_type = {
+            'max': ('MaxPool', 'GlobalMaxPool'),
+            'avg': ('AveragePool', 'GlobalAveragePool')
+        }
+        if op.attr('global_pooling'):
+            node = helper.make_node(
+                pool_type[op.attr('pooling_type')][1],
+                inputs=op.input('X'),
+                outputs=op.output('Out'), )
+        elif op.attr('adaptive'):
+            raise Excpetion("ONNX cannot support adaptive pool")
+        else:
+            input_shape = block.var(op.input('X')[0]).shape
+            k_size = op.attr('ksize')
+            paddings = op.attr('paddings')
+            if input_shape[2] > 0 and input_shape[2] + paddings[0] < k_size[0]:
+                k_size[0] = input_shape[2] + paddings[0]
+            if input_shape[3] > 0 and input_shape[3] + paddings[1] < k_size[1]:
+                k_size[1] = input_shape[3] + paddings[1]
+            node = helper.make_node(
+                pool_type[op.attr('pooling_type')][0],
+                inputs=op.input('X'),
+                outputs=op.output('Out'),
+                kernel_shape=k_size,
+                strides=op.attr('strides'),
+                pads=op.attr('paddings') + op.attr('paddings'))
+        return node
+    def pad2d(self, op, block):
+        x_shape = block.var(op.input('X')[0]).shape
+        paddings = op.attr('paddings')
+        onnx_pads = []
+        if op.attr('data_format') == 'NCHW':
+            pads = [
+                0, 0, paddings[0], paddings[2], 0, 0, paddings[1], paddings[3]
+            ]
+        else:
+            pads = [
+                0, paddings[0], paddings[2], 0, 0, paddings[1], paddings[3], 0
+            ]
+        #TODO support pads is Variable
+        node = helper.make_node(
+            'Pad',
+            inputs=op.input('X'),
+            outputs=op.output('Out'),
+            mode=op.attr('mode'),
+            value=op.attr('pad_value'),
+            pads=pads)
+        return node
+    def softmax(self, op, block):
+        axis = op.attr('axis')
+        shape = block.var(op.output('Out')[0]).shape
+        if axis < 0:
+            axis += len(shape)
+        if axis == len(shape) - 1:
+            node = helper.make_node(
+                'Softmax',
+                inputs=op.input('X'),
+                outputs=op.output('Out'),
+                axis=op.attr('axis'))
+            return node
+        else:
+            perm = [i for i in range(len(shape))]
+            perm[-1] = axis
+            perm[axis] = len(shape) - 1
+            transpose_name0 = self.get_name(op.type, 'transpose')
+            transpose_node0 = helper.make_node(
+                'Transpose',
+                inputs=op.input('X'),
+                outputs=[transpose_name0],
+                perm=perm)
+            softmax_name = self.get_name(op.type, 'softmax')
+            softmax_node = helper.make_node(
+                'Softmax',
+                inputs=[transpose_name0],
+                outputs=[softmax_name],
+                axis=-1)
+            transpose_name1 = self.get_name(op.type, 'transpose')
+            transpose_node1 = helper.make_node(
+                'Transpose',
+                inputs=[softmax_name],
+                outputs=op.output('Out'),
+                perm=perm)
+            return [transpose_node0, softmax_node, transpose_node1]
+    def scale(self, op, block):
+        scale = op.attr('scale')
+        bias = op.attr('bias')
+        if math.fabs(scale - 1.0) < 1e-06 and math.fabs(bias - 0.0) < 1e-06:
+            node = helper.make_node(
+                'Identity', inputs=op.input('X'), outputs=op.output('Out'))
+            return node
+        else:
+            scale_name = self.get_name(op.type, 'scale')
+            bias_name = self.get_name(op.type, 'bias')
+            scale_node = self.make_constant_node(
+                scale_name, onnx_pb.TensorProto.FLOAT, scale)
+            bias_node = self.make_constant_node(bias_name,
+                                                onnx_pb.TensorProto.FLOAT, bias)
+            temp_tensor_name = self.get_name(op.type, 'temporary')
+            if op.attr('bias_after_scale'):
+                node1 = helper.make_node(
+                    'Mul',
+                    inputs=[scale_name, op.input('X')[0]],
+                    outputs=[temp_tensor_name])
+                node2 = helper.make_node(
+                    'Add',
+                    inputs=[bias_name, temp_tensor_name],
+                    outputs=op.output('Out'))
+            else:
+                node1 = helper.make_node(
+                    'Add',
+                    inputs=[bias_name, op.input('X')[0]],
+                    outputs=temp_tensor_name)
+                node2 = helper.make_node(
+                    'Mul',
+                    inputs=[scale_name, temp_tensor_name],
+                    outputs=[op.output('Out')])
+            return [scale_node, bias_node, node1, node2]
+    def mul(self, op, block):
+        x_shape = block.var(op.input('X')[0]).shape
+        y_shape = block.var(op.input('Y')[0]).shape
+        out_shape = list(block.var(op.output('Out')[0]).shape)
+        x_num_col_dims = op.attr('x_num_col_dims')
+        y_num_col_dims = op.attr('y_num_col_dims')
+        flatten_x_name = 'flatten_{}'.format(op.input('X')[0])
+        flatten_y_name = 'flatten_{}'.format(op.input('Y')[0])
+        shape_name = 'temp_shape_{}'.format(op.output('Out')[0])
+        temp_out_name = 'temp_{}'.format(op.output('Out')[0])
+        flatten_x = helper.make_node(
+            'Flatten',
+            inputs=op.input('X'),
+            outputs=[flatten_x_name],
+            axis=x_num_col_dims)
+        flatten_y = helper.make_node(
+            'Flatten',
+            inputs=op.input('Y'),
+            outputs=[flatten_y_name],
+            axis=y_num_col_dims)
+        shape_node = self.make_constant_node(
+            shape_name, onnx_pb.TensorProto.INT64, out_shape)
+        node = helper.make_node(
+            'MatMul',
+            inputs=[flatten_x_name, flatten_y_name],
+            outputs=[temp_out_name])
+        reshape_out = helper.make_node(
+            'Reshape',
+            inputs=[temp_out_name, shape_name],
+            outputs=op.output('Out'))
+        return [flatten_x, flatten_y, shape_node, node, reshape_out]
+    def batch_norm(self, op, block):
+        kwargs = {
+            'epsilon': op.attr('epsilon'),
+            'momentum': op.attr('momentum')
+        }
+        inputs = op.input('X') + op.input('Scale') + op.input(
+            'Bias') + op.input('Mean') + op.input('Variance')
+        node = helper.make_node(
+            'BatchNormalization',
+            inputs=inputs,
+            outputs=op.output('Y'),
+            **kwargs)
+        return node
+    def instance_norm(self, op, block):
+        kwargs = {'epsilon': op.attr('epsilon'), }
+        inputs = op.input('X') + op.input('Scale') + op.input('Bias')
+        node = helper.make_node(
+            'InstanceNormalization',
+            inputs=inputs,
+            outputs=op.output('Y'),
+            **kwargs)
+        return node
+    def concat(self, op, block):
+        node = helper.make_node(
+            'Concat',
+            inputs=op.input('X'),
+            outputs=op.output('Out'),
+            axis=op.attr('axis'))
+        return node
+    def sum(self, op, block):
+        node = helper.make_node(
+            'Sum', inputs=op.input('X'), outputs=op.output('Out'))
+        return node
+    def floor(self, op, block):
+        node = helper.make_node(
+            'Floor', inputs=op.input('X'), outputs=op.output('Out'))
+        return node
+    def uniform_random_batch_size_like(self, op, block):
+        node = helper.make_node(
+            'RandomUniformLike',
+            inputs=op.input('Input'),
+            outputs=op.output('Out'),
+            high=op.attr('max'),
+            dtype=self.paddle_onnx_dtype_map[op.attr('dtype')],
+            low=op.attr('min'),
+            seed=float(op.attr('seed')), )
+        return node
+    def depthwise_conv2d(self, op, block):
+        return self.conv2d(op, block)
+    def relu6(self, op, block):
+        threshold = op.attr('threshold')
+        node = helper.make_node(
+            'Clip',
+            inputs=[op.input('X')[0]],
+            outputs=op.output('Out'),
+            max=threshold,
+            min=0.0)
+        return [node]
+    def shape(self, op, block):
+        node = helper.make_node(
+            'Shape', inputs=op.input('Input'), outputs=op.output('Out'))
+        return node
+    def split(self, op, block):
+        sections = op.attr('sections')
+        if len(sections) > 0:
+            node = helper.make_node(
+                'Split',
+                inputs=op.input('X'),
+                outputs=op.output('Out'),
+                axis=op.attr('axis'),
+                split=sections)
+        else:
+            node = helper.make_node(
+                'Split',
+                inputs=op.input('X'),
+                outputs=op.output('Out'),
+                axis=op.attr('axis'))
+        return node
+    def slice(self, op, block):
+        axes = op.attr('axes')
+        starts = op.attr('starts')
+        ends = op.attr('ends')
+        node = helper.make_node(
+            "Slice",
+            inputs=[op.input('Input')[0]],
+            outputs=op.output('Out'),
+            axes=axes,
+            starts=starts,
+            ends=ends)
+        return [node]
+    def fill_constant(self, op, block):
+        value = op.attr('value')
+        dtype = op.attr('dtype')
+        shape = op.attr('shape')
+        value = np.ones(shape) * value
+        if dtype == 2:
+            value = value.astype('int32')
+        node = helper.make_node(
+            'Constant',
+            inputs=[],
+            outputs=op.output('Out'),
+            value=helper.make_tensor(
+                name=op.output('Out')[0],
+                data_type=self.paddle_onnx_dtype_map[dtype],
+                dims=shape,
+                vals=value.tolist()))
+        return node
+    def transpose2(self, op, block):
+        node = helper.make_node(
+            'Transpose',
+            inputs=op.input('X'),
+            outputs=op.output('Out'),
+            perm=op.attr('axis'))
+        return node
+    def flatten2(self, op, block):
+        node = helper.make_node(
+            'Flatten',
+            inputs=op.input('X'),
+            outputs=op.output('Out'),
+            axis=op.attr('axis'))
+        return node
+    def reshape2(self, op, block):
+        input_names = op.input_names
+        if len(op.input('ShapeTensor')) > 1:
+            cast_shape_nodes = list()
+            cast_shape_names = list()
+            for i in range(len(op.input('ShapeTensor'))):
+                dim = op.input('ShapeTensor')[i]
+                temp_name = self.get_name(op.type, 'shape.cast')
+                node = helper.make_node(
+                    'Cast',
+                    inputs=[dim],
+                    outputs=[temp_name],
+                    to=onnx_pb.TensorProto.INT64)
+                cast_shape_nodes.append(node)
+                cast_shape_names.append(temp_name)
+            temp_name = self.get_name(op.type, 'shape.concat')
+            shape_node = helper.make_node(
+                'Concat', inputs=cast_shape_names, outputs=[temp_name], axis=-1)
+            node = helper.make_node(
+                'Reshape',
+                inputs=[op.input('X')[0], temp_name],
+                outputs=op.output('Out'))
+            return cast_shape_nodes + [shape_node, node]
+        elif len(op.input('ShapeTensor')) == 1:
+            temp_name = self.get_name(op.type, 'shape.cast')
+            cast_shape_node = helper.make_node(
+                'Cast',
+                inputs=op.input('ShapeTensor'),
+                outputs=[temp_name],
+                to=onnx_pb.TensorProto.INT64)
+            node = helper.make_node(
+                'Reshape',
+                inputs=[op.input('X')[0], temp_name],
+                outputs=op.output('Out'))
+            return [cast_shape_node, node]
+        elif op.attr('shape') is not None and len(op.attr('shape')) > 0:
+            shape_name = self.get_name(op.type, 'shape')
+            shape_node = self.make_constant_node(shape_name,
+                                                 onnx_pb.TensorProto.INT64,
+                                                 op.attr('shape'))
+            reshape_node = helper.make_node(
+                'Reshape',
+                inputs=[op.input('X')[0], shape_name],
+                outputs=op.output('Out'))
+            return [shape_node, reshape_node]
+    def dropout(self, op, block):
+        dropout_mode = op.attr('dropout_implementation')
+        dropout_prob = op.attr('dropout_prob')
+        if dropout_mode == 'upscale_in_train':
+            node = helper.make_node(
+                'Identity', inputs=op.input('X'), outputs=op.output('Out'))
+            return node
+        elif dropout_mode == 'downgrade_in_infer':
+            scale_name = self.get_name(op.type, 'scale')
+            scale_node = self.make_constant_node(
+                scale_name, onnx_pb.TensorProto.FLOAT, 1 - dropout_prob)
+            node = helper.make_node(
+                "Mul",
+                inputs=[op.input('X')[0], scale_name],
+                outputs=op.output('Out'))
+            return [scale_node, node]
+        else:
+            raise Exception("Unexpected situation happend")
+    def reduce_mean(self, op, block):
+        node = helper.make_node(
+            'ReduceMean',
+            inputs=op.input('X'),
+            outputs=op.output('Out'),
+            axes=op.attr('dim'),
+            keepdims=op.attr('keep_dim'))
+        return node
+    def bilinear_interp(self, op, block):
+        input_names = op.input_names
+        input_shape = block.vars[op.input('X')[0]].shape
+        if op.attr('align_corners') or op.attr('align_mode') == 0:
+            raise Exception(
+                "Resize in onnx(opset<=10) only support coordinate_transformation_mode: 'asymmetric', Try converting with --onnx_opset 11"
+            )
+        if ('OutSize' in input_names and len(op.input('OutSize')) > 0) or (
+                'SizeTensor' in input_names and
+                len(op.input('SizeTensor')) > 0):
+            node_list = list()
+            shape_name0 = self.get_name(op.type, 'shape')
+            shape_node0 = helper.make_node(
+                'Shape', inputs=op.input('X'), outputs=[shape_name0])
+            starts_name = self.get_name(op.type, 'slice.starts')
+            starts_node = self.make_constant_node(
+                starts_name, onnx_pb.TensorProto.INT64, [0])
+            ends_name = self.get_name(op.type, 'slice.ends')
+            ends_node = self.make_constant_node(ends_name,
+                                                onnx_pb.TensorProto.INT64, [2])
+            shape_name1 = self.get_name(op.type, 'shape')
+            shape_node1 = helper.make_node(
+                'Slice',
+                inputs=[shape_name0, starts_name, ends_name],
+                outputs=[shape_name1])
+            node_list.extend([shape_node0, starts_node, ends_node, shape_node1])
+            if 'OutSize' in input_names and len(op.input('OutSize')) > 0:
+                cast_shape_name = self.get_name(op.type, "shape.cast")
+                cast_shape_node = helper.make_node(
+                    'Cast',
+                    inputs=op.input('OutSize'),
+                    outputs=[cast_shape_name],
+                    to=onnx_pb.TensorProto.INT64)
+                node_list.append(cast_shape_node)
+            else:
+                concat_shape_name = self.get_name(
+                    op.type, op.output('Out')[0] + "shape.concat")
+                concat_shape_node = helper.make_node(
+                    "Concat",
+                    inputs=op.input('SizeTensor'),
+                    outputs=[concat_shape_name],
+                    axis=0)
+                cast_shape_name = self.get_name(op.type, "shape.cast")
+                cast_shape_node = helper.make_node(
+                    'Cast',
+                    inputs=[concat_shape_name],
+                    outputs=[cast_shape_name],
+                    to=onnx_pb.TensorProto.INT64)
+                node_list.extend([concat_shape_node, cast_shape_node])
+            shape_name2 = self.get_name(op.type, "shape.concat")
+            shape_node2 = helper.make_node(
+                'Concat',
+                inputs=[shape_name1, cast_shape_name],
+                outputs=[shape_name2],
+                axis=0)
+            node_list.append(shape_node2)
+            cast_shape_name2 = self.get_name(op.type, "shape.cast")
+            cast_shape_node2 = helper.make_node(
+                'Cast',
+                inputs=[shape_name2],
+                outputs=[cast_shape_name2],
+                to=onnx_pb.TensorProto.FLOAT)
+            node_list.append(cast_shape_node2)
+            cast_shape_name0 = self.get_name(op.type, "shape.cast")
+            cast_shape_node0 = helper.make_node(
+                'Cast',
+                inputs=[shape_name0],
+                outputs=[cast_shape_name0],
+                to=onnx_pb.TensorProto.FLOAT)
+            node_list.append(cast_shape_node0)
+            outputs_h_w_scales = op.output('Out')[0] + "@out_hw_scales"
+            node_h_w_scales = helper.make_node(
+                'Div',
+                inputs=[cast_shape_name2, cast_shape_name0],
+                outputs=[outputs_h_w_scales])
+            node_list.append(node_h_w_scales)
+            result_node = helper.make_node(
+                'Resize',
+                inputs=[op.input('X')[0], outputs_h_w_scales],
+                outputs=op.output('Out'),
+                mode='linear')
+            node_list.extend([result_node])
+            return node_list
+        elif 'Scale' in input_names and len(op.input('Scale')) > 0:
+            node = helper.make_node(
+                'Resize',
+                inputs=[op.input('X')[0], op.input('Scale')[0]],
+                outputs=op.output('Out'),
+                mode='linear')
+        else:
+            out_shape = [op.attr('out_h'), op.attr('out_w')]
+            scale = op.attr('scale')
+            if out_shape.count(-1) > 0:
+                scale_name = self.get_name(op.type, 'scale')
+                scale_node = self.make_constant_node(scale_name,
+                                                     onnx_pb.TensorProto.FLOAT,
+                                                     [1, 1, scale, scale])
+                node = helper.make_node(
+                    'Resize',
+                    inputs=[op.input('X')[0], scale_name],
+                    outputs=op.output('Out'),
+                    mode='linear')
+                return [scale_node, node]
+            else:
+                raise Exception("Unexpected situation happend")
+        return node
+    def nearest_interp(self, op, block):
+        input_names = op.input_names
+        if op.attr('align_corners'):
+            raise Exception(
+                "Resize in onnx(opset<=10) only support coordinate_transformation_mode: 'asymmetric', Try converting with --onnx_opset 11"
+            )
+        if 'OutSize' in input_names and len(op.input('OutSize')) > 0:
+            node_list = list()
+            shape_name0 = self.get_name(op.type, 'shape')
+            shape_node0 = helper.make_node(
+                'Shape', inputs=op.input('X'), outputs=[shape_name0])
+            starts_name = self.get_name(op.type, 'slice.starts')
+            starts_node = self.make_constant_node(
+                starts_name, onnx_pb.TensorProto.INT64, [0])
+            ends_name = self.get_name(op.type, 'slice.ends')
+            ends_node = self.make_constant_node(ends_name,
+                                                onnx_pb.TensorProto.INT64, [2])
+            shape_name1 = self.get_name(op.type, 'shape')
+            shape_node1 = helper.make_node(
+                'Slice',
+                inputs=[shape_name0, starts_name, ends_name],
+                outputs=[shape_name1])
+            node_list.extend([shape_node0, starts_node, ends_node, shape_node1])
+            if 'OutSize' in input_names and len(op.input('OutSize')) > 0:
+                cast_shape_name = self.get_name(op.type, "shape.cast")
+                cast_shape_node = helper.make_node(
+                    'Cast',
+                    inputs=op.input('OutSize'),
+                    outputs=[cast_shape_name],
+                    to=onnx_pb.TensorProto.INT64)
+                node_list.append(cast_shape_node)
+            else:
+                concat_shape_name = self.get_name(
+                    op.type, op.output('Out')[0] + "shape.concat")
+                concat_shape_node = helper.make_node(
+                    "Concat",
+                    inputs=op.input('SizeTensor'),
+                    outputs=[concat_shape_name],
+                    axis=0)
+                cast_shape_name = self.get_name(op.type, "shape.cast")
+                cast_shape_node = helper.make_node(
+                    'Cast',
+                    inputs=[concat_shape_name],
+                    outputs=[cast_shape_name],
+                    to=onnx_pb.TensorProto.INT64)
+                node_list.extend([concat_shape_node, cast_shape_node])
+            shape_name2 = self.get_name(op.type, "shape.concat")
+            shape_node2 = helper.make_node(
+                'Concat',
+                inputs=[shape_name1, cast_shape_name],
+                outputs=[shape_name2],
+                axis=0)
+            node_list.append(shape_node2)
+            cast_shape_name2 = self.get_name(op.type, "shape.cast")
+            cast_shape_node2 = helper.make_node(
+                'Cast',
+                inputs=[shape_name2],
+                outputs=[cast_shape_name2],
+                to=onnx_pb.TensorProto.FLOAT)
+            node_list.append(cast_shape_node2)
+            cast_shape_name0 = self.get_name(op.type, "shape.cast")
+            cast_shape_node0 = helper.make_node(
+                'Cast',
+                inputs=[shape_name0],
+                outputs=[cast_shape_name0],
+                to=onnx_pb.TensorProto.FLOAT)
+            node_list.append(cast_shape_node0)
+            outputs_h_w_scales = op.output('Out')[0] + "@out_hw_scales"
+            node_h_w_scales = helper.make_node(
+                'Div',
+                inputs=[cast_shape_name2, cast_shape_name0],
+                outputs=[outputs_h_w_scales])
+            node_list.append(node_h_w_scales)
+            result_node = helper.make_node(
+                'Resize',
+                inputs=[op.input('X')[0], outputs_h_w_scales],
+                outputs=op.output('Out'),
+                mode='linear')
+            node_list.extend([result_node])
+            return node_list
+        elif 'Scale' in input_names and len(op.input('Scale')) > 0:
+            node = helper.make_node(
+                'Resize',
+                inputs=[op.input('X')[0], op.input('Scale')[0]],
+                outputs=op.output('Out'),
+                mode='nearest')
+        else:
+            out_shape = [op.attr('out_h'), op.attr('out_w')]
+            scale = op.attr('scale')
+            if out_shape.count(-1) > 0:
+                scale_name = self.get_name(op.type, 'scale')
+                scale_node = self.make_constant_node(scale_name,
+                                                     onnx_pb.TensorProto.FLOAT,
+                                                     [1, 1, scale, scale])
+                node = helper.make_node(
+                    'Resize',
+                    inputs=[op.input('X')[0], scale_name],
+                    outputs=op.output('Out'),
+                    mode='nearest')
+                return [scale_node, node]
+            else:
+                raise Exception("Unexpected situation happend")
+        return node
+    def hard_sigmoid(self, op, block):
+        slope = op.attr('slope')
+        offset = op.attr('offset')
+        node = helper.make_node(
+            'HardSigmoid',
+            inputs=op.input('X'),
+            outputs=op.output('Out'),
+            alpha=slope,
+            beta=offset)
+        return node
+    def swish(self, op, block):
+        beta = op.attr('beta')
+        beta_name = self.get_name(op.type, 'beta')
+        beta_node = onnx.helper.make_node(
+            'Constant',
+            name=beta_name,
+            inputs=[],
+            outputs=[beta_name],
+            value=onnx.helper.make_tensor(
+                name=beta_name,
+                data_type=onnx.TensorProto.FLOAT,
+                dims=(),
+                vals=[beta]))
+        beta_x_name = self.get_name(op.type, 'beta_x')
+        beta_x_node = onnx.helper.make_node(
+            'Mul',
+            name=beta_x_name,
+            inputs=[op.input('X')[0], beta_name],
+            outputs=[beta_x_name])
+        sigmoid_name = self.get_name(op.type, 'sigmoid')
+        sigmoid_node = onnx.helper.make_node(
+            'Sigmoid',
+            name=sigmoid_name,
+            inputs=[beta_x_name],
+            outputs=[sigmoid_name])
+        swish_node = onnx.helper.make_node(
+            'Mul',
+            inputs=[op.input('X')[0], sigmoid_name],
+            outputs=op.output('Out'))
+        return [beta_node, beta_x_node, sigmoid_node, swish_node]
+    def hard_swish(self, op, block):
+        scale_name = self.get_name(op.type, 'scale')
+        offset_name = self.get_name(op.type, 'offset')
+        scale_node = self.make_constant_node(scale_name,
+                                             onnx_pb.TensorProto.FLOAT,
+                                             op.attr('scale'))
+        offset_node = self.make_constant_node(offset_name,
+                                              onnx_pb.TensorProto.FLOAT,
+                                              op.attr('offset'))
+        name0 = self.get_name(op.type, 'add')
+        node0 = helper.make_node(
+            'Add', inputs=[op.input('X')[0], offset_name], outputs=[name0])
+        name1 = self.get_name(op.type, 'relu')
+        min_value = op.attr('min')
+        max_value = op.attr('max')
+        node1 = helper.make_node(
+            'Clip',
+            inputs=[name0],
+            outputs=[name1],
+            max=max_value,
+            min=min_value)
+        name2 = self.get_name(op.type, 'mul')
+        node2 = helper.make_node(
+            'Mul', inputs=[op.input('X')[0], name1], outputs=[name2])
+        node3 = helper.make_node(
+            'Div', inputs=[name2, scale_name], outputs=op.output('Out'))
+        return [scale_node, offset_node, node0, node1, node2, node3]
+    def elementwise_mul(self, op, block):
+        axis = op.attr('axis')
+        x_shape = block.var(op.input('X')[0]).shape
+        y_shape = block.var(op.input('Y')[0]).shape
+        if len(y_shape) == 1 and axis == 1:
+            shape_name = self.get_name(op.type, 'shape')
+            shape_value = [1] * len(x_shape)
+            shape_value[axis] = y_shape[0]
+            shape_node = self.make_constant_node(
+                shape_name, onnx_pb.TensorProto.INT64, shape_value)
+            temp_value = self.get_name(op.type, 'temp')
+            y_node = helper.make_node(
+                'Reshape',
+                inputs=[op.input('Y')[0], shape_name],
+                outputs=[temp_value])
+            node = helper.make_node(
+                'Mul',
+                inputs=[op.input('X')[0], temp_value],
+                outputs=op.output('Out'))
+            return [shape_node, y_node, node]
+        elif axis == -1 or axis == (len(x_shape) - 1
+                                    ) or len(x_shape) == len(y_shape):
+            node = helper.make_node(
+                'Mul',
+                inputs=[op.input('X')[0], op.input('Y')[0]],
+                outputs=op.output('Out'))
+            return node
+        else:
+            raise Exception("Unexpected situation happend in elementwise_mul")
+        return node
+    def feed(self, op, block):
+        name = op.output('Out')[0]
+        var = block.var(name)
+        tensor_info = helper.make_tensor_value_info(
+            name=name,
+            shape=var.shape,
+            elem_type=self.paddle_onnx_dtype_map[var.dtype])
+        return tensor_info
+    def fetch(self, op, block):
+        name = op.input('X')[0]
+        var = block.var(name)
+        tensor_info = helper.make_tensor_value_info(
+            name=name,
+            shape=var.shape,
+            elem_type=self.paddle_onnx_dtype_map[var.dtype])
+        return tensor_info
+    def unsqueeze2(self, op, block):
+        node = helper.make_node(
+            'Unsqueeze',
+            inputs=op.input('X'),
+            outputs=op.output('Out'),
+            axes=op.attr('axes'))
+        return node
+    def cast(self, op, block):
+        node = helper.make_node(
+            'Cast',
+            inputs=op.input('X'),
+            outputs=op.output('Out'),
+            to=self.paddle_onnx_dtype_map[op.attr('out_dtype')])
+        return node
+    def arg_max(self, op, block):
+        node = helper.make_node(
+            'ArgMax',
+            inputs=op.input('X'),
+            outputs=op.output('Out'),
+            axis=op.attr('axis'),
+            keepdims=0)
+        return node
+    def reciprocal(self, op, block):
+        inputs = op.input(op.input_names[0])
+        outputs = op.output(op.output_names[0])
+        node = helper.make_node('Reciprocal', inputs=inputs, outputs=outputs)
+        return node
+    def im2sequence(self, op, block):
+        from .paddle_custom_layer.im2sequence import im2sequence
+        return im2sequence(op, block)
+    def yolo_box(self, op, block):
+        from .paddle_custom_layer.yolo_box import yolo_box
+        return yolo_box(op, block)
+    def multiclass_nms(self, op, block):
+        from .paddle_custom_layer.multiclass_nms import multiclass_nms
+        return multiclass_nms(op, block)
+    def box_coder(self, op, block):
+        from .paddle_custom_layer.box_coder import box_coder
+        return box_coder(op, block)
+    def prior_box(self, op, block):
+        from .paddle_custom_layer.prior_box import prior_box
+        return prior_box(op, block)
--- a/x2paddle/x2paddle/op_mapper/paddle2onnx/opset9/paddle_custom_layer/__init__.py
+++ b/x2paddle/x2paddle/op_mapper/paddle2onnx/opset9/paddle_custom_layer/__init__.py
--- a/x2paddle/x2paddle/op_mapper/paddle2onnx/opset9/paddle_custom_layer/box_coder.py
+++ b/x2paddle/x2paddle/op_mapper/paddle2onnx/opset9/paddle_custom_layer/box_coder.py
+# Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+import math
+import onnx
+import warnings
+import numpy as np
+from functools import partial
+from onnx import TensorProto
+from onnx.helper import make_node, make_tensor
+from onnx import onnx_pb
+from paddle.fluid.executor import _fetch_var as fetch_var
+from onnx import helper
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+def box_coder(op, block):
+    """
+   In this function, we will use the decode the prior box to target box,
+   we just use the decode mode to transform this op.
+   """
+    node_list = []
+    input_names = op.input_names
+    prior_var = block.var(op.input('PriorBox')[0])
+    t_size = block.var(op.input('TargetBox')[0]).shape
+    p_size = prior_var.shape
+    # get the outout_name
+    result_name = op.output('OutputBox')[0]
+    # n is size of batch, m is boxes num of targe_boxes
+    n = t_size[0]
+    m = t_size[0]
+    axis = int(op.attr('axis'))
+    #norm
+    norm = bool(op.attr('box_normalized'))
+    name_slice_x1 = op.output('OutputBox')[0] + "@x1"
+    name_slice_y1 = op.output('OutputBox')[0] + "@y1"
+    name_slice_x2 = op.output('OutputBox')[0] + "@x2"
+    name_slice_y2 = op.output('OutputBox')[0] + "@y2"
+    #make onnx tensor to save the intermeidate reslut
+    name_slice_indices = [[op.output('OutputBox')[0] + "@slice_" + str(i)]
+                          for i in range(1, 3)]
+    node_slice_indices = [None for i in range(1, 3)]
+    # create the range(0, 4) const data to slice
+    for i in range(1, 3):
+        node = onnx.helper.make_node(
+            'Constant',
+            inputs=[],
+            outputs=name_slice_indices[i - 1],
+            value=onnx.helper.make_tensor(
+                name=name_slice_indices[i - 1][0] + "@const",
+                data_type=onnx.TensorProto.FLOAT,
+                dims=(),
+                vals=[i]))
+        node_list.append(node)
+    # make node split data
+    name_box_split = [
+        name_slice_x1, name_slice_y1, name_slice_x2, name_slice_y2
+    ]
+    split_shape = list(p_size)
+    split_shape[-1] = 1
+    node_split_prior_node = onnx.helper.make_node(
+        'Split', inputs=op.input('PriorBox'), outputs=name_box_split, axis=1)
+    node_list.append(node_split_prior_node)
+    # make node get centor node for decode
+    final_outputs_vars = []
+    if not norm:
+        name_centor_w_tmp = [op.output('OutputBox')[0] + "@centor_w_tmp"]
+        name_centor_h_tmp = [op.output('OutputBox')[0] + "@centor_h_tmp"]
+        node_centor_w_tmp = None
+        node_centor_h_tmp = None
+        name_centor_tmp_list = [name_centor_w_tmp, name_centor_h_tmp]
+        node_centor_tmp_list = [node_centor_w_tmp, node_centor_h_tmp]
+        count = 2
+        for (name, node) in zip(name_centor_tmp_list, node_centor_tmp_list):
+            node = onnx.helper.make_node('Add',
+                   inputs=[op.output('OutputBox')[0] + "@slice_" + str(1)]\
+                       + [name_box_split[count]],
+                   outputs=name)
+            node_list.append(node)
+            count = count + 1
+    if not norm:
+        inputs_sub = [[name_centor_w_tmp[0], name_box_split[0]],
+                      [name_centor_h_tmp[0], name_box_split[1]]]
+    else:
+        inputs_sub = [[name_box_split[2], name_box_split[0]],
+                      [name_box_split[3], name_box_split[1]]]
+    outputs_sub = [result_name + "@pb_w", result_name + "@pb_h"]
+    for i in range(0, 2):
+        node = onnx.helper.make_node(
+            'Sub', inputs=inputs_sub[i], outputs=[outputs_sub[i]])
+        node_list.append(node)
+    # according to prior_box height and weight to get centor x, y
+    name_half_value = [result_name + "@half_value"]
+    node_half_value = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_half_value,
+        value=onnx.helper.make_tensor(
+            name=name_slice_indices[i][0] + "@const",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(),
+            vals=[0.5]))
+    node_list.append(node_half_value)
+    outputs_half_wh = [[result_name + "@pb_w_half"],
+                       [result_name + "@pb_h_half"]]
+    inputs_half_wh = [[result_name + "@pb_w", name_half_value[0]],
+                      [result_name + "@pb_h", name_half_value[0]]]
+    for i in range(0, 2):
+        node = onnx.helper.make_node(
+            'Mul', inputs=inputs_half_wh[i], outputs=outputs_half_wh[i])
+        node_list.append(node)
+    inputs_centor_xy = [[outputs_half_wh[0][0], name_slice_x1],
+                        [outputs_half_wh[1][0], name_slice_y1]]
+    outputs_centor_xy = [[result_name + "@pb_x"], [result_name + "@pb_y"]]
+    # final calc the centor x ,y
+    for i in range(0, 2):
+        node = onnx.helper.make_node(
+            'Add', inputs=inputs_centor_xy[i], outputs=outputs_centor_xy[i])
+        node_list.append(node)
+    # reshape the data
+    shape = (1, split_shape[0]) if axis == 0 else (split_shape[0], 1)
+    # need to reshape the data
+    inputs_transpose_pb = [
+        [result_name + "@pb_w"],
+        [result_name + "@pb_h"],
+        [result_name + "@pb_x"],
+        [result_name + "@pb_y"],
+    ]
+    outputs_transpose_pb = [
+        [result_name + "@pb_w_transpose"],
+        [result_name + "@pb_h_transpose"],
+        [result_name + "@pb_x_transpose"],
+        [result_name + "@pb_y_transpose"],
+    ]
+    if axis == 0:
+        name_reshape_pb = [result_name + "@pb_transpose"]
+        # reshape the data
+        for i in range(0, 4):
+            node = onnx.helper.make_node(
+                'Transpose',
+                inputs=inputs_transpose_pb[i],
+                outputs=outputs_transpose_pb[i])
+            node_list.append(node)
+    # decoder the box according to the target_box and variacne
+    name_variance_raw = [result_name + "@variance_raw"]
+    name_variance_unsqueeze = [result_name + "@variance_unsqueeze"]
+    shape = []
+    # make node to extend the data
+    var_split_axis = 0
+    var_split_inputs_name = []
+    if 'PriorBoxVar' in input_names and len(op.input('PriorBoxVar')) > 0:
+        if axis == 1:
+            raise Exception(
+                "The op box_coder has variable do not support aixs broadcast")
+        prior_variance_var = block.var(op.input('PriorBoxVar')[0])
+        axes = []
+        var_split_inputs_name = [result_name + "@variance_split"]
+        node = onnx.helper.make_node(
+            'Transpose',
+            inputs=op.input('PriorBoxVar'),
+            outputs=var_split_inputs_name)
+        node_list.append(node)
+        var_split_axis = 0
+    else:
+        variances = [1.0, 1.0, 1.0, 1.0]
+        if 'variance' in op.attr and len(op.attr('variance')) > 0:
+            variances = [float(var) for var in op.attr('variance')]
+        node_variance_create = onnx.helper.make_node(
+            'Constant',
+            inputs=[],
+            outputs=name_variance_raw,
+            value=onnx.helper.make_tensor(
+                name=name_variance_raw[0] + "@const",
+                data_type=onnx.TensorProto.FLOAT,
+                dims=[len(variances)],
+                vals=variances))
+        node_list.append(node_variance_create)
+        var_split_axis = 0
+        var_split_inputs_name = name_variance_raw
+    # decode the result
+    outputs_split_variance = [
+        result_name + "@variance_split" + str(i) for i in range(0, 4)
+    ]
+    outputs_split_targebox = [
+        result_name + "@targebox_split" + str(i) for i in range(0, 4)
+    ]
+    node_split_var = onnx.helper.make_node(
+        'Split',
+        inputs=var_split_inputs_name,
+        outputs=outputs_split_variance,
+        axis=var_split_axis)
+    node_split_target = onnx.helper.make_node(
+        'Split',
+        inputs=op.input('TargetBox'),
+        outputs=outputs_split_targebox,
+        axis=2)
+    node_list.extend([node_split_var, node_split_target])
+    outputs_squeeze_targebox = [
+        result_name + "@targebox_squeeze" + str(i) for i in range(0, 4)
+    ]
+    for (input_name, output_name) in zip(outputs_split_targebox,
+                                         outputs_squeeze_targebox):
+        node = onnx.helper.make_node(
+            'Squeeze', inputs=[input_name], outputs=[output_name], axes=[2])
+        node_list.append(node)
+    output_shape_step1 = list(t_size)[:-1]
+    inputs_tb_step1 = [
+        [outputs_squeeze_targebox[0], outputs_split_variance[0]],
+        [outputs_squeeze_targebox[1], outputs_split_variance[1]],
+        [outputs_squeeze_targebox[2], outputs_split_variance[2]],
+        [outputs_squeeze_targebox[3], outputs_split_variance[3]]
+    ]
+    outputs_tb_step1 = [[result_name + "@decode_x_step1"],
+                        [result_name + "@decode_y_step1"],
+                        [result_name + "@decode_w_step1"],
+                        [result_name + "@decode_h_step1"]]
+    for input_step1, output_step_1 in zip(inputs_tb_step1, outputs_tb_step1):
+        node = onnx.helper.make_node(
+            'Mul', inputs=input_step1, outputs=output_step_1)
+        node_list.append(node)
+    if axis == 0:
+        inputs_tbxy_step2 = [
+            [outputs_tb_step1[0][0], outputs_transpose_pb[0][0]],
+            [outputs_tb_step1[1][0], outputs_transpose_pb[1][0]]
+        ]
+    else:
+        inputs_tbxy_step2 = [
+            [outputs_tb_step1[0][0], inputs_transpose_pb[0][0]],
+            [outputs_tb_step1[1][0], inputs_transpose_pb[1][0]]
+        ]
+    outputs_tbxy_step2 = [[result_name + "@decode_x_step2"],
+                          [result_name + "@decode_y_step2"]]
+    for input_step2, output_step_2 in zip(inputs_tbxy_step2,
+                                          outputs_tbxy_step2):
+        node = onnx.helper.make_node(
+            'Mul', inputs=input_step2, outputs=output_step_2)
+        node_list.append(node)
+    if axis == 0:
+        inputs_tbxy_step3 = [
+            [outputs_tbxy_step2[0][0], outputs_transpose_pb[2][0]],
+            [outputs_tbxy_step2[1][0], outputs_transpose_pb[3][0]]
+        ]
+    else:
+        inputs_tbxy_step3 = [
+            [outputs_tbxy_step2[0][0], inputs_transpose_pb[2][0]],
+            [outputs_tbxy_step2[1][0], inputs_transpose_pb[3][0]]
+        ]
+    outputs_tbxy_step3 = [[result_name + "@decode_x_step3"],
+                          [result_name + "@decode_y_step3"]]
+    for input_step3, output_step_3 in zip(inputs_tbxy_step3,
+                                          outputs_tbxy_step3):
+        node = onnx.helper.make_node(
+            'Add', inputs=input_step3, outputs=output_step_3)
+        node_list.append(node)
+    # deal with width & height
+    inputs_tbwh_step2 = [outputs_tb_step1[2], outputs_tb_step1[3]]
+    outputs_tbwh_step2 = [[result_name + "@decode_w_step2"],
+                          [result_name + "@decode_h_step2"]]
+    for input_name, output_name in zip(inputs_tbwh_step2, outputs_tbwh_step2):
+        node = onnx.helper.make_node(
+            'Exp', inputs=input_name, outputs=output_name)
+        node_list.append(node)
+    if axis == 0:
+        inputs_tbwh_step3 = [
+            [outputs_tbwh_step2[0][0], outputs_transpose_pb[0][0]],
+            [outputs_tbwh_step2[1][0], outputs_transpose_pb[1][0]]
+        ]
+    else:
+        inputs_tbwh_step3 = [
+            [outputs_tbwh_step2[0][0], inputs_transpose_pb[0][0]],
+            [outputs_tbwh_step2[1][0], inputs_transpose_pb[1][0]]
+        ]
+    outputs_tbwh_step3 = [[result_name + "@decode_w_step3"],
+                          [result_name + "@decode_h_step3"]]
+    for input_name, output_name in zip(inputs_tbwh_step3, outputs_tbwh_step3):
+        node = onnx.helper.make_node(
+            'Mul', inputs=input_name, outputs=output_name)
+        node_list.append(node)
+    # final step to calc the result, and concat the result to output
+    # return the output box, [(x1, y1), (x2, y2)]
+    inputs_half_tbwh_step4 = [
+        [outputs_tbwh_step3[0][0], result_name + "@slice_2"],
+        [outputs_tbwh_step3[1][0], result_name + "@slice_2"]
+    ]
+    outputs_half_tbwh_step4 = [[result_name + "@decode_half_w_step4"],
+                               [result_name + "@decode_half_h_step4"]]
+    for inputs_name, outputs_name in zip(inputs_half_tbwh_step4,
+                                         outputs_half_tbwh_step4):
+        node = onnx.helper.make_node(
+            'Div', inputs=inputs_name, outputs=outputs_name)
+        node_list.append(node)
+    inputs_output_point1 = [
+        [outputs_tbxy_step3[0][0], outputs_half_tbwh_step4[0][0]],
+        [outputs_tbxy_step3[1][0], outputs_half_tbwh_step4[1][0]]
+    ]
+    outputs_output_point1 = [[result_name + "@ouput_x1"],
+                             [result_name + "@output_y1"]]
+    for input_name, output_name in zip(inputs_output_point1,
+                                       outputs_output_point1):
+        node = onnx.helper.make_node(
+            'Sub', inputs=input_name, outputs=output_name)
+        node_list.append(node)
+    inputs_output_point2 = [
+        [outputs_tbxy_step3[0][0], outputs_half_tbwh_step4[0][0]],
+        [outputs_tbxy_step3[1][0], outputs_half_tbwh_step4[1][0]]
+    ]
+    outputs_output_point2 = [[result_name + "@ouput_x2"],
+                             [result_name + "@output_y2"]]
+    for input_name, output_name in zip(inputs_output_point2,
+                                       outputs_output_point2):
+        node = onnx.helper.make_node(
+            'Add', inputs=input_name, outputs=output_name)
+        node_list.append(node)
+    if not norm:
+        inputs_unnorm_point2 = [
+            [outputs_output_point2[0][0], result_name + "@slice_1"],
+            [outputs_output_point2[1][0], result_name + "@slice_1"]
+        ]
+        outputs_unnorm_point2 = [[result_name + "@ouput_unnorm_x2"],
+                                 [result_name + "@ouput_unnorm_y2"]]
+        for input_name, output_name in zip(inputs_unnorm_point2,
+                                           outputs_unnorm_point2):
+            node = onnx.helper.make_node(
+                'Sub', inputs=input_name, outputs=output_name)
+            node_list.append(node)
+        outputs_output_point2 = outputs_unnorm_point2
+    outputs_output_point1.extend(outputs_output_point2)
+    ouputs_points_unsqueeze = [[result_name + "@points_unsqueeze_x1"],
+                               [result_name + "points_unsqueeze_y1"],
+                               [result_name + "points_unsqueeze_x2"],
+                               [result_name + "points_unsqueeze_y2"]]
+    for input_name, output_name in zip(outputs_output_point1,
+                                       ouputs_points_unsqueeze):
+        node = onnx.helper.make_node(
+            'Unsqueeze',
+            inputs=input_name,
+            outputs=output_name,
+            axes=[len(output_shape_step1)])
+        node_list.append(node)
+    outputs_points_unsqueeze_list = [
+        output[0] for output in ouputs_points_unsqueeze
+    ]
+    node_point_final = onnx.helper.make_node(
+        'Concat',
+        inputs=outputs_points_unsqueeze_list,
+        outputs=op.output('OutputBox'),
+        axis=len(output_shape_step1))
+    node_list.append(node_point_final)
+    return node_list
--- a/x2paddle/x2paddle/op_mapper/paddle2onnx/opset9/paddle_custom_layer/im2sequence.py
+++ b/x2paddle/x2paddle/op_mapper/paddle2onnx/opset9/paddle_custom_layer/im2sequence.py
+# Copyright (c) 2020  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import onnx
+import numpy as np
+from onnx import onnx_pb, helper
+im2seq_counter = 0
+def im2sequence(op, block):
+    global im2sequence_counter
+    n, c, h, w = block.var(op.input('X')[0]).shape
+    assert h > 0 and w > 0, "Only supported fixed input shape for im2sequence operator."
+    stride_h, stride_w = op.attr('strides')
+    paddings = op.attr('paddings')
+    assert op.attr(
+        'out_stride'
+    ) != 1, "Only out_stride==1 is supported for im2sequence operator."
+    h = h + paddings[0] + paddings[1]
+    w = w + paddings[1] + paddings[2]
+    kernel_h, kernel_w = op.attr('kernels')
+    out_h = 1 + (h - kernel_h + stride_h - 1) // stride_h
+    out_w = 1 + (w - kernel_w + stride_w - 1) // stride_w
+    h_steps = list()
+    for i in range(out_h):
+        h_steps.append([i * stride_h, i * stride_h + kernel_h])
+    w_steps = list()
+    for i in range(out_w):
+        w_steps.append([i * stride_w, i * stride_w + kernel_w])
+    nodes = list()
+    slice_blocks = list()
+    for i in range(out_h):
+        for j in range(out_w):
+            starts_name = "im2sequence.starts.{}.{}.{}".format(im2seq_counter,
+                                                               i, j)
+            starts_tensor = helper.make_tensor(
+                name=starts_name,
+                data_type=onnx_pb.TensorProto.INT64,
+                dims=[4],
+                vals=[0, 0, h_steps[i][0], w_steps[j][0]])
+            ends_name = "im2sequence.ends.{}.{}.{}".format(im2seq_counter, i, j)
+            ends_tensor = helper.make_tensor(
+                name=ends_name,
+                data_type=onnx_pb.TensorProto.INT64,
+                dims=[4],
+                vals=[999999, 999999, h_steps[i][1], w_steps[j][1]])
+            starts_node = helper.make_node(
+                'Constant',
+                inputs=[],
+                outputs=[starts_name],
+                value=starts_tensor)
+            ends_node = helper.make_node(
+                'Constant', inputs=[], outputs=[ends_name], value=ends_tensor)
+            nodes.extend([starts_node, ends_node])
+            slice_block_name = "im2sequence.slice.{}.{}.{}".format(
+                im2seq_counter, i, j)
+            slice_block_node = helper.make_node(
+                'Slice',
+                inputs=[op.input('X')[0], starts_name, ends_name],
+                outputs=[slice_block_name])
+            flatten_block_name = "im2sequence.flatten.{}.{}.{}".format(
+                im2seq_counter, i, j)
+            flatten_block_node = helper.make_node(
+                "Flatten",
+                inputs=[slice_block_name],
+                outputs=[flatten_block_name],
+                axis=0)
+            nodes.extend([slice_block_node, flatten_block_node])
+            slice_blocks.append(flatten_block_name)
+    concat_block_name = "im2sequence.concat_block.{}".format(im2seq_counter)
+    #    concat_block_node = helper.make_node("Concat", inputs=slice_blocks, outputs=[concat_block_name], axis=0)
+    concat_block_node = helper.make_node(
+        "Concat", inputs=slice_blocks, outputs=op.output('Out'), axis=0)
+    nodes.append(concat_block_node)
+    print("\n\n==========Importance Notice===========")
+    print(
+        "Since im2sequence operator is used in your paddlepaddle model, the translated onnx model only support input data with batch_size=1."
+    )
+    print("======================================\n")
+    return nodes
--- a/x2paddle/x2paddle/op_mapper/paddle2onnx/opset9/paddle_custom_layer/multiclass_nms.py
+++ b/x2paddle/x2paddle/op_mapper/paddle2onnx/opset9/paddle_custom_layer/multiclass_nms.py
+# Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+import sys
+import os
+import numpy as np
+import paddle.fluid.core as core
+import paddle.fluid as fluid
+import onnx
+import logging
+from onnx import helper, onnx_pb
+def multiclass_nms(op, block):
+    """
+    Convert the paddle multiclass_nms to onnx op.
+    This op is get the select boxes from origin boxes.
+    """
+    inputs = dict()
+    outputs = dict()
+    attrs = dict()
+    for name in op.input_names:
+        inputs[name] = op.input(name)
+    for name in op.output_names:
+        outputs[name] = op.output(name)
+    for name in op.attr_names:
+        attrs[name] = op.attr(name)
+    result_name = outputs['Out'][0]
+    background = attrs['background_label']
+    normalized = attrs['normalized']
+    if normalized == False:
+        logging.warn(
+                    "The parameter normalized of multiclass_nms OP of Paddle is False, which has diff with ONNX." \
+                    " Please set normalized=True in multiclass_nms of Paddle, see doc Q4 in https://github.com/PaddlePaddle/X2Paddle/blob/develop/FAQ.md")
+    #convert the paddle attribute to onnx tensor
+    name_score_threshold = [outputs['Out'][0] + "@score_threshold"]
+    name_iou_threshold = [outputs['Out'][0] + "@iou_threshold"]
+    name_keep_top_k = [outputs['Out'][0] + '@keep_top_k']
+    name_keep_top_k_2D = [outputs['Out'][0] + '@keep_top_k_1D']
+    node_score_threshold = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_score_threshold,
+        value=onnx.helper.make_tensor(
+            name=name_score_threshold[0] + "@const",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(),
+            vals=[float(attrs['score_threshold'])]))
+    node_iou_threshold = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_iou_threshold,
+        value=onnx.helper.make_tensor(
+            name=name_iou_threshold[0] + "@const",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(),
+            vals=[float(attrs['nms_threshold'])]))
+    node_keep_top_k = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_keep_top_k,
+        value=onnx.helper.make_tensor(
+            name=name_keep_top_k[0] + "@const",
+            data_type=onnx.TensorProto.INT64,
+            dims=(),
+            vals=[np.int64(attrs['keep_top_k'])]))
+    node_keep_top_k_2D = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_keep_top_k_2D,
+        value=onnx.helper.make_tensor(
+            name=name_keep_top_k_2D[0] + "@const",
+            data_type=onnx.TensorProto.INT64,
+            dims=[1, 1],
+            vals=[np.int64(attrs['keep_top_k'])]))
+    # the paddle data format is x1,y1,x2,y2
+    kwargs = {'center_point_box': 0}
+    name_select_nms = [outputs['Out'][0] + "@select_index"]
+    node_select_nms= onnx.helper.make_node(
+        'NonMaxSuppression',
+        inputs=inputs['BBoxes'] + inputs['Scores'] + name_keep_top_k +\
+            name_iou_threshold + name_score_threshold,
+        outputs=name_select_nms)
+    # step 1 nodes select the nms class
+    node_list = [
+        node_score_threshold, node_iou_threshold, node_keep_top_k,
+        node_keep_top_k_2D, node_select_nms
+    ]
+    # create some const value to use
+    name_const_value = [result_name+"@const_0",
+        result_name+"@const_1",\
+        result_name+"@const_2",\
+        result_name+"@const_-1"]
+    value_const_value = [0, 1, 2, -1]
+    for name, value in zip(name_const_value, value_const_value):
+        node = onnx.helper.make_node(
+            'Constant',
+            inputs=[],
+            outputs=[name],
+            value=onnx.helper.make_tensor(
+                name=name + "@const",
+                data_type=onnx.TensorProto.INT64,
+                dims=[1],
+                vals=[value]))
+        node_list.append(node)
+    # In this code block, we will deocde the raw score data, reshape N * C * M to 1 * N*C*M
+    # and the same time, decode the select indices to 1 * D, gather the select_indices
+    outputs_gather_1 = [result_name + "@gather_1"]
+    node_gather_1 = onnx.helper.make_node(
+        'Gather',
+        inputs=name_select_nms + [result_name + "@const_1"],
+        outputs=outputs_gather_1,
+        axis=1)
+    node_list.append(node_gather_1)
+    outputs_squeeze_gather_1 = [result_name + "@sequeeze_gather_1"]
+    node_squeeze_gather_1 = onnx.helper.make_node(
+        'Squeeze',
+        inputs=outputs_gather_1,
+        outputs=outputs_squeeze_gather_1,
+        axes=[1])
+    node_list.append(node_squeeze_gather_1)
+    outputs_gather_2 = [result_name + "@gather_2"]
+    node_gather_2 = onnx.helper.make_node(
+        'Gather',
+        inputs=name_select_nms + [result_name + "@const_2"],
+        outputs=outputs_gather_2,
+        axis=1)
+    node_list.append(node_gather_2)
+    #slice the class is not 0
+    if background == 0:
+        outputs_nonzero = [result_name + "@nonzero"]
+        node_nonzero = onnx.helper.make_node(
+            'NonZero', inputs=outputs_squeeze_gather_1, outputs=outputs_nonzero)
+        node_list.append(node_nonzero)
+    else:
+        name_thresh = [result_name + "@thresh"]
+        node_thresh = onnx.helper.make_node(
+            'Constant',
+            inputs=[],
+            outputs=name_thresh,
+            value=onnx.helper.make_tensor(
+                name=name_thresh[0] + "@const",
+                data_type=onnx.TensorProto.INT32,
+                dims=[1],
+                vals=[-1]))
+        node_list.append(node_thresh)
+        outputs_cast = [result_name + "@cast"]
+        node_cast = onnx.helper.make_node(
+            'Cast', inputs=outputs_squeeze_gather_1, outputs=outputs_cast, to=6)
+        node_list.append(node_cast)
+        outputs_greater = [result_name + "@greater"]
+        node_greater = onnx.helper.make_node(
+            'Greater',
+            inputs=outputs_cast + name_thresh,
+            outputs=outputs_greater)
+        node_list.append(node_greater)
+        outputs_nonzero = [result_name + "@nonzero"]
+        node_nonzero = onnx.helper.make_node(
+            'NonZero', inputs=outputs_greater, outputs=outputs_nonzero)
+        node_list.append(node_nonzero)
+    outputs_gather_1_nonzero = [result_name + "@gather_1_nonzero"]
+    node_gather_1_nonzero = onnx.helper.make_node(
+        'Gather',
+        inputs=outputs_gather_1 + outputs_nonzero,
+        outputs=outputs_gather_1_nonzero,
+        axis=0)
+    node_list.append(node_gather_1_nonzero)
+    outputs_gather_2_nonzero = [result_name + "@gather_2_nonzero"]
+    node_gather_2_nonzero = onnx.helper.make_node(
+        'Gather',
+        inputs=outputs_gather_2 + outputs_nonzero,
+        outputs=outputs_gather_2_nonzero,
+        axis=0)
+    node_list.append(node_gather_2_nonzero)
+    # reshape scores N * C * M to (N*C*M) * 1
+    outputs_reshape_scores_rank1 = [result_name + "@reshape_scores_rank1"]
+    node_reshape_scores_rank1 = onnx.helper.make_node(
+        "Reshape",
+        inputs=inputs['Scores'] + [result_name + "@const_-1"],
+        outputs=outputs_reshape_scores_rank1)
+    node_list.append(node_reshape_scores_rank1)
+    # get the shape of scores
+    outputs_shape_scores = [result_name + "@shape_scores"]
+    node_shape_scores = onnx.helper.make_node(
+        'Shape', inputs=inputs['Scores'], outputs=outputs_shape_scores)
+    node_list.append(node_shape_scores)
+    # gather the index: 2 shape of scores
+    outputs_gather_scores_dim1 = [result_name + "@gather_scores_dim1"]
+    node_gather_scores_dim1 = onnx.helper.make_node(
+        'Gather',
+        inputs=outputs_shape_scores + [result_name + "@const_2"],
+        outputs=outputs_gather_scores_dim1,
+        axis=0)
+    node_list.append(node_gather_scores_dim1)
+    # mul class * M
+    outputs_mul_classnum_boxnum = [result_name + "@mul_classnum_boxnum"]
+    node_mul_classnum_boxnum = onnx.helper.make_node(
+        'Mul',
+        inputs=outputs_gather_1_nonzero + outputs_gather_scores_dim1,
+        outputs=outputs_mul_classnum_boxnum)
+    node_list.append(node_mul_classnum_boxnum)
+    # add class * M * index
+    outputs_add_class_M_index = [result_name + "@add_class_M_index"]
+    node_add_class_M_index = onnx.helper.make_node(
+        'Add',
+        inputs=outputs_mul_classnum_boxnum + outputs_gather_2_nonzero,
+        outputs=outputs_add_class_M_index)
+    node_list.append(node_add_class_M_index)
+    # Squeeze the indices to 1 dim
+    outputs_squeeze_select_index = [result_name + "@squeeze_select_index"]
+    node_squeeze_select_index = onnx.helper.make_node(
+        'Squeeze',
+        inputs=outputs_add_class_M_index,
+        outputs=outputs_squeeze_select_index,
+        axes=[0, 2])
+    node_list.append(node_squeeze_select_index)
+    # gather the data from flatten scores
+    outputs_gather_select_scores = [result_name + "@gather_select_scores"]
+    node_gather_select_scores = onnx.helper.make_node('Gather',
+        inputs=outputs_reshape_scores_rank1 + \
+            outputs_squeeze_select_index,
+        outputs=outputs_gather_select_scores,
+        axis=0)
+    node_list.append(node_gather_select_scores)
+    # get nums to input TopK
+    outputs_shape_select_num = [result_name + "@shape_select_num"]
+    node_shape_select_num = onnx.helper.make_node(
+        'Shape',
+        inputs=outputs_gather_select_scores,
+        outputs=outputs_shape_select_num)
+    node_list.append(node_shape_select_num)
+    outputs_gather_select_num = [result_name + "@gather_select_num"]
+    node_gather_select_num = onnx.helper.make_node(
+        'Gather',
+        inputs=outputs_shape_select_num + [result_name + "@const_0"],
+        outputs=outputs_gather_select_num,
+        axis=0)
+    node_list.append(node_gather_select_num)
+    outputs_unsqueeze_select_num = [result_name + "@unsqueeze_select_num"]
+    node_unsqueeze_select_num = onnx.helper.make_node(
+        'Unsqueeze',
+        inputs=outputs_gather_select_num,
+        outputs=outputs_unsqueeze_select_num,
+        axes=[0])
+    node_list.append(node_unsqueeze_select_num)
+    outputs_concat_topK_select_num = [result_name + "@conat_topK_select_num"]
+    node_conat_topK_select_num = onnx.helper.make_node(
+        'Concat',
+        inputs=outputs_unsqueeze_select_num + name_keep_top_k_2D,
+        outputs=outputs_concat_topK_select_num,
+        axis=0)
+    node_list.append(node_conat_topK_select_num)
+    outputs_cast_concat_topK_select_num = [
+        result_name + "@concat_topK_select_num"
+    ]
+    node_outputs_cast_concat_topK_select_num = onnx.helper.make_node(
+        'Cast',
+        inputs=outputs_concat_topK_select_num,
+        outputs=outputs_cast_concat_topK_select_num,
+        to=6)
+    node_list.append(node_outputs_cast_concat_topK_select_num)
+    # get min(topK, num_select)
+    outputs_compare_topk_num_select = [result_name + "@compare_topk_num_select"]
+    node_compare_topk_num_select = onnx.helper.make_node(
+        'ReduceMin',
+        inputs=outputs_cast_concat_topK_select_num,
+        outputs=outputs_compare_topk_num_select,
+        keepdims=0)
+    node_list.append(node_compare_topk_num_select)
+    # unsqueeze the indices to 1D tensor
+    outputs_unsqueeze_topk_select_indices = [
+        result_name + "@unsqueeze_topk_select_indices"
+    ]
+    node_unsqueeze_topk_select_indices = onnx.helper.make_node(
+        'Unsqueeze',
+        inputs=outputs_compare_topk_num_select,
+        outputs=outputs_unsqueeze_topk_select_indices,
+        axes=[0])
+    node_list.append(node_unsqueeze_topk_select_indices)
+    # cast the indices to INT64
+    outputs_cast_topk_indices = [result_name + "@cast_topk_indices"]
+    node_cast_topk_indices = onnx.helper.make_node(
+        'Cast',
+        inputs=outputs_unsqueeze_topk_select_indices,
+        outputs=outputs_cast_topk_indices,
+        to=7)
+    node_list.append(node_cast_topk_indices)
+    # select topk scores  indices
+    outputs_topk_select_topk_indices = [result_name + "@topk_select_topk_values",\
+        result_name + "@topk_select_topk_indices"]
+    node_topk_select_topk_indices = onnx.helper.make_node(
+        'TopK',
+        inputs=outputs_gather_select_scores + outputs_cast_topk_indices,
+        outputs=outputs_topk_select_topk_indices)
+    node_list.append(node_topk_select_topk_indices)
+    # gather topk label, scores, boxes
+    outputs_gather_topk_scores = [result_name + "@gather_topk_scores"]
+    node_gather_topk_scores = onnx.helper.make_node(
+        'Gather',
+        inputs=outputs_gather_select_scores +
+        [outputs_topk_select_topk_indices[1]],
+        outputs=outputs_gather_topk_scores,
+        axis=0)
+    node_list.append(node_gather_topk_scores)
+    outputs_gather_topk_class = [result_name + "@gather_topk_class"]
+    node_gather_topk_class = onnx.helper.make_node(
+        'Gather',
+        inputs=outputs_gather_1_nonzero +
+        [outputs_topk_select_topk_indices[1]],
+        outputs=outputs_gather_topk_class,
+        axis=1)
+    node_list.append(node_gather_topk_class)
+    # gather the boxes need to gather the boxes id, then get boxes
+    outputs_gather_topk_boxes_id = [result_name + "@gather_topk_boxes_id"]
+    node_gather_topk_boxes_id = onnx.helper.make_node(
+        'Gather',
+        inputs=outputs_gather_2_nonzero +
+        [outputs_topk_select_topk_indices[1]],
+        outputs=outputs_gather_topk_boxes_id,
+        axis=1)
+    node_list.append(node_gather_topk_boxes_id)
+    # squeeze the gather_topk_boxes_id to 1 dim
+    outputs_squeeze_topk_boxes_id = [result_name + "@squeeze_topk_boxes_id"]
+    node_squeeze_topk_boxes_id = onnx.helper.make_node(
+        'Squeeze',
+        inputs=outputs_gather_topk_boxes_id,
+        outputs=outputs_squeeze_topk_boxes_id,
+        axes=[0, 2])
+    node_list.append(node_squeeze_topk_boxes_id)
+    outputs_gather_select_boxes = [result_name + "@gather_select_boxes"]
+    node_gather_select_boxes = onnx.helper.make_node(
+        'Gather',
+        inputs=inputs['BBoxes'] + outputs_squeeze_topk_boxes_id,
+        outputs=outputs_gather_select_boxes,
+        axis=1)
+    node_list.append(node_gather_select_boxes)
+    # concat the final result
+    # before concat need to cast the class to float
+    outputs_cast_topk_class = [result_name + "@cast_topk_class"]
+    node_cast_topk_class = onnx.helper.make_node(
+        'Cast',
+        inputs=outputs_gather_topk_class,
+        outputs=outputs_cast_topk_class,
+        to=1)
+    node_list.append(node_cast_topk_class)
+    outputs_unsqueeze_topk_scores = [result_name + "@unsqueeze_topk_scores"]
+    node_unsqueeze_topk_scores = onnx.helper.make_node(
+        'Unsqueeze',
+        inputs=outputs_gather_topk_scores,
+        outputs=outputs_unsqueeze_topk_scores,
+        axes=[0, 2])
+    node_list.append(node_unsqueeze_topk_scores)
+    inputs_concat_final_results = outputs_cast_topk_class + outputs_unsqueeze_topk_scores +\
+        outputs_gather_select_boxes
+    outputs_sort_by_socre_results = [result_name + "@concat_topk_scores"]
+    node_sort_by_socre_results = onnx.helper.make_node(
+        'Concat',
+        inputs=inputs_concat_final_results,
+        outputs=outputs_sort_by_socre_results,
+        axis=2)
+    node_list.append(node_sort_by_socre_results)
+    # select topk classes indices
+    outputs_squeeze_cast_topk_class = [result_name + "@squeeze_cast_topk_class"]
+    node_squeeze_cast_topk_class = onnx.helper.make_node(
+        'Squeeze',
+        inputs=outputs_cast_topk_class,
+        outputs=outputs_squeeze_cast_topk_class,
+        axes=[0, 2])
+    node_list.append(node_squeeze_cast_topk_class)
+    outputs_neg_squeeze_cast_topk_class = [
+        result_name + "@neg_squeeze_cast_topk_class"
+    ]
+    node_neg_squeeze_cast_topk_class = onnx.helper.make_node(
+        'Neg',
+        inputs=outputs_squeeze_cast_topk_class,
+        outputs=outputs_neg_squeeze_cast_topk_class)
+    node_list.append(node_neg_squeeze_cast_topk_class)
+    outputs_topk_select_classes_indices = [result_name + "@topk_select_topk_classes_scores",\
+        result_name + "@topk_select_topk_classes_indices"]
+    node_topk_select_topk_indices = onnx.helper.make_node(
+        'TopK',
+        inputs=outputs_neg_squeeze_cast_topk_class + outputs_cast_topk_indices,
+        outputs=outputs_topk_select_classes_indices)
+    node_list.append(node_topk_select_topk_indices)
+    outputs_concat_final_results = outputs['Out']
+    node_concat_final_results = onnx.helper.make_node(
+        'Gather',
+        inputs=outputs_sort_by_socre_results +
+        [outputs_topk_select_classes_indices[1]],
+        outputs=outputs_concat_final_results,
+        axis=1)
+    node_list.append(node_concat_final_results)
+    return node_list
--- a/x2paddle/x2paddle/op_mapper/paddle2onnx/opset9/paddle_custom_layer/prior_box.py
+++ b/x2paddle/x2paddle/op_mapper/paddle2onnx/opset9/paddle_custom_layer/prior_box.py
+# Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+import math
+import onnx
+import warnings
+import numpy as np
+from functools import partial
+from onnx import TensorProto
+from onnx.helper import make_node, make_tensor
+from onnx import onnx_pb
+from paddle.fluid.executor import _fetch_var as fetch_var
+from onnx import helper
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+def ExpandAspectRations(input_aspect_ratior, flip):
+    expsilon = 1e-6
+    output_ratios = [1.0]
+    for input_ratio in input_aspect_ratior:
+        already_exis = False
+        for output_ratio in output_ratios:
+            if abs(input_ratio - output_ratio) < expsilon:
+                already_exis = True
+                break
+        if already_exis == False:
+            output_ratios.append(input_ratio)
+            if flip:
+                output_ratios.append(1.0 / input_ratio)
+    return output_ratios
+def prior_box(op, block):
+    """
+    In this function, use the attribute to get the prior box, because we do not use
+    the image data and feature map, wo could the python code to create the varaible,
+    and to create the onnx tensor as output.
+    """
+    flip = bool(op.attr('flip'))
+    clip = bool(op.attr('clip'))
+    min_max_aspect_ratios_order = bool(op.attr('min_max_aspect_ratios_order'))
+    min_sizes = [float(size) for size in op.attr('min_sizes')]
+    max_sizes = [float(size) for size in op.attr('max_sizes')]
+    if isinstance(op.attr('aspect_ratios'), list):
+        aspect_ratios = [float(ratio) for ratio in op.attr('aspect_ratios')]
+    else:
+        aspect_ratios = [float(op.attr('aspect_ratios'))]
+    variances = [float(var) for var in op.attr('variances')]
+    # set min_max_aspect_ratios_order = false
+    output_ratios = ExpandAspectRations(aspect_ratios, flip)
+    step_w = float(op.attr('step_w'))
+    step_h = float(op.attr('step_h'))
+    offset = float(op.attr('offset'))
+    input_shape = block.var(op.input('Input')[0]).shape
+    image_shape = block.var(op.input('Image')[0]).shape
+    img_width = image_shape[3]
+    img_height = image_shape[2]
+    feature_width = input_shape[3]
+    feature_height = input_shape[2]
+    step_width = 1.0
+    step_height = 1.0
+    if step_w == 0.0 or step_h == 0.0:
+        step_w = float(img_width / feature_width)
+        step_h = float(img_height / feature_height)
+    num_priors = len(output_ratios) * len(min_sizes)
+    if len(max_sizes) > 0:
+        num_priors += len(max_sizes)
+    out_dim = (feature_height, feature_width, num_priors, 4)
+    out_boxes = np.zeros(out_dim).astype('float32')
+    out_var = np.zeros(out_dim).astype('float32')
+    idx = 0
+    for h in range(feature_height):
+        for w in range(feature_width):
+            c_x = (w + offset) * step_w
+            c_y = (h + offset) * step_h
+            idx = 0
+            for s in range(len(min_sizes)):
+                min_size = min_sizes[s]
+                if not min_max_aspect_ratios_order:
+                    # rest of priors
+                    for r in range(len(output_ratios)):
+                        ar = output_ratios[r]
+                        c_w = min_size * math.sqrt(ar) / 2
+                        c_h = (min_size / math.sqrt(ar)) / 2
+                        out_boxes[h, w, idx, :] = [
+                            (c_x - c_w) / img_width, (c_y - c_h) / img_height,
+                            (c_x + c_w) / img_width, (c_y + c_h) / img_height
+                        ]
+                        idx += 1
+                    if len(max_sizes) > 0:
+                        max_size = max_sizes[s]
+                        # second prior: aspect_ratio = 1,
+                        c_w = c_h = math.sqrt(min_size * max_size) / 2
+                        out_boxes[h, w, idx, :] = [
+                            (c_x - c_w) / img_width, (c_y - c_h) / img_height,
+                            (c_x + c_w) / img_width, (c_y + c_h) / img_height
+                        ]
+                        idx += 1
+                else:
+                    c_w = c_h = min_size / 2.
+                    out_boxes[h, w, idx, :] = [
+                        (c_x - c_w) / img_width, (c_y - c_h) / img_height,
+                        (c_x + c_w) / img_width, (c_y + c_h) / img_height
+                    ]
+                    idx += 1
+                    if len(max_sizes) > 0:
+                        max_size = max_sizes[s]
+                        # second prior: aspect_ratio = 1,
+                        c_w = c_h = math.sqrt(min_size * max_size) / 2
+                        out_boxes[h, w, idx, :] = [
+                            (c_x - c_w) / img_width, (c_y - c_h) / img_height,
+                            (c_x + c_w) / img_width, (c_y + c_h) / img_height
+                        ]
+                        idx += 1
+                    # rest of priors
+                    for r in range(len(output_ratios)):
+                        ar = output_ratios[r]
+                        if abs(ar - 1.) < 1e-6:
+                            continue
+                        c_w = min_size * math.sqrt(ar) / 2
+                        c_h = (min_size / math.sqrt(ar)) / 2
+                        out_boxes[h, w, idx, :] = [
+                            (c_x - c_w) / img_width, (c_y - c_h) / img_height,
+                            (c_x + c_w) / img_width, (c_y + c_h) / img_height
+                        ]
+                        idx += 1
+    if clip:
+        out_boxes = np.clip(out_boxes, 0.0, 1.0)
+    # set the variance.
+    out_var = np.tile(variances, (feature_height, feature_width, num_priors, 1))
+    #make node that
+    node_boxes = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=op.output('Boxes'),
+        value=onnx.helper.make_tensor(
+            name=op.output('Boxes')[0] + "@const",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=out_boxes.shape,
+            vals=out_boxes.flatten()))
+    node_vars = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=op.output('Variances'),
+        value=onnx.helper.make_tensor(
+            name=op.output('Variances')[0] + "@const",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=out_var.shape,
+            vals=out_var.flatten()))
+    return [node_boxes, node_vars]
--- a/x2paddle/x2paddle/op_mapper/paddle2onnx/opset9/paddle_custom_layer/yolo_box.py
+++ b/x2paddle/x2paddle/op_mapper/paddle2onnx/opset9/paddle_custom_layer/yolo_box.py
+# Copyright (c) 2020  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import onnx
+import numpy as np
+from onnx import onnx_pb, helper
+MAX_FLOAT32 = np.asarray(
+    [255, 255, 127, 127], dtype=np.uint8).view(np.float32)[0]
+def get_old_name(arg, name_prefix=''):
+    prefix_index = arg.find(name_prefix)
+    if prefix_index != -1:
+        last_prefix = arg[len(name_prefix):]
+    else:
+        last_prefix = arg
+    idx = last_prefix.find('@')
+    if idx != -1:
+        last_prefix = last_prefix[:idx]
+    return name_prefix + last_prefix
+def is_static_shape(shape):
+    if len(shape) > 1 and shape.count(-1) > 1:
+        raise Exception(
+            "Converting this model to ONNX need with static input shape, please fix input shape of this model, see doc Q5 in https://github.com/PaddlePaddle/X2Paddle/blob/develop/FAQ.md."
+        )
+def yolo_box(op, block):
+    inputs = dict()
+    outputs = dict()
+    attrs = dict()
+    for name in op.input_names:
+        inputs[name] = op.input(name)
+    for name in op.output_names:
+        outputs[name] = op.output(name)
+    for name in op.attr_names:
+        attrs[name] = op.attr(name)
+    model_name = outputs['Boxes'][0]
+    input_shape = block.vars[get_old_name(inputs['X'][0])].shape
+    is_static_shape(input_shape)
+    image_size = inputs['ImgSize']
+    input_height = input_shape[2]
+    input_width = input_shape[3]
+    class_num = attrs['class_num']
+    anchors = attrs['anchors']
+    num_anchors = int(len(anchors)) // 2
+    downsample_ratio = attrs['downsample_ratio']
+    input_size = input_height * downsample_ratio
+    conf_thresh = attrs['conf_thresh']
+    conf_thresh_mat = np.ones([num_anchors * input_height *
+                               input_width]) * conf_thresh
+    node_list = []
+    im_outputs = []
+    x_shape = [1, num_anchors, 5 + class_num, input_height, input_width]
+    name_x_shape = [model_name + "@x_shape"]
+    node_x_shape = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_x_shape,
+        value=onnx.helper.make_tensor(
+            name=name_x_shape[0] + "@const",
+            data_type=onnx.TensorProto.INT64,
+            dims=[5],
+            vals=x_shape))
+    node_list.append(node_x_shape)
+    outputs_x_reshape = [model_name + "@reshape"]
+    node_x_reshape = onnx.helper.make_node(
+        'Reshape', inputs=inputs['X'] + name_x_shape, outputs=outputs_x_reshape)
+    node_list.append(node_x_reshape)
+    outputs_x_transpose = [model_name + "@x_transpose"]
+    node_x_transpose = onnx.helper.make_node(
+        'Transpose',
+        inputs=outputs_x_reshape,
+        outputs=outputs_x_transpose,
+        perm=[0, 1, 3, 4, 2])
+    node_list.append(node_x_transpose)
+    range_x = []
+    range_y = []
+    for i in range(0, input_width):
+        range_x.append(i)
+    for j in range(0, input_height):
+        range_y.append(j)
+    name_range_x = [model_name + "@range_x"]
+    node_range_x = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_range_x,
+        value=onnx.helper.make_tensor(
+            name=name_range_x[0] + "@const",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=[input_width],
+            vals=range_x))
+    node_list.append(node_range_x)
+    name_range_y = [model_name + "@range_y"]
+    node_range_y = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_range_y,
+        value=onnx.helper.make_tensor(
+            name=name_range_y[0] + "@const",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=[input_height],
+            vals=range_y))
+    node_list.append(node_range_y)
+    range_x_new_shape = [1, input_width]
+    range_y_new_shape = [input_height, 1]
+    name_range_x_new_shape = [model_name + "@range_x_new_shape"]
+    node_range_x_new_shape = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_range_x_new_shape,
+        value=onnx.helper.make_tensor(
+            name=name_range_x_new_shape[0] + "@const",
+            data_type=onnx.TensorProto.INT64,
+            dims=[len(range_x_new_shape)],
+            vals=range_x_new_shape))
+    node_list.append(node_range_x_new_shape)
+    name_range_y_new_shape = [model_name + "@range_y_new_shape"]
+    node_range_y_new_shape = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_range_y_new_shape,
+        value=onnx.helper.make_tensor(
+            name=name_range_y_new_shape[0] + "@const",
+            data_type=onnx.TensorProto.INT64,
+            dims=[len(range_y_new_shape)],
+            vals=range_y_new_shape))
+    node_list.append(node_range_y_new_shape)
+    outputs_range_x_reshape = [model_name + "@range_x_reshape"]
+    node_range_x_reshape = onnx.helper.make_node(
+        'Reshape',
+        inputs=name_range_x + name_range_x_new_shape,
+        outputs=outputs_range_x_reshape)
+    node_list.append(node_range_x_reshape)
+    outputs_range_y_reshape = [model_name + "@range_y_reshape"]
+    node_range_y_reshape = onnx.helper.make_node(
+        'Reshape',
+        inputs=name_range_y + name_range_y_new_shape,
+        outputs=outputs_range_y_reshape)
+    node_list.append(node_range_y_reshape)
+    outputs_grid_x = [model_name + "@grid_x"]
+    node_grid_x = onnx.helper.make_node(
+        "Tile",
+        inputs=outputs_range_x_reshape + name_range_y_new_shape,
+        outputs=outputs_grid_x)
+    node_list.append(node_grid_x)
+    outputs_grid_y = [model_name + "@grid_y"]
+    node_grid_y = onnx.helper.make_node(
+        "Tile",
+        inputs=outputs_range_y_reshape + name_range_x_new_shape,
+        outputs=outputs_grid_y)
+    node_list.append(node_grid_y)
+    outputs_box_x = [model_name + "@box_x"]
+    outputs_box_y = [model_name + "@box_y"]
+    outputs_box_w = [model_name + "@box_w"]
+    outputs_box_h = [model_name + "@box_h"]
+    outputs_conf = [model_name + "@conf"]
+    outputs_prob = [model_name + "@prob"]
+    node_split_input = onnx.helper.make_node(
+        "Split",
+        inputs=outputs_x_transpose,
+        outputs=outputs_box_x + outputs_box_y + outputs_box_w\
+                + outputs_box_h + outputs_conf + outputs_prob,
+        axis=-1,
+        split=[1, 1, 1, 1, 1, class_num])
+    node_list.append(node_split_input)
+    outputs_box_x_sigmoid = [model_name + "@box_x_sigmoid"]
+    outputs_box_y_sigmoid = [model_name + "@box_y_sigmoid"]
+    node_box_x_sigmoid = onnx.helper.make_node(
+        "Sigmoid", inputs=outputs_box_x, outputs=outputs_box_x_sigmoid)
+    node_list.append(node_box_x_sigmoid)
+    node_box_y_sigmoid = onnx.helper.make_node(
+        "Sigmoid", inputs=outputs_box_y, outputs=outputs_box_y_sigmoid)
+    node_list.append(node_box_y_sigmoid)
+    outputs_box_x_squeeze = [model_name + "@box_x_squeeze"]
+    outputs_box_y_squeeze = [model_name + "@box_y_squeeze"]
+    node_box_x_squeeze = onnx.helper.make_node(
+        'Squeeze',
+        inputs=outputs_box_x_sigmoid,
+        outputs=outputs_box_x_squeeze,
+        axes=[4])
+    node_list.append(node_box_x_squeeze)
+    node_box_y_squeeze = onnx.helper.make_node(
+        'Squeeze',
+        inputs=outputs_box_y_sigmoid,
+        outputs=outputs_box_y_squeeze,
+        axes=[4])
+    node_list.append(node_box_y_squeeze)
+    outputs_box_x_add_grid = [model_name + "@box_x_add_grid"]
+    outputs_box_y_add_grid = [model_name + "@box_y_add_grid"]
+    node_box_x_add_grid = onnx.helper.make_node(
+        "Add",
+        inputs=outputs_grid_x + outputs_box_x_squeeze,
+        outputs=outputs_box_x_add_grid)
+    node_list.append(node_box_x_add_grid)
+    node_box_y_add_grid = onnx.helper.make_node(
+        "Add",
+        inputs=outputs_grid_y + outputs_box_y_squeeze,
+        outputs=outputs_box_y_add_grid)
+    node_list.append(node_box_y_add_grid)
+    name_input_h = [model_name + "@input_h"]
+    name_input_w = [model_name + "@input_w"]
+    node_input_h = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_input_h,
+        value=onnx.helper.make_tensor(
+            name=name_input_w[0] + "@const",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(),
+            vals=[input_height]))
+    node_list.append(node_input_h)
+    node_input_w = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_input_w,
+        value=onnx.helper.make_tensor(
+            name=name_input_w[0] + "@const",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(),
+            vals=[input_width]))
+    node_list.append(node_input_w)
+    outputs_box_x_encode = [model_name + "@box_x_encode"]
+    outputs_box_y_encode = [model_name + "@box_y_encode"]
+    node_box_x_encode = onnx.helper.make_node(
+        'Div',
+        inputs=outputs_box_x_add_grid + name_input_w,
+        outputs=outputs_box_x_encode)
+    node_list.append(node_box_x_encode)
+    node_box_y_encode = onnx.helper.make_node(
+        'Div',
+        inputs=outputs_box_y_add_grid + name_input_h,
+        outputs=outputs_box_y_encode)
+    node_list.append(node_box_y_encode)
+    name_anchor_tensor = [model_name + "@anchor_tensor"]
+    node_anchor_tensor = onnx.helper.make_node(
+        "Constant",
+        inputs=[],
+        outputs=name_anchor_tensor,
+        value=onnx.helper.make_tensor(
+            name=name_anchor_tensor[0] + "@const",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=[len(anchors)],
+            vals=anchors))
+    node_list.append(node_anchor_tensor)
+    anchor_shape = [int(num_anchors), 2]
+    name_anchor_shape = [model_name + "@anchor_shape"]
+    node_anchor_shape = onnx.helper.make_node(
+        "Constant",
+        inputs=[],
+        outputs=name_anchor_shape,
+        value=onnx.helper.make_tensor(
+            name=name_anchor_shape[0] + "@const",
+            data_type=onnx.TensorProto.INT64,
+            dims=[2],
+            vals=anchor_shape))
+    node_list.append(node_anchor_shape)
+    outputs_anchor_tensor_reshape = [model_name + "@anchor_tensor_reshape"]
+    node_anchor_tensor_reshape = onnx.helper.make_node(
+        "Reshape",
+        inputs=name_anchor_tensor + name_anchor_shape,
+        outputs=outputs_anchor_tensor_reshape)
+    node_list.append(node_anchor_tensor_reshape)
+    name_input_size = [model_name + "@input_size"]
+    node_input_size = onnx.helper.make_node(
+        "Constant",
+        inputs=[],
+        outputs=name_input_size,
+        value=onnx.helper.make_tensor(
+            name=name_input_size[0] + "@const",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(),
+            vals=[input_size]))
+    node_list.append(node_input_size)
+    outputs_anchors_div_input_size = [model_name + "@anchors_div_input_size"]
+    node_anchors_div_input_size = onnx.helper.make_node(
+        "Div",
+        inputs=outputs_anchor_tensor_reshape + name_input_size,
+        outputs=outputs_anchors_div_input_size)
+    node_list.append(node_anchors_div_input_size)
+    outputs_anchor_w = [model_name + "@anchor_w"]
+    outputs_anchor_h = [model_name + "@anchor_h"]
+    node_anchor_split = onnx.helper.make_node(
+        'Split',
+        inputs=outputs_anchors_div_input_size,
+        outputs=outputs_anchor_w + outputs_anchor_h,
+        axis=1,
+        split=[1, 1])
+    node_list.append(node_anchor_split)
+    new_anchor_shape = [1, int(num_anchors), 1, 1]
+    name_new_anchor_shape = [model_name + "@new_anchor_shape"]
+    node_new_anchor_shape = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_new_anchor_shape,
+        value=onnx.helper.make_tensor(
+            name=name_new_anchor_shape[0] + "@const",
+            data_type=onnx.TensorProto.INT64,
+            dims=[len(new_anchor_shape)],
+            vals=new_anchor_shape))
+    node_list.append(node_new_anchor_shape)
+    outputs_anchor_w_reshape = [model_name + "@anchor_w_reshape"]
+    outputs_anchor_h_reshape = [model_name + "@anchor_h_reshape"]
+    node_anchor_w_reshape = onnx.helper.make_node(
+        'Reshape',
+        inputs=outputs_anchor_w + name_new_anchor_shape,
+        outputs=outputs_anchor_w_reshape)
+    node_list.append(node_anchor_w_reshape)
+    node_anchor_h_reshape = onnx.helper.make_node(
+        'Reshape',
+        inputs=outputs_anchor_h + name_new_anchor_shape,
+        outputs=outputs_anchor_h_reshape)
+    node_list.append(node_anchor_h_reshape)
+    outputs_box_w_squeeze = [model_name + "@box_w_squeeze"]
+    node_box_w_squeeze = onnx.helper.make_node(
+        'Squeeze',
+        inputs=outputs_box_w,
+        outputs=outputs_box_w_squeeze,
+        axes=[4])
+    node_list.append(node_box_w_squeeze)
+    outputs_box_h_squeeze = [model_name + "@box_h_squeeze"]
+    node_box_h_squeeze = onnx.helper.make_node(
+        'Squeeze',
+        inputs=outputs_box_h,
+        outputs=outputs_box_h_squeeze,
+        axes=[4])
+    node_list.append(node_box_h_squeeze)
+    outputs_box_w_exp = [model_name + "@box_w_exp"]
+    node_box_w_exp = onnx.helper.make_node(
+        "Exp", inputs=outputs_box_w_squeeze, outputs=outputs_box_w_exp)
+    node_list.append(node_box_w_exp)
+    outputs_box_h_exp = [model_name + "@box_h_exp"]
+    node_box_h_exp = onnx.helper.make_node(
+        "Exp", inputs=outputs_box_h_squeeze, outputs=outputs_box_h_exp)
+    node_list.append(node_box_h_exp)
+    outputs_box_w_encode = [model_name + "box_w_encode"]
+    outputs_box_h_encode = [model_name + "box_h_encode"]
+    node_box_w_encode = onnx.helper.make_node(
+        'Mul',
+        inputs=outputs_box_w_exp + outputs_anchor_w_reshape,
+        outputs=outputs_box_w_encode)
+    node_list.append(node_box_w_encode)
+    node_box_h_encode = onnx.helper.make_node(
+        'Mul',
+        inputs=outputs_box_h_exp + outputs_anchor_h_reshape,
+        outputs=outputs_box_h_encode)
+    node_list.append(node_box_h_encode)
+    outputs_conf_sigmoid = [model_name + "@conf_sigmoid"]
+    node_conf_sigmoid = onnx.helper.make_node(
+        'Sigmoid', inputs=outputs_conf, outputs=outputs_conf_sigmoid)
+    node_list.append(node_conf_sigmoid)
+    name_conf_thresh = [model_name + "@conf_thresh"]
+    node_conf_thresh = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_conf_thresh,
+        value=onnx.helper.make_tensor(
+            name=name_conf_thresh[0] + "@const",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=[num_anchors * input_height * input_width],
+            vals=conf_thresh_mat))
+    node_list.append(node_conf_thresh)
+    conf_shape = [1, int(num_anchors), input_height, input_width, 1]
+    name_conf_shape = [model_name + "@conf_shape"]
+    node_conf_shape = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_conf_shape,
+        value=onnx.helper.make_tensor(
+            name=name_conf_shape[0] + "@const",
+            data_type=onnx.TensorProto.INT64,
+            dims=[len(conf_shape)],
+            vals=conf_shape))
+    node_list.append(node_conf_shape)
+    outputs_conf_thresh_reshape = [model_name + "@conf_thresh_reshape"]
+    node_conf_thresh_reshape = onnx.helper.make_node(
+        'Reshape',
+        inputs=name_conf_thresh + name_conf_shape,
+        outputs=outputs_conf_thresh_reshape)
+    node_list.append(node_conf_thresh_reshape)
+    outputs_conf_sub = [model_name + "@conf_sub"]
+    node_conf_sub = onnx.helper.make_node(
+        'Sub',
+        inputs=outputs_conf_sigmoid + outputs_conf_thresh_reshape,
+        outputs=outputs_conf_sub)
+    node_list.append(node_conf_sub)
+    outputs_conf_clip = [model_name + "@conf_clip"]
+    node_conf_clip = onnx.helper.make_node(
+        'Clip', inputs=outputs_conf_sub, outputs=outputs_conf_clip)
+    node_list.append(node_conf_clip)
+    zeros = [0]
+    name_zeros = [model_name + "@zeros"]
+    node_zeros = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_zeros,
+        value=onnx.helper.make_tensor(
+            name=name_zeros[0] + "@const",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(),
+            vals=zeros))
+    node_list.append(node_zeros)
+    outputs_conf_clip_bool = [model_name + "@conf_clip_bool"]
+    node_conf_clip_bool = onnx.helper.make_node(
+        'Greater',
+        inputs=outputs_conf_clip + name_zeros,
+        outputs=outputs_conf_clip_bool)
+    node_list.append(node_conf_clip_bool)
+    outputs_conf_clip_cast = [model_name + "@conf_clip_cast"]
+    node_conf_clip_cast = onnx.helper.make_node(
+        'Cast',
+        inputs=outputs_conf_clip_bool,
+        outputs=outputs_conf_clip_cast,
+        to=1)
+    node_list.append(node_conf_clip_cast)
+    outputs_conf_set_zero = [model_name + "@conf_set_zero"]
+    node_conf_set_zero = onnx.helper.make_node(
+        'Mul',
+        inputs=outputs_conf_sigmoid + outputs_conf_clip_cast,
+        outputs=outputs_conf_set_zero)
+    node_list.append(node_conf_set_zero)
+    outputs_prob_sigmoid = [model_name + "@prob_sigmoid"]
+    node_prob_sigmoid = onnx.helper.make_node(
+        'Sigmoid', inputs=outputs_prob, outputs=outputs_prob_sigmoid)
+    node_list.append(node_prob_sigmoid)
+    new_shape = [1, int(num_anchors), input_height, input_width, 1]
+    name_new_shape = [model_name + "@new_shape"]
+    node_new_shape = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_new_shape,
+        value=onnx.helper.make_tensor(
+            name=name_new_shape[0] + "@const",
+            data_type=onnx.TensorProto.INT64,
+            dims=[len(new_shape)],
+            vals=new_shape))
+    node_list.append(node_new_shape)
+    outputs_conf_new_shape = [model_name + "@_conf_new_shape"]
+    node_conf_new_shape = onnx.helper.make_node(
+        'Reshape',
+        inputs=outputs_conf_set_zero + name_new_shape,
+        outputs=outputs_conf_new_shape)
+    node_list.append(node_conf_new_shape)
+    outputs_score = [model_name + "@score"]
+    node_score = onnx.helper.make_node(
+        'Mul',
+        inputs=outputs_prob_sigmoid + outputs_conf_new_shape,
+        outputs=outputs_score)
+    node_list.append(node_score)
+    outputs_conf_bool = [model_name + "@conf_bool"]
+    node_conf_bool = onnx.helper.make_node(
+        'Greater',
+        inputs=outputs_conf_new_shape + name_zeros,
+        outputs=outputs_conf_bool)
+    node_list.append(node_conf_bool)
+    outputs_box_x_new_shape = [model_name + "@box_x_new_shape"]
+    node_box_x_new_shape = onnx.helper.make_node(
+        'Reshape',
+        inputs=outputs_box_x_encode + name_new_shape,
+        outputs=outputs_box_x_new_shape)
+    node_list.append(node_box_x_new_shape)
+    outputs_box_y_new_shape = [model_name + "@box_y_new_shape"]
+    node_box_y_new_shape = onnx.helper.make_node(
+        'Reshape',
+        inputs=outputs_box_y_encode + name_new_shape,
+        outputs=outputs_box_y_new_shape)
+    node_list.append(node_box_y_new_shape)
+    outputs_box_w_new_shape = [model_name + "@box_w_new_shape"]
+    node_box_w_new_shape = onnx.helper.make_node(
+        'Reshape',
+        inputs=outputs_box_w_encode + name_new_shape,
+        outputs=outputs_box_w_new_shape)
+    node_list.append(node_box_w_new_shape)
+    outputs_box_h_new_shape = [model_name + "@box_h_new_shape"]
+    node_box_h_new_shape = onnx.helper.make_node(
+        'Reshape',
+        inputs=outputs_box_h_encode + name_new_shape,
+        outputs=outputs_box_h_new_shape)
+    node_list.append(node_box_h_new_shape)
+    outputs_pred_box = [model_name + "@pred_box"]
+    node_pred_box = onnx.helper.make_node(
+        'Concat',
+        inputs=outputs_box_x_new_shape + outputs_box_y_new_shape + \
+               outputs_box_w_new_shape + outputs_box_h_new_shape,
+        outputs=outputs_pred_box,
+        axis=4)
+    node_list.append(node_pred_box)
+    outputs_conf_cast = [model_name + "conf_cast"]
+    node_conf_cast = onnx.helper.make_node(
+        'Cast', inputs=outputs_conf_bool, outputs=outputs_conf_cast, to=1)
+    node_list.append(node_conf_cast)
+    outputs_pred_box_mul_conf = [model_name + "@pred_box_mul_conf"]
+    node_pred_box_mul_conf = onnx.helper.make_node(
+        'Mul',
+        inputs=outputs_pred_box + outputs_conf_cast,
+        outputs=outputs_pred_box_mul_conf)
+    node_list.append(node_pred_box_mul_conf)
+    box_shape = [1, int(num_anchors) * input_height * input_width, 4]
+    name_box_shape = [model_name + "@box_shape"]
+    node_box_shape = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_box_shape,
+        value=onnx.helper.make_tensor(
+            name=name_box_shape[0] + "@const",
+            data_type=onnx.TensorProto.INT64,
+            dims=[len(box_shape)],
+            vals=box_shape))
+    node_list.append(node_box_shape)
+    outputs_pred_box_new_shape = [model_name + "@pred_box_new_shape"]
+    node_pred_box_new_shape = onnx.helper.make_node(
+        'Reshape',
+        inputs=outputs_pred_box_mul_conf + name_box_shape,
+        outputs=outputs_pred_box_new_shape)
+    node_list.append(node_pred_box_new_shape)
+    outputs_pred_box_x = [model_name + "@_pred_box_x"]
+    outputs_pred_box_y = [model_name + "@_pred_box_y"]
+    outputs_pred_box_w = [model_name + "@_pred_box_w"]
+    outputs_pred_box_h = [model_name + "@_pred_box_h"]
+    node_pred_box_split = onnx.helper.make_node(
+        'Split',
+        inputs=outputs_pred_box_new_shape,
+        outputs=outputs_pred_box_x + outputs_pred_box_y + outputs_pred_box_w +
+        outputs_pred_box_h,
+        axis=2)
+    node_list.append(node_pred_box_split)
+    name_number_two = [model_name + "@number_two"]
+    node_number_two = onnx.helper.make_node(
+        "Constant",
+        inputs=[],
+        outputs=name_number_two,
+        value=onnx.helper.make_tensor(
+            name=name_number_two[0] + "@const",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(),
+            vals=[2]))
+    node_list.append(node_number_two)
+    outputs_half_w = [model_name + "@half_w"]
+    node_half_w = onnx.helper.make_node(
+        "Div",
+        inputs=outputs_pred_box_w + name_number_two,
+        outputs=outputs_half_w)
+    node_list.append(node_half_w)
+    outputs_half_h = [model_name + "@half_h"]
+    node_half_h = onnx.helper.make_node(
+        "Div",
+        inputs=outputs_pred_box_h + name_number_two,
+        outputs=outputs_half_h)
+    node_list.append(node_half_h)
+    outputs_pred_box_x1 = [model_name + "@pred_box_x1"]
+    node_pred_box_x1 = onnx.helper.make_node(
+        'Sub',
+        inputs=outputs_pred_box_x + outputs_half_w,
+        outputs=outputs_pred_box_x1)
+    node_list.append(node_pred_box_x1)
+    outputs_pred_box_y1 = [model_name + "@pred_box_y1"]
+    node_pred_box_y1 = onnx.helper.make_node(
+        'Sub',
+        inputs=outputs_pred_box_y + outputs_half_h,
+        outputs=outputs_pred_box_y1)
+    node_list.append(node_pred_box_y1)
+    outputs_pred_box_x2 = [model_name + "@pred_box_x2"]
+    node_pred_box_x2 = onnx.helper.make_node(
+        'Add',
+        inputs=outputs_pred_box_x + outputs_half_w,
+        outputs=outputs_pred_box_x2)
+    node_list.append(node_pred_box_x2)
+    outputs_pred_box_y2 = [model_name + "@pred_box_y2"]
+    node_pred_box_y2 = onnx.helper.make_node(
+        'Add',
+        inputs=outputs_pred_box_y + outputs_half_h,
+        outputs=outputs_pred_box_y2)
+    node_list.append(node_pred_box_y2)
+    outputs_sqeeze_image_size = [model_name + "@sqeeze_image_size"]
+    node_sqeeze_image_size = onnx.helper.make_node(
+        "Squeeze",
+        axes=[0],
+        inputs=image_size,
+        outputs=outputs_sqeeze_image_size)
+    node_list.append(node_sqeeze_image_size)
+    output_img_height = [model_name + "@img_height"]
+    output_img_width = [model_name + "@img_width"]
+    node_image_size_split = onnx.helper.make_node(
+        "Split",
+        inputs=outputs_sqeeze_image_size,
+        outputs=output_img_height + output_img_width,
+        axis=-1,
+        split=[1, 1])
+    node_list.append(node_image_size_split)
+    output_img_width_cast = [model_name + "@img_width_cast"]
+    node_img_width_cast = onnx.helper.make_node(
+        'Cast', inputs=output_img_width, outputs=output_img_width_cast, to=1)
+    node_list.append(node_img_width_cast)
+    output_img_height_cast = [model_name + "@img_height_cast"]
+    node_img_height_cast = onnx.helper.make_node(
+        'Cast', inputs=output_img_height, outputs=output_img_height_cast, to=1)
+    node_list.append(node_img_height_cast)
+    outputs_pred_box_x1_decode = [model_name + "@pred_box_x1_decode"]
+    outputs_pred_box_y1_decode = [model_name + "@pred_box_y1_decode"]
+    outputs_pred_box_x2_decode = [model_name + "@pred_box_x2_decode"]
+    outputs_pred_box_y2_decode = [model_name + "@pred_box_y2_decode"]
+    node_pred_box_x1_decode = onnx.helper.make_node(
+        'Mul',
+        inputs=outputs_pred_box_x1 + output_img_width_cast,
+        outputs=outputs_pred_box_x1_decode)
+    node_list.append(node_pred_box_x1_decode)
+    node_pred_box_y1_decode = onnx.helper.make_node(
+        'Mul',
+        inputs=outputs_pred_box_y1 + output_img_height_cast,
+        outputs=outputs_pred_box_y1_decode)
+    node_list.append(node_pred_box_y1_decode)
+    node_pred_box_x2_decode = onnx.helper.make_node(
+        'Mul',
+        inputs=outputs_pred_box_x2 + output_img_width_cast,
+        outputs=outputs_pred_box_x2_decode)
+    node_list.append(node_pred_box_x2_decode)
+    node_pred_box_y2_decode = onnx.helper.make_node(
+        'Mul',
+        inputs=outputs_pred_box_y2 + output_img_height_cast,
+        outputs=outputs_pred_box_y2_decode)
+    node_list.append(node_pred_box_y2_decode)
+    name_number_one = [model_name + "@one"]
+    node_number_one = onnx.helper.make_node(
+        'Constant',
+        inputs=[],
+        outputs=name_number_one,
+        value=onnx.helper.make_tensor(
+            name=name_number_one[0] + "@const",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(),
+            vals=[1]))
+    node_list.append(node_number_one)
+    output_new_img_height = [model_name + "@new_img_height"]
+    node_new_img_height = onnx.helper.make_node(
+        'Sub',
+        inputs=output_img_height_cast + name_number_one,
+        outputs=output_new_img_height)
+    node_list.append(node_new_img_height)
+    output_new_img_width = [model_name + "@new_img_width"]
+    node_new_img_width = onnx.helper.make_node(
+        'Sub',
+        inputs=output_img_width_cast + name_number_one,
+        outputs=output_new_img_width)
+    node_list.append(node_new_img_width)
+    outputs_pred_box_x2_sub_w = [model_name + "@pred_box_x2_sub_w"]
+    node_pred_box_x2_sub_w = onnx.helper.make_node(
+        'Sub',
+        inputs=outputs_pred_box_x2_decode + output_new_img_width,
+        outputs=outputs_pred_box_x2_sub_w)
+    node_list.append(node_pred_box_x2_sub_w)
+    outputs_pred_box_y2_sub_h = [model_name + "@pred_box_y2_sub_h"]
+    node_pred_box_y2_sub_h = onnx.helper.make_node(
+        'Sub',
+        inputs=outputs_pred_box_y2_decode + output_new_img_height,
+        outputs=outputs_pred_box_y2_sub_h)
+    node_list.append(node_pred_box_y2_sub_h)
+    outputs_pred_box_x1_clip = [model_name + "@pred_box_x1_clip"]
+    outputs_pred_box_y1_clip = [model_name + "@pred_box_y1_clip"]
+    outputs_pred_box_x2_clip = [model_name + "@pred_box_x2_clip"]
+    outputs_pred_box_y2_clip = [model_name + "@pred_box_y2_clip"]
+    node_pred_box_x1_clip = onnx.helper.make_node(
+        'Clip',
+        inputs=outputs_pred_box_x1_decode,
+        outputs=outputs_pred_box_x1_clip,
+        min=0.0,
+        max=float(MAX_FLOAT32))
+    node_list.append(node_pred_box_x1_clip)
+    node_pred_box_y1_clip = onnx.helper.make_node(
+        'Clip',
+        inputs=outputs_pred_box_y1_decode,
+        outputs=outputs_pred_box_y1_clip,
+        min=0.0,
+        max=float(MAX_FLOAT32))
+    node_list.append(node_pred_box_y1_clip)
+    node_pred_box_x2_clip = onnx.helper.make_node(
+        'Clip',
+        inputs=outputs_pred_box_x2_sub_w,
+        outputs=outputs_pred_box_x2_clip,
+        min=0.0,
+        max=float(MAX_FLOAT32))
+    node_list.append(node_pred_box_x2_clip)
+    node_pred_box_y2_clip = onnx.helper.make_node(
+        'Clip',
+        inputs=outputs_pred_box_y2_sub_h,
+        outputs=outputs_pred_box_y2_clip,
+        min=0.0,
+        max=float(MAX_FLOAT32))
+    node_list.append(node_pred_box_y2_clip)
+    outputs_pred_box_x2_res = [model_name + "@box_x2_res"]
+    node_pred_box_x2_res = onnx.helper.make_node(
+        'Sub',
+        inputs=outputs_pred_box_x2_decode + outputs_pred_box_x2_clip,
+        outputs=outputs_pred_box_x2_res)
+    node_list.append(node_pred_box_x2_res)
+    outputs_pred_box_y2_res = [model_name + "@box_y2_res"]
+    node_pred_box_y2_res = onnx.helper.make_node(
+        'Sub',
+        inputs=outputs_pred_box_y2_decode + outputs_pred_box_y2_clip,
+        outputs=outputs_pred_box_y2_res)
+    node_list.append(node_pred_box_y2_res)
+    node_pred_box_result = onnx.helper.make_node(
+        'Concat',
+        inputs=outputs_pred_box_x1_clip + outputs_pred_box_y1_clip +
+        outputs_pred_box_x2_res + outputs_pred_box_y2_res,
+        outputs=outputs['Boxes'],
+        axis=-1)
+    node_list.append(node_pred_box_result)
+    score_shape = [1, input_height * input_width * int(num_anchors), class_num]
+    name_score_shape = [model_name + "@score_shape"]
+    node_score_shape = onnx.helper.make_node(
+        "Constant",
+        inputs=[],
+        outputs=name_score_shape,
+        value=onnx.helper.make_tensor(
+            name=name_score_shape[0] + "@const",
+            data_type=onnx.TensorProto.INT64,
+            dims=[len(score_shape)],
+            vals=score_shape))
+    node_list.append(node_score_shape)
+    node_score_new_shape = onnx.helper.make_node(
+        'Reshape',
+        inputs=outputs_score + name_score_shape,
+        outputs=outputs['Scores'])
+    node_list.append(node_score_new_shape)
+    return node_list
--- a/x2paddle/x2paddle/op_mapper/paddle2onnx/paddle_op_mapper.py
+++ b/x2paddle/x2paddle/op_mapper/paddle2onnx/paddle_op_mapper.py
+#   Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+import sys
+import x2paddle
+import os
+import numpy as np
+import paddle.fluid.core as core
+import paddle.fluid as fluid
+import onnx
+from onnx import helper, onnx_pb
+from x2paddle.op_mapper.paddle2onnx.opset9.opset import OpSet9
+from x2paddle.op_mapper.paddle2onnx.opset10.opset import OpSet10
+from x2paddle.op_mapper.paddle2onnx.opset11.opset import OpSet11
+class PaddleOpMapper(object):
+    def __init__(self):
+        self.support_opsets = [9, 10, 11]
+        self.default_opset = 10
+        self.name_counter = dict()
+        self.op_set = None
+    def convert(self, program, save_dir, scope=None, opset_version=10):
+        self.op_set = self.create_opset(opset_version)
+        weight_nodes = self.op_set.convert_weights(program, scope=scope)
+        op_nodes = list()
+        input_nodes = list()
+        output_nodes = list()
+        unsupported_ops = set()
+        print("Translating PaddlePaddle to ONNX...\n")
+        for block in program.blocks:
+            for i, op in enumerate(block.ops):
+                sys.stdout.write("\rTotal:{}, Current:{} : {} ".format(
+                    len(block.ops), i + 1, op.type))
+                sys.stdout.flush()
+                if not hasattr(self.op_set, op.type):
+                    unsupported_ops.add(op.type)
+                    continue
+                if len(unsupported_ops) > 0:
+                    continue
+                node = getattr(self.op_set, op.type)(op, block)
+                if op.type == 'feed':
+                    print(node.name)
+                    input_nodes.append(node)
+                elif op.type == 'fetch':
+                    output_nodes.append(node)
+                else:
+                    if isinstance(node, list):
+                        op_nodes = op_nodes + node
+                    else:
+                        op_nodes.append(node)
+        if len(unsupported_ops) > 0:
+            print("\nThere's {} ops are not supported yet".format(
+                len(unsupported_ops)))
+            for op in unsupported_ops:
+                print("=========== {} ===========".format(op))
+            return
+        graph = helper.make_graph(
+            nodes=weight_nodes + op_nodes,
+            name='onnx_model_from_paddle',
+            initializer=[],
+            inputs=input_nodes,
+            outputs=output_nodes)
+        opset_imports = [helper.make_opsetid("", opset_version)]
+        model = helper.make_model(
+            graph, producer_name='X2Paddle', opset_imports=opset_imports)
+        onnx.checker.check_model(model)
+        if not os.path.isdir(save_dir):
+            os.makedirs(save_dir)
+        with open(os.path.join(save_dir, 'x2paddle_model.onnx'), 'wb') as f:
+            f.write(model.SerializeToString())
+        print("\nTranslated model saved in {}".format(
+            os.path.join(save_dir, 'x2paddle_model.onnx')))
+    def create_opset(self, opset_version=10):
+        run_opset = self.default_opset
+        opset = ''
+        if opset_version in self.support_opsets:
+            run_opset = opset_version
+        else:
+            for support_opset_version in self.support_opsets:
+                if support_opset_version < opset_version:
+                    run_opset = support_opset_version
+                else:
+                    break
+        print(
+            'Now, onnx2paddle support convert onnx model opset_verison {},'
+            'opset_verison of your onnx model is {}, automatically treated as op_set: {}.'
+            .format(self.support_opsets, opset_version, run_opset))
+        opset = 'OpSet' + str(run_opset)
+        return eval(opset)()
--- a/x2paddle/x2paddle/op_mapper/tf_op_mapper.py
+++ b/x2paddle/x2paddle/op_mapper/tf_op_mapper.py
+#   Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from x2paddle.decoder.tf_decoder import TFGraph
+from x2paddle.core.op_mapper import OpMapper
+from x2paddle.core.util import *
+import inspect
+import numpy
+import sys
+# compute padding size for SAME mode
+def get_same_padding(in_size, kernel_size, stride):
+    new_size = int(math.ceil(in_size * 1.0 / stride))
+    pad_size = (new_size - 1) * stride + kernel_size - in_size
+    if pad_size < 0:
+        pad_size = 0
+    pad0 = int(pad_size / 2)
+    pad1 = pad_size - pad0
+    return [pad0, pad1]
+def nhwc_dim_to_nchw(node, dim):
+    tf_data_format = list(node.tf_data_format)
+    pd_data_format = list(node.pd_data_format)
+    if isinstance(dim, list):
+        for i in range(len(dim)):
+            char = tf_data_format[dim[i]]
+            dim[i] = pd_data_format.index(char)
+    else:
+        char = tf_data_format[dim]
+        dim = pd_data_format.index(char)
+    return dim
+    if dim < 0:
+        dim += 4
+    if dim > 0:
+        dim = (dim + 1) % 4 + int((dim + 1) / 4)
+    return dim
+class TFOpMapper(OpMapper):
+    directly_map_ops = {
+        'Relu': ['relu'],
+        'Relu6': ['relu6'],
+        'Shape': ['shape'],
+        'Abs': ['abs'],
+        'Sigmoid': ['sigmoid'],
+        'Exp': ['exp'],
+        'Rsqrt': ['rsqrt'],
+        'swish_f32': ['swish'],
+        'Tanh': ['tanh'],
+        'LeakyRelu': ['leaky_relu', {
+            'alpha': 'alpha'
+        }]
+    }
+    elementwise_ops = {
+        'Add': 'elementwise_add',
+        'RealDiv': 'elementwise_div',
+        'Sub': 'elementwise_sub',
+        'Maximum': 'elementwise_max',
+        'Mul': 'elementwise_mul',
+        'FloorDiv': 'elementwise_floordiv'
+    }
+    def __init__(self, decoder):
+        super(TFOpMapper, self).__init__()
+        self.decoder = decoder
+        self.graph = decoder.tf_graph
+        self.batch_node = None
+        self.weights = dict()
+        self.omit_nodes = list()
+        self.used_custom_layers = dict()
+        not_placeholder = list()
+        for name in self.graph.input_nodes:
+            if self.graph.get_node(name).layer_type != "Placeholder" \
+               and self.graph.get_node(name).layer_type != "OneShotIterator":
+                not_placeholder.append(name)
+        for name in not_placeholder:
+            idx = self.graph.input_nodes.index(name)
+            del self.graph.input_nodes[idx]
+        sys.stderr.write("Total nodes: {}\n".format(len(self.graph.topo_sort)))
+        unsupported_ops = set()
+        for i, node_name in enumerate(self.graph.topo_sort):
+            sys.stderr.write("\rConverting node {} ...    ".format(i + 1))
+            node = self.graph.get_node(node_name)
+            op = node.layer_type
+            if op in self.directly_map_ops:
+                if len(unsupported_ops) > 0:
+                    continue
+                self.directly_map(node)
+            elif op in self.elementwise_ops:
+                if len(unsupported_ops) > 0:
+                    continue
+                self.elementwise_map(node)
+            elif hasattr(self, op):
+                if len(unsupported_ops) > 0:
+                    continue
+                func = getattr(self, op)
+                func(node)
+            else:
+                unsupported_ops.add(op)
+        if len(unsupported_ops) > 0:
+            sys.stderr.write("=========={} Ops are not supported yet======\n".
+                             format(len(unsupported_ops)))
+            for op in unsupported_ops:
+                sys.stderr.write("========== {} ==========\n".format(op))
+            sys.exit(-1)
+        sys.stderr.write('\nDone!\n')
+    def add_omit_nodes(self, in_node_name, out_node_name):
+        in_node = self.graph.get_node(in_node_name)
+        out_node = self.graph.get_node(out_node_name)
+        index = in_node.outputs.index(out_node_name)
+        #        del in_node.outputs[index]
+        index = out_node.inputs.index(in_node_name)
+        #        del out_node.inputs[index]
+        self.omit_nodes.append(in_node.layer_name)
+    def directly_map(self, node):
+        assert node.layer_type in self.directly_map_ops
+        op_info = self.directly_map_ops[node.layer_type]
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        attr = dict()
+        for param in op_info[1:]:
+            tf_param_name = list(param.keys())[0]
+            pd_param_name = list(param.values())[0]
+            tf_param = node.get_attr(tf_param_name)
+            attr[pd_param_name] = tf_param
+        node.fluid_code.add_layer(
+            op_info[0], inputs=input, output=node, param_attr=attr)
+    def elementwise_map(self, node):
+        assert node.layer_type in self.elementwise_ops
+        op_type = self.elementwise_ops[node.layer_type]
+        x = self.graph.get_node(node.layer.input[0], copy=True)
+        y = self.graph.get_node(node.layer.input[1], copy=True)
+        x_shape = x.out_shapes[0]
+        y_shape = y.out_shapes[0]
+        if len(x_shape) == 0:
+            x_shape = [1]
+        if len(y_shape) == 0:
+            y_shape = [1]
+        # incomplement broadcasting support for paddle
+        x_input = x
+        y_input = y
+        if len(x_shape) < len(y_shape):
+            unrevertable_ops = [
+                "elementwise_sub", "elementwise_div", "elementwise_floordiv",
+                "elementwise_mod", "elementwise_pow"
+            ]
+            if op_type not in unrevertable_ops:
+                x_input = y
+                y_input = x
+                x_shape = y.out_shapes[0]
+                if len(x_shape) == 0:
+                    x_shape = [1]
+                y_shape = x.out_shapes[0]
+                if len(y_shape) == 0:
+                    y_shape = [1]
+            else:
+                if len(x_shape) == 1 and len(y_shape) == 4 and x_shape[
+                        0] == y_shape[-1] and y_shape.count(-1) < 1:
+                    shape = [1, x_shape[0], 1, 1]
+                    attr = {"shape": shape}
+                    node.fluid_code.add_layer(
+                        "reshape",
+                        inputs=x_input,
+                        output="reshape_x",
+                        param_attr=attr)
+                    if y_shape[0] != 1:
+                        attr = {"expand_times": [y_shape[0], 1, 1, 1]}
+                        node.fluid_code.add_layer(
+                            "expand",
+                            inputs="reshape_x",
+                            output="reshape_x",
+                            param_attr=attr)
+                    inputs = {"x": "reshape_x", "y": y_input}
+                    node.fluid_code.add_layer(
+                        op_type, inputs=inputs, output=node, param_attr=None)
+                    return
+                else:
+                    raise Exception("Unexpected situation happend")
+        if len(x_shape) == 4 and len(y_shape) == 1:
+            if x_input.tf_data_format == "NHWC":
+                axis = 1
+            else:
+                axis = -1
+            attr = {"axis": axis}
+            inputs = {"x": x_input, "y": y_input}
+            node.fluid_code.add_layer(
+                op_type, inputs=inputs, output=node, param_attr=attr)
+            return
+        is_sub_seq = True
+        for i in range(len(y_shape)):
+            index = -1 * i - 1
+            if y_shape[index] != x_shape[index]:
+                is_sub_seq = False
+        if not is_sub_seq:
+            if x_shape.count(-1) > 2:
+                x_shape = self.decoder.infer_tensor_shape(x_input)
+            if y_shape.count(-1) > 2:
+                y_shape = self.decoder.infer_tensor_shape(y_input)
+            x_expand_times = [1] * len(x_shape)
+            y_expand_times = [1] * len(y_shape)
+            x_need_expand = False
+            y_need_expand = False
+            for i in range(len(y_shape)):
+                index = -1 * i - 1
+                if y_shape[index] != x_shape[index]:
+                    if y_shape[index] == 1:
+                        y_expand_times[index] = x_shape[index]
+                        y_need_expand = True
+                    elif x_shape[index] == 1:
+                        x_expand_times[index] = y_shape[index]
+                        x_need_expand = True
+                    else:
+                        raise Exception("Unexpected situation happend")
+            if x_need_expand:
+                if len(x_expand_times) == 3 and x.tf_data_format == "NHWC":
+                    x_expand_times = [x_expand_times[i] for i in [2, 0, 1]]
+                if len(x_expand_times) == 4 and x.tf_data_format == "NHWC":
+                    x_expand_times = [x_expand_times[i] for i in [0, 3, 1, 2]]
+                attr = {"expand_times": x_expand_times}
+                node.fluid_code.add_layer(
+                    "expand", inputs=x_input, output="x_tmp", param_attr=attr)
+                x_input = "x_tmp"
+            if y_need_expand:
+                if len(y_expand_times) == 3 and y.tf_data_format == "NHWC":
+                    y_expand_times = [y_expand_times[i] for i in [2, 0, 1]]
+                if len(y_expand_times) == 4 and y.tf_data_format == "NHWC":
+                    y_expand_times = [y_expand_times[i] for i in [0, 3, 1, 2]]
+                attr = {"expand_times": y_expand_times}
+                node.fluid_code.add_layer(
+                    "expand", inputs=y_input, output="y_tmp", param_attr=attr)
+                y_input = "y_tmp"
+        inputs = {"x": x_input, "y": y_input}
+        node.fluid_code.add_layer(
+            op_type, inputs=inputs, output=node, param_attr=None)
+    def Placeholder(self, node):
+        shape = node.out_shapes[0]
+        assert len(shape) != 0, "Unknown shape of input nodes[{}].".format(
+            node.layer_name)
+        if node.tf_data_format == "NHWC" and len(shape) == 4:
+            shape = [shape[i] for i in [0, 3, 1, 2]]
+        elif node.tf_data_format == "NCHW" and len(shape) == 4:
+            self.graph.data_format_propagation(node)
+        dtype = node.dtype
+        attr = {
+            'dtype': string(dtype),
+            'shape': shape,
+            'name': string(node.layer_name),
+            'append_batch_size': False
+        }
+        if shape[0] < 0:
+            self.batch_node = node
+        node.fluid_code.add_layer(
+            "data", inputs=None, output=node, param_attr=attr)
+    def OneShotIterator(self, node):
+        return self.Placeholder(node)
+    def Const(self, node):
+        shape = node.out_shapes[0]
+        dtype = node.dtype
+        value = node.value
+        initializer = "Constant(0.0)"
+        if len(shape) == 0:
+            assert value.size == 1, "Unexpected situation happend"
+            shape = [1]
+            initializer = "Constant({})".format(value)
+        self.weights[node.layer_name] = node.value
+        if node.tf_data_format == "NHWC":
+            if len(shape) == 4:
+                shape = [shape[i] for i in [0, 3, 1, 2]]
+            if len(shape) == 3:
+                shape = [shape[i] for i in [2, 0, 1]]
+                self.weights[node.layer_name] = numpy.transpose(node.value,
+                                                                (2, 0, 1))
+        elif node.tf_data_format == "NCHW":
+            if len(shape) == 4:
+                self.graph.data_format_propagation(node)
+        attr = {
+            'dtype': string(dtype),
+            'shape': shape,
+            'name': string(node.layer_name),
+            'default_initializer': initializer
+        }
+        node.fluid_code.add_layer(
+            "create_parameter", inputs=None, output=node, param_attr=attr)
+    def Transpose(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        perm = self.graph.get_node(node.layer.input[1], copy=True)
+        assert perm.layer_type == "Const", "Perm of transpose OP should be Const"
+        del self.weights[perm.layer_name.replace('/', '_')]
+        perm.fluid_code.clear()
+        perm = perm.value.tolist()
+        if perm == [0, 3, 1, 2] and input.data_format == "NHWC":
+            input_name = input.layer_name
+            if hasattr(input, "index"):
+                input_name = input_name + "[{}]".format(input.index)
+            node.fluid_code.add_layer("{} = {}").format(node.layer_name,
+                                                        input_name)
+            node.tf_data_format = "NCHW"
+            self.graph.data_format_propagation(node)
+        elif perm == [0, 2, 3, 1] and input.tf_data_format == "NCHW":
+            input_name = input.layer_name
+            if hasattr(input, "index"):
+                input_name = input_name + "[{}]".format(input.index)
+            node.fluid_code.add_layer("{} = {}").format(node.layer_name,
+                                                        input_name)
+            node.tf_data_format = "NHWC"
+            self.graph.data_format_propagation(node)
+        elif len(input.out_shapes[0]) > 4:
+            tf_data_format = list(input.tf_data_format)
+            pd_data_format = list(input.pd_data_format)
+            new_perm = [i for i in range(len(perm))]
+            for i in range(len(perm)):
+                char0 = tf_data_format[i]
+                char1 = tf_data_format[perm[i]]
+                index0 = pd_data_format.index(char0)
+                index1 = pd_data_format.index(char1)
+                new_perm[index0] = index1
+            node.tf_data_format = [tf_data_format[i] for i in perm]
+            node.pd_data_format = [pd_data_format[i] for i in perm]
+            attr = {'perm': new_perm}
+            node.fluid_code.add_layer(
+                "transpose", inputs=input, output=node, param_attr=attr)
+        elif len(node.out_shapes[0]) != 4:
+            attr = {'perm': perm}
+            node.fluid_code.add_layer(
+                "transpose", inputs=input, output=node, param_attr=attr)
+        else:
+            raise Exception("Unexpected situation happend in Transpose OP")
+    def MaxPool(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        in_shape = input.out_shapes[0]
+        if in_shape.count(-1) > 2:
+            in_shape = self.decoder.infer_tensor(input).shape
+        k_size = node.get_attr("ksize")
+        strides = node.get_attr("strides")
+        data_format = node.get_attr("data_format").decode()
+        pad_mode = node.get_attr("padding").decode()
+        channel_first = data_format == "NCHW"
+        if not channel_first:
+            in_shape = [in_shape[i] for i in [0, 3, 1, 2]]
+            strides = [strides[i] for i in [0, 3, 1, 2]]
+            k_size = [k_size[i] for i in [0, 3, 1, 2]]
+        else:
+            self.graph.data_format_propagation(node)
+        attr = {
+            "pool_size": k_size[2:4],
+            "pool_type": string("max"),
+            "pool_padding": string(pad_mode),
+            "pool_stride": strides[2:4]
+        }
+        node.fluid_code.add_layer(
+            "pool2d", inputs=input, output=node, param_attr=attr)
+    def Conv2D(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        kernel = self.graph.get_node(node.layer.input[1], copy=True)
+        assert kernel.layer_type == "Const", "Kernel of Conv2D should be Const"
+        self.add_omit_nodes(kernel.layer_name, node.layer_name)
+        in_shape = input.out_shapes[0]
+        if in_shape.count(-1) > 2:
+            in_shape = self.decoder.infer_tensor(input).shape
+        k_size = kernel.out_shapes[0]
+        if k_size.count(-1) > 2:
+            k_size = self.decoder.infer_tensor(kernel).shape
+        strides = node.get_attr("strides")
+        dilations = node.get_attr("dilations")
+        data_format = node.get_attr("data_format").decode()
+        pad_mode = node.get_attr("padding").decode()
+        channel_first = data_format == "NCHW"
+        self.weights[kernel.layer_name.replace('/', '_')] = numpy.transpose(
+            kernel.value, (3, 2, 0, 1))
+        if not channel_first:
+            in_shape = [in_shape[i] for i in [0, 3, 1, 2]]
+            strides = [strides[i] for i in [0, 3, 1, 2]]
+            dilations = [dilations[i] for i in [0, 3, 1, 2]]
+        else:
+            self.graph.data_format_propagation(node)
+        attr = {
+            "bias_attr": False,
+            "param_attr": string(kernel.layer_name),
+            "num_filters": k_size[3],
+            "filter_size": k_size[0:2],
+            "stride": strides[2:4],
+            "dilation": dilations[2:4],
+            "padding": string(pad_mode)
+        }
+        node.fluid_code.add_layer(
+            "conv2d", inputs=input, output=node, param_attr=attr)
+    def BiasAdd(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        bias = self.graph.get_node(node.layer.input[1], copy=True)
+        axis = -1
+        if input.tf_data_format == "NHWC" and len(input.out_shapes[0]) == 4:
+            axis = 1
+        inputs = {"x": input, "y": bias}
+        attr = {"axis": axis}
+        node.fluid_code.add_layer(
+            "elementwise_add", inputs=inputs, output=node, param_attr=attr)
+    def FusedBatchNorm(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        gamma = self.graph.get_node(node.layer.input[1], copy=True)
+        beta = self.graph.get_node(node.layer.input[2], copy=True)
+        moving_mean = self.graph.get_node(node.layer.input[3], copy=True)
+        moving_var = self.graph.get_node(node.layer.input[4], copy=True)
+        data_format = node.get_attr("data_format").decode()
+        channel_first = data_format == "NCHW"
+        assert gamma.layer_type == "Const"
+        assert beta.layer_type == "Const"
+        assert moving_mean.layer_type == "Const"
+        assert moving_var.layer_type == "Const"
+        self.add_omit_nodes(gamma.layer_name, node.layer_name)
+        self.add_omit_nodes(beta.layer_name, node.layer_name)
+        self.add_omit_nodes(moving_mean.layer_name, node.layer_name)
+        self.add_omit_nodes(moving_var.layer_name, node.layer_name)
+        if channel_first:
+            self.data_format_propagation(node)
+        attr = {
+            "epsilon": node.get_attr("epsilon"),
+            "param_attr": string(gamma.layer_name),
+            "bias_attr": string(beta.layer_name),
+            "moving_mean_name": string(moving_mean.layer_name),
+            "moving_variance_name": string(moving_var.layer_name),
+            "is_test": True
+        }
+        node.fluid_code.add_layer(
+            "batch_norm", inputs=input, output=node, param_attr=attr)
+    def FusedBatchNormV3(self, node):
+        return self.FusedBatchNorm(node)
+    def DepthwiseConv2dNative(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        kernel = self.graph.get_node(node.layer.input[1], copy=True)
+        assert kernel.layer_type == "Const", "Kernel of DepthwiseConv2DNative should be Const"
+        self.add_omit_nodes(kernel.layer_name, node.layer_name)
+        in_shape = input.out_shapes[0]
+        if in_shape.count(-1) > 2:
+            in_shape = self.decoder.infer_tensor(input).shape
+        k_size = kernel.out_shapes[0]
+        if k_size.count(-1) > 2:
+            k_size = self.decoder.infer_tensor(kernel).shape
+        strides = node.get_attr("strides")
+        dilations = node.get_attr("dilations")
+        data_format = node.get_attr("data_format").decode()
+        pad_mode = node.get_attr("padding").decode()
+        channel_first = data_format == "NCHW"
+        self.weights[kernel.layer_name.replace('/', '_')] = numpy.transpose(
+            kernel.value, (2, 3, 0, 1))
+        if not channel_first:
+            in_shape = [in_shape[i] for i in [0, 3, 1, 2]]
+            strides = [strides[i] for i in [0, 3, 1, 2]]
+            dilations = [dilations[i] for i in [0, 3, 1, 2]]
+        else:
+            self.data_format_propagation(node)
+        attr = {
+            "bias_attr": False,
+            "param_attr": string(kernel.layer_name),
+            "num_filters": in_shape[1],
+            "filter_size": k_size[0:2],
+            "stride": strides[2:4],
+            "dilation": dilations[2:4],
+            "groups": k_size[3] * in_shape[1],
+            "use_cudnn": False,
+            "padding": string(pad_mode)
+        }
+        node.fluid_code.add_layer(
+            "conv2d", inputs=input, output=node, param_attr=attr)
+    def Reshape(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        param = self.graph.get_node(node.layer.input[1], copy=True)
+        is_variable = False
+        if param.layer_type == "Const":
+            attr = {"shape": param.value.tolist()}
+            self.add_omit_nodes(param.layer_name, node.layer_name)
+        else:
+            # Here is a trick method to solove tensor parameter in tensorflow
+            shape = self.decoder.infer_shape_tensor(param, node.out_shapes[0])
+            if shape.count(-1) <= 1:
+                attr = {"shape": shape}
+                self.add_omit_nodes(param.layer_name, node.layer_name)
+            elif shape.count(-1) == 2 and shape[0] == -1:
+                shape[0] = 0
+                attr = {"shape": shape}
+                self.add_omit_nodes(param.layer_name, node.layer_name)
+            else:
+                assert len(param.out_shapes[
+                    0]) == 1, "Unexpected situation of shape parameter"
+                attr = {"shape": [-1]}
+                node.fluid_code.add_layer(
+                    "reshape",
+                    inputs=param,
+                    output="shape_param",
+                    param_attr=attr)
+                attr = {"num_or_sections": param.out_shapes[0][0], "dim": 0}
+                node.fluid_code.add_layer(
+                    "split", inputs="shape_param", output=node, param_attr=attr)
+                new_param = "["
+                for i in range(param.out_shapes[0][0]):
+                    new_param += (node.layer_name + "[{}]".format(i) + ", ")
+                new_param = new_param.strip(", ") + "]"
+                attr = {"shape": new_param}
+                is_variable = True
+        # to change [192, -1]->[-1, 192], allways put -1 in the first dimension
+        # optimization for Paddle-Lite
+        in_shape = input.out_shapes[0]
+        if not is_variable and in_shape.count(-1) < 1:
+            total_size = 1
+            for i in range(len(in_shape)):
+                total_size *= in_shape[i]
+            for i in range(len(attr["shape"])):
+                if attr["shape"][i] == 0:
+                    attr["shape"][i] = in_shape[i]
+                if attr["shape"][i] != -1:
+                    total_size /= attr["shape"][i]
+            if attr["shape"].count(-1) > 0:
+                index = attr["shape"].index(-1)
+                attr["shape"][index] = int(total_size)
+                attr["shape"][0] = -1
+        if len(input.out_shapes[0]) == 4 and node.tf_data_format == "NHWC":
+            if len(attr["shape"]) < 3:
+                perm = {"perm": [0, 2, 3, 1]}
+                node.fluid_code.add_layer(
+                    "transpose", inputs=input, output=node, param_attr=perm)
+                node.fluid_code.add_layer(
+                    "reshape", inputs=node, output=node, param_attr=attr)
+                return
+        if len(attr["shape"]) == 4 and node.tf_data_format == "NHWC":
+            input_shape = self.decoder.infer_tensor(input).shape
+            if input_shape[1] == attr["shape"][1]:
+                attr["shape"] = [attr["shape"][i] for i in [0, 3, 1, 2]]
+            else:
+                perm = {"perm": [0, 2, 3, 1]}
+                node.fluid_code.add_layer(
+                    "transpose", inputs=input, output=node, param_attr=perm)
+                node.fluid_code.add_layer(
+                    "reshape", inputs=node, output=node, param_attr=attr)
+                perm = {"perm": [0, 3, 1, 2]}
+                node.fluid_code.add_layer(
+                    "transpose", inputs=node, output=node, param_attr=perm)
+                return
+        if len(attr["shape"]) == 5:
+            attr["shape"] = [attr["shape"][i] for i in [0, 1, 4, 2, 3]]
+        node.fluid_code.add_layer(
+            "reshape", inputs=input, output=node, param_attr=attr)
+    def AvgPool(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        in_shape = input.out_shapes[0]
+        if in_shape.count(-1) > 2:
+            in_shape = self.decoder.infer_tensor(input).shape
+        k_size = node.get_attr("ksize")
+        strides = node.get_attr("strides")
+        data_format = node.get_attr("data_format").decode()
+        pad_mode = node.get_attr("padding").decode()
+        channel_first = data_format == "NCHW"
+        if not channel_first:
+            in_shape = [in_shape[i] for i in [0, 3, 1, 2]]
+            strides = [strides[i] for i in [0, 3, 1, 2]]
+            k_size = [k_size[i] for i in [0, 3, 1, 2]]
+        else:
+            self.graph.data_format_propagation(node)
+        attr = {
+            "pool_size": k_size[2:4],
+            "pool_type": string("avg"),
+            "pool_stride": strides[2:4],
+            "pool_padding": string(pad_mode)
+        }
+        node.fluid_code.add_layer(
+            "pool2d", inputs=input, output=node, param_attr=attr)
+    def SplitV(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        num_sections = self.graph.get_node(node.layer.input[1], copy=True)
+        dim = self.graph.get_node(node.layer.input[2], copy=True)
+        assert num_sections.layer_type == "Const"
+        assert dim.layer_type == "Const"
+        self.add_omit_nodes(num_sections.layer_name, node.layer_name)
+        self.add_omit_nodes(dim.layer_name, node.layer_name)
+        dim = dim.value
+        if input.tf_data_format == "NHWC" and len(input.out_shapes[0]) == 4:
+            dim = nhwc_dim_to_nchw(input, dim)
+        attr = {
+            "num_or_sections": num_sections.value.tolist(),
+            "dim": dim.value
+        }
+        node.fluid_code.add_layer(
+            "split", inputs=input, output=node, param_attr=attr)
+    def ConcatV2(self, node):
+        inputs = [
+            self.graph.get_node(
+                name, copy=True) for name in node.layer.input[:-1]
+        ]
+        axis = self.graph.get_node(node.layer.input[-1], copy=True)
+        assert axis.layer_type == "Const"
+        self.add_omit_nodes(axis.layer_name, node.layer_name)
+        axis = axis.value
+        if inputs[0].tf_data_format == "NHWC" and len(inputs[0].out_shapes[
+                0]) == 4:
+            axis = nhwc_dim_to_nchw(inputs[0], axis)
+        attr = {"axis": axis}
+        node.fluid_code.add_layer(
+            "concat", inputs=inputs, output=node, param_attr=attr)
+    def Tile(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        expand_times = self.graph.get_node(node.layer.input[1], copy=True)
+        self.add_omit_nodes(expand_times.layer_name, node.layer_name)
+        if expand_times.layer_type == "Const":
+            expand_times = expand_times.value.tolist()
+        else:
+            expand_times = self.decoder.infer_shape_tensor(expand_times)
+        if input.tf_data_format == "NHWC":
+            if len(input.out_shapes[0]) == 4:
+                expand_times = [expand_times[i] for i in [0, 3, 1, 2]]
+            elif len(input.out_shapes[0]) == 3:
+                expand_times = [expand_times[i] for i in [2, 0, 1]]
+        for i in range(len(expand_times)):
+            if expand_times[i] < 0:
+                expand_times[i] = 1
+        attr = {"expand_times": expand_times}
+        node.fluid_code.add_layer(
+            "expand", inputs=input, output=node, param_attr=attr)
+    def Pack(self, node):
+        inputs = [
+            self.graph.get_node(
+                name, copy=True) for name in node.layer.input
+        ]
+        axis = node.get_attr("axis")
+        if inputs[0].tf_data_format == "NHWC" and len(inputs[0].out_shapes[
+                0]) == 4:
+            tf_data_format = list(inputs[0].tf_data_format)
+            tf_data_format.insert(axis, str(len(tf_data_format)))
+            axis = nhwc_dim_to_nchw(inputs[0], axis)
+            pd_data_format = list(inputs[0].pd_data_format)
+            pd_data_format.insert(axis, str(len(pd_data_format)))
+            node.tf_data_format = "".join(tf_data_format)
+            node.pd_data_format = "".join(pd_data_format)
+        attr = {"axis": axis}
+        node.fluid_code.add_layer(
+            "stack", inputs=inputs, output=node, param_attr=attr)
+    def Pad(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        paddings = self.graph.get_node(node.layer.input[1], copy=True)
+        assert paddings.layer_type == "Const", "Padding should be Const"
+        self.add_omit_nodes(paddings.layer_name, node.layer_name)
+        paddings = paddings.value.flatten().tolist()
+        if input.tf_data_format == "NHWC" and len(input.out_shapes[0]) == 4:
+            paddings = [paddings[i] for i in [0, 1, 6, 7, 2, 3, 4, 5]]
+        pad_op = "pad"
+        if len(input.out_shapes[0]) == 4:
+            if paddings[0] + paddings[1] + paddings[2] + paddings[3] == 0:
+                paddings = paddings[4:]
+                pad_op = "pad2d"
+        attr = {"paddings": paddings}
+        node.fluid_code.add_layer(
+            pad_op, inputs=input, output=node, param_attr=attr)
+    def MirrorPad(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        paddings = self.graph.get_node(node.layer.input[1], copy=True)
+        assert paddings.layer_type == "Const", "Padding should be Const"
+        self.add_omit_nodes(paddings.layer_name, node.layer_name)
+        paddings = paddings.value.flatten().tolist()
+        mode = node.get_attr("mode").decode()
+        assert mode == "REFLECT", "Only support 'REFLECT` mode in MirrorPad"
+        if input.tf_data_format == "NHWC" and len(input.out_shapes[0]) == 4:
+            paddings = [paddings[i] for i in [0, 1, 6, 7, 2, 3, 4, 5]]
+        pad_op = "pad"
+        if len(input.out_shapes[0]) == 4:
+            if paddings[0] + paddings[1] + paddings[2] + paddings[3] == 0:
+                paddings = paddings[4:]
+                pad_op = "pad2d"
+        attr = {"paddings": paddings, "mode": string("reflect")}
+        node.fluid_code.add_layer(
+            pad_op, inputs=input, output=node, param_attr=attr)
+    def Range(self, node):
+        start = self.graph.get_node(node.layer.input[0], copy=True)
+        limit = self.graph.get_node(node.layer.input[1], copy=True)
+        delta = self.graph.get_node(node.layer.input[2], copy=True)
+        self.add_omit_nodes(start.layer_name, node.layer_name)
+        self.add_omit_nodes(limit.layer_name, node.layer_name)
+        self.add_omit_nodes(delta.layer_name, node.layer_name)
+        if start.layer_type == "Const":
+            start = start.value
+        else:
+            start = self.decoder.infer_tensor(start)
+        if limit.layer_type == "Const":
+            limit = limit.value
+        else:
+            limit = self.decoder.infer_tensor(limit)
+        if delta.layer_type == "Const":
+            delta = delta.value
+        else:
+            delta = self.decoder.infer_tensor(delta)
+        inputs = {"start": start, "end": limit, "step": delta}
+        attr = {"dtype": string(node.dtype)}
+        node.fluid_code.add_layer(
+            "range", inputs=inputs, output=node, param_attr=attr)
+    def Mean(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        reduce_idx = self.graph.get_node(node.layer.input[1], copy=True)
+        assert reduce_idx.layer_type == "Const", "Only support Const parameter[reduce_idx]"
+        dims = reduce_idx.value.tolist()
+        keep_dims = node.get_attr("keep_dims")
+        if input.tf_data_format == "NHWC" and len(input.out_shapes[0]) == 4:
+            for i in range(len(dims)):
+                dims[i] = nhwc_dim_to_nchw(input, dims[i])
+        attr = {"dim": dims, "keep_dim": keep_dims}
+        node.fluid_code.add_layer(
+            "reduce_mean", inputs=input, output=node, param_attr=attr)
+    def MatMul(self, node):
+        x = self.graph.get_node(node.layer.input[0], copy=True)
+        y = self.graph.get_node(node.layer.input[1], copy=True)
+        transpose_a = node.get_attr('transpose_a')
+        transpose_b = node.get_attr('transpose_b')
+        inputs = {"x": x, "y": y}
+        # fix paddle shape infer problem
+        # should be removed after paddle 1.6
+        if x.out_shapes[0][-1] < 0 and y.out_shapes[0][0] > 0:
+            shape = x.out_shapes[0]
+            shape[-1] = y.out_shapes[0][0]
+            attr = {"shape": shape}
+            node.fluid_code.add_layer(
+                "reshape", inputs=x, output=x, param_attr=attr)
+        attr = {"transpose_x": transpose_a, "transpose_y": transpose_b}
+        node.fluid_code.add_layer(
+            "matmul", inputs=inputs, output=node, param_attr=attr)
+    def ArgMax(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        axis = self.graph.get_node(node.layer.input[1], copy=True)
+        assert axis.layer_type == "Const", "ArgMax only support Const parameter"
+        self.add_omit_nodes(axis.layer_name, node.layer_name)
+        axis = axis.value
+        if input.tf_data_format == "NHWC" and len(input.out_shapes[0]) == 4:
+            axis = nhwc_dim_to_nchw(input, axis)
+        attr = {"axis": axis}
+        node.fluid_code.add_layer(
+            "argmax", inputs=input, output=node, param_attr=attr)
+    def StridedSlice(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        begin = self.graph.get_node(node.layer.input[1], copy=True)
+        end = self.graph.get_node(node.layer.input[2], copy=True)
+        strides = self.graph.get_node(node.layer.input[3], copy=True)
+        assert begin.layer_type == "Const"
+        assert end.layer_type == "Const"
+        assert strides.layer_type == "Const"
+        self.add_omit_nodes(begin.layer_name, node.layer_name)
+        self.add_omit_nodes(end.layer_name, node.layer_name)
+        self.add_omit_nodes(strides.layer_name, node.layer_name)
+        strides = strides.value.tolist()
+        assert len(set(strides)) == 1 and strides[0] == 1
+        begin = begin.value.tolist()
+        end = end.value.tolist()
+        if input.tf_data_format == "NHWC" and len(input.out_shapes[0]) == 4:
+            begin = [begin[i] for i in [0, 3, 1, 2]]
+            end = [end[i] for i in [0, 3, 1, 2]]
+        for i in range(len(end)):
+            if end[i] == 0:
+                end[i] = 999999
+        attr = {
+            "axes": [i for i in range(len(strides))],
+            "starts": begin,
+            "ends": end
+        }
+        shrink_axis_mask = node.get_attr('shrink_axis_mask')
+        squeeze_dims = list()
+        for i in range(len(begin)):
+            x = shrink_axis_mask >> i & 1
+            if x == 1:
+                squeeze_dims.append(i)
+        node.fluid_code.add_layer(
+            "slice", inputs=input, output=node, param_attr=attr)
+        if shrink_axis_mask > 0 and len(input.out_shapes[0]) == 5:
+            attr = {"axes": squeeze_dims}
+            node.fluid_code.add_layer(
+                "squeeze", inputs=node, output=node, param_attr=attr)
+    def Slice(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        begin = self.graph.get_node(node.layer.input[1], copy=True)
+        size = self.graph.get_node(node.layer.input[2], copy=True)
+        self.add_omit_nodes(begin.layer_name, node.layer_name)
+        self.add_omit_nodes(size.layer_name, node.layer_name)
+        if begin.layer_type == "Const":
+            begin = begin.value.tolist()
+        else:
+            begin = self.decoder.infer_tensor(begin).tolist()
+        if size.layer_type == "const":
+            size = size.value.tolist()
+        else:
+            size = self.decoder.infer_tensor(size).tolist()
+        if input.tf_data_format == "NHWC" and len(input.out_shapes[0]) == 4:
+            size = [size[i] for i in [0, 3, 1, 2]]
+            begin = [begin[i] for i in [0, 3, 1, 2]]
+        for i in range(len(size)):
+            if size[i] < 0:
+                size[i] = 99999999
+            else:
+                size[i] = size[i] + begin[i]
+        attr = {
+            "axes": [i for i in range(len(size))],
+            "starts": begin,
+            "ends": size
+        }
+        node.fluid_code.add_layer(
+            "slice", inputs=input, output=node, param_attr=attr)
+    def Conv2DBackpropInput(self, node):
+        out_shape = self.graph.get_node(node.layer.input[0], copy=True)
+        kernel = self.graph.get_node(node.layer.input[1], copy=True)
+        input = self.graph.get_node(node.layer.input[2], copy=True)
+        assert kernel.layer_type == "Const", "Kernel of Conv2DBackpropInput should be Const"
+        self.add_omit_nodes(kernel.layer_name, node.layer_name)
+        self.add_omit_nodes(out_shape.layer_name, node.layer_name)
+        if out_shape.layer_type == "Const":
+            out_shape = out_shape.value.tolist()
+        else:
+            out_shape = self.decoder.infer_shape_tensor(out_shape,
+                                                        node.out_shapes[0])
+        in_shape = input.out_shapes[0]
+        if in_shape.count(-1) > 2:
+            in_shape = self.decoder.infer_tensor(input).shape
+        k_size = kernel.out_shapes[0]
+        if k_size.count(-1) > 2:
+            k_size = self.decoder.infer_tensor(kernel).shape
+        pad_mode = node.get_attr("padding").decode()
+        strides = node.get_attr("strides")
+        dilations = node.get_attr("dilations")
+        data_format = node.get_attr("data_format").decode()
+        channel_first = data_format == "NCHW"
+        self.weights[kernel.layer_name.replace('/', '_')] = numpy.transpose(
+            kernel.value, (3, 2, 0, 1))
+        if not channel_first:
+            in_shape = [in_shape[i] for i in [0, 3, 1, 2]]
+            strides = [strides[i] for i in [0, 3, 1, 2]]
+            dilations = [dilations[i] for i in [0, 3, 1, 2]]
+        else:
+            self.data_format_propagation(node)
+        attr = {
+            "bias_attr": False,
+            "param_attr": string(kernel.layer_name),
+            "num_filters": k_size[2],
+            "filter_size": k_size[0:2],
+            "stride": strides[2:4],
+            "dilation": dilations[2:4],
+            "padding": string(pad_mode),
+            "output_size": out_shape[1:3]
+        }
+        node.fluid_code.add_layer(
+            "conv2d_transpose", inputs=input, output=node, param_attr=attr)
+    def Max(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        reduce_idx = self.graph.get_node(node.layer.input[1], copy=True)
+        assert reduce_idx.layer_type == "Const", "Only support Const parameter[reduce_idx]"
+        keep_dims = node.get_attr("keep_dims")
+        dim = reduce_idx.value.tolist()
+        if input.tf_data_format == "NHWC" and len(input.out_shapes[0]) == 4:
+            dim = nhwc_dim_to_nchw(input, dim)
+        attr = {"dim": dim, "keep_dim": keep_dims}
+        node.fluid_code.add_layer(
+            "reduce_max", inputs=input, output=node, param_attr=attr)
+    def Sum(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        reduce_idx = self.graph.get_node(node.layer.input[1], copy=True)
+        assert reduce_idx.layer_type == "Const", "Only support Const parameter[reduce_idx]"
+        keep_dims = node.get_attr("keep_dims")
+        dim = reduce_idx.value.tolist()
+        if input.tf_data_format == "NHWC" and len(input.out_shapes[0]) == 4:
+            dim = nhwc_dim_to_nchw(input, dim)
+        attr = {"dim": dim, "keep_dim": keep_dims}
+        node.fluid_code.add_layer(
+            "reduce_sum", inputs=input, output=node, param_attr=attr)
+    def Cast(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        dtype = node.dtype_map[node.get_attr('DstT')]
+        attr = {"dtype": string(dtype)}
+        node.fluid_code.add_layer(
+            "cast", inputs=input, output=node, param_attr=attr)
+    def Split(self, node):
+        dim = self.graph.get_node(node.layer.input[0], copy=True)
+        input = self.graph.get_node(node.layer.input[1], copy=True)
+        self.add_omit_nodes(dim.layer_name, node.layer_name)
+        num_split = node.get_attr('num_split')
+        dim = dim.value
+        if input.tf_data_format == "NHWC" and len(input.out_shapes[0]) == 4:
+            dim = nhwc_dim_to_nchw(input, dim)
+        attr = {"num_or_sections": num_split, "dim": dim}
+        node.fluid_code.add_layer(
+            "split", inputs=input, output=node, param_attr=attr)
+    def Squeeze(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        squeeze_dims = node.get_attr('squeeze_dims')
+        if input.tf_data_format == "NHWC" and len(input.out_shapes[0]) == 4:
+            for i in range(len(squeeze_dims)):
+                squeeze_dims[i] = nhwc_dim_to_nchw(input, squeeze_dims[i])
+        attr = {"axes": squeeze_dims}
+        node.fluid_code.add_layer(
+            "squeeze", inputs=input, output=node, param_attr=attr)
+    def Softmax(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        axis = node.get_attr("axis")
+        if axis is None:
+            axis = -1 + len(input.out_shapes[0])
+        if input.tf_data_format == "NHWC" and len(input.out_shapes[0]) == 4:
+            axis = nhwc_dim_to_nchw(input, axis)
+        attr = {"axis": axis}
+        node.fluid_code.add_layer(
+            "softmax", inputs=input, output=node, param_attr=attr)
+    def ResizeNearestNeighbor(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        resize_shape = self.graph.get_node(node.layer.input[1], copy=True)
+        self.add_omit_nodes(resize_shape.layer_name, node.layer_name)
+        if resize_shape.layer_type == "Const":
+            resize_shape = resize_shape.value.tolist()
+        else:
+            resize_shape = self.decoder.infer_shape_tensor(resize_shape,
+                                                           node.out_shapes[0])
+        align_corners = node.get_attr("align_corners")
+        attr = {"align_corners": align_corners, "out_shape": resize_shape}
+        node.fluid_code.add_layer(
+            "resize_nearest", inputs=input, output=node, param_attr=attr)
+    def ResizeBilinear(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        resize_shape = self.graph.get_node(node.layer.input[1], copy=True)
+        self.add_omit_nodes(resize_shape.layer_name, node.layer_name)
+        if resize_shape.layer_type == "Const":
+            resize_shape = resize_shape.value.tolist()
+        else:
+            resize_shape = self.decoder.infer_shape_tensor(resize_shape,
+                                                           node.out_shapes[0])
+        align_corners = node.get_attr("align_corners")
+        attr = {
+            "align_corners": align_corners,
+            "out_shape": resize_shape,
+            "align_mode": 1
+        }
+        node.fluid_code.add_layer(
+            "resize_bilinear", inputs=input, output=node, param_attr=attr)
+    def GreaterEqual(self, node):
+        x = self.graph.get_node(node.layer.input[0], copy=True)
+        y = self.graph.get_node(node.layer.input[1], copy=True)
+        inputs = {"x": x, "y": y}
+        node.fluid_code.add_layer(
+            "greater_equal", inputs=inputs, output=node, param_attr=None)
+    def RandomUniform(self, node):
+        shape = self.graph.get_node(node.layer.input[0], copy=True)
+        self.add_omit_nodes(shape.layer_name, node.layer_name)
+        if shape.layer_type == "Const":
+            shape = shape.value.tolist()
+        else:
+            shape = self.decoder.infer_shape_tensor(shape)
+        if len(shape) == 4 and node.tf_data_format == "NHWC":
+            shape = [shape[i] for i in [0, 3, 1, 2]]
+        attr = {"shape": shape, "min": 0.0, "max": 0.9999}
+        if shape[0] < 0:
+            input = self.batch_node
+            node.fluid_code.add_layer(
+                "uniform_random_batch_size_like",
+                inputs=input,
+                output=node,
+                param_attr=attr)
+        else:
+            node.fluid_code.add_layer(
+                "uniform_random", inputs=None, output=node, param_attr=attr)
+    def SquaredDifference(self, node):
+        x = self.graph.get_node(node.layer.input[0], copy=True)
+        y = self.graph.get_node(node.layer.input[1], copy=True)
+        inputs = {"x": x, "y": y}
+        node.fluid_code.add_layer(
+            "elementwise_sub", inputs=inputs, output=node, param_attr=None)
+        inputs = {"x": node, "y": node}
+        node.fluid_code.add_layer(
+            "elementwise_mul", inputs=inputs, output=node, param_attr=None)
--- a/x2paddle/x2paddle/op_mapper/tf_op_mapper_nhwc.py
+++ b/x2paddle/x2paddle/op_mapper/tf_op_mapper_nhwc.py
+#   Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from x2paddle.decoder.tf_decoder import TFGraph
+from x2paddle.core.op_mapper import OpMapper
+from x2paddle.core.util import *
+import math
+import inspect
+import numpy
+import sys
+# compute padding size for SAME mode
+def get_same_padding(in_size, kernel_size, stride):
+    new_size = int(math.ceil(in_size * 1.0 / stride))
+    pad_size = (new_size - 1) * stride + kernel_size - in_size
+    if pad_size < 0:
+        pad_size = 0
+    pad0 = int(pad_size / 2)
+    pad1 = pad_size - pad0
+    return [pad0, pad1]
+class TFOpMapperNHWC(OpMapper):
+    directly_map_ops = {
+        'Relu': ['relu'],
+        'Relu6': ['relu6'],
+        'Shape': ['shape'],
+        'Abs': ['abs'],
+        'Sigmoid': ['sigmoid'],
+        'Exp': ['exp'],
+        'Rsqrt': ['rsqrt'],
+        'Sqrt': ['sqrt'],
+        'swish_f32': ['swish'],
+        'Tanh': ['tanh'],
+        'Softplus': ['softplus'],
+        'LeakyRelu': ['leaky_relu', {
+            'alpha': 'alpha'
+        }],
+        'Floor': ['floor'],
+        'Erf': ['erf']
+    }
+    elementwise_ops = {
+        'Add': 'elementwise_add',
+        'AddV2': 'elementwise_add',
+        'RealDiv': 'elementwise_div',
+        'Sub': 'elementwise_sub',
+        'Maximum': 'elementwise_max',
+        'Minimum': 'elementwise_min',
+        'LessEqual': 'less_equal',
+        'Mul': 'elementwise_mul',
+        'FloorDiv': 'elementwise_floordiv'
+    }
+    def __init__(self, decoder):
+        super(TFOpMapperNHWC, self).__init__()
+        self.decoder = decoder
+        self.graph = decoder.tf_graph
+        self.weights = dict()
+        self.batch_node = None
+        self.omit_nodes = list()
+        self.used_custom_layers = dict()
+        not_placeholder = list()
+        for name in self.graph.input_nodes:
+            if self.graph.get_node(
+                    name).layer_type != "Placeholder" and self.graph.get_node(
+                        name
+                    ).layer_type != "OneShotIterator" and self.graph.get_node(
+                        name).layer_type != "IteratorV2":
+                not_placeholder.append(name)
+        for name in not_placeholder:
+            idx = self.graph.input_nodes.index(name)
+            del self.graph.input_nodes[idx]
+        unsupported_ops = set()
+        sys.stderr.write("Total nodes: {}\n".format(len(self.graph.topo_sort)))
+        for i, node_name in enumerate(self.graph.topo_sort):
+            sys.stderr.write("\rConverting node {} ...     ".format(i + 1))
+            node = self.graph.get_node(node_name)
+            op = node.layer_type
+            if op in self.directly_map_ops:
+                if len(unsupported_ops) > 0:
+                    continue
+                self.directly_map(node)
+            elif op in self.elementwise_ops:
+                if len(unsupported_ops) > 0:
+                    continue
+                self.elementwise_map(node)
+            elif hasattr(self, op):
+                if len(unsupported_ops) > 0:
+                    continue
+                func = getattr(self, op)
+                try:
+                    func(node)
+                except Exception as e:
+                    unsupported_ops.add(op)
+                    print(e)
+            else:
+                unsupported_ops.add(op)
+        if len(unsupported_ops) > 0:
+            print("========= {} OPs are not supported yet ===========".format(
+                len(unsupported_ops)))
+            for op in unsupported_ops:
+                print("========== {} ============".format(op))
+            sys.exit(-1)
+        sys.stderr.write("\nDone!\n")
+    def add_omit_nodes(self, in_node_name, out_node_name):
+        in_node = self.graph.get_node(in_node_name)
+        out_node = self.graph.get_node(out_node_name)
+        index = in_node.outputs.index(out_node_name)
+        del in_node.outputs[index]
+        index = out_node.inputs.index(in_node_name)
+        del out_node.inputs[index]
+        self.omit_nodes.append(in_node.layer_name)
+    def directly_map(self, node):
+        assert node.layer_type in self.directly_map_ops
+        op_info = self.directly_map_ops[node.layer_type]
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        attr = dict()
+        for param in op_info[1:]:
+            tf_param_name = list(param.keys())[0]
+            pd_param_name = list(param.values())[0]
+            tf_param = node.get_attr(tf_param_name)
+            attr[pd_param_name] = tf_param
+        if len(input.out_shapes[0]) == 4 and op_info[0] != 'shape':
+            attr1 = {"perm": [0, 3, 1, 2]}
+            node.fluid_code.add_layer(
+                'transpose', inputs=input, output=node, param_attr=attr1)
+            input = node
+            node.fluid_code.add_layer(
+                op_info[0], inputs=input, output=node, param_attr=attr)
+            input = node
+            attr2 = {"perm": [0, 2, 3, 1]}
+            node.fluid_code.add_layer(
+                'transpose', inputs=input, output=node, param_attr=attr2)
+        else:
+            node.fluid_code.add_layer(
+                op_info[0], inputs=input, output=node, param_attr=attr)
+    def elementwise_map(self, node):
+        assert node.layer_type in self.elementwise_ops
+        op_type = self.elementwise_ops[node.layer_type]
+        x = self.graph.get_node(node.layer.input[0], copy=True)
+        y = self.graph.get_node(node.layer.input[1], copy=True)
+        inputs = {"x": x, "y": y}
+        node.fluid_code.add_layer(
+            op_type, inputs=inputs, output=node, param_attr=None)
+    def Placeholder(self, node):
+        shape = node.out_shapes[0]
+        assert len(shape) != 0, "Unknown shape of input nodes[{}].".format(
+            node.layer_name)
+        dtype = node.dtype
+        if shape[0] < 0:
+            self.batch_node = node
+        attr = {
+            'dtype': string(dtype),
+            'shape': shape,
+            'name': string(node.layer_name),
+            'append_batch_size': False
+        }
+        node.fluid_code.add_layer(
+            "data", inputs=None, output=node, param_attr=attr)
+    def Const(self, node):
+        shape = node.out_shapes[0]
+        dtype = node.dtype
+        value = node.value
+        initializer = "Constant(0.0)"
+        if len(shape) == 0:
+            assert value.size == 1, "Unexpected situation happend"
+            shape = [1]
+            initializer = "Constant({})".format(value)
+        self.weights[node.layer_name] = node.value
+        attr = {
+            'dtype': string(dtype),
+            'shape': shape,
+            'name': string(node.layer_name),
+            'default_initializer': initializer
+        }
+        node.fluid_code.add_layer(
+            "create_parameter", inputs=None, output=node, param_attr=attr)
+    def Transpose(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        perm = self.graph.get_node(node.layer.input[1], copy=True)
+        assert perm.layer_type == "Const", "Perm of transpose OP should be Const"
+        del self.weights[perm.layer_name.replace('/', '_')]
+        perm.fluid_code.clear()
+        perm = perm.value.tolist()
+        attr = {'perm': perm}
+        node.fluid_code.add_layer(
+            "transpose", inputs=input, output=node, param_attr=attr)
+    def Fill(self, node):
+        dims = self.graph.get_node(node.layer.input[0], copy=True)
+        input_value = self.graph.get_node(node.layer.input[1], copy=True)
+        assert input_value.layer_type == "Const", "Value of fill OP should be Const"
+        self.add_omit_nodes(input_value.layer_name, node.layer_name)
+        input_value = input_value.value
+        input_dtype = string(input_value.dtype)
+        attr = {'value': input_value, 'dtype': input_dtype}
+        node.fluid_code.add_layer(
+            "fill_constant", inputs=dims, output=node, param_attr=attr)
+    def DepthToSpace(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        block_size = node.get_attr("block_size")
+        data_format = node.get_attr("data_format").decode()
+        if data_format == "NHWC":
+            attr = {"perm": [0, 3, 1, 2]}
+            node.fluid_code.add_layer(
+                "transpose", inputs=input, output=input, param_attr=attr)
+        n, h, w, c = input.out_shapes[0]
+        attr = {'shape': [0, block_size * block_size, -1, h, w]}
+        node.fluid_code.add_layer(
+            "reshape", inputs=input, output=input, param_attr=attr)
+        attr = {'perm': [0, 2, 1, 3, 4]}
+        node.fluid_code.add_layer(
+            "transpose", inputs=input, output=input, param_attr=attr)
+        attr = {'shape': [0, c, h, w]}
+        node.fluid_code.add_layer(
+            "reshape", inputs=input, output=input, param_attr=attr)
+        attr = {'upscale_factor': block_size}
+        node.fluid_code.add_layer(
+            "pixel_shuffle", inputs=input, output=node, param_attr=attr)
+        if data_format == "NHWC":
+            attr = {"perm": [0, 2, 3, 1]}
+            node.fluid_code.add_layer(
+                "transpose", inputs=node, output=node, param_attr=attr)
+    def MaxPool(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        k_size = node.get_attr("ksize")
+        strides = node.get_attr("strides")
+        data_format = node.get_attr("data_format").decode()
+        pad_mode = node.get_attr("padding").decode()
+        channel_first = data_format == "NCHW"
+        if not channel_first:
+            attr = {"perm": [0, 3, 1, 2]}
+            node.fluid_code.add_layer(
+                "transpose", inputs=input, output=node, param_attr=attr)
+            strides = [strides[i] for i in [0, 3, 1, 2]]
+            k_size = [k_size[i] for i in [0, 3, 1, 2]]
+            input = node
+        attr = {
+            "pool_size": k_size[2:4],
+            "pool_type": string("max"),
+            "pool_stride": strides[2:4],
+            "pool_padding": string(pad_mode)
+        }
+        node.fluid_code.add_layer(
+            "pool2d", inputs=input, output=node, param_attr=attr)
+        if not channel_first:
+            attr = {"perm": [0, 2, 3, 1]}
+            node.fluid_code.add_layer(
+                "transpose", inputs=node, output=node, param_attr=attr)
+    def Conv2D(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        kernel = self.graph.get_node(node.layer.input[1], copy=True)
+        self.add_omit_nodes(kernel.layer_name, node.layer_name)
+        k_size = kernel.out_shapes[0]
+        strides = node.get_attr("strides")
+        dilations = node.get_attr("dilations")
+        data_format = node.get_attr("data_format").decode()
+        pad_mode = node.get_attr("padding").decode()
+        channel_first = data_format == "NCHW"
+        if data_format == "NHWC":
+            n, h, w, c = input.out_shapes[0]
+        else:
+            n, c, h, w = input.out_shapes[0]
+        if kernel.layer_type == 'Const':
+            kernel_value = kernel.value
+            kernel_weight_name = kernel.layer_name.replace('/', '_')
+        else:
+            kernel_value = self.decoder.infer_tensor(kernel)
+            if kernel.layer_type == 'Split':
+                kernel_weight_name = "{}_{}_kernel".format(node.layer_name,
+                                                           kernel.layer_name)
+            else:
+                kernel_weight_name = kernel.layer_name.replace('/', '_')
+        self.weights[kernel_weight_name] = numpy.transpose(kernel_value,
+                                                           (3, 2, 0, 1))
+        if not channel_first:
+            strides = [strides[i] for i in [0, 3, 1, 2]]
+            dilations = [dilations[i] for i in [0, 3, 1, 2]]
+            attr = {"perm": [0, 3, 1, 2]}
+            node.fluid_code.add_layer(
+                "transpose", inputs=input, output=node, param_attr=attr)
+            input = node
+        attr = {
+            "bias_attr": False,
+            "param_attr": string(kernel_weight_name),
+            "num_filters": k_size[3],
+            "filter_size": k_size[0:2],
+            "stride": strides[2:4],
+            "dilation": dilations[2:4],
+            "padding": string(pad_mode)
+        }
+        if hasattr(node, 'dilation') and attr['dilation'] == [1, 1]:
+            if len(node.dilation) == 1:
+                attr['dilation'] = [1, node.dilation[0]]
+        if c == -1:
+            reshape_attr = {"shape": [0, k_size[2], 0, 0]}
+            node.fluid_code.add_layer(
+                "reshape", inputs=input, output=input, param_attr=reshape_attr)
+        node.fluid_code.add_layer(
+            "conv2d", inputs=input, output=node, param_attr=attr)
+        if not channel_first:
+            attr = {"perm": [0, 2, 3, 1]}
+            node.fluid_code.add_layer(
+                "transpose", inputs=node, output=node, param_attr=attr)
+    def BiasAdd(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        bias = self.graph.get_node(node.layer.input[1], copy=True)
+        inputs = {"x": input, "y": bias}
+        node.fluid_code.add_layer(
+            "elementwise_add", inputs=inputs, output=node, param_attr=None)
+    def FusedBatchNorm(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        gamma = self.graph.get_node(node.layer.input[1], copy=True)
+        beta = self.graph.get_node(node.layer.input[2], copy=True)
+        moving_mean = self.graph.get_node(node.layer.input[3], copy=True)
+        moving_var = self.graph.get_node(node.layer.input[4], copy=True)
+        data_format = node.get_attr("data_format").decode()
+        channel_first = data_format == "NCHW"
+        assert gamma.layer_type == "Const"
+        assert beta.layer_type == "Const"
+        assert moving_mean.layer_type == "Const"
+        assert moving_var.layer_type == "Const"
+        self.add_omit_nodes(gamma.layer_name, node.layer_name)
+        self.add_omit_nodes(beta.layer_name, node.layer_name)
+        self.add_omit_nodes(moving_mean.layer_name, node.layer_name)
+        self.add_omit_nodes(moving_var.layer_name, node.layer_name)
+        if not channel_first:
+            attr = {"perm": [0, 3, 1, 2]}
+            node.fluid_code.add_layer(
+                "transpose", inputs=input, output=node, param_attr=attr)
+            input = node
+        attr = {
+            "epsilon": node.get_attr("epsilon"),
+            "param_attr": string(gamma.layer_name),
+            "bias_attr": string(beta.layer_name),
+            "moving_mean_name": string(moving_mean.layer_name),
+            "moving_variance_name": string(moving_var.layer_name),
+            "is_test": True
+        }
+        node.fluid_code.add_layer(
+            "batch_norm", inputs=input, output=node, param_attr=attr)
+        if not channel_first:
+            attr = {"perm": [0, 2, 3, 1]}
+            node.fluid_code.add_layer(
+                "transpose", inputs=node, output=node, param_attr=attr)
+    def DepthwiseConv2dNative(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        kernel = self.graph.get_node(node.layer.input[1], copy=True)
+        assert kernel.layer_type == "Const", "Kernel of DepthwiseConv2DNative should be Const"
+        self.add_omit_nodes(kernel.layer_name, node.layer_name)
+        in_shape = input.out_shapes[0]
+        k_size = kernel.out_shapes[0]
+        strides = node.get_attr("strides")
+        dilations = node.get_attr("dilations")
+        data_format = node.get_attr("data_format").decode()
+        pad_mode = node.get_attr("padding").decode()
+        channel_first = data_format == "NCHW"
+        self.weights[kernel.layer_name.replace('/', '_')] = numpy.transpose(
+            kernel.value, (2, 3, 0, 1))
+        if not channel_first:
+            in_shape = [in_shape[i] for i in [0, 3, 1, 2]]
+            strides = [strides[i] for i in [0, 3, 1, 2]]
+            dilations = [dilations[i] for i in [0, 3, 1, 2]]
+            attr = {"perm": [0, 3, 1, 2]}
+            node.fluid_code.add_layer(
+                "transpose", inputs=input, output=node, param_attr=attr)
+            input = node
+        attr = {
+            "bias_attr": False,
+            "param_attr": string(kernel.layer_name),
+            "num_filters": in_shape[1],
+            "filter_size": k_size[0:2],
+            "stride": strides[2:4],
+            "dilation": dilations[2:4],
+            "groups": k_size[3] * in_shape[1],
+            "use_cudnn": False,
+            "padding": string(pad_mode)
+        }
+        node.fluid_code.add_layer(
+            "conv2d", inputs=input, output=node, param_attr=attr)
+        if not channel_first:
+            attr = {"perm": [0, 2, 3, 1]}
+            node.fluid_code.add_layer(
+                "transpose", inputs=node, output=node, param_attr=attr)
+    def Reshape(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        param = self.graph.get_node(node.layer.input[1], copy=True)
+        if param.layer_type == "Const":
+            self.add_omit_nodes(param.layer_name, node.layer_name)
+            shape = param.value.tolist()
+        else:
+            shape = param
+        inputs = {"x": input, "shape": shape}
+        node.fluid_code.add_layer(
+            "reshape", inputs=inputs, output=node, param_attr=None)
+        if param.layer_type != "Const":
+            out_shape = numpy.array(node.out_shapes[0])
+            if (out_shape > 0).any():
+                out_shape[out_shape < 0] = 0
+                attr = {'shape': out_shape.tolist()}
+                node.fluid_code.add_layer(
+                    "reshape", inputs=node, output=node, param_attr=attr)
+    def AvgPool(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        k_size = node.get_attr("ksize")
+        strides = node.get_attr("strides")
+        data_format = node.get_attr("data_format").decode()
+        pad_mode = node.get_attr("padding").decode()
+        channel_first = data_format == "NCHW"
+        if not channel_first:
+            strides = [strides[i] for i in [0, 3, 1, 2]]
+            k_size = [k_size[i] for i in [0, 3, 1, 2]]
+            attr = {"perm": [0, 3, 1, 2]}
+            node.fluid_code.add_layer(
+                "transpose", inputs=input, output=node, param_attr=attr)
+            input = node
+        attr = {
+            "pool_size": k_size[2:4],
+            "pool_type": string("avg"),
+            "pool_stride": strides[2:4],
+            "pool_padding": string(pad_mode)
+        }
+        node.fluid_code.add_layer(
+            "pool2d", inputs=input, output=node, param_attr=attr)
+        if not channel_first:
+            attr = {"perm": [0, 2, 3, 1]}
+            node.fluid_code.add_layer(
+                "transpose", inputs=node, output=node, param_attr=attr)
+    def SplitV(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        num_sections = self.graph.get_node(node.layer.input[1], copy=True)
+        dim = self.graph.get_node(node.layer.input[2], copy=True)
+        assert num_sections.layer_type == "Const"
+        assert dim.layer_type == "Const"
+        self.add_omit_nodes(num_sections.layer_name, node.layer_name)
+        self.add_omit_nodes(dim.layer_name, node.layer_name)
+        dim = dim.value
+        attr = {
+            "num_or_sections": num_sections.value.tolist(),
+            "dim": dim.value
+        }
+        node.fluid_code.add_layer(
+            "split", inputs=input, output=node, param_attr=attr)
+    def ConcatV2(self, node):
+        inputs = [
+            self.graph.get_node(
+                name, copy=True) for name in node.layer.input[:-1]
+        ]
+        axis = self.graph.get_node(node.layer.input[-1], copy=True)
+        assert axis.layer_type == "Const"
+        self.add_omit_nodes(axis.layer_name, node.layer_name)
+        axis = axis.value
+        if axis < 0:
+            axis += len(inputs[0].out_shapes[0])
+        attr = {"axis": axis}
+        node.fluid_code.add_layer(
+            "concat", inputs=inputs, output=node, param_attr=attr)
+    def Tile(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        expand_times = self.graph.get_node(node.layer.input[1], copy=True)
+        if expand_times.layer_type == "Const":
+            self.add_omit_nodes(expand_times.layer_name, node.layer_name)
+            expand_times = expand_times.value.tolist()
+        else:
+            expand_times = expand_times
+        inputs = {"x": input, "expand_times": expand_times}
+        node.fluid_code.add_layer(
+            "expand", inputs=inputs, output=node, param_attr=None)
+    def Pack(self, node):
+        inputs = [
+            self.graph.get_node(
+                name, copy=True) for name in node.layer.input
+        ]
+        reshape_shape = list()
+        for input_node in inputs:
+            k_size = input_node.out_shapes[0]
+            if len(k_size) and k_size[-1] != -1:
+                reshape_shape = [0] * len(k_size)
+                reshape_shape[-1] = k_size[-1]
+                break
+        if len(reshape_shape):
+            for i, input_node in enumerate(inputs):
+                node.fluid_code.add_layer(
+                    "reshape",
+                    inputs=input_node,
+                    output='tmp_{}'.format(i),
+                    param_attr={"shape": reshape_shape})
+        axis = node.get_attr("axis")
+        attr = {"axis": axis}
+        if len(reshape_shape):
+            inputs = ['tmp_{}'.format(i) for i in range(len(inputs))]
+        node.fluid_code.add_layer(
+            "stack", inputs=inputs, output=node, param_attr=attr)
+    def Pad(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        paddings = self.graph.get_node(node.layer.input[1], copy=True)
+        assert paddings.layer_type == "Const", "Padding should be Const"
+        self.add_omit_nodes(paddings.layer_name, node.layer_name)
+        paddings = paddings.value.flatten().tolist()
+        data_format = input.tf_data_format
+        if len(input.out_shapes[0]) == 4:
+            new_padding = None
+            if input.tf_data_format == "NHWC":
+                if paddings[0] + paddings[1] + paddings[6] + paddings[7] == 0:
+                    new_padding = paddings[2:6]
+            else:
+                if paddings[0] + paddings[1] + paddings[2] + paddings[3] == 0:
+                    new_padding = paddings[4:]
+            if new_padding is not None:
+                if input.tf_data_format == "NHWC":
+                    attr = {"perm": [0, 3, 1, 2]}
+                    node.fluid_code.add_layer(
+                        "transpose", inputs=input, output=node, param_attr=attr)
+                    input = node
+                attr = {"paddings": new_padding}
+                node.fluid_code.add_layer(
+                    "pad2d", inputs=input, output=node, param_attr=attr)
+                if input.tf_data_format == "NHWC":
+                    attr = {"perm": [0, 2, 3, 1]}
+                    node.fluid_code.add_layer(
+                        "transpose", inputs=node, output=node, param_attr=attr)
+                return
+        attr = {"paddings": paddings}
+        node.fluid_code.add_layer(
+            "pad", inputs=input, output=node, param_attr=attr)
+    def Range(self, node):
+        start = self.graph.get_node(node.layer.input[0], copy=True)
+        limit = self.graph.get_node(node.layer.input[1], copy=True)
+        delta = self.graph.get_node(node.layer.input[2], copy=True)
+        if start.layer_type == "Const":
+            self.add_omit_nodes(start.layer_name, node.layer_name)
+            start = start.value
+        if limit.layer_type == "Const":
+            self.add_omit_nodes(limit.layer_name, node.layer_name)
+            limit = limit.value
+        if delta.layer_type == "Const":
+            self.add_omit_nodes(delta.layer_name, node.layer_name)
+            delta = delta.value
+        dtype = node.dtype
+        inputs = {
+            "start": start,
+            "end": limit,
+            "step": delta,
+        }
+        attr = {"dtype": string(node.dtype)}
+        node.fluid_code.add_layer(
+            "range", inputs=inputs, output=node, param_attr=attr)
+    def Mean(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        reduce_idx = self.graph.get_node(node.layer.input[1], copy=True)
+        assert reduce_idx.layer_type == "Const", "Only support Const parameter[reduce_idx]"
+        dims = reduce_idx.value.tolist()
+        keep_dims = node.get_attr("keep_dims")
+        attr = {"dim": dims, "keep_dim": keep_dims}
+        node.fluid_code.add_layer(
+            "reduce_mean", inputs=input, output=node, param_attr=attr)
+    def MatMul(self, node):
+        x = self.graph.get_node(node.layer.input[0], copy=True)
+        y = self.graph.get_node(node.layer.input[1], copy=True)
+        transpose_a = node.get_attr('transpose_a')
+        transpose_b = node.get_attr('transpose_b')
+        inputs = {"x": x, "y": y}
+        # fix paddle shape infer problem
+        # should be removed after paddle 1.6
+        if x.out_shapes[0][-1] < 0 and y.out_shapes[0][0] > 0:
+            shape = x.out_shapes[0]
+            shape[-1] = y.out_shapes[0][0]
+            attr = {"shape": shape}
+            node.fluid_code.add_layer(
+                "reshape", inputs=x, output=x, param_attr=attr)
+        if transpose_a is None:
+            transpose_a = node.get_attr('adj_x')
+        if transpose_b is None:
+            transpose_b = node.get_attr('adj_y')
+        attr = {"transpose_x": transpose_a, "transpose_y": transpose_b}
+        node.fluid_code.add_layer(
+            "matmul", inputs=inputs, output=node, param_attr=attr)
+    def BatchMatMul(self, node):
+        return self.MatMul(node)
+    def BatchMatMulV2(self, node):
+        return self.MatMul(node)
+    def ArgMax(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        axis = self.graph.get_node(node.layer.input[1], copy=True)
+        assert axis.layer_type == "Const", "ArgMax only support Const parameter"
+        self.add_omit_nodes(axis.layer_name, node.layer_name)
+        axis = axis.value
+        attr = {"axis": axis}
+        node.fluid_code.add_layer(
+            "argmax", inputs=input, output=node, param_attr=attr)
+    def StridedSlice(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        begin = self.graph.get_node(node.layer.input[1], copy=True)
+        end = self.graph.get_node(node.layer.input[2], copy=True)
+        strides = self.graph.get_node(node.layer.input[3], copy=True)
+        assert begin.layer_type == "Const"
+        assert end.layer_type == "Const"
+        assert strides.layer_type == "Const"
+        self.add_omit_nodes(begin.layer_name, node.layer_name)
+        self.add_omit_nodes(end.layer_name, node.layer_name)
+        self.add_omit_nodes(strides.layer_name, node.layer_name)
+        strides = strides.value.tolist()
+        assert len(set(strides)) == 1 and strides[
+            0] == 1, "Only support strides be 1 in StridedSlice OP"
+        begin = begin.value.tolist()
+        end = end.value.tolist()
+        for i in range(len(end)):
+            if end[i] == 0:
+                end[i] = 999999
+        begin_mask = node.get_attr('begin_mask')
+        end_mask = node.get_attr('end_mask')
+        ellipsis_mask = node.get_attr('ellipsis_mask')
+        new_axis_mask = node.get_attr('new_axis_mask')
+        shrink_axis_mask = node.get_attr('shrink_axis_mask')
+        assert ellipsis_mask == 0, "(OP:{} Name:{})Only support ellipsis_mask be 0[now: {}] n StridedSlice OP".format(
+            node.layer_type, node.layer.name, ellipsis_mask)
+        # TODO codes without validation
+        # Use it carefully
+        new_begin = list()
+        new_end = list()
+        new_axes = list()
+        shrink_axes = list()
+        for i, item in enumerate(begin):
+            mask = (new_axis_mask >> i) & 1
+            if mask != 0:
+                new_axes.append(i)
+                continue
+            mask = (shrink_axis_mask >> i) & 1
+            if mask != 0:
+                shrink_axes.append(i)
+            mask = (begin_mask >> i) & 1
+            if mask != 0:
+                new_begin.append(0)
+            else:
+                new_begin.append(item)
+            mask = (end_mask >> i) & 1
+            if mask != 0:
+                new_end.append(999999)
+            else:
+                new_end.append(end[i])
+        attr = {
+            "axes": [i for i in range(len(new_begin))],
+            "starts": new_begin,
+            "ends": new_end
+        }
+        node.fluid_code.add_layer(
+            "slice", inputs=input, output=node, param_attr=attr)
+        if len(new_axes) > 0:
+            attr = {"axes": new_axes}
+            node.fluid_code.add_layer(
+                "unsqueeze", inputs=node, output=node, param_attr=attr)
+        if len(shrink_axes) > 0:
+            if len(input.out_shapes[0]) + len(new_axes) <= 1:
+                pass
+            else:
+                attr = {"axes": shrink_axes}
+                node.fluid_code.add_layer(
+                    "squeeze", inputs=node, output=node, param_attr=attr)
+    def Slice(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        begin = self.graph.get_node(node.layer.input[1], copy=True)
+        size = self.graph.get_node(node.layer.input[2], copy=True)
+        if begin.layer_type == "Const":
+            self.add_omit_nodes(begin.layer_name, node.layer_name)
+            begin = begin.value.tolist()
+        else:
+            begin = self.decoder.infer_tensor(begin).tolist()
+#             shape = begin.out_shapes[0]
+#             attr = {"shape": shape}
+#             node.fluid_code.add_layer(
+#                 "reshape", inputs=begin, output=begin, param_attr=attr)
+        if size.layer_type == "Const":
+            self.add_omit_nodes(size.layer_name, node.layer_name)
+            size = size.value.tolist()
+        else:
+            size = size
+            shape = size.out_shapes[0]
+            attr = {"shape": shape}
+            node.fluid_code.add_layer(
+                "reshape", inputs=size, output=size, param_attr=attr)
+        inputs = {"x": input, "offsets": begin, "shape": size}
+        node.fluid_code.add_layer(
+            "crop_tensor", inputs=inputs, output=node, param_attr=None)
+    def Conv2DBackpropInput(self, node):
+        out_shape = self.graph.get_node(node.layer.input[0], copy=True)
+        kernel = self.graph.get_node(node.layer.input[1], copy=True)
+        input = self.graph.get_node(node.layer.input[2], copy=True)
+        assert kernel.layer_type == "Const", "Kernel of Conv2DBackpropInput should be Const"
+        self.add_omit_nodes(kernel.layer_name, node.layer_name)
+        self.add_omit_nodes(out_shape.layer_name, node.layer_name)
+        if out_shape.layer_type == "Const":
+            out_shape = out_shape.value.tolist()
+        else:
+            out_shape = self.decoder.infer_shape_tensor(out_shape,
+                                                        node.out_shapes[0])
+        in_shape = input.out_shapes[0]
+        if in_shape.count(-1) > 2:
+            in_shape = self.decoder.infer_tensor(input).shape
+        k_size = kernel.out_shapes[0]
+        if k_size.count(-1) > 2:
+            k_size = self.decoder.infer_tensor(kernel).shape
+        pad_mode = node.get_attr("padding").decode()
+        strides = node.get_attr("strides")
+        dilations = node.get_attr("dilations")
+        data_format = node.get_attr("data_format").decode()
+        channel_first = data_format == "NCHW"
+        self.weights[kernel.layer_name.replace('/', '_')] = numpy.transpose(
+            kernel.value, (3, 2, 0, 1))
+        if not channel_first:
+            in_shape = [in_shape[i] for i in [0, 3, 1, 2]]
+            strides = [strides[i] for i in [0, 3, 1, 2]]
+            dilations = [dilations[i] for i in [0, 3, 1, 2]]
+            attr = {"perm": [0, 3, 1, 2]}
+            node.fluid_code.add_layer(
+                "transpose", inputs=input, output=node, param_attr=attr)
+            input = node
+        else:
+            self.graph.data_format_propagation(node)
+        attr = {
+            "bias_attr": False,
+            "param_attr": string(kernel.layer_name),
+            "num_filters": k_size[2],
+            "filter_size": k_size[0:2],
+            "stride": strides[2:4],
+            "dilation": dilations[2:4],
+            "padding": string(pad_mode),
+            "output_size": out_shape[1:3]
+        }
+        node.fluid_code.add_layer(
+            "conv2d_transpose", inputs=input, output=node, param_attr=attr)
+        if not channel_first:
+            attr = {"perm": [0, 2, 3, 1]}
+            node.fluid_code.add_layer(
+                "transpose", inputs=node, output=node, param_attr=attr)
+    def Max(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        reduce_idx = self.graph.get_node(node.layer.input[1], copy=True)
+        assert reduce_idx.layer_type == "Const", "Only support Const parameter[reduce_idx]"
+        keep_dims = node.get_attr("keep_dims")
+        dim = reduce_idx.value.tolist()
+        attr = {"dim": dim, "keep_dim": keep_dims}
+        node.fluid_code.add_layer(
+            "reduce_max", inputs=input, output=node, param_attr=attr)
+    def Sum(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        reduce_idx = self.graph.get_node(node.layer.input[1], copy=True)
+        assert reduce_idx.layer_type == "Const", "Only support Const parameter[reduce_idx]"
+        keep_dims = node.get_attr("keep_dims")
+        dim = reduce_idx.value.tolist()
+        attr = {"dim": dim, "keep_dim": keep_dims}
+        node.fluid_code.add_layer(
+            "reduce_sum", inputs=input, output=node, param_attr=attr)
+    def Cast(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        dtype = node.dtype_map[node.get_attr('DstT')]
+        attr = {"dtype": string(dtype)}
+        node.fluid_code.add_layer(
+            "cast", inputs=input, output=node, param_attr=attr)
+    def Split(self, node):
+        dim = self.graph.get_node(node.layer.input[0], copy=True)
+        input = self.graph.get_node(node.layer.input[1], copy=True)
+        assert dim.layer_type == "Const"
+        self.add_omit_nodes(dim.layer_name, node.layer_name)
+        num_split = node.get_attr('num_split')
+        dim = dim.value
+        attr = {"num_or_sections": num_split, "dim": dim}
+        node.fluid_code.add_layer(
+            "split", inputs=input, output=node, param_attr=attr)
+    def Squeeze(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        squeeze_dims = node.get_attr('squeeze_dims')
+        attr = {"axes": squeeze_dims}
+        node.fluid_code.add_layer(
+            "squeeze", inputs=input, output=node, param_attr=attr)
+    def Softmax(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        axis = node.get_attr("axis")
+        attr = {"axis": axis}
+        node.fluid_code.add_layer(
+            "softmax", inputs=input, output=node, param_attr=attr)
+    def ResizeNearestNeighbor(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        resize_shape = self.graph.get_node(node.layer.input[1], copy=True)
+        if resize_shape.layer_type == "Const":
+            self.add_omit_nodes(resize_shape.layer_name, node.layer_name)
+            resize_shape = resize_shape.value.tolist()
+        else:
+            resize_shape = resize_shape
+            shape = resize_shape.out_shapes[0]
+            attr = {"shape": shape}
+            node.fluid_code.add_layer(
+                "reshape",
+                inputs=resize_shape,
+                output=resize_shape,
+                param_attr=attr)
+        align_corners = node.get_attr("align_corners")
+        attr = {"perm": [0, 3, 1, 2]}
+        node.fluid_code.add_layer(
+            "transpose", inputs=input, output=node, param_attr=attr)
+        inputs = {"input": node, "out_shape": resize_shape}
+        attr = {"align_corners": align_corners}
+        node.fluid_code.add_layer(
+            "resize_nearest", inputs=inputs, output=node, param_attr=attr)
+        attr = {"perm": [0, 2, 3, 1]}
+        node.fluid_code.add_layer(
+            "transpose", inputs=node, output=node, param_attr=attr)
+    def ResizeBilinear(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        resize_shape = self.graph.get_node(node.layer.input[1], copy=True)
+        if resize_shape.layer_type == "Const":
+            self.add_omit_nodes(resize_shape.layer_name, node.layer_name)
+            resize_shape = resize_shape.value.tolist()
+        else:
+            shape = resize_shape.out_shapes[0]
+            attr = {"shape": shape}
+            node.fluid_code.add_layer(
+                "reshape",
+                inputs=resize_shape,
+                output=resize_shape,
+                param_attr=attr)
+        align_corners = node.get_attr("align_corners")
+        attr = {"perm": [0, 3, 1, 2]}
+        node.fluid_code.add_layer(
+            "transpose", inputs=input, output=node, param_attr=attr)
+        inputs = {"input": node, "out_shape": resize_shape}
+        attr = {
+            #"out_shape": resize_shape,
+            "align_corners": align_corners,
+            "align_mode": 1
+        }
+        node.fluid_code.add_layer(
+            "resize_bilinear", inputs=inputs, output=node, param_attr=attr)
+        attr = {"perm": [0, 2, 3, 1]}
+        node.fluid_code.add_layer(
+            "transpose", inputs=node, output=node, param_attr=attr)
+    def GreaterEqual(self, node):
+        x = self.graph.get_node(node.layer.input[0], copy=True)
+        y = self.graph.get_node(node.layer.input[1], copy=True)
+        inputs = {"x": x, "y": y}
+        node.fluid_code.add_layer(
+            "greater_equal", inputs=inputs, output=node, param_attr=None)
+    def RandomUniform(self, node):
+        shape = self.graph.get_node(node.layer.input[0], copy=True)
+        if shape.layer_type == "Const":
+            self.add_omit_nodes(shape.layer_name, node.layer_name)
+            shape = shape.value.tolist()
+        else:
+            shape = shape
+        attr = {"min": 0.0, "max": 0.9999}
+        node.fluid_code.add_layer(
+            "uniform_random", inputs=shape, output=node, param_attr=attr)
+    def SquaredDifference(self, node):
+        x = self.graph.get_node(node.layer.input[0], copy=True)
+        y = self.graph.get_node(node.layer.input[1], copy=True)
+        inputs = {"x": x, "y": y}
+        node.fluid_code.add_layer(
+            "elementwise_sub", inputs=inputs, output=node, param_attr=None)
+        inputs = {"x": node, "y": node}
+        node.fluid_code.add_layer(
+            "elementwise_mul", inputs=inputs, output=node, param_attr=None)
+    def ExpandDims(self, node):
+        x = self.graph.get_node(node.layer.input[0], copy=True)
+        y = self.graph.get_node(node.layer.input[1], copy=True)
+        if y.layer_type == 'Const':
+            self.add_omit_nodes(y.layer_name, node.layer_name)
+            dim = y.value.tolist()
+            if not isinstance(dim, list):
+                dim = [dim]
+            attr = {'axes': dim}
+        else:
+            attr = {'axes': y}
+        node.fluid_code.add_layer(
+            "unsqueeze", inputs=x, output=node, param_attr=attr)
+    def BatchToSpaceND(self, node):
+        x = self.graph.get_node(node.layer.input[0], copy=True)
+        y = self.graph.get_node(node.layer.input[1], copy=True)
+        if hasattr(node, 'skip') and node.skip:
+            node.fluid_code.add_layer(
+                "=", inputs=x, output=node, param_attr=None)
+        else:
+            raise Exception("BatchToSpaceND is not supported")
+    def SpaceToBatchND(self, node):
+        x = self.graph.get_node(node.layer.input[0], copy=True)
+        y = self.graph.get_node(node.layer.input[1], copy=True)
+        if hasattr(node, 'skip') and node.skip:
+            node.fluid_code.add_layer(
+                "=", inputs=x, output=node, param_attr=None)
+        else:
+            raise Exception("SpaceToBatchND is not supported")
+    def OneHot(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        depth = self.graph.get_node(node.layer.input[1], copy=True)
+        on_value = self.graph.get_node(node.layer.input[2], copy=True)
+        off_value = self.graph.get_node(node.layer.input[3], copy=True)
+        assert depth.layer_type == 'Const', 'Parameter depth should be Const in OneHot'
+        assert on_value.layer_type == 'Const', 'Parameter on_value should be Const in OneHot'
+        assert off_value.layer_type == 'Const', 'Parameter off_value should be Const in OneHot'
+        self.add_omit_nodes(depth.layer_name, node.layer_name)
+        self.add_omit_nodes(on_value.layer_name, node.layer_name)
+        self.add_omit_nodes(off_value.layer_name, node.layer_name)
+        depth = depth.value
+        on_value = on_value.value
+        off_value = off_value.value
+        assert math.fabs(on_value -
+                         1.0) < 1e-06, "on_value should be 1 in OneHot"
+        assert math.fabs(off_value -
+                         0.0) < 1e-06, "off_value should be 0 in OneHot"
+        attr = {'depth': depth}
+        node.fluid_code.add_layer(
+            "one_hot",
+            inputs=input,
+            output=node,
+            param_attr=attr,
+            use_fluid=True)
+    def Pow(self, node):
+        x = self.graph.get_node(node.layer.input[0], copy=True)
+        factor = self.graph.get_node(node.layer.input[1], copy=True)
+        self.add_omit_nodes(factor.layer_name, node.layer_name)
+        if factor.layer_type == 'Const':
+            factor = factor.value.tolist()
+        else:
+            factor = self.decoder.infer_tensor(factor)
+        attr = {'factor': factor}
+        node.fluid_code.add_layer("pow", inputs=x, output=node, param_attr=attr)
+    def All(self, node):
+        input = self.graph.get_node(node.layer.input[0], copy=True)
+        reduce_idx = self.graph.get_node(node.layer.input[1], copy=True)
+        self.add_omit_nodes(reduce_idx.layer_name, node.layer_name)
+        assert reduce_idx.layer_type == "Const", "Only support Const parameter[reduce_idx]"
+        dims = reduce_idx.value.tolist()
+        keep_dims = node.get_attr("keep_dims")
+        attr = {"dim": dims, "keep_dim": keep_dims}
+        node.fluid_code.add_layer(
+            "reduce_all", inputs=input, output=node, param_attr=attr)
+    def GatherV2(self, node):
+        embeddings = self.graph.get_node(node.layer.input[0], copy=True)
+        index = self.graph.get_node(node.layer.input[1], copy=True)
+        axis = self.graph.get_node(node.layer.input[2], copy=True)
+        self.add_omit_nodes(axis.layer_name, node.layer_name)
+        assert axis.layer_type == 'Const', "Only support Const parameter[axis]"
+        axis = axis.value.tolist()
+        assert axis == 0, "Only support axis=0 in GatherV2 OP"
+        attr = {'overwrite': False}
+        embeddings_shape = embeddings.out_shapes[0][-1]
+        reshape_list = list()
+        reshape_name = index.layer_name
+        if len(index.out_shapes[0]) != 1:
+            reshape_list = index.out_shapes[0]
+            reshape_attr = {"shape": [-1]}
+            reshape_name = "{}_reshape".format(index.layer_name)
+            node.fluid_code.add_layer(
+                "reshape",
+                inputs=index,
+                output=reshape_name,
+                param_attr=reshape_attr)
+        inputs = {'input': embeddings, 'index': reshape_name}
+        node.fluid_code.add_layer(
+            "gather", inputs=inputs, output=node, param_attr=attr)
+        if len(index.out_shapes[0]) != 1:
+            reshape_attr = {"shape": reshape_list + [embeddings_shape]}
+            node.fluid_code.add_layer(
+                "reshape", inputs=node, output=node, param_attr=reshape_attr)
+    def OneShotIterator(self, node):
+        return self.Placeholder(node)
+    def IteratorV2(self, node):
+        dtype_map = {
+            1: "float32",
+            3: "int32",
+            4: "uint8",
+            9: "int64",
+            10: "bool"
+        }
+        shapes = node.out_shapes
+        dtypes = node.layer.attr['output_types'].list.type
+        node.fluid_code.add_note("{} = [0] * {}".format(node.layer_name,
+                                                        len(shapes)))
+        for i, shape in enumerate(shapes):
+            attr = {
+                'dtype': string(dtype_map[dtypes[i]]),
+                'shape': shape,
+                'name': string("{}_{}".format(node.layer_name, i)),
+                'append_batch_size': False
+            }
+            output = "{}[{}]".format(node.layer_name, i)
+            node.fluid_code.add_layer(
+                "data", inputs=None, output=output, param_attr=attr)
--- a/x2paddle/x2paddle/optimizer/__init__.py
+++ b/x2paddle/x2paddle/optimizer/__init__.py
--- a/x2paddle/x2paddle/optimizer/caffe_optimizer.py
+++ b/x2paddle/x2paddle/optimizer/caffe_optimizer.py
+#   Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from x2paddle.decoder.caffe_decoder import CaffeGraph
+from x2paddle.core.util import *
+class CaffeOptimizer(object):
+    layers_with_act = ['Convolution', 'Deconvolution', 'InnerProduct']
+    activation_ops = ['ReLU', 'Sigmoid']
+    def __init__(self, mapper):
+        self.graph = mapper.graph
+    def merge_bn_scale(self):
+        for node_name in self.graph.topo_sort:
+            node = self.graph.get_node(node_name)
+            if node.layer_type == 'Scale':
+                parent_node = self.graph.get_bottom_node(node, idx=0)
+                if parent_node.layer_type == 'BatchNorm':
+                    is_delete_node = True if len(
+                        parent_node.outputs) == 1 else False
+                    parent_fluid_layer = parent_node.fluid_code.layers[0]
+                    input = parent_fluid_layer.inputs
+                    parent_param_attr = parent_fluid_layer.param_attr
+                    parent_param_attr['param_attr'] = string(node.layer_name +
+                                                             '_scale')
+                    parent_param_attr['bias_attr'] = string(node.layer_name +
+                                                            '_offset')
+                    if is_delete_node:
+                        parent_node.fluid_code.clear()
+                    node.fluid_code.clear()
+                    node.fluid_code.add_layer(
+                        "batch_norm",
+                        inputs=input,
+                        output=node,
+                        param_attr=parent_param_attr)
+    def merge_op_activation(self):
+        for node_name in self.graph.topo_sort:
+            node = self.graph.get_node(node_name)
+            if node.layer_type in self.activation_ops:
+                parent_node = self.graph.get_bottom_node(node, idx=0)
+                if parent_node.layer_type in self.layers_with_act:
+                    is_delete_node = True if len(
+                        parent_node.outputs) == 1 else False
+                    parent_fluid_layer = parent_node.fluid_code.layers[0]
+                    input = parent_fluid_layer.inputs
+                    parent_param_attr = parent_fluid_layer.param_attr
+                    parent_param_attr['act'] = string(node.layer_type.lower())
+                    op = parent_fluid_layer.op
+                    if is_delete_node:
+                        parent_node.fluid_code.clear()
+                    node.fluid_code.clear()
+                    node.fluid_code.add_layer(
+                        op,
+                        inputs=input,
+                        output=node,
+                        param_attr=parent_param_attr)
--- a/x2paddle/x2paddle/optimizer/onnx_optimizer.py
+++ b/x2paddle/x2paddle/optimizer/onnx_optimizer.py
+#   Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# TODO useless node remove
+class ONNXOptimizer(object):
+    def __init__(self, op_mapper):
+        self.op_mapper = op_mapper
+        self.graph = op_mapper.graph
+    def delete_redundance_code(self):
+        for node_name in self.graph.topo_sort:
+            if node_name in self.op_mapper.omit_nodes:
+                node = self.graph.get_node(node_name)
+                omit_freq = self.op_mapper.omit_nodes.count(node_name)
+                if len(node.outputs) <= omit_freq:
+                    node.fluid_code.clear()
--- a/x2paddle/x2paddle/optimizer/tf_optimizer.py
+++ b/x2paddle/x2paddle/optimizer/tf_optimizer.py
+#   Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# TODO useless node remove
+from x2paddle.op_mapper.tf_op_mapper import TFOpMapper
+from x2paddle.core.fluid_code import Layer
+from x2paddle.core.util import *
+import six
+import numpy
+import copy as cp
+def exist_act(node):
+    for layer in node.fluid_code.layers:
+        if layer.param_attr is not None:
+            act = layer.param_attr.get("act", None)
+            if act is not None:
+                return True
+    return False
+class TFOptimizer(object):
+    activation_ops = {
+        'Relu': 'relu',
+        'Sigmoid': 'sigmoid',
+        'Relu6': 'relu6',
+        'swish_f32': 'swish'
+    }
+    layers_with_act = [
+        'Conv2D', 'BiasAdd', 'DepthwiseConv2dNative', 'Conv2DBackpropInput',
+        'FusedBatchNorm', 'conv2d', 'elementwise_add', 'conv2d_transpose',
+        'batch_norm'
+    ]
+    layers_with_bias = [
+        'Conv2D', 'DepthwiseConv2dNative', 'Conv2DBackpropInput', 'conv2d',
+        'conv2d_transpose'
+    ]
+    def __init__(self, op_mapper):
+        self.op_mapper = op_mapper
+        self.graph = op_mapper.graph
+    def delete_redundance_code(self):
+        for node_name in self.graph.topo_sort:
+            if node_name in self.op_mapper.omit_nodes:
+                node = self.graph.get_node(node_name)
+                if node is None:
+                    continue
+                omit_freq = self.op_mapper.omit_nodes.count(node_name)
+                if len(node.outputs) <= omit_freq:
+                    node.fluid_code.clear()
+                    # remove node from graph
+                    input_names = node.inputs
+                    output_names = node.outputs
+                    for in_name in input_names:
+                        in_node = self.graph.get_node(in_name)
+                        index = in_node.outputs.index(node_name)
+                        del in_node.outputs[index]
+                    for out_name in output_names:
+                        out_node = self.graph.get_node(out_name)
+                        index = out_node.inputs.index(node_name)
+                        del out_node.inputs[index]
+                    del self.graph.node_map[node_name]
+    def strip_graph(self):
+        visited_nodes = set()
+        def visit(node_name):
+            if node_name in visited_nodes:
+                return
+            visited_nodes.add(node_name)
+            input_names = self.graph.get_node(node_name).inputs
+            for in_name in input_names:
+                visit(in_name)
+        for node_name in self.graph.output_nodes:
+            visit(node_name)
+        for i, node_name in enumerate(self.graph.topo_sort):
+            if node_name not in visited_nodes:
+                node = self.graph.get_node(node_name)
+                if node is None:
+                    continue
+                input_names = node.inputs
+                output_names = node.outputs
+                for in_name in input_names:
+                    in_node = self.graph.get_node(in_name)
+                    index = in_node.outputs.index(node_name)
+                    del in_node.outputs[index]
+                for out_name in output_names:
+                    out_node = self.graph.get_node(out_name)
+                    index = out_node.inputs.index(node_name)
+                    del out_node.inputs[index]
+                del self.graph.node_map[node_name]
+    def optimize_elementwise_op(self):
+        elementwise_ops = [
+            'Sub', 'Add', 'RealDiv', 'Maximum', 'Mul', 'FloorDiv',
+            'GreaterEqual'
+        ]
+        revertable_ops = ['Add', 'Mul']
+        for node_name in self.graph.topo_sort:
+            node = self.graph.get_node(node_name)
+            if node is None:
+                continue
+            if node.layer_type in elementwise_ops:
+                if len(node.fluid_code.layers) != 2:
+                    continue
+                if node.fluid_code.layers[0].op != "expand":
+                    continue
+                expand_out = node.fluid_code.layers[0].output
+                expand_in = node.fluid_code.layers[0].inputs
+                expand_times = node.fluid_code.layers[0].param_attr[
+                    "expand_times"]
+                x = node.fluid_code.layers[1].inputs["x"]
+                y = node.fluid_code.layers[1].inputs["y"]
+                if isinstance(
+                        x,
+                        six.string_types) and node.layer_type in revertable_ops:
+                    node.fluid_code.layers[1].inputs["y"] = x
+                    node.fluid_code.layers[1].inputs["x"] = y
+                    x = node.fluid_code.layers[1].inputs["x"]
+                    y = expand_in
+                elif isinstance(y, six.string_types):
+                    y = expand_in
+                else:
+                    continue
+                x_shape = x.out_shapes[0]
+                y_shape = y.out_shapes[0]
+                if len(x_shape) != len(y_shape):
+                    continue
+                if len(x_shape) == 4:
+                    x_shape = [x_shape[i] for i in [0, 3, 1, 2]]
+                    y_shape = [y_shape[i] for i in [0, 3, 1, 2]]
+                continue_flag = True
+                for i in range(len(x_shape)):
+                    if y_shape[-1 * (i + 1)] == 1 and continue_flag:
+                        expand_times[-1 * (i + 1)] = 1
+                    else:
+                        continue_flag = False
+                if expand_times.count(1) == len(expand_times):
+                    node.fluid_code.layers[1].inputs["y"] = expand_in
+                    del node.fluid_code.layers[0]
+    def merge_activation(self):
+        act_nodes = list()
+        for node_name in self.graph.topo_sort:
+            node = self.graph.get_node(node_name)
+            if node is None:
+                continue
+            if node.layer_type in self.activation_ops:
+                act_nodes.append(node_name)
+        for act_node_name in act_nodes:
+            node = self.graph.get_node(act_node_name)
+            input = self.graph.get_node(node.inputs[0])
+            if input.layer_type not in self.layers_with_act:
+                continue
+            if len(input.fluid_code.layers) == 0:
+                continue
+            if 'act' in input.fluid_code.layers[
+                    -1].param_attr and input.fluid_code.layers[-1].param_attr[
+                        'act'] is not None:
+                continue
+            if len(input.outputs) != 1:
+                continue
+            index = -1
+            for i in range(len(input.fluid_code.layers)):
+                if input.fluid_code.layers[i].op in self.layers_with_act:
+                    index = i
+                    break
+            input.fluid_code.layers[index].param_attr['act'] = string(
+                self.activation_ops[node.layer_type])
+            input.fluid_code.layers[-1].output = node.fluid_code.layers[
+                0].output
+            self.graph.remove_node(act_node_name)
+    def merge_bias(self):
+        for node_name in self.graph.topo_sort:
+            node = self.graph.get_node(node_name)
+            if node is None:
+                continue
+            if node.layer_type == "BiasAdd":
+                input = self.graph.get_node(node.inputs[0])
+                if input.layer_type not in self.layers_with_bias:
+                    continue
+                if len(input.outputs) != 1:
+                    continue
+                if len(input.fluid_code.layers) == 0:
+                    continue
+                bias_with_act = False
+                if 'act' in node.fluid_code.layers[-1].param_attr:
+                    bias_with_act = True
+                layer_with_act = False
+                index = -1
+                for i in range(len(input.fluid_code.layers)):
+                    if input.fluid_code.layers[i].op in self.layers_with_bias:
+                        index = i
+                        break
+                if 'act' in input.fluid_code.layers[
+                        index].param_attr and input.fluid_code.layers[
+                            index].param_attr['act'] is not None:
+                    layer_with_act = True
+                if bias_with_act and layer_with_act:
+                    continue
+                if not input.fluid_code.layers[index].param_attr['bias_attr']:
+                    bias_name = node.inputs[1]
+                    input.fluid_code.layers[index].param_attr[
+                        'bias_attr'] = string(bias_name)
+                    input.fluid_code.layers[-1].output = node.fluid_code.layers[
+                        0].output
+                    if bias_with_act:
+                        input.fluid_code.layers[index].param_attr[
+                            'act'] = node.fluid_code.layers[-1].param_attr[
+                                'act']
+                    node.fluid_code.clear()
+                    self.graph.remove_node(node.layer_name)
+                    self.graph.identity_map[node.layer_name] = input.layer_name
+    def remove_transpose(self):
+        graph_copy = cp.deepcopy(self.graph)
+        elementwise_ops = [
+            'Sub', 'Add', 'RealDiv', 'Maximum', 'Mul', 'FloorDiv',
+            'GreateerEqual'
+        ]
+        can_be_optimized_ops = [
+            'Conv2D', 'MaxPool', 'FusedBatchNorm', 'DepthwiseConv2dNative',
+            'AvgPool', 'Pad', 'Conv2DBackpropInput', 'ResizeNearestNeighbor',
+            'Placeholder', 'Relu', 'Relu6', 'Abs', 'Sigmoid', 'Exp', 'Rsqrt',
+            'swish_f32', 'LeakyRelu', 'Cast', 'Tanh'
+        ]
+        # These ops may have one more Variable input
+        can_be_optimized_special_ops = ['ResizeBilinear']
+        for node_name in self.graph.topo_sort:
+            node = graph_copy.get_node(node_name)
+            if node is None:
+                continue
+            if node.layer_type in can_be_optimized_ops:
+                if node.fluid_code.layers[
+                        -1].op != "transpose" or node.fluid_code.layers[
+                            -1].param_attr["perm"] != [0, 2, 3, 1]:
+                    continue
+                can_be_removed = True
+                output_names = node.outputs
+                for out_name in output_names:
+                    out_node = graph_copy.get_node(out_name)
+                    if hasattr(out_node, "can_be_removed"):
+                        if not out_node.can_be_removed:
+                            can_be_removed = False
+                            break
+                    elif out_node.fluid_code.layers[
+                            0].op != "transpose" or out_node.fluid_code.layers[
+                                0].param_attr["perm"] != [0, 3, 1, 2]:
+                        can_be_removed = False
+                        break
+                    elif out_node.layer_type in elementwise_ops or out_node.layer_type in can_be_optimized_special_ops:
+                        can_be_removed = False
+                        break
+                if can_be_removed and len(node.fluid_code.layers) > 1:
+                    true_node = self.graph.get_node(node_name)
+                    if true_node.layer_type == "Placeholder":
+                        index = self.graph.input_nodes.index(
+                            true_node.fluid_code.layers[-2].output)
+                        if isinstance(true_node.fluid_code.layers[-1].output,
+                                      str):
+                            self.graph.input_nodes[
+                                index] = true_node.fluid_code.layers[-1].output
+                        else:
+                            self.graph.input_nodes[
+                                index] = true_node.fluid_code.layers[
+                                    -1].output.layer_name
+                    true_node.fluid_code.layers[
+                        -2].output = true_node.fluid_code.layers[-1].output
+                    node.removed = True
+                    del true_node.fluid_code.layers[-1]
+                    for out_name in output_names:
+                        out_node = self.graph.get_node(out_name)
+                        out_node.fluid_code.layers[
+                            1].inputs = out_node.fluid_code.layers[0].inputs
+                        del out_node.fluid_code.layers[0]
+        for node_name in self.graph.topo_sort:
+            node = graph_copy.get_node(node_name)
+            if node is None:
+                continue
+            if node.layer_type in elementwise_ops:
+                can_be_removed = True
+                if node.fluid_code.layers[
+                        -1].op != "transpose" or node.fluid_code.layers[
+                            -1].param_attr["perm"] != [0, 2, 3, 1]:
+                    continue
+                can_be_removed = True
+                output_names = node.outputs
+                for out_name in output_names:
+                    out_node = graph_copy.get_node(out_name)
+                    if len(out_node.fluid_code.layers) < 3:
+                        can_be_removed = False
+                        break
+                    if hasattr(out_node, "can_be_removed"):
+                        if not out_node.can_be_removed:
+                            can_be_removed = False
+                            break
+                    if out_node.layer_type in can_be_optimized_ops:
+                        if out_node.fluid_code.layers[
+                                0].op != "transpose" or out_node.fluid_code.layers[
+                                    0].param_attr["perm"] != [0, 3, 1, 2]:
+                            can_be_removed = False
+                            break
+                    elif out_node.layer_type in elementwise_ops:
+                        if out_node.fluid_code.layers[
+                                0].op != "transpose" and out_node.fluid_code.layers[
+                                    1].op != "transpose":
+                            can_be_removed = False
+                            break
+                        if out_node.fluid_code.layers[0].op == "transpose":
+                            if out_node.fluid_code.layers[0].param_attr[
+                                    "perm"] != [0, 3, 1, 2]:
+                                can_be_removed = False
+                                break
+                        if out_node.fluid_code.layers[1].op == "transpose":
+                            if out_node.fluid_code.layers[1].param_attr[
+                                    "perm"] != [0, 3, 1, 2]:
+                                can_be_removed = False
+                                break
+                if can_be_removed and len(node.fluid_code.layers) > 1:
+                    true_node = self.graph.get_node(node_name)
+                    true_node.fluid_code.layers[
+                        -2].output = true_node.fluid_code.layers[-1].output
+                    del true_node.fluid_code.layers[-1]
+                    for out_name in output_names:
+                        out_node = self.graph.get_node(out_name)
+                        if out_node.layer_type in can_be_optimized_ops:
+                            out_node.fluid_code.layers[
+                                1].inputs = out_node.fluid_code.layers[0].inputs
+                            del out_node.fluid_code.layers[0]
+                        elif out_node.layer_type in elementwise_ops:
+                            if out_node.inputs[0] in node.layer_name:
+                                if out_node.fluid_code.layers[
+                                        1].op == 'transpose':
+                                    out_node.fluid_code.layers[2].inputs[
+                                        'x'] = out_node.fluid_code.layers[
+                                            0].inputs
+                                    del out_node.fluid_code.layers[0]
+                                else:
+                                    out_node.fluid_code.layers[1].inputs[
+                                        'x'] = out_node.fluid_code.layers[
+                                            0].inputs
+                                    del out_node.fluid_code.layers[0]
+                            elif out_node.inputs[1] in node.layer_name:
+                                if out_node.fluid_code.layers[
+                                        1].op == 'transpose':
+                                    out_node.fluid_code.layers[2].inputs[
+                                        'y'] = out_node.fluid_code.layers[
+                                            1].inputs
+                                    del out_node.fluid_code.layers[1]
+                                else:
+                                    out_node.fluid_code.layers[1].inputs[
+                                        'y'] = out_node.fluid_code.layers[
+                                            0].inputs
+                                    del out_node.fluid_code.layers[0]
+        graph_copy = cp.deepcopy(self.graph)
+        for node_name in self.graph.topo_sort:
+            node = graph_copy.get_node(node_name)
+            if node is None or len(node.fluid_code.layers) < 2:
+                continue
+            if node.layer_type in can_be_optimized_ops and node.layer_type != "Placeholder":
+                if node.fluid_code.layers[
+                        -1].op != "transpose" or node.fluid_code.layers[
+                            -1].param_attr["perm"] != [0, 2, 3, 1]:
+                    continue
+                can_be_removed = True
+                output_names = node.outputs
+                for out_name in output_names:
+                    out_node = graph_copy.get_node(out_name)
+                    if hasattr(out_node, "can_be_removed"):
+                        if not out_node.can_be_removed:
+                            can_be_removed = False
+                            break
+                    if len(out_node.fluid_code.layers) < 2:
+                        can_be_removed = False
+                        break
+                    if out_node.layer_type in can_be_optimized_ops:
+                        if out_node.fluid_code.layers[
+                                0].op != "transpose" or out_node.fluid_code.layers[
+                                    0].param_attr["perm"] != [0, 3, 1, 2]:
+                            can_be_removed = False
+                            break
+                    elif out_node.layer_type in elementwise_ops:
+                        if out_node.fluid_code.layers[
+                                0].op != "transpose" and out_node.fluid_code.layers[
+                                    1].op != "transpose":
+                            can_be_removed = False
+                            break
+                        if out_node.fluid_code.layers[
+                                0].op == "expand" or out_node.fluid_code.layers[
+                                    1].op == "expand":
+                            can_be_removed = False
+                            break
+                        if out_node.fluid_code.layers[0].op == "transpose":
+                            if out_node.fluid_code.layers[0].param_attr[
+                                    "perm"] != [0, 3, 1, 2]:
+                                can_be_removed = False
+                                break
+                        if out_node.fluid_code.layers[1].op == "transpose":
+                            if out_node.fluid_code.layers[1].param_attr[
+                                    "perm"] != [0, 3, 1, 2]:
+                                can_be_removed = False
+                                break
+                    elif out_node.layer_type not in elementwise_ops and out_node.layer_type not in can_be_optimized_ops:
+                        can_be_removed = False
+                        break
+                if can_be_removed:
+                    true_node = self.graph.get_node(node_name)
+                    if len(true_node.fluid_code.layers) < 2:
+                        continue
+                    true_node.fluid_code.layers[
+                        -2].output = true_node.fluid_code.layers[-1].output
+                    del true_node.fluid_code.layers[-1]
+                    for out_name in output_names:
+                        out_node = self.graph.get_node(out_name)
+                        if out_node.layer_type in can_be_optimized_ops:
+                            out_node.fluid_code.layers[
+                                1].inputs = out_node.fluid_code.layers[0].inputs
+                            del out_node.fluid_code.layers[0]
+                        elif out_node.layer_type in elementwise_ops:
+                            if out_node.inputs[0] in node.layer_name:
+                                if out_node.fluid_code.layers[
+                                        1].op == 'transpose':
+                                    if out_node.fluid_code.layers[
+                                            2].op == 'transpose':
+                                        out_node.fluid_code.layers[3].inputs[
+                                            'x'] = out_node.fluid_code.layers[
+                                                0].inputs
+                                    else:
+                                        out_node.fluid_code.layers[2].inputs[
+                                            'x'] = out_node.fluid_code.layers[
+                                                0].inputs
+                                    del out_node.fluid_code.layers[0]
+                                else:
+                                    out_node.fluid_code.layers[1].inputs[
+                                        'x'] = out_node.fluid_code.layers[
+                                            0].inputs
+                                    del out_node.fluid_code.layers[0]
+                            elif out_node.inputs[1] in node.layer_name:
+                                if out_node.fluid_code.layers[
+                                        1].op == 'transpose':
+                                    out_node.fluid_code.layers[2].inputs[
+                                        'y'] = out_node.fluid_code.layers[
+                                            1].inputs
+                                    del out_node.fluid_code.layers[1]
+                                else:
+                                    out_node.fluid_code.layers[1].inputs[
+                                        'y'] = out_node.fluid_code.layers[
+                                            0].inputs
+                                    del out_node.fluid_code.layers[0]
+        graph_copy = cp.deepcopy(self.graph)
+        for node_name in self.graph.topo_sort:
+            node = graph_copy.get_node(node_name)
+            if node is None:
+                continue
+            if node.layer_type in elementwise_ops:
+                can_be_removed = True
+                if len(node.fluid_code.layers) < 3:
+                    continue
+                numTranspose = 0
+                numNotTranspose = 0
+                for i in range(len(node.fluid_code.layers)):
+                    if node.fluid_code.layers[i].op == 'transpose':
+                        numTranspose += 1
+                    elif node.fluid_code.layers[i].op != 'expand':
+                        numNotTranspose += 1
+                if numTranspose > numNotTranspose:
+                    if node.fluid_code.layers[0].op == 'expand':
+                        if node.fluid_code.layers[
+                                1].op != 'transpose' or node.fluid_code.layers[
+                                    2].op != 'transpose':
+                            continue
+                        else:
+                            true_node = self.graph.get_node(node_name)
+                            true_node.fluid_code.layers[3].inputs[
+                                'x'] = true_node.fluid_code.layers[1].inputs
+                            true_node.fluid_code.layers[3].inputs[
+                                'y'] = true_node.fluid_code.layers[2].inputs
+                            l = Layer()
+                            l.op = 'transpose'
+                            l.inputs = true_node.fluid_code.layers[3].output
+                            l.param_attr = {'perm': [0, 3, 1, 2]}
+                            if isinstance(l.inputs, six.string_types):
+                                l.output = l.inputs
+                            else:
+                                l.output = l.inputs.layer_name
+                            true_node.fluid_code.layers.append(l)
+                            del true_node.fluid_code.layers[1]
+                            del true_node.fluid_code.layers[1]
+                    else:
+                        if node.fluid_code.layers[
+                                0].op != 'transpose' or node.fluid_code.layers[
+                                    1].op != 'transpose':
+                            continue
+                        else:
+                            true_node = self.graph.get_node(node_name)
+                            true_node.fluid_code.layers[2].inputs[
+                                'x'] = true_node.fluid_code.layers[0].inputs
+                            true_node.fluid_code.layers[2].inputs[
+                                'y'] = true_node.fluid_code.layers[1].inputs
+                            l = Layer()
+                            l.op = 'transpose'
+                            l.inputs = true_node.fluid_code.layers[2].output
+                            l.param_attr = {'perm': [0, 3, 1, 2]}
+                            l.output = l.inputs.layer_name
+                            true_node.fluid_code.layers.append(l)
+                            del true_node.fluid_code.layers[0]
+                            del true_node.fluid_code.layers[0]
+    def make_nchw_input_output(self):
+        for i, name in enumerate(self.graph.input_nodes):
+            node = self.graph.get_node(name)
+            if len(node.out_shapes[0]) == 4 and node.tf_data_format == "NHWC":
+                shape = node.fluid_code.layers[0].param_attr["shape"]
+                shape = [shape[j] for j in [0, 3, 1, 2]]
+                node.fluid_code.layers[0].param_attr["shape"] = shape
+                node.fluid_code.layers[0].output = "nhwc_" + name
+                attr = {"perm": [0, 2, 3, 1]}
+                node.fluid_code.add_layer(
+                    "transpose",
+                    inputs="nhwc_" + name,
+                    output=node,
+                    param_attr=attr)
+                self.graph.input_nodes[i] = "nhwc_" + name
+        for i, name in enumerate(self.graph.output_nodes):
+            node = self.graph.get_node(name)
+            if node.layer_type != "transpose":
+                if node.fluid_code.layers[-1].op == "transpose":
+                    node.fluid_code.layers[-2].output = name
+                    del node.fluid_code.layers[-1]
+    def optimize_sub_graph(self):
+        self.merge_batch_norm()
+        self.merge_prelu()
+        self.merge_scale()
+        self.merge_affine_channel()
+    def merge_batch_norm(self):
+        for i, name in enumerate(self.graph.topo_sort):
+            node = self.graph.get_node(name)
+            if node is None:
+                continue
+            is_batch_norm = True
+            if node.layer_type == "Add":
+                in_nodes0 = [
+                    self.graph.get_node(in_name) for in_name in node.inputs
+                ]
+                if in_nodes0[0].layer_type != "Mul" or in_nodes0[
+                        1].layer_type != "Sub":
+                    is_batch_norm = False
+                    continue
+                if exist_act(in_nodes0[0]) or exist_act(in_nodes0[1]):
+                    is_batch_norm = False
+                    continue
+                in_nodes1 = [
+                    self.graph.get_node(in_name)
+                    for in_name in in_nodes0[0].inputs
+                ]
+                in_nodes2 = [
+                    self.graph.get_node(in_name)
+                    for in_name in in_nodes0[1].inputs
+                ]
+                if len(in_nodes1[0].out_shapes[0]) != 4:
+                    is_batch_norm = False
+                    continue
+                if in_nodes1[1].layer_type != "Mul":
+                    is_batch_norm = False
+                    continue
+                if exist_act(in_nodes1[1]):
+                    is_batch_norm = False
+                    continue
+                if in_nodes2[0].layer_type != "Const" or in_nodes2[
+                        1].layer_type != "Mul":
+                    is_batch_norm = False
+                    continue
+                if exist_act(in_nodes2[1]):
+                    is_batch_norm = False
+                    continue
+                in_nodes3 = [
+                    self.graph.get_node(in_name)
+                    for in_name in in_nodes1[1].inputs
+                ]
+                if in_nodes3[0].layer_type != "Rsqrt" or in_nodes3[
+                        1].layer_type != "Const":
+                    is_batch_norm = False
+                    continue
+                in_nodes4 = [
+                    self.graph.get_node(in_name)
+                    for in_name in in_nodes2[1].inputs
+                ]
+                if in_nodes4[0].layer_type != "Const" or in_nodes4[
+                        1].layer_name != in_nodes1[1].layer_name:
+                    is_batch_norm = False
+                    continue
+                in_nodes5 = self.graph.get_node(in_nodes3[0].inputs[0])
+                if in_nodes5.layer_type != "Add":
+                    is_batch_norm = False
+                    continue
+                if exist_act(in_nodes5):
+                    is_batch_norm = False
+                    continue
+                in_nodes6 = [
+                    self.graph.get_node(in_name) for in_name in in_nodes5.inputs
+                ]
+                if in_nodes6[0].layer_type != "Const" or in_nodes6[
+                        1].layer_type != "Const":
+                    is_batch_norm = False
+                    continue
+                if len(in_nodes0[0].outputs) != 1:
+                    is_batch_norm = False
+                    continue
+                if len(in_nodes0[1].outputs) != 1:
+                    is_batch_norm = False
+                    continue
+                if len(in_nodes1[1].outputs) != 2:
+                    is_batch_norm = False
+                    continue
+                if len(in_nodes2[0].outputs) != 1:
+                    is_batch_norm = False
+                    continue
+                if len(in_nodes2[1].outputs) != 1:
+                    is_batch_norm = False
+                    continue
+                if len(in_nodes3[0].outputs) != 1:
+                    is_batch_norm = False
+                    continue
+                if len(in_nodes3[1].outputs) != 1:
+                    is_batch_norm = False
+                    continue
+                if len(in_nodes4[0].outputs) != 1:
+                    is_batch_norm = False
+                    continue
+                if len(in_nodes5.outputs) != 1:
+                    is_batch_norm = False
+                    continue
+                if len(in_nodes6[0].outputs) != 1:
+                    is_batch_norm = False
+                    continue
+                if len(in_nodes6[1].outputs) != 1:
+                    is_batch_norm = False
+                    continue
+                conv_shape = in_nodes1[0].out_shapes[0]
+                if conv_shape[3] < 0:
+                    is_batch_norm = False
+                    continue
+                # moving_variance
+                if in_nodes6[0].value.size != conv_shape[3]:
+                    is_batch_norm = False
+                    continue
+                # epsilon
+                if in_nodes6[1].value.size != 1:
+                    is_batch_norm = False
+                    continue
+                # gamma
+                if in_nodes3[1].value.size != conv_shape[3]:
+                    is_batch_norm = False
+                    continue
+                # moving_mean
+                if in_nodes4[0].value.size != conv_shape[3]:
+                    is_batch_norm = False
+                    continue
+                # beta
+                if in_nodes2[0].value.size != conv_shape[3]:
+                    is_batch_norm = False
+                    continue
+                if is_batch_norm:
+                    index = in_nodes1[0].outputs.index(in_nodes0[0].layer_name)
+                    in_nodes1[0].outputs[index] = node.layer_name
+                    node.layer_type = "FusedBatchNorm"
+                    node.inputs = [in_nodes1[0].layer_name]
+                    act = node.fluid_code.layers[-1].param_attr.get("act", None)
+                    node.fluid_code.clear()
+                    attr = {
+                        "epsilon": in_nodes6[1].value,
+                        "param_attr": string(in_nodes3[1].layer_name),
+                        "bias_attr": string(in_nodes2[0].layer_name),
+                        "moving_mean_name": string(in_nodes4[0].layer_name),
+                        "moving_variance_name": string(in_nodes6[0].layer_name),
+                        "is_test": True,
+                        "act": act
+                    }
+                    node.fluid_code.add_layer(
+                        "batch_norm",
+                        inputs=in_nodes1[0].fluid_code.layers[-1].output,
+                        output=node,
+                        param_attr=attr)
+                del self.graph.node_map[in_nodes0[0].layer_name]
+                del self.graph.node_map[in_nodes0[1].layer_name]
+                del self.graph.node_map[in_nodes1[1].layer_name]
+                del self.graph.node_map[in_nodes2[1].layer_name]
+                del self.graph.node_map[in_nodes3[0].layer_name]
+                del self.graph.node_map[in_nodes4[0].layer_name]
+                del self.graph.node_map[in_nodes5.layer_name]
+    def merge_prelu(self):
+        for i, name in enumerate(self.graph.topo_sort):
+            node = self.graph.get_node(name)
+            if node is None:
+                continue
+            is_prelu = True
+            if node.layer_type == "Add":
+                if exist_act(node):
+                    is_prelu = False
+                    continue
+                in_nodes0 = [
+                    self.graph.get_node(in_name) for in_name in node.inputs
+                ]
+                if in_nodes0[0].layer_type != "Relu" or in_nodes0[
+                        1].layer_type != "Mul":
+                    is_prelu = False
+                    continue
+                if exist_act(in_nodes0[1]):
+                    is_prelu = False
+                    continue
+                if len(in_nodes0[0].outputs) != 1 or len(in_nodes0[1]
+                                                         .outputs) != 1:
+                    is_prelu = False
+                    continue
+                in_nodes1 = self.graph.get_node(in_nodes0[0].inputs[0])
+                in_nodes2 = [
+                    self.graph.get_node(in_name)
+                    for in_name in in_nodes0[1].inputs
+                ]
+                if in_nodes2[1].layer_type != "Const" or numpy.fabs(in_nodes2[
+                        1].value - 0.5) > 1e-06:
+                    is_prelu = False
+                    continue
+                if in_nodes2[0].layer_type != "Mul":
+                    is_prelu = False
+                    continue
+                if exist_act(in_nodes2[0]):
+                    is_prelu = False
+                    continue
+                if len(in_nodes2[1].outputs) != 1 or len(in_nodes2[0]
+                                                         .outputs) != 1:
+                    is_prelu = False
+                    continue
+                in_nodes3 = [
+                    self.graph.get_node(in_name)
+                    for in_name in in_nodes2[0].inputs
+                ]
+                if in_nodes3[0].layer_type != "Const" or in_nodes3[
+                        1].layer_type != "Sub":
+                    is_prelu = False
+                    continue
+                if exist_act(in_nodes3[1]):
+                    is_prelu = False
+                    continue
+                if len(in_nodes3[0].outputs) != 1 or len(in_nodes3[1]
+                                                         .outputs) != 1:
+                    is_prelu = False
+                    continue
+                in_nodes4 = [
+                    self.graph.get_node(in_name)
+                    for in_name in in_nodes3[1].inputs
+                ]
+                if in_nodes4[0].layer_name != in_nodes1.layer_name or in_nodes4[
+                        1].layer_type != "Abs":
+                    is_prelu = False
+                    continue
+                if len(in_nodes4[1].outputs) != 1:
+                    is_prelu = False
+                    continue
+                in_nodes5 = self.graph.get_node(in_nodes4[1].inputs[0])
+                if in_nodes5.layer_name != in_nodes1.layer_name:
+                    is_prelu = False
+                    continue
+                if len(in_nodes0[0].outputs) != 1:
+                    is_prelu = false
+                    continue
+                if len(in_nodes0[1].outputs) != 1:
+                    is_prelu = False
+                    continue
+                if len(in_nodes1.outputs) < 3:
+                    is_prelu = False
+                    continue
+                if len(in_nodes2[0].outputs) != 1:
+                    is_prelu = false
+                    continue
+                if len(in_nodes2[1].outputs) != 1:
+                    is_prelu = False
+                    continue
+                if len(in_nodes3[0].outputs) != 1:
+                    is_prelu = False
+                    continue
+                if len(in_nodes3[1].outputs) != 1:
+                    is_prelu = false
+                    continue
+                if len(in_nodes4[1].outputs) != 1:
+                    is_prelu = False
+                    continue
+                mode = None
+                in_shape = in_nodes1.out_shapes[0]
+                if in_shape == list(in_nodes3[0].value.shape):
+                    mode = "element"
+                elif len(in_nodes3[0].value.shape) == 0:
+                    mode = "all"
+                elif len(in_nodes3[0].value.shape) == 1 and in_nodes3[
+                        0].value.shape[0] == 1:
+                    mode = "all"
+                elif len(in_shape) == 4 and len(in_nodes3[
+                        0].value.shape) == 1 and in_nodes3[0].value.shape[
+                            0] == in_shape[-1]:
+                    mode = "channel"
+                    weight = self.op_mapper.weights[in_nodes3[0].layer_name]
+                    weight = numpy.expand_dims(weight, 0)
+                    weight = numpy.expand_dims(weight, 2)
+                    weight = numpy.expand_dims(weight, 3)
+                    self.op_mapper.weights[in_nodes3[0].layer_name] = weight
+                    # fix bug in Paddle1.8.3 and may change in next version.
+                    # self.op_mapper.weights[in_nodes3[0].layer_name +
+                    #                        '_1'] = weight.reshape(1, -1)
+                    in_nodes3[0].fluid_code.layers[0].param_attr["shape"] = [
+                        1, in_shape[-1], 1, 1
+                    ]
+                else:
+                    is_prelu = False
+                    continue
+                if is_prelu:
+                    index = in_nodes1.outputs.index(in_nodes0[0].layer_name)
+                    del in_nodes1.outputs[index]
+                    index = in_nodes1.outputs.index(in_nodes3[1].layer_name)
+                    del in_nodes1.outputs[index]
+                    index = in_nodes1.outputs.index(in_nodes4[1].layer_name)
+                    del in_nodes1.outputs[index]
+                    in_nodes1.outputs.append(node.layer_name)
+                    node.layer_type = "Prelu"
+                    node.inputs = [in_nodes1.layer_name]
+                    act = node.fluid_code.layers[-1].param_attr.get("act", None)
+                    node.fluid_code.clear()
+                    attr = {
+                        "mode": string(mode),
+                        "param_attr": string(in_nodes3[0].layer_name)
+                    }
+                    node.fluid_code.add_layer(
+                        "prelu",
+                        inputs=in_nodes1.fluid_code.layers[-1].output,
+                        output=node,
+                        param_attr=attr)
+                del self.graph.node_map[in_nodes0[0].layer_name]
+                del self.graph.node_map[in_nodes0[1].layer_name]
+                del self.graph.node_map[in_nodes2[0].layer_name]
+                del self.graph.node_map[in_nodes2[1].layer_name]
+                del self.graph.node_map[in_nodes3[1].layer_name]
+                del self.graph.node_map[in_nodes4[1].layer_name]
+    def merge_scale(self):
+        for i, name in enumerate(self.graph.topo_sort):
+            node = self.graph.get_node(name)
+            if node is None:
+                continue
+            is_scale = True
+            if node.layer_type == "Sub":
+                in_nodes0 = [
+                    self.graph.get_node(in_name) for in_name in node.inputs
+                ]
+                if in_nodes0[0].layer_type != "Mul" or in_nodes0[
+                        1].layer_type != "Const" or in_nodes0[
+                            1].value.size != 1:
+                    is_scale = False
+                    continue
+                if exist_act(in_nodes0[0]):
+                    is_scale = False
+                    continue
+                if len(in_nodes0[0].outputs) != 1 or len(in_nodes0[1]
+                                                         .outputs) != 1:
+                    is_scale = False
+                    continue
+                in_nodes1 = [
+                    self.graph.get_node(in_name)
+                    for in_name in in_nodes0[0].inputs
+                ]
+                if in_nodes1[0].layer_type != "Const" or in_nodes1[
+                        1].layer_type != "RealDiv" or in_nodes1[
+                            0].value.size != 1:
+                    is_scale = False
+                    continue
+                if exist_act(in_nodes1[1]):
+                    is_scale = False
+                    continue
+                if len(in_nodes1[0].outputs) != 1 or len(in_nodes1[1]
+                                                         .outputs) != 1:
+                    is_scale = False
+                    continue
+                in_nodes2 = [
+                    self.graph.get_node(in_name)
+                    for in_name in in_nodes1[1].inputs
+                ]
+                if in_nodes2[1].layer_type != "Const" or in_nodes2[
+                        1].value.size != 1:
+                    is_scale = False
+                    continue
+                if is_scale:
+                    in_node = self.graph.get_node(in_nodes1[1].inputs[0])
+                    index = in_node.outputs.index(in_nodes1[1].layer_name)
+                    in_node.outputs[index] = node.layer_name
+                    node.layer_type = "Scale"
+                    node.inputs = [in_node.layer_name]
+                    scale = 1.0 / in_nodes2[1].value * in_nodes1[0].value
+                    act = None
+                    if node.fluid_code.layers[0].param_attr is not None:
+                        act = node.fluid_code.layers[0].param_attr.get("act",
+                                                                       None)
+                    node.fluid_code.clear()
+                    attr = {
+                        "scale": scale,
+                        "bias": in_nodes0[1].value,
+                        "bias_after_scale": True,
+                        "act": act
+                    }
+                    node.fluid_code.add_layer(
+                        "scale", inputs=in_node, output=node, param_attr=attr)
+                    del self.graph.node_map[in_nodes0[0].layer_name]
+                    del self.graph.node_map[in_nodes0[1].layer_name]
+                    del self.graph.node_map[in_nodes1[0].layer_name]
+                    del self.graph.node_map[in_nodes1[1].layer_name]
+                    del self.graph.node_map[in_nodes2[1].layer_name]
+    def merge_affine_channel(self):
+        for i, name in enumerate(self.graph.topo_sort):
+            node = self.graph.get_node(name)
+            if node is None:
+                continue
+            is_affine_channel = True
+            if node.layer_type == "RealDiv":
+                in_nodes0 = [
+                    self.graph.get_node(in_name) for in_name in node.inputs
+                ]
+                bias_add = True
+                if (in_nodes0[0].layer_type != "Sub" and in_nodes0[0].layer_type
+                        != "Add") or in_nodes0[1].layer_type != "Const" or len(
+                            in_nodes0[1].value.shape) != 3:
+                    is_affine_channel = False
+                    continue
+                if in_nodes0[0].layer_type == "Sub":
+                    bias_add = False
+                if exist_act(in_nodes0[0]):
+                    is_affine_channel = False
+                    continue
+                if len(in_nodes0[0].outputs) != 1 or len(in_nodes0[1]
+                                                         .outputs) != 1:
+                    is_affine_channel = False
+                    continue
+                in_nodes1 = [
+                    self.graph.get_node(in_name)
+                    for in_name in in_nodes0[0].inputs
+                ]
+                if len(in_nodes1[0].out_shapes[0]) != 4 or in_nodes1[
+                        1].layer_type != "Const" or len(in_nodes1[1]
+                                                        .value.shape) != 3:
+                    is_affine_channel = False
+                    continue
+                if len(in_nodes1[1].outputs) != 1:
+                    is_affine_channel = False
+                    continue
+                channel = in_nodes1[0].out_shapes[0][-1]
+                if channel < 0 or channel != in_nodes0[
+                        1].value.size or channel != in_nodes1[1].value.size:
+                    is_affine_channel = False
+                    continue
+                if in_nodes0[1].out_shapes[0][-1] != in_nodes0[
+                        1].value.size or in_nodes1[1].out_shapes[0][
+                            -1] != in_nodes1[1].value.size:
+                    is_affine_channel = False
+                    continue
+                if is_affine_channel:
+                    in_node = in_nodes1[0]
+                    index = in_node.outputs.index(in_nodes0[0].layer_name)
+                    in_node.outputs[index] = node.layer_name
+                    node.layer_type = "AffineChannel"
+                    node.inputs = [in_node.layer_name]
+                    scale = 1.0 / in_nodes0[1].value.flatten()
+                    bias = in_nodes1[1].value.flatten() / in_nodes0[
+                        1].value.flatten()
+                    if not bias_add:
+                        bias *= -1.0
+                    self.op_mapper.weights[node.layer_name + "_scale"] = scale
+                    self.op_mapper.weights[node.layer_name + "_bias"] = bias
+                    act = None
+                    if node.fluid_code.layers[0].param_attr is not None:
+                        act = node.fluid_code.layers[0].param_attr.get("act",
+                                                                       None)
+                    node.fluid_code.clear()
+                    attr = {
+                        "dtype": string(scale.dtype),
+                        "shape": [channel],
+                        "name": string(node.layer_name + "_scale")
+                    }
+                    node.fluid_code.add_layer(
+                        "create_parameter",
+                        inputs=None,
+                        output=node.layer_name + "_scale",
+                        param_attr=attr)
+                    attr = {
+                        "dtype": string(scale.dtype),
+                        "shape": [channel],
+                        "name": string(node.layer_name + "_bias")
+                    }
+                    node.fluid_code.add_layer(
+                        "create_parameter",
+                        inputs=None,
+                        output=node.layer_name + "_bias",
+                        param_attr=attr)
+                    inputs = {
+                        "x": in_node,
+                        "scale": node.layer_name + "_scale",
+                        "bias": node.layer_name + "_bias"
+                    }
+                    attr = {"act": act}
+                    node.fluid_code.add_layer(
+                        "affine_channel",
+                        inputs=inputs,
+                        output=node,
+                        param_attr=attr)
+                    del self.graph.node_map[in_nodes0[0].layer_name]
+                    del self.graph.node_map[in_nodes0[1].layer_name]
+                    del self.graph.node_map[in_nodes1[1].layer_name]
--- a/x2paddle/x2paddle/tests/__init__.py
+++ b/x2paddle/x2paddle/tests/__init__.py