Merge remote-tracking branch 'paddle/develop' into prior_box

a6759829 · Channingss · d93dc40c · 78c614b7 · a6759829 · a6759829
8 changed file
--- a/op_list.md
+++ b/op_list.md
 # X2Paddle支持OP列表
-> 目前X2Paddle支持50+的TensorFlow OP，30+的Caffe Layer，覆盖了大部分CV分类模型常用的操作。我们在如下列表中给出了目前X2Paddle支持的全部OP。
+> 目前X2Paddle支持70+的TensorFlow OP，30+的Caffe Layer，覆盖了大部分CV分类模型常用的操作。我们在如下列表中给出了目前X2Paddle支持的全部OP。
 **注：** 目前，部分OP暂未支持，如您在转换过程中出现OP不支持的情况，可自行添加或反馈给我们。欢迎通过[ISSUE反馈](https://github.com/PaddlePaddle/X2Paddle/issues/new)的方式告知我们(模型名，代码实现或模型获取方式)，我们会及时跟进：）
@@ -7,20 +7,24 @@
 | 序号 | OP | 序号 | OP | 序号 | OP | 序号 | OP |
 |------|------|------|------|------|------|------|------|
-| 1  | Relu        | 2  | Relu6     | 3  | Shape     | 4  | Abs                   |
+| 1  | Relu             | 2  | Relu6          | 3  | Shape          | 4  | Abs                   |
-| 5  | Sigmoid     | 6  | Exp       | 7  | Rsqrt     | 8  | swish_f32             |
+| 5  | Sigmoid          | 6  | Exp            | 7  | Rsqrt          | 8  | swish_f32             |
-| 9  | Tanh        | 10 | LeakyRelu | 11 | Add       | 12 | RealDiv               |
+| 9  | Tanh             | 10 | LeakyRelu      | 11 | Add            | 12 | RealDiv               |
-| 13 | Sub         | 14 | Maximum   | 15 | Mul       | 16 | FloorDiv              |
+| 13 | Sub              | 14 | Maximum        | 15 | Mul            | 16 | FloorDiv              |
-| 17 | Placeholder | 18 | Const     | 19 | Transpose | 20 | FusedBatchNorm        |
+| 17 | Placeholder      | 18 | Const          | 19 | Transpose      | 20 | FusedBatchNorm        |
-| 21 | Conv2D      | 22 | BiasAdd   | 23 | MaxPool   | 24 | DepthwiseConv2dNative |
+| 21 | Conv2D           | 22 | BiasAdd        | 23 | MaxPool        | 24 | DepthwiseConv2dNative |
-| 25 | Reshape     | 26 | AvgPool   | 27 | SplitV    | 28 | SquaredDifference     |
+| 25 | Reshape          | 26 | AvgPool        | 27 | SplitV         | 28 | SquaredDifference     |
-| 29 | Tile        | 30 | Pack      | 31 | Pad       | 32 | ResizeBilinear        |
+| 29 | Tile             | 30 | Pack           | 31 | Pad            | 32 | ResizeBilinear        |
-| 33 | Mean        | 34 | MatMul    | 35 | ArgMax    | 36 | StridedSlice          |
+| 33 | Mean             | 34 | MatMul         | 35 | ArgMax         | 36 | StridedSlice          |
-| 37 | Slice       | 38 | Sum       | 39 | Max       | 40 | Conv2DBackpropInput   |
+| 37 | Slice            | 38 | Sum            | 39 | Max            | 40 | Conv2DBackpropInput   |
-| 41 | Cast        | 42 | Split     | 43 | Squeeze   | 44 | ResizeNearestNeighbor |
+| 41 | Cast             | 42 | Split          | 43 | Squeeze        | 44 | ResizeNearestNeighbor |
-| 45 | Softmax     | 46 | Range     | 47 | ConcatV2  | 48 | MirrorPad             |
+| 45 | Softmax          | 46 | Range          | 47 | ConcatV2       | 48 | MirrorPad             |
-| 49 | Identity    | 50 | GreaterEqual  | 51 | StopGradient | 52 | Minimum |
+| 49 | Identity         | 50 | GreaterEqual   | 51 | StopGradient   | 52 | Minimum               |
-| 53 | RadnomUniform | 54 | Fill | 55 | Floor | 56 | DepthToSpace |
+| 53 | RadnomUniform    | 54 | Fill           | 55 | Floor          | 56 | DepthToSpace          |
+| 57 | Sqrt             | 58 | Softplus       | 59 | Erf            | 60 | AddV2                 |
+| 61 | LessEqual        | 62 | BatchMatMul    | 63 | BatchMatMulV2  | 64 | ExpandDims            |
+| 65 | BatchToSpaceND   | 66 | SpaceToBatchND | 67 | OneHot         | 68 | Pow                   |
+| 69 | All              | 70 | GatherV2       | 71 | IteratorV2     |    |                       |
 ## Caffe

--- a/x2paddle/decoder/onnx_shape_inference.py
+++ b/x2paddle/decoder/onnx_shape_inference.py
@@ -267,9 +267,8 @@ class SymbolicShapeInference:
        if pending_nodes and self.verbose_ > 0:
            print('SymbolicShapeInference: orphaned nodes discarded: ')
-            print(
+            print('\n'.join(
-                * [n.op_type + ': ' + n.output[0] for n in pending_nodes],
+                [n.op_type + ': ' + n.output[0] for n in pending_nodes]))
-                sep='\n')
        if input_shapes is not None:
            for input_name, shape in input_shapes.items():
                for idx in range(len(self.out_mp_.graph.input)):

--- a/x2paddle/op_mapper/caffe_custom_layer/normalize.py
+++ b/x2paddle/op_mapper/caffe_custom_layer/normalize.py
@@ -17,7 +17,7 @@ def normalize_layer(inputs,
    scale_param = fluid.layers.create_parameter(
        shape=[1] if channel_shared else [1, 1, 1, input_shape[0][1]],
        dtype=input.dtype,
-        attr=name + '_scale')
+        attr=fluid.ParamAttr(name=name + '_scale'))
    scale_param = fluid.layers.reshape(x=scale_param, \
                  shape=[1] if channel_shared else [input_shape[0][1]])
    out = fluid.layers.elementwise_mul(

--- a/x2paddle/op_mapper/onnx2paddle/opset9/opset.py
+++ b/x2paddle/op_mapper/onnx2paddle/opset9/opset.py
@@ -32,15 +32,33 @@ import shutil
 _logger = _logging.getLogger(__name__)
-def _const_weight_or_none(node):
+def _const_weight_or_none(node, necessary=False):
    if 'Constant' in node.layer_type:
        return node.value
    if isinstance(node, ONNXGraphDataNode):
        return node.weight
+    if necessary:
+        assert '{} should be an initializer or Constant operator.'.format(
+            node.layer_name)
    return None
-def get_same_padding(in_size, kernel_size, stride):
+def _is_static_shape(shape):
+    negtive_dims = 0
+    error_dims = 0
+    for dim in shape:
+        if dim < 0:
+            negtive_dims += 1
+        if dim < -1:
+            error_dims += 1
+    if negtive_dims > 1:
+        return False
+    if error_dims > 0:
+        return False
+    return True
+def _get_same_padding(in_size, kernel_size, stride):
    new_size = int(math.ceil(in_size * 1.0 / stride))
    pad_size = (new_size - 1) * stride + kernel_size - in_size
    pad0 = int(pad_size / 2)
@@ -228,42 +246,9 @@ class OpSet9():
        val_x = self.graph.get_input_node(node, idx=0, copy=True)
        val_y = self.graph.get_input_node(node, idx=1, copy=True)
-        val_y_shape = val_y.out_shapes[0]
+        inputs = {'x': val_x, 'y': val_y}
-        val_x_shape = val_x.out_shapes[0]
+        node.fluid_code.add_layer(
+            op_type, inputs=inputs, output=node, param_attr=None)
-        if len(val_x_shape) < len(val_y_shape):
-            val_x, val_y = val_y, val_x
-            val_y_shape, val_x_shape = val_x_shape, val_y_shape
-        str_y_shape = ','.join(str(e) for e in val_y_shape)
-        str_x_shape = ','.join(str(e) for e in val_x_shape)
-        slice_idx = 0
-        if str_y_shape not in str_x_shape:
-            for dim in val_y_shape:
-                if dim == 1:
-                    slice_idx += 1
-                else:
-                    break
-        attr = {"name": string(node.layer_name)}
-        if slice_idx < len(val_y_shape) and slice_idx > 0:
-            val_y_reshaped = val_y_shape[slice_idx:]
-            var_y_reshaped = val_y.layer_name + '_reshaped'
-            attr_reshaped = {
-                'shape': val_y_reshaped,
-                'name': string(var_y_reshaped)
-            }
-            node.fluid_code.add_layer(
-                'reshape',
-                inputs=val_y,
-                output=var_y_reshaped,
-                param_attr=attr_reshaped)
-            inputs = {'x': val_x, 'y': var_y_reshaped}
-            node.fluid_code.add_layer(
-                op_type, inputs=inputs, output=node, param_attr=attr)
-        else:
-            inputs = {'x': val_x, 'y': val_y}
-            node.fluid_code.add_layer(
-                op_type, inputs=inputs, output=node, param_attr=attr)
    @print_mapping_info
    def place_holder(self, node):
@@ -476,8 +461,21 @@ class OpSet9():
                    output=node,
                    param_attr={'shape': [1]})
        else:
-            node.fluid_code.add_layer(
+            if str(val_x.dtype) == 'bool':
-                'unsqueeze', inputs=val_x, output=node, param_attr=attr)
+                val_x_cast = val_x.layer_name + '_cast'
+                node.fluid_code.add_layer(
+                    'cast',
+                    inputs=val_x,
+                    output=val_x_cast,
+                    param_attr={'dtype': string('int64')})
+                node.fluid_code.add_layer(
+                    'unsqueeze',
+                    inputs=val_x_cast,
+                    output=node,
+                    param_attr=attr)
+            else:
+                node.fluid_code.add_layer(
+                    'unsqueeze', inputs=val_x, output=node, param_attr=attr)
    @print_mapping_info
    def Shrink(self, node):
@@ -597,12 +595,35 @@ class OpSet9():
        #assert len(
        #    indices_shape) <= 2, "Gather op don't support dim of indice >2 "
        if axis == 0 and len(indices_shape) <= 1:
-            node.fluid_code.add_layer(
+            if len(val_x.out_shapes[0]) <= 1:
-                'gather',
+                node.fluid_code.add_layer(
-                inputs={'input': val_x,
+                    'gather',
-                        'index': indices},
+                    inputs={'input': val_x,
-                output=node,
+                            'index': indices},
-                param_attr=None)
+                    output=node,
+                    param_attr=None)
+            elif len(val_x.out_shapes[0]) > 1:
+                if len(indices_shape) == 0:
+                    gather_ = node.layer_name + '_1'
+                    node.fluid_code.add_layer(
+                        'gather',
+                        inputs={'input': val_x,
+                                'index': indices},
+                        output=gather_,
+                        param_attr=None)
+                    node.fluid_code.add_layer(
+                        'squeeze',
+                        inputs={'input': gather_,
+                                'axes': [0]},
+                        output=node,
+                        param_attr=None)
+                else:
+                    node.fluid_code.add_layer(
+                        'gather',
+                        inputs={'input': val_x,
+                                'index': indices},
+                        output=node,
+                        param_attr=None)
        elif axis > 0 and len(indices_shape) <= 1:
            perm = list(range(len(val_x.out_shapes[0])))
            perm = [axis] + perm[:axis] + perm[axis + 1:]
@@ -621,6 +642,13 @@ class OpSet9():
                param_attr=None)
            node.fluid_code.add_layer(
                'transpose', inputs=node, output=node, param_attr=attr_trans)
+            if len(indices_shape) < 1:
+                node.fluid_code.add_layer(
+                    'squeeze',
+                    inputs={'input': node,
+                            'axes': [0]},
+                    output=node,
+                    param_attr=None)
        elif axis == 0 and len(indices_shape) > 1:
            if val_x.out_shapes[0] is not None and isinstance(
                    val_x, ONNXGraphDataNode):
@@ -701,6 +729,86 @@ class OpSet9():
                output=node,
                param_attr={'shape': reshaped_shape})
+    @print_mapping_info
+    def ScatterND(self, node):
+        val_x = self.graph.get_input_node(node, idx=0, copy=True)
+        indices = self.graph.get_input_node(node, idx=1, copy=True)
+        updates = self.graph.get_input_node(node, idx=2, copy=True)
+        if len(indices.out_shapes[0]) == 1:
+            node.fluid_code.add_layer(
+                'scatter',
+                inputs={'input': val_x,
+                        'index': indices,
+                        'updates': updates},
+                output=node,
+                param_attr=None)
+        else:
+            input_inner_indices = node.layer_name + '_input_inner_indices'
+            node.fluid_code.add_layer(
+                'scatter_nd',
+                inputs={
+                    'shape': val_x.out_shapes[0],
+                    'index': indices,
+                    'updates': updates
+                },
+                output=input_inner_indices,
+                param_attr=None)
+            constant_minus_one = node.layer_name + '_constant_minus_one'
+            node.fluid_code.add_layer(
+                'fill_constant',
+                inputs=None,
+                output=constant_minus_one,
+                param_attr={
+                    'shape': updates.out_shapes[0],
+                    'dtype': string(updates.dtype),
+                    'value': -1
+                })
+            indices_mask = node.layer_name + '_indices_mask'
+            node.fluid_code.add_layer(
+                'scatter_nd',
+                inputs={
+                    'shape': val_x.out_shapes[0],
+                    'index': indices,
+                    'updates': constant_minus_one
+                },
+                output=indices_mask,
+                param_attr=None)
+            constant_1 = node.layer_name + '_constant_1'
+            node.fluid_code.add_layer(
+                'fill_constant',
+                inputs=None,
+                output=constant_1,
+                param_attr={
+                    'shape': val_x.out_shapes[0],
+                    'dtype': string(val_x.dtype),
+                    'value': 1
+                })
+            input_out_indices_mask = node.layer_name + '_input_out_indices_mask'
+            node.fluid_code.add_layer(
+                "elementwise_add",
+                inputs={"x": indices_mask,
+                        "y": constant_1},
+                output=input_out_indices_mask,
+                param_attr=None)
+            input_out_indices = node.layer_name + '_input_out_indices'
+            node.fluid_code.add_layer(
+                "elementwise_mul",
+                inputs={"x": val_x,
+                        "y": input_out_indices_mask},
+                output=input_out_indices,
+                param_attr=None)
+            node.fluid_code.add_layer(
+                "elementwise_add",
+                inputs={"x": input_inner_indices,
+                        "y": input_out_indices},
+                output=node,
+                param_attr=None)
    @print_mapping_info
    def Range(self, node):
        val_start = self.graph.get_input_node(node, idx=0, copy=True)
@@ -724,7 +832,7 @@ class OpSet9():
            ends = self.graph.get_input_node(node, idx=2, copy=True)
            if len(node.inputs) > 3:
                axes = self.graph.get_input_node(node, idx=3, copy=True)
-                axes = _const_weight_or_none(axes)
+                axes = _const_weight_or_none(axes, necessary=True)
            if len(node.inputs) > 4:
                steps = self.graph.get_input_node(node, idx=4, copy=True)
                steps = _const_weight_or_none(steps)
@@ -828,6 +936,14 @@ class OpSet9():
                inputs={'x': val_x},
                output=node,
                param_attr={'shape': shape_value.tolist()})
+        elif len(node.out_shapes[0]) > 0 and _is_static_shape(node.out_shapes[
+                0]):
+            node.fluid_code.add_layer(
+                'reshape',
+                inputs={'x': val_x,
+                        'shape': node.out_shapes[0]},
+                output=node,
+                param_attr=attr)
        elif val_shape.dtype == 'int64':
            val_shape_cast = val_shape.layer_name + '_cast'
            node.fluid_code.add_layer(
@@ -879,6 +995,11 @@ class OpSet9():
        node.fluid_code.add_layer(
            'cast', inputs=val_input, output=node, param_attr=attr)
+    @print_mapping_info
+    def Not(self, node):
+        val_input = self.graph.get_input_node(node, idx=0, copy=True)
+        node.fluid_code.add_layer('logical_not', inputs=val_input, output=node)
    @print_mapping_info
    def AveragePool(self, node):
        val_x = self.graph.get_input_node(node, idx=0, copy=True)
@@ -897,11 +1018,11 @@ class OpSet9():
        if auto_pad == "SAME_UPPER" or auto_pad == "SAME_LOWER":
            input_shape = val_x.out_shapes[0]
-            pad_h = get_same_padding(input_shape[2], kernel_shape[0],
+            pad_h = _get_same_padding(input_shape[2], kernel_shape[0],
-                                     strides[0])
+                                      strides[0])
-            pad_w = get_same_padding(input_shape[3], kernel_shape[1],
+            pad_w = _get_same_padding(input_shape[3], kernel_shape[1],
-                                     strides[1])
+                                      strides[1])
-            attr = {"paddings": pad_h + pad_w, "pad_value": 0.0}
+            paddings = pad_h + pad_w
        attr = {
            "pool_size": kernel_shape,
@@ -1171,7 +1292,6 @@ class OpSet9():
    def NonZero(self, node):
        val_x = self.graph.get_input_node(node, idx=0, copy=True)
        val_x_dim = len(val_x.out_shapes[0])
-        print(val_x.layer_name, val_x.out_shapes[0])
        if val_x_dim == 1:
            node.fluid_code.add_layer("nonzero", inputs=val_x, output=val_x)
            node.fluid_code.add_layer(
@@ -1232,11 +1352,11 @@ class OpSet9():
        if auto_pad == "SAME_UPPER" or auto_pad == "SAME_LOWER":
            input_shape = val_x.out_shapes[0]
-            pad_h = get_same_padding(input_shape[2], kernel_shape[0],
+            pad_h = _get_same_padding(input_shape[2], kernel_shape[0],
-                                     strides[0])
+                                      strides[0])
-            pad_w = get_same_padding(input_shape[3], kernel_shape[1],
+            pad_w = _get_same_padding(input_shape[3], kernel_shape[1],
-                                     strides[1])
+                                      strides[1])
-            attr = {"paddings": pad_h + pad_w, "pad_value": 0.0}
+            paddings = pad_h + pad_w
        attr = {
            "pool_size": kernel_shape,
@@ -1293,23 +1413,23 @@ class OpSet9():
        kernel_shape = node.get_attr('kernel_shape')
        convnd = len(kernel_shape)
        assert 2 <= convnd <= 3, 'only conv2d and conv3d is supported'
-        num_out_channels = val_w.out_shapes[0][0]  # OI...
+        num_out_channels = val_w.out_shapes[0][0]
        fluid_op = 'conv{}d'.format(convnd)
        num_groups = node.get_attr('group', 1)
-        strides = node.get_attr('strides', [1] * convnd)  # optional
+        strides = node.get_attr('strides', [1] * convnd)
-        dilations = node.get_attr('dilations', [1] * convnd)  # optional
+        dilations = node.get_attr('dilations', [1] * convnd)
-        pads = node.get_attr('pads', [0] * (convnd * 2))  # optional
+        pads = node.get_attr('pads', [0] * (convnd * 2))
        input_shape = val_x.out_shapes[0]
        paddings, val_x = self._pad_if_asymmetric(node, pads, val_x)
        if auto_pad == "SAME_UPPER" or auto_pad == "SAME_LOWER":
-            pad_h = get_same_padding(input_shape[2], kernel_shape[0],
+            pad_h = _get_same_padding(input_shape[2], kernel_shape[0],
-                                     strides[0])
+                                      strides[0])
-            pad_w = get_same_padding(input_shape[3], kernel_shape[1],
+            pad_w = _get_same_padding(input_shape[3], kernel_shape[1],
-                                     strides[1])
+                                      strides[1])
-            attr = {"paddings": pad_h + pad_w, "pad_value": 0.0}
+            paddings = pad_h + pad_w
        attr = {
            "num_filters": num_out_channels,
@@ -1379,183 +1499,3 @@ class OpSet9():
        }
        node.fluid_code.add_layer(
            fluid_op, inputs=val_x, output=node, param_attr=attr)
-    @print_mapping_info
-    def GRU(self, node):
-        val_x = self.graph.get_input_node(node, idx=0, copy=True)
-        val_w = self.graph.get_input_node(node, idx=1, copy=True)
-        val_r = self.graph.get_input_node(node, idx=2, copy=True)
-        val_b = None
-        val_len = None
-        val_xh = None
-        miss_arg_num = 0
-        num_ipt = len(node.layer.input)
-        if num_ipt > 3 and node.layer.input[3] != '':
-            val_b = self.graph.get_input_node(node, idx=3, copy=True)
-        else:
-            miss_arg_num += 1
-        if num_ipt > 4 and node.layer.input[4] != '':
-            val_len = self.graph.get_input_node(
-                node, idx=4 - miss_arg_num, copy=True)
-        else:
-            miss_arg_num += 1
-        if num_ipt > 5 and node.layer.input[5] != '':
-            val_xh = self.graph.get_input_node(
-                node, idx=5 - miss_arg_num, copy=True)
-        x_shape = val_x.out_shapes[0]
-        assert x_shape[1] == 1, 'only X with batch_size = 1 supported'
-        assert node.get_attr('clip', None) is None, 'clipping not supported'
-        hidden_size = node.get_attr('hidden_size', None)
-        if hidden_size is None:
-            r_shape = val_r.out_shapes[0]
-            if r_shape:
-                hidden_size = r_shape[-1]
-        if hidden_size is None:
-            w_shape = var_w.out_shapes[0]
-            if w_shape:
-                hidden_size = w_shape[-2] // 3
-        if hidden_size is None and val_b:
-            b_shape = val_b.out_shapes[0]
-            if b_shape:
-                hidden_size = b_shape[-1] // 6
-        if hidden_size is None and val_xh:
-            xh_shape = val_xh.out_shapes[0]
-            if xh_shape:
-                hidden_size = xh_shape[-1]
-        direction = node.get_attr('direction', 'forward')
-        assert direction != 'bidirectional', 'direction = bidirectional not supported'
-        activations = node.get_attr('activations', ['Sigmoid', 'Tanh'])
-        assert len(activations) == 2, 'bidirectional operation not supported'
-        assert node.get_attr('linear_before_reset',
-                             0) == 0, 'only linear_before_reset = 0 supported'
-        activations = [s.lower() for s in activations]
-        gate_activation, candidate_activation = activations
-        is_reverse = direction == 'reverse'
-        var_x0 = node.layer_name + '_x0'
-        node.fluid_code.add_layer(
-            'squeeze',
-            inputs=val_x,
-            output=var_x0,
-            param_attr={'axes': [1],
-                        'name': string(var_x0)})
-        var_w0 = node.layer_name + '_w0'
-        node.fluid_code.add_layer(
-            'squeeze',
-            inputs=val_w,
-            output=var_w0,
-            param_attr={'axes': [0],
-                        'name': string(var_w0)})
-        var_fc = node.layer_name + '_fc'
-        var_mm = (node.layer_name + '_mm') if val_b else var_fc
-        node.fluid_code.add_layer(
-            'matmul',
-            inputs={'x': var_x0,
-                    'y': var_w0},
-            output=var_mm,
-            param_attr={
-                'transpose_x': 0,
-                'transpose_y': 1,
-                'name': string(var_mm)
-            })
-        var_r0 = node.layer_name + '_r0'
-        node.fluid_code.add_layer(
-            'squeeze',
-            inputs=val_r,
-            output=var_r0,
-            param_attr={'axes': [0],
-                        'name': string(var_r0)})
-        var_r0t = node.layer_name + '_r0t'
-        node.fluid_code.add_layer(
-            'transpose',
-            inputs=var_r0,
-            output=var_r0t,
-            param_attr={'perm': [1, 0],
-                        'name': string(var_r0t)})
-        if val_b:
-            var_bi = node.layer_name + '_bi'
-            var_bh = node.layer_name + '_bh'
-            node.fluid_code.add_layer(
-                'split',
-                inputs=val_b,
-                output=var_bi + ',' + var_bh,
-                param_attr={
-                    'dim': 1,
-                    'num_or_sections': [hidden_size * 3, hidden_size * 3],
-                    'name': string(node.layer_name + '.b/split')
-                })
-            var_bi0 = node.layer_name + '_bi0'
-            node.fluid_code.add_layer(
-                'squeeze',
-                inputs=var_bi,
-                output=var_bi0,
-                param_attr={'axes': [0],
-                            'name': string(var_bi0)})
-            node.fluid_code.add_layer(
-                'elementwise_add',
-                inputs=[var_mm, var_bi0],
-                output=var_fc,
-                param_attr={
-                    'axes': 1,
-                    'name': string(node.layer_name + '.i/bias')
-                })
-        if val_xh:
-            var_xh0 = node.layer_name + '_xh0'
-            node.fluid_code.add_layer(
-                'squeeze',
-                inputs=val_xh,
-                output=var_xh0,
-                param_attr={'axes': [1],
-                            'name': string(var_xh0)})
-        var_y00 = node.layer_name + '_y00'
-        attr = {
-            'origin_mode': True,
-            'h_0': var_xh0 if val_xh else None,
-            'is_reverse': is_reverse,
-            'gate_activation': string(gate_activation),
-            'candidate_activation': string(candidate_activation),
-            'param_attr': string(var_r0t),
-            'bias_attr': string(var_bh) if val_b else False,
-        }
-        node.fluid_code.add_layer(
-            'dynamic_gru',
-            inputs=var_fc + ',' + str(hidden_size),
-            output=var_y00,
-            param_attr=attr)
-        num_opt = len(node.layer.output)
-        if num_opt > 0 and node.layer.output[0] != '':
-            node.fluid_code.add_layer(
-                'unsqueeze',
-                inputs=var_y00,
-                output=node.layer.output[0],
-                param_attr={
-                    'axes': [1, 1],
-                    'name': string(node.layer.output[0])
-                })
-        if num_opt > 1 and node.layer.output[1] != '':
-            node.fluid_code.add_layer(
-                'unsqueeze',
-                inputs=var_y00,
-                output=node.layer.output[1],
-                param_attr={
-                    'axes': [1, 1],
-                    'name': string(node.layer.output[1])
-                })
--- a/x2paddle/op_mapper/paddle2onnx/opset9/opset.py
+++ b/x2paddle/op_mapper/paddle2onnx/opset9/opset.py
@@ -875,6 +875,14 @@ class OpSet9(object):
            axes=op.attr('axes'))
        return node
+    def cast(self, op, block):
+        node = helper.make_node(
+            'Cast',
+            inputs=op.input('X'),
+            outputs=op.output('Out'),
+            to=self.paddle_onnx_dtype_map[op.attr('out_dtype')])
+        return node
    def arg_max(self, op, block):
        node = helper.make_node(
            'ArgMax',

--- a/x2paddle/op_mapper/tf_op_mapper_nhwc.py
+++ b/x2paddle/op_mapper/tf_op_mapper_nhwc.py
@@ -299,6 +299,10 @@ class TFOpMapperNHWC(OpMapper):
        data_format = node.get_attr("data_format").decode()
        pad_mode = node.get_attr("padding").decode()
        channel_first = data_format == "NCHW"
+        if data_format == "NHWC":
+            n, h, w, c = input.out_shapes[0]
+        else:
+            n, c, h, w = input.out_shapes[0]
        if kernel.layer_type == 'Const':
            kernel_value = kernel.value
@@ -329,10 +333,15 @@ class TFOpMapperNHWC(OpMapper):
            "dilation": dilations[2:4],
            "padding": string(pad_mode)
        }
        if hasattr(node, 'dilation') and attr['dilation'] == [1, 1]:
            if len(node.dilation) == 1:
                attr['dilation'] = [1, node.dilation[0]]
+        if c == -1:
+            reshape_attr = {"shape": [0, k_size[2], 0, 0]}
+            node.fluid_code.add_layer(
+                "reshape", inputs=input, output=input, param_attr=reshape_attr)
        node.fluid_code.add_layer(
            "conv2d", inputs=input, output=node, param_attr=attr)
        if not channel_first:
@@ -748,11 +757,12 @@ class TFOpMapperNHWC(OpMapper):
            self.add_omit_nodes(begin.layer_name, node.layer_name)
            begin = begin.value.tolist()
        else:
-            begin = begin
+            begin = self.decoder.infer_tensor(begin).tolist()
-            shape = begin.out_shapes[0]
-            attr = {"shape": shape}
+#             shape = begin.out_shapes[0]
-            node.fluid_code.add_layer(
+#             attr = {"shape": shape}
-                "reshape", inputs=begin, output=begin, param_attr=attr)
+#             node.fluid_code.add_layer(
+#                 "reshape", inputs=begin, output=begin, param_attr=attr)
        if size.layer_type == "Const":
            self.add_omit_nodes(size.layer_name, node.layer_name)
            size = size.value.tolist()

--- a/x2paddle/optimizer/tf_optimizer.py
+++ b/x2paddle/optimizer/tf_optimizer.py
@@ -863,6 +863,9 @@ class TFOptimizer(object):
                    weight = numpy.expand_dims(weight, 2)
                    weight = numpy.expand_dims(weight, 3)
                    self.op_mapper.weights[in_nodes3[0].layer_name] = weight
+                    # fix bug in Paddle1.8.3 and may change in next version.
+                    self.op_mapper.weights[in_nodes3[0].layer_name +
+                                           '_1'] = weight.reshape(1, -1)
                    in_nodes3[0].fluid_code.layers[0].param_attr["shape"] = [
                        1, in_shape[-1], 1, 1
                    ]
@@ -885,7 +888,7 @@ class TFOptimizer(object):
                    node.fluid_code.clear()
                    attr = {
                        "mode": string(mode),
-                        "param_attr": string(in_nodes3[0].layer_name)
+                        "param_attr": string(in_nodes3[0].layer_name + "_1")
                    }
                    node.fluid_code.add_layer(

--- a/x2paddle_model_zoo.md
+++ b/x2paddle_model_zoo.md
 # X2Paddle模型测试库
-> 目前X2Paddle支持50+的TensorFlow OP，40+的Caffe Layer，覆盖了大部分CV分类模型常用的操作。我们在如下模型列表中测试了X2Paddle的转换。
+> 目前X2Paddle支持70+的TensorFlow OP，40+的Caffe Layer，覆盖了大部分CV分类模型常用的操作。我们在如下模型列表中测试了X2Paddle的转换。
 **注：** 受限于不同框架的差异，部分模型可能会存在目前无法转换的情况，如TensorFlow中包含控制流的模型，NLP模型等。对于CV常见的模型，如若您发现无法转换或转换失败，存在较大diff等问题，欢迎通过[ISSUE反馈](https://github.com/PaddlePaddle/X2Paddle/issues/new)的方式告知我们(模型名，代码实现或模型获取方式)，我们会及时跟进：）
@@ -20,10 +20,13 @@
 | ResNet_V1_101 | [code](https://github.com/tensorflow/models/tree/master/research/slim/nets) |-|
 | ResNet_V2_101 | [code](https://github.com/tensorflow/models/tree/master/research/slim/nets) |-|
 | UNet | [code1](https://github.com/jakeret/tf_unet )/[code2](https://github.com/lyatdawn/Unet-Tensorflow) |-|
-|MTCNN | [code](https://github.com/AITTSMD/MTCNN-Tensorflow) |-|
+| MTCNN | [code](https://github.com/AITTSMD/MTCNN-Tensorflow) |-|
-|YOLO-V3| [code](https://github.com/YunYang1994/tensorflow-yolov3) | 转换需要关闭NHWC->NCHW的优化，见[文档Q2](FAQ.md) |
+| YOLO-V3| [code](https://github.com/YunYang1994/tensorflow-yolov3) | 转换需要关闭NHWC->NCHW的优化，见[文档Q2](FAQ.md) |
-| FALSR | [code](https://github.com/xiaomi-automl/FALSR) | - |
+| FALSR | [code](https://github.com/xiaomi-automl/FALSR) | 需使用参数without_data_format_optimization |
-| DCSCN | [code](https://modelzoo.co/model/dcscn-super-resolution) | - |
+| DCSCN | [code](https://modelzoo.co/model/dcscn-super-resolution) | 需使用参数without_data_format_optimization |
+| Bert（albert） | [code](https://github.com/google-research/albert#pre-trained-models) | 需使用参数without_data_format_optimization |
+| Bert（chinese_L-12_H-768_A-12） | [code](https://github.com/google-research/bert#pre-trained-models) | 需使用参数without_data_format_optimization |
+| Bert（multi_cased_L-12_H-768_A-12） | [code](https://github.com/google-research/bert#pre-trained-models) | 需使用参数without_data_format_optimization |
 ## Caffe